Last active
March 28, 2018 21:26
-
-
Save chendaniely/b83c6990ce5ea5a1794797545d1d0251 to your computer and use it in GitHub Desktop.
Pandas replacing values
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"toc": "true" | |
}, | |
"source": [ | |
"# Table of Contents\n", | |
" <p><div class=\"lev1 toc-item\"><a href=\"#Replace-using-iloc\" data-toc-modified-id=\"Replace-using-iloc-1\"><span class=\"toc-item-num\">1 </span>Replace using iloc</a></div><div class=\"lev1 toc-item\"><a href=\"#Replace-using-loc\" data-toc-modified-id=\"Replace-using-loc-2\"><span class=\"toc-item-num\">2 </span>Replace using loc</a></div><div class=\"lev1 toc-item\"><a href=\"#Replacing-a-value-in-a-column\" data-toc-modified-id=\"Replacing-a-value-in-a-column-3\"><span class=\"toc-item-num\">3 </span>Replacing a value in a column</a></div><div class=\"lev1 toc-item\"><a href=\"#Replacing-using-replace\" data-toc-modified-id=\"Replacing-using-replace-4\"><span class=\"toc-item-num\">4 </span>Replacing using <code>replace</code></a></div><div class=\"lev2 toc-item\"><a href=\"#Replacing-multiple-values\" data-toc-modified-id=\"Replacing-multiple-values-41\"><span class=\"toc-item-num\">4.1 </span>Replacing multiple values</a></div><div class=\"lev1 toc-item\"><a href=\"#Recoding-and-crosstabing-results\" data-toc-modified-id=\"Recoding-and-crosstabing-results-5\"><span class=\"toc-item-num\">5 </span>Recoding and crosstabing results</a></div>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2018-03-28T21:25:25.838859Z", | |
"start_time": "2018-03-28T21:25:24.589587Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import seaborn as sns" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2018-03-28T21:25:25.877511Z", | |
"start_time": "2018-03-28T21:25:25.844998Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"# get the first 5 rows of tips data to make the example easier\n", | |
"tips = sns.load_dataset('tips').head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2018-03-28T21:25:25.944281Z", | |
"start_time": "2018-03-28T21:25:25.883456Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>total_bill</th>\n", | |
" <th>tip</th>\n", | |
" <th>sex</th>\n", | |
" <th>smoker</th>\n", | |
" <th>day</th>\n", | |
" <th>time</th>\n", | |
" <th>size</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>16.99</td>\n", | |
" <td>1.01</td>\n", | |
" <td>Female</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>10.34</td>\n", | |
" <td>1.66</td>\n", | |
" <td>Male</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>21.01</td>\n", | |
" <td>3.50</td>\n", | |
" <td>Male</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>23.68</td>\n", | |
" <td>3.31</td>\n", | |
" <td>Male</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>24.59</td>\n", | |
" <td>3.61</td>\n", | |
" <td>Female</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" total_bill tip sex smoker day time size\n", | |
"0 16.99 1.01 Female No Sun Dinner 2\n", | |
"1 10.34 1.66 Male No Sun Dinner 3\n", | |
"2 21.01 3.50 Male No Sun Dinner 3\n", | |
"3 23.68 3.31 Male No Sun Dinner 2\n", | |
"4 24.59 3.61 Female No Sun Dinner 4" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"tips" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Replace using iloc" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2018-03-28T21:25:26.082789Z", | |
"start_time": "2018-03-28T21:25:25.948641Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"# replace the first total_bill value\n", | |
"# replacing using iloc, by giving the 'coordinates' of the place to replace\n", | |
"tips.iloc[0, 0] = 9999" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2018-03-28T21:25:26.134611Z", | |
"start_time": "2018-03-28T21:25:26.096569Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>total_bill</th>\n", | |
" <th>tip</th>\n", | |
" <th>sex</th>\n", | |
" <th>smoker</th>\n", | |
" <th>day</th>\n", | |
" <th>time</th>\n", | |
" <th>size</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>9999.00</td>\n", | |
" <td>1.01</td>\n", | |
" <td>Female</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>10.34</td>\n", | |
" <td>1.66</td>\n", | |
" <td>Male</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>21.01</td>\n", | |
" <td>3.50</td>\n", | |
" <td>Male</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>23.68</td>\n", | |
" <td>3.31</td>\n", | |
" <td>Male</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>24.59</td>\n", | |
" <td>3.61</td>\n", | |
" <td>Female</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" total_bill tip sex smoker day time size\n", | |
"0 9999.00 1.01 Female No Sun Dinner 2\n", | |
"1 10.34 1.66 Male No Sun Dinner 3\n", | |
"2 21.01 3.50 Male No Sun Dinner 3\n", | |
"3 23.68 3.31 Male No Sun Dinner 2\n", | |
"4 24.59 3.61 Female No Sun Dinner 4" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"tips" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Replace using loc" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2018-03-28T21:25:26.149480Z", | |
"start_time": "2018-03-28T21:25:26.138688Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"# using loc\n", | |
"tips.loc[tips['size'] == 4, 'size'] = 3333" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2018-03-28T21:25:26.196048Z", | |
"start_time": "2018-03-28T21:25:26.155273Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>total_bill</th>\n", | |
" <th>tip</th>\n", | |
" <th>sex</th>\n", | |
" <th>smoker</th>\n", | |
" <th>day</th>\n", | |
" <th>time</th>\n", | |
" <th>size</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>9999.00</td>\n", | |
" <td>1.01</td>\n", | |
" <td>Female</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>10.34</td>\n", | |
" <td>1.66</td>\n", | |
" <td>Male</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>21.01</td>\n", | |
" <td>3.50</td>\n", | |
" <td>Male</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>23.68</td>\n", | |
" <td>3.31</td>\n", | |
" <td>Male</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>24.59</td>\n", | |
" <td>3.61</td>\n", | |
" <td>Female</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>3333</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" total_bill tip sex smoker day time size\n", | |
"0 9999.00 1.01 Female No Sun Dinner 2\n", | |
"1 10.34 1.66 Male No Sun Dinner 3\n", | |
"2 21.01 3.50 Male No Sun Dinner 3\n", | |
"3 23.68 3.31 Male No Sun Dinner 2\n", | |
"4 24.59 3.61 Female No Sun Dinner 3333" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"tips" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Replacing a value in a column" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2018-03-28T21:25:26.278990Z", | |
"start_time": "2018-03-28T21:25:26.204095Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/home/dchen/anaconda3/lib/python3.6/site-packages/pandas/core/indexing.py:194: SettingWithCopyWarning: \n", | |
"A value is trying to be set on a copy of a slice from a DataFrame\n", | |
"\n", | |
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", | |
" self._setitem_with_indexer(indexer, value)\n" | |
] | |
} | |
], | |
"source": [ | |
"# replace a value using some booloead T/F condition\n", | |
"tips['total_bill'].loc[tips['total_bill'] == 23.68] = 9999" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2018-03-28T21:25:26.301751Z", | |
"start_time": "2018-03-28T21:25:26.282219Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>total_bill</th>\n", | |
" <th>tip</th>\n", | |
" <th>sex</th>\n", | |
" <th>smoker</th>\n", | |
" <th>day</th>\n", | |
" <th>time</th>\n", | |
" <th>size</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>9999.00</td>\n", | |
" <td>1.01</td>\n", | |
" <td>Female</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>10.34</td>\n", | |
" <td>1.66</td>\n", | |
" <td>Male</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>21.01</td>\n", | |
" <td>3.50</td>\n", | |
" <td>Male</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>9999.00</td>\n", | |
" <td>3.31</td>\n", | |
" <td>Male</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>24.59</td>\n", | |
" <td>3.61</td>\n", | |
" <td>Female</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>3333</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" total_bill tip sex smoker day time size\n", | |
"0 9999.00 1.01 Female No Sun Dinner 2\n", | |
"1 10.34 1.66 Male No Sun Dinner 3\n", | |
"2 21.01 3.50 Male No Sun Dinner 3\n", | |
"3 9999.00 3.31 Male No Sun Dinner 2\n", | |
"4 24.59 3.61 Female No Sun Dinner 3333" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"tips" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Replacing using `replace`" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2018-03-28T21:25:26.310599Z", | |
"start_time": "2018-03-28T21:25:26.305225Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"# replace a value in a column with a new value\n", | |
"tips['total_bill'] = tips['total_bill'].replace(9999, 4444)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2018-03-28T21:25:26.350377Z", | |
"start_time": "2018-03-28T21:25:26.320965Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>total_bill</th>\n", | |
" <th>tip</th>\n", | |
" <th>sex</th>\n", | |
" <th>smoker</th>\n", | |
" <th>day</th>\n", | |
" <th>time</th>\n", | |
" <th>size</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>4444.00</td>\n", | |
" <td>1.01</td>\n", | |
" <td>Female</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>10.34</td>\n", | |
" <td>1.66</td>\n", | |
" <td>Male</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>21.01</td>\n", | |
" <td>3.50</td>\n", | |
" <td>Male</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4444.00</td>\n", | |
" <td>3.31</td>\n", | |
" <td>Male</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>24.59</td>\n", | |
" <td>3.61</td>\n", | |
" <td>Female</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>3333</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" total_bill tip sex smoker day time size\n", | |
"0 4444.00 1.01 Female No Sun Dinner 2\n", | |
"1 10.34 1.66 Male No Sun Dinner 3\n", | |
"2 21.01 3.50 Male No Sun Dinner 3\n", | |
"3 4444.00 3.31 Male No Sun Dinner 2\n", | |
"4 24.59 3.61 Female No Sun Dinner 3333" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"tips" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Replacing multiple values" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2018-03-28T21:25:26.434558Z", | |
"start_time": "2018-03-28T21:25:26.353990Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/home/dchen/anaconda3/lib/python3.6/site-packages/pandas/core/indexing.py:194: SettingWithCopyWarning: \n", | |
"A value is trying to be set on a copy of a slice from a DataFrame\n", | |
"\n", | |
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", | |
" self._setitem_with_indexer(indexer, value)\n" | |
] | |
} | |
], | |
"source": [ | |
"# replacing multiple values in a column\n", | |
"tips['size'].loc[tips['size'].isin([2, 3])] = 42" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2018-03-28T21:25:26.463425Z", | |
"start_time": "2018-03-28T21:25:26.437918Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>total_bill</th>\n", | |
" <th>tip</th>\n", | |
" <th>sex</th>\n", | |
" <th>smoker</th>\n", | |
" <th>day</th>\n", | |
" <th>time</th>\n", | |
" <th>size</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>4444.00</td>\n", | |
" <td>1.01</td>\n", | |
" <td>Female</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>42</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>10.34</td>\n", | |
" <td>1.66</td>\n", | |
" <td>Male</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>42</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>21.01</td>\n", | |
" <td>3.50</td>\n", | |
" <td>Male</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>42</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4444.00</td>\n", | |
" <td>3.31</td>\n", | |
" <td>Male</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>42</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>24.59</td>\n", | |
" <td>3.61</td>\n", | |
" <td>Female</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>3333</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" total_bill tip sex smoker day time size\n", | |
"0 4444.00 1.01 Female No Sun Dinner 42\n", | |
"1 10.34 1.66 Male No Sun Dinner 42\n", | |
"2 21.01 3.50 Male No Sun Dinner 42\n", | |
"3 4444.00 3.31 Male No Sun Dinner 42\n", | |
"4 24.59 3.61 Female No Sun Dinner 3333" | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"tips" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Recoding and crosstabing results" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2018-03-28T21:25:26.480409Z", | |
"start_time": "2018-03-28T21:25:26.466242Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"tips = sns.load_dataset('tips').head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2018-03-28T21:25:26.510137Z", | |
"start_time": "2018-03-28T21:25:26.483823Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>total_bill</th>\n", | |
" <th>tip</th>\n", | |
" <th>sex</th>\n", | |
" <th>smoker</th>\n", | |
" <th>day</th>\n", | |
" <th>time</th>\n", | |
" <th>size</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>16.99</td>\n", | |
" <td>1.01</td>\n", | |
" <td>Female</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>10.34</td>\n", | |
" <td>1.66</td>\n", | |
" <td>Male</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>21.01</td>\n", | |
" <td>3.50</td>\n", | |
" <td>Male</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>23.68</td>\n", | |
" <td>3.31</td>\n", | |
" <td>Male</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>24.59</td>\n", | |
" <td>3.61</td>\n", | |
" <td>Female</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" total_bill tip sex smoker day time size\n", | |
"0 16.99 1.01 Female No Sun Dinner 2\n", | |
"1 10.34 1.66 Male No Sun Dinner 3\n", | |
"2 21.01 3.50 Male No Sun Dinner 3\n", | |
"3 23.68 3.31 Male No Sun Dinner 2\n", | |
"4 24.59 3.61 Female No Sun Dinner 4" | |
] | |
}, | |
"execution_count": 15, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"tips" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2018-03-28T21:25:26.629956Z", | |
"start_time": "2018-03-28T21:25:26.516110Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"tips['size_recode'] = tips['size'].replace([2, 3], 42)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2018-03-28T21:25:26.687114Z", | |
"start_time": "2018-03-28T21:25:26.636160Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>total_bill</th>\n", | |
" <th>tip</th>\n", | |
" <th>sex</th>\n", | |
" <th>smoker</th>\n", | |
" <th>day</th>\n", | |
" <th>time</th>\n", | |
" <th>size</th>\n", | |
" <th>size_recode</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>16.99</td>\n", | |
" <td>1.01</td>\n", | |
" <td>Female</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>2</td>\n", | |
" <td>42</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>10.34</td>\n", | |
" <td>1.66</td>\n", | |
" <td>Male</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>3</td>\n", | |
" <td>42</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>21.01</td>\n", | |
" <td>3.50</td>\n", | |
" <td>Male</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>3</td>\n", | |
" <td>42</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>23.68</td>\n", | |
" <td>3.31</td>\n", | |
" <td>Male</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>2</td>\n", | |
" <td>42</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>24.59</td>\n", | |
" <td>3.61</td>\n", | |
" <td>Female</td>\n", | |
" <td>No</td>\n", | |
" <td>Sun</td>\n", | |
" <td>Dinner</td>\n", | |
" <td>4</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" total_bill tip sex smoker day time size size_recode\n", | |
"0 16.99 1.01 Female No Sun Dinner 2 42\n", | |
"1 10.34 1.66 Male No Sun Dinner 3 42\n", | |
"2 21.01 3.50 Male No Sun Dinner 3 42\n", | |
"3 23.68 3.31 Male No Sun Dinner 2 42\n", | |
"4 24.59 3.61 Female No Sun Dinner 4 4" | |
] | |
}, | |
"execution_count": 17, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"tips" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2018-03-28T21:25:26.760530Z", | |
"start_time": "2018-03-28T21:25:26.698577Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th>size_recode</th>\n", | |
" <th>4</th>\n", | |
" <th>42</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>size</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>0</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"size_recode 4 42\n", | |
"size \n", | |
"2 0 2\n", | |
"3 0 2\n", | |
"4 1 0" | |
] | |
}, | |
"execution_count": 18, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"pd.crosstab(tips['size'], tips['size_recode'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.4" | |
}, | |
"toc": { | |
"colors": { | |
"hover_highlight": "#DAA520", | |
"navigate_num": "#000000", | |
"navigate_text": "#333333", | |
"running_highlight": "#FF0000", | |
"selected_highlight": "#FFD700", | |
"sidebar_border": "#EEEEEE", | |
"wrapper_background": "#FFFFFF" | |
}, | |
"moveMenuLeft": true, | |
"nav_menu": { | |
"height": "12px", | |
"width": "252px" | |
}, | |
"navigate_menu": true, | |
"number_sections": true, | |
"sideBar": true, | |
"threshold": 4, | |
"toc_cell": true, | |
"toc_section_display": "block", | |
"toc_window_display": true, | |
"widenNotebook": false | |
}, | |
"varInspector": { | |
"cols": { | |
"lenName": 16, | |
"lenType": 16, | |
"lenVar": 40 | |
}, | |
"kernels_config": { | |
"python": { | |
"delete_cmd_postfix": "", | |
"delete_cmd_prefix": "del ", | |
"library": "var_list.py", | |
"varRefreshCmd": "print(var_dic_list())" | |
}, | |
"r": { | |
"delete_cmd_postfix": ") ", | |
"delete_cmd_prefix": "rm(", | |
"library": "var_list.r", | |
"varRefreshCmd": "cat(var_dic_list()) " | |
} | |
}, | |
"types_to_exclude": [ | |
"module", | |
"function", | |
"builtin_function_or_method", | |
"instance", | |
"_Feature" | |
], | |
"window_display": false | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment