Skip to content

Instantly share code, notes, and snippets.

@aiqc
Last active May 8, 2022 15:34
Show Gist options
  • Save aiqc/d8d4b5e74a8811b3d8657c65cb3c6e7f to your computer and use it in GitHub Desktop.
Save aiqc/d8d4b5e74a8811b3d8657c65cb3c6e7f to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "db4f19d5-75f3-4e12-8a95-ae81a2f5bb4c",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "932b92cf-5501-45c4-a2f9-bc087264e812",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "91d3616b-b48a-4479-9554-9b672611e347",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/Users/layne/Desktop/fraud_detect\n"
]
}
],
"source": [
"cd '/Users/layne/Desktop/fraud_detect'"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "9b40a5de-b41f-4bca-a89a-9f4c0d6a8c4b",
"metadata": {},
"outputs": [],
"source": [
"path_test = '/Users/layne/Desktop/fraud_detect/fraudTest.csv'\n",
"path_train = '/Users/layne/Desktop/fraud_detect/fraudTrain.csv'"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "9308fb73-99c6-4bcf-8022-7b5ee3c79fb5",
"metadata": {},
"outputs": [],
"source": [
"df_test = pd.read_csv(path_test)\n",
"df_train = pd.read_csv(path_train)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "eaa09f4b-9782-467c-8b9f-931e35a76b76",
"metadata": {},
"outputs": [],
"source": [
"cols_train = df_train.columns.tolist()\n",
"cols_test = df_test.columns.tolist()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "0ed4f99f-5129-457b-b01a-b74e50aa0bbe",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cols_train == cols_test"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "414f79fe-85b2-4b69-83f5-aba0f17cfae8",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Unnamed: 0 True\n",
"trans_date_trans_time True\n",
"cc_num True\n",
"merchant True\n",
"category True\n",
"amt True\n",
"first True\n",
"last True\n",
"gender True\n",
"street True\n",
"city True\n",
"state True\n",
"zip True\n",
"lat True\n",
"long True\n",
"city_pop True\n",
"job True\n",
"dob True\n",
"trans_num True\n",
"unix_time True\n",
"merch_lat True\n",
"merch_long True\n",
"is_fraud True\n",
"dtype: bool"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_train.dtypes == df_train.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "24148ff5-c213-4661-a4fa-cd2a1ceac074",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"False\n",
"False\n",
"False\n",
"False\n",
"False\n",
"False\n",
"False\n",
"False\n",
"False\n",
"False\n",
"False\n",
"False\n",
"False\n",
"False\n",
"False\n",
"False\n",
"False\n",
"False\n",
"False\n",
"False\n",
"False\n",
"False\n",
"False\n"
]
}
],
"source": [
"for col in cols_train:\n",
" print(df_train[col].isnull().values.any())"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "8265025f-0bfd-4c57-8137-86f92912f84d",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"False\n",
"False\n",
"False\n",
"False\n",
"False\n",
"False\n",
"False\n",
"False\n",
"False\n",
"False\n",
"False\n",
"False\n",
"False\n",
"False\n",
"False\n",
"False\n",
"False\n",
"False\n",
"False\n",
"False\n",
"False\n",
"False\n",
"False\n"
]
}
],
"source": [
"for col in cols_test:\n",
" print(df_test[col].isnull().values.any())"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "f82c9713-c27f-4771-b340-4fc00b26824c",
"metadata": {},
"outputs": [],
"source": [
"df_all = pd.concat([df_train, df_test], ignore_index=True)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "da79baa2-0c4f-40d6-952b-a2f1e1a9d610",
"metadata": {},
"outputs": [],
"source": [
"del df_train\n",
"del df_test"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "bb0d6daf-8312-45ba-b71b-965542f4a853",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Unnamed: 0', 'trans_date_trans_time', 'cc_num', 'merchant', 'category',\n",
" 'amt', 'first', 'last', 'gender', 'street', 'city', 'state', 'zip',\n",
" 'lat', 'long', 'city_pop', 'job', 'dob', 'trans_num', 'unix_time',\n",
" 'merch_lat', 'merch_long', 'is_fraud'],\n",
" dtype='object')"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_all.columns"
]
},
{
"cell_type": "markdown",
"id": "e93666d5-bf7b-4f8d-a605-c888ab1061d6",
"metadata": {},
"source": [
"---"
]
},
{
"cell_type": "markdown",
"id": "4b46ba7c-1cf2-4a5b-b674-e9fe890427a3",
"metadata": {},
"source": [
"# feature engineering"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "a89aaa29-bb56-4862-8777-f927a51bb02c",
"metadata": {},
"outputs": [],
"source": [
"drop_cols = ['Unnamed: 0', 'cc_num', 'first', 'last', 'street', 'trans_num', 'unix_time']"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "31540f7c-0e31-4ae9-b487-06415eeecc1c",
"metadata": {},
"outputs": [],
"source": [
"df_all = df_all.drop(columns=drop_cols)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "7963700f-a233-4d52-b8bf-b8e779d52726",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['trans_date_trans_time', 'merchant', 'category', 'amt', 'gender',\n",
" 'city', 'state', 'zip', 'lat', 'long', 'city_pop', 'job', 'dob',\n",
" 'merch_lat', 'merch_long', 'is_fraud'],\n",
" dtype='object')"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_all.columns"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "d1cc981b-fc8b-4a02-b774-1586f2948a20",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"df_all['dob'] = 2020 - df_all['dob'].str[:4].astype(np.int64)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "7b3d1e8d-880c-4971-b753-472b297b3e97",
"metadata": {},
"outputs": [],
"source": [
"df_all = df_all.rename(columns={\"dob\":\"age\", \"amt\":\"amount\"})"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "dc10c787-0fb5-4a5f-9774-44ff93cc242c",
"metadata": {},
"outputs": [],
"source": [
"df_all['diff_lat'] = df_all['lat'] - df_all['merch_lat']"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "ba0c2e8c-4d99-4e4c-8129-98ea12a59165",
"metadata": {},
"outputs": [],
"source": [
"df_all['diff_long'] = df_all['long'] - df_all['merch_long']"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "c4b09d71-72fc-4f2a-ab70-0a0ea9f90fdc",
"metadata": {},
"outputs": [],
"source": [
"drop_cols = ['long', 'merch_long', 'lat', 'merch_lat']"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "3d32a691-a1ed-4f6c-b529-9bcc4f64389f",
"metadata": {},
"outputs": [],
"source": [
"df_all = df_all.drop(columns=drop_cols)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "66e89b00-f864-456b-9224-2fa9fc942986",
"metadata": {},
"outputs": [],
"source": [
"drop_cols = ['city', 'state']"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "09a882ca-ef81-4fba-ab1b-eadf71d718cc",
"metadata": {},
"outputs": [],
"source": [
"df_all = df_all.drop(columns=drop_cols)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "f87db741-3577-4236-ab9b-c2e6f47db12f",
"metadata": {},
"outputs": [],
"source": [
"df_all = df_all.rename(columns={\"amt\":\"amount\"})"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "6edd9c34-6eeb-4261-884b-24eff4984280",
"metadata": {},
"outputs": [],
"source": [
"df_all['month'] = pd.DatetimeIndex(df_all['trans_date_trans_time']).month"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "f3323e2c-7ab8-40bd-8dff-b646fac5332d",
"metadata": {},
"outputs": [],
"source": [
"df_all['day'] = pd.DatetimeIndex(df_all['trans_date_trans_time']).day"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "818a3c8f-f354-44ab-8551-55df79bef63a",
"metadata": {},
"outputs": [],
"source": [
"df_all['hour'] = pd.DatetimeIndex(df_all['trans_date_trans_time']).hour"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "120a1e4f-18b5-4216-a84f-51ca718dd57b",
"metadata": {},
"outputs": [],
"source": [
"df_all = df_all.drop(columns=['trans_date_trans_time'])"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "7f11a00b-be5b-497d-8063-3c3e8b044eda",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>merchant</th>\n",
" <th>category</th>\n",
" <th>amount</th>\n",
" <th>gender</th>\n",
" <th>zip</th>\n",
" <th>city_pop</th>\n",
" <th>job</th>\n",
" <th>age</th>\n",
" <th>is_fraud</th>\n",
" <th>diff_lat</th>\n",
" <th>diff_long</th>\n",
" <th>month</th>\n",
" <th>day</th>\n",
" <th>hour</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>fraud_Rippin, Kub and Mann</td>\n",
" <td>misc_net</td>\n",
" <td>4.97</td>\n",
" <td>F</td>\n",
" <td>28654</td>\n",
" <td>3495</td>\n",
" <td>Psychologist, counselling</td>\n",
" <td>32</td>\n",
" <td>0</td>\n",
" <td>0.067507</td>\n",
" <td>0.870215</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>fraud_Heller, Gutmann and Zieme</td>\n",
" <td>grocery_pos</td>\n",
" <td>107.23</td>\n",
" <td>F</td>\n",
" <td>99160</td>\n",
" <td>149</td>\n",
" <td>Special educational needs teacher</td>\n",
" <td>42</td>\n",
" <td>0</td>\n",
" <td>-0.271247</td>\n",
" <td>-0.024038</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>fraud_Lind-Buckridge</td>\n",
" <td>entertainment</td>\n",
" <td>220.11</td>\n",
" <td>M</td>\n",
" <td>83252</td>\n",
" <td>4154</td>\n",
" <td>Nature conservation officer</td>\n",
" <td>58</td>\n",
" <td>0</td>\n",
" <td>-0.969904</td>\n",
" <td>-0.107519</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>fraud_Kutch, Hermiston and Farrell</td>\n",
" <td>gas_transport</td>\n",
" <td>45.00</td>\n",
" <td>M</td>\n",
" <td>59632</td>\n",
" <td>1939</td>\n",
" <td>Patent attorney</td>\n",
" <td>53</td>\n",
" <td>0</td>\n",
" <td>-0.803731</td>\n",
" <td>0.447271</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>fraud_Keeling-Crist</td>\n",
" <td>misc_pos</td>\n",
" <td>41.96</td>\n",
" <td>M</td>\n",
" <td>24433</td>\n",
" <td>99</td>\n",
" <td>Dance movement psychotherapist</td>\n",
" <td>34</td>\n",
" <td>0</td>\n",
" <td>-0.254299</td>\n",
" <td>-0.830441</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" merchant category amount gender zip \\\n",
"0 fraud_Rippin, Kub and Mann misc_net 4.97 F 28654 \n",
"1 fraud_Heller, Gutmann and Zieme grocery_pos 107.23 F 99160 \n",
"2 fraud_Lind-Buckridge entertainment 220.11 M 83252 \n",
"3 fraud_Kutch, Hermiston and Farrell gas_transport 45.00 M 59632 \n",
"4 fraud_Keeling-Crist misc_pos 41.96 M 24433 \n",
"\n",
" city_pop job age is_fraud diff_lat \\\n",
"0 3495 Psychologist, counselling 32 0 0.067507 \n",
"1 149 Special educational needs teacher 42 0 -0.271247 \n",
"2 4154 Nature conservation officer 58 0 -0.969904 \n",
"3 1939 Patent attorney 53 0 -0.803731 \n",
"4 99 Dance movement psychotherapist 34 0 -0.254299 \n",
"\n",
" diff_long month day hour \n",
"0 0.870215 1 1 0 \n",
"1 -0.024038 1 1 0 \n",
"2 -0.107519 1 1 0 \n",
"3 0.447271 1 1 0 \n",
"4 -0.830441 1 1 0 "
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_all.head()"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "344afdf5-cad7-4edd-b709-520a95a5c71c",
"metadata": {},
"outputs": [],
"source": [
"df_all.to_parquet('fraud_unbalanced.parquet')"
]
},
{
"cell_type": "markdown",
"id": "18c30cac-fb24-45df-9816-ef7140fa23af",
"metadata": {},
"source": [
"---"
]
},
{
"cell_type": "markdown",
"id": "329a3996-f359-4fa6-867d-60bc04e8dbcc",
"metadata": {},
"source": [
"# Balance"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "347aa761-2c2b-4230-9f5d-f8ab794d5e74",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"is_fraud\n",
"0 1842743\n",
"1 9651\n",
"dtype: int64"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_all[['is_fraud']].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 42,
"id": "1614c4b1-5f08-45b4-9558-7b0c33b01dc6",
"metadata": {},
"outputs": [],
"source": [
"dataset_real = df_all[df_all[\"is_fraud\"] == 0]"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "17ac1802-07f9-40a2-b46b-09f9df6e8a9d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1842743, 14)"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset_real.shape"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "35798ce1-5060-42fc-aecc-ec5cc8378e85",
"metadata": {},
"outputs": [],
"source": [
"dataset_fraud = df_all[df_all[\"is_fraud\"] == 1]"
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "015283d7-0ce7-4c34-b253-0306e3f494eb",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(9651, 14)"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset_fraud.shape"
]
},
{
"cell_type": "markdown",
"id": "15a1ec27-92b9-410c-a5cc-f6122fc0dcde",
"metadata": {},
"source": [
"The majority class can only be `1.3` times bigger than the minority class. Here 1.3 is an arbitrary number."
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "d9342a10-135b-4d40-9be1-93a1a46ed17c",
"metadata": {},
"outputs": [],
"source": [
"fraud_multiplied = round(int(dataset_fraud.shape[0]*1.3))"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "472a14b2-333c-47df-8b72-233697beaea8",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"12546"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fraud_multiplied"
]
},
{
"cell_type": "code",
"execution_count": 48,
"id": "7ade74bf-ab99-4926-8a2c-86abdac93edb",
"metadata": {},
"outputs": [],
"source": [
"dataset_real = dataset_real.sample(fraud_multiplied)"
]
},
{
"cell_type": "code",
"execution_count": 49,
"id": "9e97cd63-d17a-43ea-b3aa-363c0f4e8c37",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(12546, 14)"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset_real.shape"
]
},
{
"cell_type": "code",
"execution_count": 50,
"id": "9c892f7c-66a7-4243-8946-d1033a718724",
"metadata": {},
"outputs": [],
"source": [
"dataset_balanced = pd.concat([dataset_real, dataset_fraud])"
]
},
{
"cell_type": "code",
"execution_count": 51,
"id": "2d46d609-a71d-41f3-8d84-c903622d553e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(22197, 14)"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset_balanced.shape"
]
},
{
"cell_type": "code",
"execution_count": 57,
"id": "e7206141-d48a-4f88-9ca2-1f8433e14d6b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['merchant', 'category', 'amount', 'gender', 'zip', 'city_pop', 'job',\n",
" 'age', 'is_fraud', 'diff_lat', 'diff_long', 'month', 'day', 'hour'],\n",
" dtype='object')"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset_balanced.columns"
]
},
{
"cell_type": "code",
"execution_count": 79,
"id": "73dc7155-6f5e-4029-94bf-6ee79654cfdf",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"merchant object\n",
"category object\n",
"amount float64\n",
"gender object\n",
"zip int64\n",
"city_pop int64\n",
"job object\n",
"age int64\n",
"is_fraud int64\n",
"diff_lat float64\n",
"diff_long float64\n",
"month int64\n",
"day int64\n",
"hour int64\n",
"dtype: object"
]
},
"execution_count": 79,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset_balanced.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 81,
"id": "ca25bcb7-f802-4704-b002-4d381d3410b3",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>merchant</th>\n",
" <th>category</th>\n",
" <th>amount</th>\n",
" <th>gender</th>\n",
" <th>zip</th>\n",
" <th>city_pop</th>\n",
" <th>job</th>\n",
" <th>age</th>\n",
" <th>is_fraud</th>\n",
" <th>diff_lat</th>\n",
" <th>diff_long</th>\n",
" <th>month</th>\n",
" <th>day</th>\n",
" <th>hour</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>153551</th>\n",
" <td>fraud_Yost-Rogahn</td>\n",
" <td>personal_care</td>\n",
" <td>169.88</td>\n",
" <td>M</td>\n",
" <td>49440</td>\n",
" <td>128715</td>\n",
" <td>Historic buildings inspector/conservation officer</td>\n",
" <td>25</td>\n",
" <td>0</td>\n",
" <td>0.970016</td>\n",
" <td>0.068795</td>\n",
" <td>3</td>\n",
" <td>23</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>878897</th>\n",
" <td>fraud_Heathcote, Yost and Kertzmann</td>\n",
" <td>shopping_net</td>\n",
" <td>5.75</td>\n",
" <td>F</td>\n",
" <td>58531</td>\n",
" <td>307</td>\n",
" <td>Make</td>\n",
" <td>90</td>\n",
" <td>0</td>\n",
" <td>-0.849992</td>\n",
" <td>-0.209885</td>\n",
" <td>12</td>\n",
" <td>22</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>125739</th>\n",
" <td>fraud_Bode-Rempel</td>\n",
" <td>kids_pets</td>\n",
" <td>5.85</td>\n",
" <td>M</td>\n",
" <td>25213</td>\n",
" <td>5512</td>\n",
" <td>Exhibition designer</td>\n",
" <td>40</td>\n",
" <td>0</td>\n",
" <td>0.993371</td>\n",
" <td>-0.875224</td>\n",
" <td>3</td>\n",
" <td>10</td>\n",
" <td>22</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" merchant category amount gender \\\n",
"153551 fraud_Yost-Rogahn personal_care 169.88 M \n",
"878897 fraud_Heathcote, Yost and Kertzmann shopping_net 5.75 F \n",
"125739 fraud_Bode-Rempel kids_pets 5.85 M \n",
"\n",
" zip city_pop job \\\n",
"153551 49440 128715 Historic buildings inspector/conservation officer \n",
"878897 58531 307 Make \n",
"125739 25213 5512 Exhibition designer \n",
"\n",
" age is_fraud diff_lat diff_long month day hour \n",
"153551 25 0 0.970016 0.068795 3 23 17 \n",
"878897 90 0 -0.849992 -0.209885 12 22 2 \n",
"125739 40 0 0.993371 -0.875224 3 10 22 "
]
},
"execution_count": 81,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset_balanced.head(3)"
]
},
{
"cell_type": "markdown",
"id": "e36fdefb-2234-49b1-ba89-17a2fcaa9cae",
"metadata": {},
"source": [
"---"
]
},
{
"cell_type": "markdown",
"id": "5786fdf3-23b4-4555-8c95-527bf863ab5b",
"metadata": {},
"source": [
"# aiqc"
]
},
{
"cell_type": "code",
"execution_count": 82,
"id": "82b0e3b7-ec58-4051-93f6-a0f961da091a",
"metadata": {},
"outputs": [],
"source": [
"cols_scale = ['day','city_pop', 'age', 'diff_lat', 'diff_long', 'amount']\n",
"cols_ohe = ['merchant', 'category', 'gender', 'month', 'hour', 'job', 'zip']"
]
},
{
"cell_type": "code",
"execution_count": 83,
"id": "56efcddf-9499-495d-a1ad-f5fee42fdc29",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.preprocessing import OneHotEncoder, StandardScaler"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "700cf925-7def-4904-a68b-17c6635e3895",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/Users/layne/Desktop/AIQC\n"
]
}
],
"source": [
"cd '/Users/Layne/Desktop/AIQC'"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "4421364f-5a33-426b-b502-dca920e729d7",
"metadata": {},
"outputs": [],
"source": [
"import aiqc"
]
},
{
"cell_type": "code",
"execution_count": 94,
"id": "20a0048a-6f68-49c7-a8b3-c52a2ab83d61",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"=> Info - System overriding user input to set `sklearn_preprocess.sparse=False`.\n",
"\tThis would have generated 'scipy.sparse.csr.csr_matrix', causing Keras training to fail.\n",
"\n",
"\n",
"___/ featurecoder_index: 0 \\_________\n",
"\n",
"=> The column(s) below matched your filter(s) featurecoder filters.\n",
"\n",
"['merchant', 'category', 'gender', 'month', 'hour', 'job', 'zip']\n",
"\n",
"=> The remaining column(s) and dtype(s) are available for downstream featurecoder(s):\n",
"{'age': 'int64',\n",
" 'amount': 'float64',\n",
" 'city_pop': 'int64',\n",
" 'day': 'int64',\n",
" 'diff_lat': 'float64',\n",
" 'diff_long': 'float64'}\n",
"\n",
"\n",
"=> Info - System overriding user input to set `sklearn_preprocess.copy=False`.\n",
"\tThis saves memory when concatenating the output of many encoders.\n",
"\n",
"\n",
"___/ featurecoder_index: 1 \\_________\n",
"\n",
"=> The column(s) below matched your filter(s) featurecoder filters.\n",
"\n",
"['day', 'city_pop', 'age', 'diff_lat', 'diff_long', 'amount']\n",
"\n",
"=> Done. All feature column(s) have featurecoder(s) associated with them.\n",
"No more FeatureCoders can be added to this Encoderset.\n",
"\n"
]
}
],
"source": [
"splitset = aiqc.Pipeline.Tabular.make(\n",
" # --- Data source ---\n",
" df_or_path = dataset_balanced\n",
"\n",
" # --- Label preprocessing ---\n",
" , label_column = 'is_fraud'\n",
" , label_encoder = None\n",
"\n",
" # --- Feature preprocessing ---\n",
" , feature_cols_excluded = ['is_fraud', 'first', 'last']\n",
" , feature_encoders = [\n",
" dict(\n",
" sklearn_preprocess = OneHotEncoder()\n",
" , columns = cols_ohe\n",
" ),\n",
" dict(\n",
" sklearn_preprocess = StandardScaler()\n",
" , columns = cols_scale\n",
" ),\n",
" ]\n",
"\n",
" # --- Stratification ---\n",
" , size_test = 0.09\n",
" , size_validation = 0.17\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 101,
"id": "be617528-5be1-4870-89cf-526a1e306167",
"metadata": {},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"from tensorflow.keras import layers as l"
]
},
{
"cell_type": "code",
"execution_count": 102,
"id": "4eedb584-3dce-4892-a88b-1ab7937f1524",
"metadata": {},
"outputs": [],
"source": [
"def fn_build(features_shape, label_shape, **hp):\n",
" m = tf.keras.models.Sequential()\n",
" m.add(l.Input(shape=features_shape))\n",
" m.add(l.Dense(hp['neuron_count'], activation='relu', kernel_initializer='he_uniform'))\n",
" m.add(l.Dropout(hp['dropout']))\n",
" m.add(l.Dense(hp['neuron_count'], activation='relu', kernel_initializer='he_uniform'))\n",
" m.add(l.Dropout(hp['dropout']))\n",
" m.add(l.Dense(units=label_shape[0], activation='sigmoid', kernel_initializer='glorot_uniform'))\n",
" return m"
]
},
{
"cell_type": "code",
"execution_count": 103,
"id": "aaee1cbf-8e8b-447e-ad4e-d445cbb19b1f",
"metadata": {},
"outputs": [],
"source": [
"def fn_train(model, loser, optimizer, samples_train, samples_evaluate, **hp):\n",
" model.compile(\n",
" loss=loser\n",
" , optimizer=optimizer\n",
" , metrics=['accuracy']\n",
" )\n",
" model.fit(\n",
" samples_train['features'], samples_train['labels']\n",
" , validation_data = (samples_evaluate['features'], samples_evaluate['labels'])\n",
" , verbose = 0\n",
" , batch_size = hp['batch_size']\n",
" , epochs = hp['epochs']\n",
" , callbacks = [tf.keras.callbacks.History()]\n",
" )\n",
" return model"
]
},
{
"cell_type": "code",
"execution_count": 104,
"id": "d8717d7a-b6c1-466c-8d09-d93d42807108",
"metadata": {},
"outputs": [],
"source": [
"hyperparameters = dict(\n",
" neuron_count=[60], \n",
" epochs = [10],\n",
" batch_size = [3],\n",
" dropout = [0.3, 0.4]\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 108,
"id": "9f40b94f-ff18-4412-be8d-6c50713822ed",
"metadata": {},
"outputs": [],
"source": [
"queue = aiqc.Experiment.make(\n",
" # --- Analysis type ---\n",
" library = \"keras\"\n",
" , analysis_type = \"classification_binary\"\n",
"\n",
" # --- Model functions ---\n",
" , fn_build = fn_build\n",
" , fn_train = fn_train\n",
" , fn_lose = None #auto\n",
" , fn_optimize = None #auto\n",
" , fn_predict = None #auto\n",
"\n",
" # --- Training options ---\n",
" , repeat_count = 2\n",
" , permute_count = 3\n",
" , hyperparameters = hyperparameters\n",
"\n",
" # --- Data source ---\n",
" , splitset_id = splitset.id\n",
"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 109,
"id": "b8dc0b6c-77b4-428d-9ffa-398c73594de1",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/layne/.pyenv/versions/3.7.12/envs/aiqc_dev/lib/python3.7/site-packages/jupyter_dash/jupyter_app.py:139: UserWarning:\n",
"\n",
"The 'environ['werkzeug.server.shutdown']' function is deprecated and will be removed in Werkzeug 2.1.\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"📊 AIQC Tracker http://127.0.0.1:9991 📊\n"
]
}
],
"source": [
"from aiqc.lab import Tracker\n",
"app = Tracker()\n",
"app.start()"
]
},
{
"cell_type": "code",
"execution_count": 110,
"id": "a99a4f26-128b-46b9-ae33-1a15ba621bdb",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"🔮 Training Models 🔮: 25%|█████████ | 1/4 [1:16:25<3:49:16, 4585.56s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Queue was gracefully interrupted.\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"queue.run_jobs()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "5e977b94-bad4-47b6-95db-ccec1998b3b7",
"metadata": {},
"outputs": [],
"source": [
"predictor = aiqc.Predictor.get_by_id(33)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "a06cec6d-786c-4fed-aefd-7fafc1d14e26",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"name": "amount",
"type": "box",
"x": [
3.308984925970435,
3.3493398930877447,
3.2744919564574957
]
},
{
"name": "category=shopping_net",
"type": "box",
"x": [
0.4514044728130102,
0.4568766262382269,
0.4489722456783056
]
},
{
"name": "category=shopping_pos",
"type": "box",
"x": [
0.31098209880292416,
0.2717477288097143,
0.3281372394412756
]
},
{
"name": "category=misc_net",
"type": "box",
"x": [
0.20811662636697292,
0.19702242873609066,
0.2008549328893423
]
},
{
"name": "category=grocery_pos",
"type": "box",
"x": [
0.16108024679124355,
0.18406141363084316,
0.17419015429913998
]
},
{
"name": "gender=F",
"type": "box",
"x": [
0.13460165821015835,
0.1347148772329092,
0.13639475964009762
]
},
{
"name": "gender=M",
"type": "box",
"x": [
0.06523300521075726,
0.07392674498260021,
0.07772778533399105
]
},
{
"name": "category=entertainment",
"type": "box",
"x": [
0.06499864719808102,
0.06474841199815273,
0.06395175121724606
]
},
{
"name": "hour=20",
"type": "box",
"x": [
0.05624440871179104,
0.05221535079181194,
0.05536658130586147
]
}
],
"layout": {
"autosize": true,
"margin": {
"l": 150
},
"showlegend": false,
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"font": {
"color": "#FAFAFA",
"family": "Avenir",
"size": 13
},
"hoverlabel": {
"bgcolor": "#122536",
"font": {
"family": "Avenir",
"size": 15
}
},
"hovermode": "closest",
"paper_bgcolor": "#182d41",
"plot_bgcolor": "#182d41",
"title": {
"font": {
"family": "Avenir"
},
"pad": {
"b": 50,
"t": 20
},
"x": 0.05,
"y": 0.95
}
}
},
"title": {
"text": "Feature Importance <sub>(feature.id:11, permute_count:3, top_n:9)</sub><br><br>"
},
"xaxis": {
"autorange": true,
"gridcolor": "#2c3c4a",
"nticks": 15,
"range": [
-0.13095823489129543,
3.532513478770852
],
"tickangle": 45,
"title": {
"text": "Importance<br><sup>[permuted column loss - training loss]</sup>"
},
"type": "linear"
},
"yaxis": {
"autorange": true,
"range": [
-0.5,
8.5
],
"type": "category"
}
}
},
"image/png": "",
"text/html": [
"<div> <div id=\"ca09dcbe-312b-42bf-a547-7340d63b0988\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"ca09dcbe-312b-42bf-a547-7340d63b0988\")) { Plotly.newPlot( \"ca09dcbe-312b-42bf-a547-7340d63b0988\", [{\"name\":\"amount\",\"type\":\"box\",\"x\":[3.308984925970435,3.3493398930877447,3.2744919564574957]},{\"name\":\"category=shopping_net\",\"type\":\"box\",\"x\":[0.4514044728130102,0.4568766262382269,0.4489722456783056]},{\"name\":\"category=shopping_pos\",\"type\":\"box\",\"x\":[0.31098209880292416,0.2717477288097143,0.3281372394412756]},{\"name\":\"category=misc_net\",\"type\":\"box\",\"x\":[0.20811662636697292,0.19702242873609066,0.2008549328893423]},{\"name\":\"category=grocery_pos\",\"type\":\"box\",\"x\":[0.16108024679124355,0.18406141363084316,0.17419015429913998]},{\"name\":\"gender=F\",\"type\":\"box\",\"x\":[0.13460165821015835,0.1347148772329092,0.13639475964009762]},{\"name\":\"gender=M\",\"type\":\"box\",\"x\":[0.06523300521075726,0.07392674498260021,0.07772778533399105]},{\"name\":\"category=entertainment\",\"type\":\"box\",\"x\":[0.06499864719808102,0.06474841199815273,0.06395175121724606]},{\"name\":\"hour=20\",\"type\":\"box\",\"x\":[0.05624440871179104,0.05221535079181194,0.05536658130586147]}], {\"height\":525,\"margin\":{\"l\":150},\"showlegend\":false,\"template\":{\"data\":{\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"choropleth\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"type\":\"choropleth\"}],\"contour\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"contour\"}],\"contourcarpet\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"type\":\"contourcarpet\"}],\"heatmap\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"heatmap\"}],\"heatmapgl\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"heatmapgl\"}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"histogram2d\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"histogram2d\"}],\"histogram2dcontour\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"histogram2dcontour\"}],\"mesh3d\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"type\":\"mesh3d\"}],\"parcoords\":[{\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"parcoords\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}],\"scatter\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scatter\"}],\"scatter3d\":[{\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scatter3d\"}],\"scattercarpet\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scattercarpet\"}],\"scattergeo\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scattergeo\"}],\"scattergl\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scattergl\"}],\"scattermapbox\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scattermapbox\"}],\"scatterpolar\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scatterpolar\"}],\"scatterpolargl\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scatterpolargl\"}],\"scatterternary\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scatterternary\"}],\"surface\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"surface\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}]},\"layout\":{\"font\":{\"color\":\"#FAFAFA\",\"family\":\"Avenir\",\"size\":13},\"hoverlabel\":{\"bgcolor\":\"#122536\",\"font\":{\"family\":\"Avenir\",\"size\":15}},\"hovermode\":\"closest\",\"paper_bgcolor\":\"#182d41\",\"plot_bgcolor\":\"#182d41\",\"title\":{\"font\":{\"family\":\"Avenir\"},\"pad\":{\"b\":50,\"t\":20},\"x\":0.05,\"y\":0.95}}},\"title\":{\"text\":\"Feature Importance <sub>(feature.id:11, permute_count:3, top_n:9)</sub><br><br>\"},\"xaxis\":{\"gridcolor\":\"#2c3c4a\",\"nticks\":15,\"tickangle\":45,\"title\":{\"text\":\"Importance<br><sup>[permuted column loss - training loss]</sup>\"}}}, {\"responsive\": true} ).then(function(){\n",
" \n",
"var gd = document.getElementById('ca09dcbe-312b-42bf-a547-7340d63b0988');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"predictor.predictions[0].plot_feature_importance(top_n=9)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "ff12e706-461c-4649-b705-1b0ea77a9a26",
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'queue' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-7-fff590d9fae1>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mqueue\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplot_performance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mNameError\u001b[0m: name 'queue' is not defined"
]
}
],
"source": [
"queue.plot_performance()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3067d1ab-e6dc-40c4-9d3c-1f293c5b483c",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment