{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Categorical Encoding.ipynb",
"provenance": [],
"collapsed_sections": [],
"authorship_tag": "ABX9TyPxp4DnWs7o6MmCborBmiSx",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU"
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
""
]
},
{
"cell_type": "code",
"source": [
"!pip install tensorflow_addons\n",
"!pip install category_encoders"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "dV7sBRyA0laj",
"outputId": "e75f1c7f-ddc6-4c36-a2d0-41ac2cd8c0e7"
},
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Requirement already satisfied: tensorflow_addons in /usr/local/lib/python3.7/dist-packages (0.16.1)\n",
"Requirement already satisfied: typeguard>=2.7 in /usr/local/lib/python3.7/dist-packages (from tensorflow_addons) (2.7.1)\n",
"Requirement already satisfied: category_encoders in /usr/local/lib/python3.7/dist-packages (2.4.0)\n",
"Requirement already satisfied: patsy>=0.5.1 in /usr/local/lib/python3.7/dist-packages (from category_encoders) (0.5.2)\n",
"Requirement already satisfied: statsmodels>=0.9.0 in /usr/local/lib/python3.7/dist-packages (from category_encoders) (0.10.2)\n",
"Requirement already satisfied: scipy>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from category_encoders) (1.4.1)\n",
"Requirement already satisfied: scikit-learn>=0.20.0 in /usr/local/lib/python3.7/dist-packages (from category_encoders) (1.0.2)\n",
"Requirement already satisfied: pandas>=0.21.1 in /usr/local/lib/python3.7/dist-packages (from category_encoders) (1.3.5)\n",
"Requirement already satisfied: numpy>=1.14.0 in /usr/local/lib/python3.7/dist-packages (from category_encoders) (1.21.6)\n",
"Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.21.1->category_encoders) (2.8.2)\n",
"Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.21.1->category_encoders) (2022.1)\n",
"Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from patsy>=0.5.1->category_encoders) (1.15.0)\n",
"Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.7/dist-packages (from scikit-learn>=0.20.0->category_encoders) (1.1.0)\n",
"Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn>=0.20.0->category_encoders) (3.1.0)\n"
]
}
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"id": "2INjkFXMDzMg"
},
"outputs": [],
"source": [
"import warnings\n",
"warnings.filterwarnings(\"ignore\")"
]
},
{
"cell_type": "code",
"source": [
"import pandas as pd\n",
"import requests\n",
"from zipfile import ZipFile\n",
"\n",
"dataset_url = 'https://storage.googleapis.com/kaggle-data-sets/225/498/bundle/archive.zip?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com%2F20220430%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20220430T104007Z&X-Goog-Expires=259199&X-Goog-SignedHeaders=host&X-Goog-Signature=1ced48204b7889c650880becabe6f5a825ff8d6d346832d811cb4a928586f097ff2edd3f976109f5c05b3bcb7c93c644196bbf71417ed9038f23296c2310fda6539c5349471f856435c1ee13f0345cefd37aca6de9f39f454486106353681c949830c5629f62ed7551beb1e16dda1f011b4c54f9c1943e2607629e5b6849373b923fd595fdcb63a6e7a61a0d98c3753ffdafaeb4506efafe45948cb2dc577c2df8d0cf6d195c88077f050e024ffb50f3f66b0f1fa4b0c1fe3ac7c5185aa0af2907d179847e8eec2d4996428fa2b97b93c9d19247827213c65fb142e4d5f3ce20f3ba0b7fa3a45a55a17f8b975f2204e77fcc9edaff701c1c7d29e459ce6f4e25'\n",
"download_filename = 'download.zip'\n",
"content_filename = 'adult.csv'\n",
"\n",
"req = requests.get(dataset_url)\n",
"with open(download_filename, 'wb') as output_file:\n",
" output_file.write(req.content)\n",
"print('Download completed!\\n')\n",
"\n",
"zf = ZipFile(download_filename)\n",
"data = pd.read_csv(zf.open(content_filename)).dropna()\n",
"data['occupation'] = data['occupation'].replace({'?': 'Unknown'})\n",
"data['workclass'] = data['workclass'].replace({'?': 'Unknown'})\n",
"data"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 458
},
"id": "GttbPkK5EDQf",
"outputId": "3c819a95-cd2c-410e-cd11-5affbd49822c"
},
"execution_count": 6,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Download completed!\n",
"\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
" age workclass fnlwgt education education.num marital.status \\\n",
"0 90 Unknown 77053 HS-grad 9 Widowed \n",
"1 82 Private 132870 HS-grad 9 Widowed \n",
"2 66 Unknown 186061 Some-college 10 Widowed \n",
"3 54 Private 140359 7th-8th 4 Divorced \n",
"4 41 Private 264663 Some-college 10 Separated \n",
"... ... ... ... ... ... ... \n",
"32556 22 Private 310152 Some-college 10 Never-married \n",
"32557 27 Private 257302 Assoc-acdm 12 Married-civ-spouse \n",
"32558 40 Private 154374 HS-grad 9 Married-civ-spouse \n",
"32559 58 Private 151910 HS-grad 9 Widowed \n",
"32560 22 Private 201490 HS-grad 9 Never-married \n",
"\n",
" occupation relationship race sex capital.gain \\\n",
"0 Unknown Not-in-family White Female 0 \n",
"1 Exec-managerial Not-in-family White Female 0 \n",
"2 Unknown Unmarried Black Female 0 \n",
"3 Machine-op-inspct Unmarried White Female 0 \n",
"4 Prof-specialty Own-child White Female 0 \n",
"... ... ... ... ... ... \n",
"32556 Protective-serv Not-in-family White Male 0 \n",
"32557 Tech-support Wife White Female 0 \n",
"32558 Machine-op-inspct Husband White Male 0 \n",
"32559 Adm-clerical Unmarried White Female 0 \n",
"32560 Adm-clerical Own-child White Male 0 \n",
"\n",
" capital.loss hours.per.week native.country income \n",
"0 4356 40 United-States <=50K \n",
"1 4356 18 United-States <=50K \n",
"2 4356 40 United-States <=50K \n",
"3 3900 40 United-States <=50K \n",
"4 3900 40 United-States <=50K \n",
"... ... ... ... ... \n",
"32556 0 40 United-States <=50K \n",
"32557 0 38 United-States <=50K \n",
"32558 0 40 United-States >50K \n",
"32559 0 40 United-States <=50K \n",
"32560 0 20 United-States <=50K \n",
"\n",
"[32561 rows x 15 columns]"
],
"text/html": [
"\n",
"
\n", " | age | \n", "workclass | \n", "fnlwgt | \n", "education | \n", "education.num | \n", "marital.status | \n", "occupation | \n", "relationship | \n", "race | \n", "sex | \n", "capital.gain | \n", "capital.loss | \n", "hours.per.week | \n", "native.country | \n", "income | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "90 | \n", "Unknown | \n", "77053 | \n", "HS-grad | \n", "9 | \n", "Widowed | \n", "Unknown | \n", "Not-in-family | \n", "White | \n", "Female | \n", "0 | \n", "4356 | \n", "40 | \n", "United-States | \n", "<=50K | \n", "
1 | \n", "82 | \n", "Private | \n", "132870 | \n", "HS-grad | \n", "9 | \n", "Widowed | \n", "Exec-managerial | \n", "Not-in-family | \n", "White | \n", "Female | \n", "0 | \n", "4356 | \n", "18 | \n", "United-States | \n", "<=50K | \n", "
2 | \n", "66 | \n", "Unknown | \n", "186061 | \n", "Some-college | \n", "10 | \n", "Widowed | \n", "Unknown | \n", "Unmarried | \n", "Black | \n", "Female | \n", "0 | \n", "4356 | \n", "40 | \n", "United-States | \n", "<=50K | \n", "
3 | \n", "54 | \n", "Private | \n", "140359 | \n", "7th-8th | \n", "4 | \n", "Divorced | \n", "Machine-op-inspct | \n", "Unmarried | \n", "White | \n", "Female | \n", "0 | \n", "3900 | \n", "40 | \n", "United-States | \n", "<=50K | \n", "
4 | \n", "41 | \n", "Private | \n", "264663 | \n", "Some-college | \n", "10 | \n", "Separated | \n", "Prof-specialty | \n", "Own-child | \n", "White | \n", "Female | \n", "0 | \n", "3900 | \n", "40 | \n", "United-States | \n", "<=50K | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
32556 | \n", "22 | \n", "Private | \n", "310152 | \n", "Some-college | \n", "10 | \n", "Never-married | \n", "Protective-serv | \n", "Not-in-family | \n", "White | \n", "Male | \n", "0 | \n", "0 | \n", "40 | \n", "United-States | \n", "<=50K | \n", "
32557 | \n", "27 | \n", "Private | \n", "257302 | \n", "Assoc-acdm | \n", "12 | \n", "Married-civ-spouse | \n", "Tech-support | \n", "Wife | \n", "White | \n", "Female | \n", "0 | \n", "0 | \n", "38 | \n", "United-States | \n", "<=50K | \n", "
32558 | \n", "40 | \n", "Private | \n", "154374 | \n", "HS-grad | \n", "9 | \n", "Married-civ-spouse | \n", "Machine-op-inspct | \n", "Husband | \n", "White | \n", "Male | \n", "0 | \n", "0 | \n", "40 | \n", "United-States | \n", ">50K | \n", "
32559 | \n", "58 | \n", "Private | \n", "151910 | \n", "HS-grad | \n", "9 | \n", "Widowed | \n", "Adm-clerical | \n", "Unmarried | \n", "White | \n", "Female | \n", "0 | \n", "0 | \n", "40 | \n", "United-States | \n", "<=50K | \n", "
32560 | \n", "22 | \n", "Private | \n", "201490 | \n", "HS-grad | \n", "9 | \n", "Never-married | \n", "Adm-clerical | \n", "Own-child | \n", "White | \n", "Male | \n", "0 | \n", "0 | \n", "20 | \n", "United-States | \n", "<=50K | \n", "
32561 rows × 15 columns
\n", "\n", " | age | \n", "fnlwgt | \n", "education.num | \n", "capital.gain | \n", "capital.loss | \n", "hours.per.week | \n", "workclass_Federal-gov | \n", "workclass_Local-gov | \n", "workclass_Never-worked | \n", "workclass_Private | \n", "... | \n", "native.country_Portugal | \n", "native.country_Puerto-Rico | \n", "native.country_Scotland | \n", "native.country_South | \n", "native.country_Taiwan | \n", "native.country_Thailand | \n", "native.country_Trinadad&Tobago | \n", "native.country_United-States | \n", "native.country_Vietnam | \n", "native.country_Yugoslavia | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "90 | \n", "77053 | \n", "9 | \n", "0 | \n", "4356 | \n", "40 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "
1 | \n", "82 | \n", "132870 | \n", "9 | \n", "0 | \n", "4356 | \n", "18 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "
2 | \n", "66 | \n", "186061 | \n", "10 | \n", "0 | \n", "4356 | \n", "40 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "
3 | \n", "54 | \n", "140359 | \n", "4 | \n", "0 | \n", "3900 | \n", "40 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "
4 | \n", "41 | \n", "264663 | \n", "10 | \n", "0 | \n", "3900 | \n", "40 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
32556 | \n", "22 | \n", "310152 | \n", "10 | \n", "0 | \n", "0 | \n", "40 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "
32557 | \n", "27 | \n", "257302 | \n", "12 | \n", "0 | \n", "0 | \n", "38 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "
32558 | \n", "40 | \n", "154374 | \n", "9 | \n", "0 | \n", "0 | \n", "40 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "
32559 | \n", "58 | \n", "151910 | \n", "9 | \n", "0 | \n", "0 | \n", "40 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "
32560 | \n", "22 | \n", "201490 | \n", "9 | \n", "0 | \n", "0 | \n", "20 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "
32561 rows × 108 columns
\n", "\n", " | intercept | \n", "age | \n", "workclass_0 | \n", "workclass_1 | \n", "workclass_2 | \n", "workclass_3 | \n", "workclass_4 | \n", "workclass_5 | \n", "workclass_6 | \n", "workclass_7 | \n", "... | \n", "native.country_31 | \n", "native.country_32 | \n", "native.country_33 | \n", "native.country_34 | \n", "native.country_35 | \n", "native.country_36 | \n", "native.country_37 | \n", "native.country_38 | \n", "native.country_39 | \n", "native.country_40 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
9281 | \n", "1 | \n", "35 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
31884 | \n", "1 | \n", "28 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
31580 | \n", "1 | \n", "55 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
18489 | \n", "1 | \n", "33 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
21111 | \n", "1 | \n", "39 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
13123 | \n", "1 | \n", "90 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
19648 | \n", "1 | \n", "36 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
9845 | \n", "1 | \n", "26 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
10799 | \n", "1 | \n", "44 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
2732 | \n", "1 | \n", "39 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
29304 rows × 101 columns
\n", "\n", " | age | \n", "workclass_0 | \n", "workclass_1 | \n", "workclass_2 | \n", "workclass_3 | \n", "fnlwgt | \n", "education_0 | \n", "education_1 | \n", "education_2 | \n", "education_3 | \n", "... | \n", "sex_1 | \n", "capital.gain | \n", "capital.loss | \n", "hours.per.week | \n", "native.country_0 | \n", "native.country_1 | \n", "native.country_2 | \n", "native.country_3 | \n", "native.country_4 | \n", "native.country_5 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
9281 | \n", "35 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "241126 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "40 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "
31884 | \n", "28 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "90547 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "1 | \n", "0 | \n", "0 | \n", "23 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "
31580 | \n", "55 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "70088 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "40 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "
18489 | \n", "33 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "182423 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "40 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "
21111 | \n", "39 | \n", "0 | \n", "1 | \n", "1 | \n", "0 | \n", "163057 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "99 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
13123 | \n", "90 | \n", "0 | \n", "1 | \n", "0 | \n", "1 | \n", "282095 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "40 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "
19648 | \n", "36 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "279721 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "40 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "
9845 | \n", "26 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "51961 | \n", "0 | \n", "1 | \n", "1 | \n", "1 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "51 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "
10799 | \n", "44 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "115323 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "40 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "
2732 | \n", "39 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "224531 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "7298 | \n", "0 | \n", "40 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "
29304 rows × 36 columns
\n", "\n", " | col_0 | \n", "col_1 | \n", "col_2 | \n", "col_3 | \n", "col_4 | \n", "col_5 | \n", "col_6 | \n", "col_7 | \n", "col_8 | \n", "col_9 | \n", "... | \n", "col_16 | \n", "col_17 | \n", "col_18 | \n", "col_19 | \n", "age | \n", "fnlwgt | \n", "education.num | \n", "capital.gain | \n", "capital.loss | \n", "hours.per.week | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
9281 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "... | \n", "0 | \n", "2 | \n", "1 | \n", "1 | \n", "35 | \n", "241126 | \n", "14 | \n", "0 | \n", "0 | \n", "40 | \n", "
31884 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "1 | \n", "3 | \n", "1 | \n", "28 | \n", "90547 | \n", "9 | \n", "0 | \n", "0 | \n", "23 | \n", "
31580 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "... | \n", "0 | \n", "2 | \n", "1 | \n", "1 | \n", "55 | \n", "70088 | \n", "14 | \n", "0 | \n", "0 | \n", "40 | \n", "
18489 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "2 | \n", "4 | \n", "0 | \n", "33 | \n", "182423 | \n", "9 | \n", "0 | \n", "0 | \n", "40 | \n", "
21111 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "1 | \n", "1 | \n", "1 | \n", "39 | \n", "163057 | \n", "10 | \n", "0 | \n", "0 | \n", "99 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
13123 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "... | \n", "0 | \n", "2 | \n", "1 | \n", "2 | \n", "90 | \n", "282095 | \n", "10 | \n", "0 | \n", "0 | \n", "40 | \n", "
19648 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "... | \n", "0 | \n", "2 | \n", "2 | \n", "2 | \n", "36 | \n", "279721 | \n", "9 | \n", "0 | \n", "0 | \n", "40 | \n", "
9845 | \n", "0 | \n", "1 | \n", "1 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "2 | \n", "2 | \n", "0 | \n", "26 | \n", "51961 | \n", "8 | \n", "0 | \n", "0 | \n", "51 | \n", "
10799 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "... | \n", "0 | \n", "2 | \n", "1 | \n", "1 | \n", "44 | \n", "115323 | \n", "14 | \n", "0 | \n", "0 | \n", "40 | \n", "
2732 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "1 | \n", "0 | \n", "... | \n", "0 | \n", "2 | \n", "2 | \n", "1 | \n", "39 | \n", "224531 | \n", "9 | \n", "7298 | \n", "0 | \n", "40 | \n", "
29304 rows × 26 columns
\n", "\n", " | age | \n", "workclass_0 | \n", "workclass_1 | \n", "fnlwgt | \n", "education_0 | \n", "education_1 | \n", "education_2 | \n", "education.num | \n", "marital.status_0 | \n", "marital.status_1 | \n", "... | \n", "relationship_1 | \n", "race_0 | \n", "race_1 | \n", "sex_0 | \n", "capital.gain | \n", "capital.loss | \n", "hours.per.week | \n", "native.country_0 | \n", "native.country_1 | \n", "native.country_2 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
9281 | \n", "35 | \n", "0 | \n", "2 | \n", "241126 | \n", "0 | \n", "2 | \n", "0 | \n", "14 | \n", "1 | \n", "1 | \n", "... | \n", "1 | \n", "0 | \n", "1 | \n", "2 | \n", "0 | \n", "0 | \n", "40 | \n", "0 | \n", "0 | \n", "1 | \n", "
31884 | \n", "28 | \n", "0 | \n", "2 | \n", "90547 | \n", "0 | \n", "0 | \n", "1 | \n", "9 | \n", "1 | \n", "1 | \n", "... | \n", "2 | \n", "0 | \n", "2 | \n", "1 | \n", "0 | \n", "0 | \n", "23 | \n", "0 | \n", "0 | \n", "1 | \n", "
31580 | \n", "55 | \n", "0 | \n", "2 | \n", "70088 | \n", "0 | \n", "2 | \n", "0 | \n", "14 | \n", "1 | \n", "1 | \n", "... | \n", "1 | \n", "0 | \n", "1 | \n", "2 | \n", "0 | \n", "0 | \n", "40 | \n", "0 | \n", "0 | \n", "1 | \n", "
18489 | \n", "33 | \n", "0 | \n", "2 | \n", "182423 | \n", "0 | \n", "0 | \n", "1 | \n", "9 | \n", "0 | \n", "2 | \n", "... | \n", "2 | \n", "0 | \n", "2 | \n", "2 | \n", "0 | \n", "0 | \n", "40 | \n", "0 | \n", "0 | \n", "1 | \n", "
21111 | \n", "39 | \n", "1 | \n", "2 | \n", "163057 | \n", "0 | \n", "0 | \n", "2 | \n", "10 | \n", "0 | \n", "2 | \n", "... | \n", "1 | \n", "0 | \n", "1 | \n", "2 | \n", "0 | \n", "0 | \n", "99 | \n", "0 | \n", "0 | \n", "1 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
13123 | \n", "90 | \n", "1 | \n", "1 | \n", "282095 | \n", "0 | \n", "0 | \n", "2 | \n", "10 | \n", "1 | \n", "1 | \n", "... | \n", "1 | \n", "0 | \n", "1 | \n", "2 | \n", "0 | \n", "0 | \n", "40 | \n", "0 | \n", "0 | \n", "1 | \n", "
19648 | \n", "36 | \n", "0 | \n", "2 | \n", "279721 | \n", "0 | \n", "0 | \n", "1 | \n", "9 | \n", "1 | \n", "1 | \n", "... | \n", "1 | \n", "0 | \n", "1 | \n", "2 | \n", "0 | \n", "0 | \n", "40 | \n", "0 | \n", "0 | \n", "1 | \n", "
9845 | \n", "26 | \n", "0 | \n", "2 | \n", "51961 | \n", "0 | \n", "3 | \n", "2 | \n", "8 | \n", "1 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "2 | \n", "2 | \n", "0 | \n", "0 | \n", "51 | \n", "0 | \n", "0 | \n", "1 | \n", "
10799 | \n", "44 | \n", "0 | \n", "2 | \n", "115323 | \n", "0 | \n", "2 | \n", "0 | \n", "14 | \n", "1 | \n", "1 | \n", "... | \n", "1 | \n", "0 | \n", "1 | \n", "2 | \n", "0 | \n", "0 | \n", "40 | \n", "0 | \n", "0 | \n", "1 | \n", "
2732 | \n", "39 | \n", "0 | \n", "2 | \n", "224531 | \n", "0 | \n", "0 | \n", "1 | \n", "9 | \n", "1 | \n", "1 | \n", "... | \n", "1 | \n", "0 | \n", "1 | \n", "2 | \n", "7298 | \n", "0 | \n", "40 | \n", "0 | \n", "0 | \n", "1 | \n", "
29304 rows × 23 columns
\n", "