├── CONTRIBUTING.md
├── Code
    ├── %prun.ipynb
    ├── %timeit.ipynb
    ├── Convert normal Distribution.ipynb
    ├── CustomDataGen_Keras.ipynb
    ├── Date Parser.ipynb
    ├── Decision Tree Plot.ipynb
    ├── Dummy Classifier.ipynb
    ├── Extract E-mails from text.ipynb
    ├── FeatureSelection_SelectFromModel.ipynb
    ├── Funnel_charts.ipynb
    ├── Hack of the day - Time series.ipynb
    ├── HoD_history.ipynb
    ├── HoD_stratify.ipynb
    ├── Image Augmentation - Article Shoot.ipynb
    ├── Pandas Apply.ipynb
    ├── Pandas_boolean indexing.ipynb
    ├── Pandas_conditional_formatting.ipynb
    ├── Read multiple datasets.ipynb
    ├── Removing emojis from text.ipynb
    ├── Resizing images.ipynb
    ├── Running Linux Commands in Jupyter Notebook.ipynb
    ├── Save_sklearn_model.ipynb
    ├── Styling pandas.ipynb
    ├── args.ipynb
    ├── av_hack.ipynb
    ├── binning_data.ipynb
    ├── column_transformer.ipynb
    ├── conditional_selection_of_rows.ipynb
    ├── convert_string_to_characters.ipynb
    ├── first and last name extraction.ipynb
    ├── generator vs list.ipynb
    ├── groupby_in_pandas.ipynb
    ├── heatmap_over_code.ipynb
    ├── hide_print.ipynb
    ├── interactive plot - plotly.ipynb
    ├── interactive_notebook.ipynb
    ├── invert_dictionary.ipynb
    ├── iterative_imputer.ipynb
    ├── knnImputer.ipynb
    ├── list_of_lists_to_list.ipynb
    ├── magic function - %who.ipynb
    ├── make_regression.ipynb
    ├── map python.ipynb
    ├── melt().ipynb
    ├── merge_pdf.ipynb
    ├── pandarellel.ipynb
    ├── pandas bar plot.ipynb
    ├── pandas profiling.ipynb
    ├── pandas_crosstab.ipynb
    ├── pandas_pivot_table.ipynb
    ├── pandas_read_html_files.ipynb
    ├── pdb_pythonDebugger.ipynb
    ├── plot_confusion_matrix.ipynb
    ├── pretty print.ipynb
    ├── print zenofpython.ipynb
    ├── pyforest.ipynb
    ├── read_csv_ParseDate.ipynb
    ├── run python script.ipynb
    ├── select_dtype.ipynb
    ├── split_pdf_pages.ipynb
    ├── str.split().ipynb
    ├── tqdm.ipynb
    └── write python script.ipynb
├── Data
    ├── AirQualityUCI.csv
    ├── data_cleaned.csv
    ├── dummy_pdf.pdf
    ├── dummy_pdf_page1.pdf
    ├── dummy_pdf_page2.pdf
    ├── ind-ban-comment.csv
    ├── loan_train.csv
    └── muti_input_nn.png
├── LICENSE
└── README.md


/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contribution Guidelines
 2 | 
 3 | This project is created and maintained by Analytics Vidhya. The core idea behind these data science hacks is to help the data science community write and produce better and efficient code. If you also want to contribute your hack, make sure to follow these guidelines
 4 | 
 5 | ### Hack idea
 6 | 1. Your hack must have at least one of the following characteristics - 
 7 |   
 8 |     1. It must save time.
 9 |     1. Includes lesser lines of code.
10 |     1. Provides more functionality.
11 |     1. A better tool or library than an existing one.
12 | 
13 | 1. The hacks must be related to data science, machine learning, data engineering or any of the related fields
14 | 
15 | 
16 | ### Code and submission steps
17 | 1. Create a pull request, describing what your data science hack does
18 | 1. Add all the necessary files in the same format as given in this project
19 | 1. The code must include proper and relevant comments 
20 | 1. Each PR will be first reviewed and will be approved accordingly.
21 | 


--------------------------------------------------------------------------------
/Code/%prun.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "slideshow": {
  7 |      "slide_type": "slide"
  8 |     }
  9 |    },
 10 |    "source": [
 11 |     "# Magic Command - %prun"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 1,
 17 |    "metadata": {
 18 |     "slideshow": {
 19 |      "slide_type": "fragment"
 20 |     }
 21 |    },
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "# import pandas\n",
 25 |     "import pandas as pd"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 2,
 31 |    "metadata": {
 32 |     "slideshow": {
 33 |      "slide_type": "fragment"
 34 |     }
 35 |    },
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "#Import dataset\n",
 39 |     "loan = pd.read_csv('../Data/loan_train.csv', index_col = 'Loan_ID')"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 3,
 45 |    "metadata": {
 46 |     "slideshow": {
 47 |      "slide_type": "fragment"
 48 |     }
 49 |    },
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "# Define function\n",
 53 |     "def missing(x):\n",
 54 |     "    return sum(x.isnull())"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": 16,
 60 |    "metadata": {
 61 |     "slideshow": {
 62 |      "slide_type": "slide"
 63 |     }
 64 |    },
 65 |    "outputs": [
 66 |     {
 67 |      "name": "stdout",
 68 |      "output_type": "stream",
 69 |      "text": [
 70 |       "Missing values per column\n",
 71 |       " "
 72 |      ]
 73 |     }
 74 |    ],
 75 |    "source": [
 76 |     "# Apply per column\n",
 77 |     "\n",
 78 |     "print('Missing values per column')\n",
 79 |     "%prun loan.apply(missing, axis = 0).head()"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 15,
 85 |    "metadata": {
 86 |     "slideshow": {
 87 |      "slide_type": "slide"
 88 |     }
 89 |    },
 90 |    "outputs": [
 91 |     {
 92 |      "name": "stdout",
 93 |      "output_type": "stream",
 94 |      "text": [
 95 |       "Missing values per row\n",
 96 |       " "
 97 |      ]
 98 |     }
 99 |    ],
100 |    "source": [
101 |     "# Apply per row\n",
102 |     "print('Missing values per row')\n",
103 |     "%prun loan.apply(missing, axis = 1).head()"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": null,
109 |    "metadata": {
110 |     "slideshow": {
111 |      "slide_type": "skip"
112 |     }
113 |    },
114 |    "outputs": [],
115 |    "source": []
116 |   }
117 |  ],
118 |  "metadata": {
119 |   "kernelspec": {
120 |    "display_name": "Python 3",
121 |    "language": "python",
122 |    "name": "python3"
123 |   },
124 |   "language_info": {
125 |    "codemirror_mode": {
126 |     "name": "ipython",
127 |     "version": 3
128 |    },
129 |    "file_extension": ".py",
130 |    "mimetype": "text/x-python",
131 |    "name": "python",
132 |    "nbconvert_exporter": "python",
133 |    "pygments_lexer": "ipython3",
134 |    "version": "3.7.1"
135 |   }
136 |  },
137 |  "nbformat": 4,
138 |  "nbformat_minor": 2
139 | }
140 | 


--------------------------------------------------------------------------------
/Code/Date Parser.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "import datetime\n",
10 |     "import dateutil.parser"
11 |    ]
12 |   },
13 |   {
14 |    "cell_type": "code",
15 |    "execution_count": 2,
16 |    "metadata": {},
17 |    "outputs": [],
18 |    "source": [
19 |     "input_date = '10th May 1996'\n",
20 |     "parsed_date = dateutil.parser.parse(input_date)"
21 |    ]
22 |   },
23 |   {
24 |    "cell_type": "code",
25 |    "execution_count": 3,
26 |    "metadata": {},
27 |    "outputs": [
28 |     {
29 |      "name": "stdout",
30 |      "output_type": "stream",
31 |      "text": [
32 |       "10-05-1996\n"
33 |      ]
34 |     }
35 |    ],
36 |    "source": [
37 |     "output_date = datetime.datetime.strftime(parsed_date, '%d-%m-%Y')\n",
38 |     "\n",
39 |     "print(output_date)"
40 |    ]
41 |   }
42 |  ],
43 |  "metadata": {
44 |   "kernelspec": {
45 |    "display_name": "Python 3",
46 |    "language": "python",
47 |    "name": "python3"
48 |   },
49 |   "language_info": {
50 |    "codemirror_mode": {
51 |     "name": "ipython",
52 |     "version": 3
53 |    },
54 |    "file_extension": ".py",
55 |    "mimetype": "text/x-python",
56 |    "name": "python",
57 |    "nbconvert_exporter": "python",
58 |    "pygments_lexer": "ipython3",
59 |    "version": "3.7.3"
60 |   }
61 |  },
62 |  "nbformat": 4,
63 |  "nbformat_minor": 2
64 | }
65 | 


--------------------------------------------------------------------------------
/Code/Dummy Classifier.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Sklearn's Dummy Classifier"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 28,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import pandas as pd\n",
 17 |     "from sklearn.dummy import DummyClassifier"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 29,
 23 |    "metadata": {},
 24 |    "outputs": [
 25 |     {
 26 |      "data": {
 27 |       "text/html": [
 28 |        "<div>\n",
 29 |        "<style scoped>\n",
 30 |        "    .dataframe tbody tr th:only-of-type {\n",
 31 |        "        vertical-align: middle;\n",
 32 |        "    }\n",
 33 |        "\n",
 34 |        "    .dataframe tbody tr th {\n",
 35 |        "        vertical-align: top;\n",
 36 |        "    }\n",
 37 |        "\n",
 38 |        "    .dataframe thead th {\n",
 39 |        "        text-align: right;\n",
 40 |        "    }\n",
 41 |        "</style>\n",
 42 |        "<table border=\"1\" class=\"dataframe\">\n",
 43 |        "  <thead>\n",
 44 |        "    <tr style=\"text-align: right;\">\n",
 45 |        "      <th></th>\n",
 46 |        "      <th>Loan_ID</th>\n",
 47 |        "      <th>Gender</th>\n",
 48 |        "      <th>Married</th>\n",
 49 |        "      <th>Dependents</th>\n",
 50 |        "      <th>Education</th>\n",
 51 |        "      <th>Self_Employed</th>\n",
 52 |        "      <th>ApplicantIncome</th>\n",
 53 |        "      <th>CoapplicantIncome</th>\n",
 54 |        "      <th>LoanAmount</th>\n",
 55 |        "      <th>Loan_Amount_Term</th>\n",
 56 |        "      <th>Credit_History</th>\n",
 57 |        "      <th>Property_Area</th>\n",
 58 |        "      <th>Loan_Status</th>\n",
 59 |        "    </tr>\n",
 60 |        "  </thead>\n",
 61 |        "  <tbody>\n",
 62 |        "    <tr>\n",
 63 |        "      <th>0</th>\n",
 64 |        "      <td>LP001002</td>\n",
 65 |        "      <td>Male</td>\n",
 66 |        "      <td>No</td>\n",
 67 |        "      <td>0</td>\n",
 68 |        "      <td>Graduate</td>\n",
 69 |        "      <td>No</td>\n",
 70 |        "      <td>5849</td>\n",
 71 |        "      <td>0.0</td>\n",
 72 |        "      <td>NaN</td>\n",
 73 |        "      <td>360.0</td>\n",
 74 |        "      <td>1.0</td>\n",
 75 |        "      <td>Urban</td>\n",
 76 |        "      <td>Y</td>\n",
 77 |        "    </tr>\n",
 78 |        "    <tr>\n",
 79 |        "      <th>1</th>\n",
 80 |        "      <td>LP001003</td>\n",
 81 |        "      <td>Male</td>\n",
 82 |        "      <td>Yes</td>\n",
 83 |        "      <td>1</td>\n",
 84 |        "      <td>Graduate</td>\n",
 85 |        "      <td>No</td>\n",
 86 |        "      <td>4583</td>\n",
 87 |        "      <td>1508.0</td>\n",
 88 |        "      <td>128.0</td>\n",
 89 |        "      <td>360.0</td>\n",
 90 |        "      <td>1.0</td>\n",
 91 |        "      <td>Rural</td>\n",
 92 |        "      <td>N</td>\n",
 93 |        "    </tr>\n",
 94 |        "    <tr>\n",
 95 |        "      <th>2</th>\n",
 96 |        "      <td>LP001005</td>\n",
 97 |        "      <td>Male</td>\n",
 98 |        "      <td>Yes</td>\n",
 99 |        "      <td>0</td>\n",
100 |        "      <td>Graduate</td>\n",
101 |        "      <td>Yes</td>\n",
102 |        "      <td>3000</td>\n",
103 |        "      <td>0.0</td>\n",
104 |        "      <td>66.0</td>\n",
105 |        "      <td>360.0</td>\n",
106 |        "      <td>1.0</td>\n",
107 |        "      <td>Urban</td>\n",
108 |        "      <td>Y</td>\n",
109 |        "    </tr>\n",
110 |        "    <tr>\n",
111 |        "      <th>3</th>\n",
112 |        "      <td>LP001006</td>\n",
113 |        "      <td>Male</td>\n",
114 |        "      <td>Yes</td>\n",
115 |        "      <td>0</td>\n",
116 |        "      <td>Not Graduate</td>\n",
117 |        "      <td>No</td>\n",
118 |        "      <td>2583</td>\n",
119 |        "      <td>2358.0</td>\n",
120 |        "      <td>120.0</td>\n",
121 |        "      <td>360.0</td>\n",
122 |        "      <td>1.0</td>\n",
123 |        "      <td>Urban</td>\n",
124 |        "      <td>Y</td>\n",
125 |        "    </tr>\n",
126 |        "    <tr>\n",
127 |        "      <th>4</th>\n",
128 |        "      <td>LP001008</td>\n",
129 |        "      <td>Male</td>\n",
130 |        "      <td>No</td>\n",
131 |        "      <td>0</td>\n",
132 |        "      <td>Graduate</td>\n",
133 |        "      <td>No</td>\n",
134 |        "      <td>6000</td>\n",
135 |        "      <td>0.0</td>\n",
136 |        "      <td>141.0</td>\n",
137 |        "      <td>360.0</td>\n",
138 |        "      <td>1.0</td>\n",
139 |        "      <td>Urban</td>\n",
140 |        "      <td>Y</td>\n",
141 |        "    </tr>\n",
142 |        "  </tbody>\n",
143 |        "</table>\n",
144 |        "</div>"
145 |       ],
146 |       "text/plain": [
147 |        "    Loan_ID Gender Married Dependents     Education Self_Employed  \\\n",
148 |        "0  LP001002   Male      No          0      Graduate            No   \n",
149 |        "1  LP001003   Male     Yes          1      Graduate            No   \n",
150 |        "2  LP001005   Male     Yes          0      Graduate           Yes   \n",
151 |        "3  LP001006   Male     Yes          0  Not Graduate            No   \n",
152 |        "4  LP001008   Male      No          0      Graduate            No   \n",
153 |        "\n",
154 |        "   ApplicantIncome  CoapplicantIncome  LoanAmount  Loan_Amount_Term  \\\n",
155 |        "0             5849                0.0         NaN             360.0   \n",
156 |        "1             4583             1508.0       128.0             360.0   \n",
157 |        "2             3000                0.0        66.0             360.0   \n",
158 |        "3             2583             2358.0       120.0             360.0   \n",
159 |        "4             6000                0.0       141.0             360.0   \n",
160 |        "\n",
161 |        "   Credit_History Property_Area Loan_Status  \n",
162 |        "0             1.0         Urban           Y  \n",
163 |        "1             1.0         Rural           N  \n",
164 |        "2             1.0         Urban           Y  \n",
165 |        "3             1.0         Urban           Y  \n",
166 |        "4             1.0         Urban           Y  "
167 |       ]
168 |      },
169 |      "execution_count": 29,
170 |      "metadata": {},
171 |      "output_type": "execute_result"
172 |     }
173 |    ],
174 |    "source": [
175 |     "#import data\n",
176 |     "data = pd.read_csv('../Data/loan_train.csv')\n",
177 |     "data.head()"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "code",
182 |    "execution_count": 30,
183 |    "metadata": {},
184 |    "outputs": [],
185 |    "source": [
186 |     "# Dependent and independent variables\n",
187 |     "\n",
188 |     "X = data.drop(\"Loan_Status\", axis = 1)\n",
189 |     "y = data[\"Loan_Status\"]"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "code",
194 |    "execution_count": 31,
195 |    "metadata": {},
196 |    "outputs": [
197 |     {
198 |      "data": {
199 |       "text/plain": [
200 |        "DummyClassifier(constant=None, random_state=None, strategy='most_frequent')"
201 |       ]
202 |      },
203 |      "execution_count": 31,
204 |      "metadata": {},
205 |      "output_type": "execute_result"
206 |     }
207 |    ],
208 |    "source": [
209 |     "# Dummy classifier with strategy as \"most_frequent\"\n",
210 |     "dummy_clf = DummyClassifier(strategy=\"most_frequent\")\n",
211 |     "dummy_clf.fit(X, y)"
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "code",
216 |    "execution_count": 32,
217 |    "metadata": {},
218 |    "outputs": [
219 |     {
220 |      "data": {
221 |       "text/plain": [
222 |        "array(['Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
223 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
224 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
225 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
226 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
227 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
228 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
229 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
230 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
231 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
232 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
233 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
234 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
235 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
236 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
237 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
238 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
239 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
240 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
241 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
242 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
243 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
244 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
245 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
246 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
247 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
248 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
249 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
250 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
251 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
252 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
253 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
254 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
255 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
256 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
257 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
258 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
259 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
260 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
261 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
262 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
263 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
264 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
265 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
266 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
267 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
268 |        "       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
269 |        "       'Y', 'Y', 'Y'], dtype='<U1')"
270 |       ]
271 |      },
272 |      "execution_count": 32,
273 |      "metadata": {},
274 |      "output_type": "execute_result"
275 |     }
276 |    ],
277 |    "source": [
278 |     "dummy_clf.predict(X)"
279 |    ]
280 |   },
281 |   {
282 |    "cell_type": "code",
283 |    "execution_count": 33,
284 |    "metadata": {},
285 |    "outputs": [
286 |     {
287 |      "data": {
288 |       "text/plain": [
289 |        "0.6872964169381107"
290 |       ]
291 |      },
292 |      "execution_count": 33,
293 |      "metadata": {},
294 |      "output_type": "execute_result"
295 |     }
296 |    ],
297 |    "source": [
298 |     "dummy_clf.score(X, y)"
299 |    ]
300 |   },
301 |   {
302 |    "cell_type": "code",
303 |    "execution_count": null,
304 |    "metadata": {},
305 |    "outputs": [],
306 |    "source": [
307 |     "# This is our base model, our \"real\" model must perform better than this"
308 |    ]
309 |   }
310 |  ],
311 |  "metadata": {
312 |   "kernelspec": {
313 |    "display_name": "Python 3",
314 |    "language": "python",
315 |    "name": "python3"
316 |   },
317 |   "language_info": {
318 |    "codemirror_mode": {
319 |     "name": "ipython",
320 |     "version": 3
321 |    },
322 |    "file_extension": ".py",
323 |    "mimetype": "text/x-python",
324 |    "name": "python",
325 |    "nbconvert_exporter": "python",
326 |    "pygments_lexer": "ipython3",
327 |    "version": "3.7.1"
328 |   }
329 |  },
330 |  "nbformat": 4,
331 |  "nbformat_minor": 2
332 | }
333 | 


--------------------------------------------------------------------------------
/Code/Extract E-mails from text.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Extract E-mails from text"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "code",
12 |    "execution_count": 21,
13 |    "metadata": {},
14 |    "outputs": [],
15 |    "source": [
16 |     "text = \"\"\"The E-Book looks amazing and I would like a copy of it, here is my e-mail id - xyz@gmail.com | \n",
17 |     "        Hi, I am looking for a job in data science field, please send me the E-book and kindly suggest\n",
18 |     "        how to move forward, thanks - ab_c@gmail.com\"\"\""
19 |    ]
20 |   },
21 |   {
22 |    "cell_type": "code",
23 |    "execution_count": 22,
24 |    "metadata": {},
25 |    "outputs": [],
26 |    "source": [
27 |     "import re"
28 |    ]
29 |   },
30 |   {
31 |    "cell_type": "code",
32 |    "execution_count": 23,
33 |    "metadata": {},
34 |    "outputs": [
35 |     {
36 |      "data": {
37 |       "text/plain": [
38 |        "['xyz@gmail.com', 'ab_c@gmail.com']"
39 |       ]
40 |      },
41 |      "execution_count": 23,
42 |      "metadata": {},
43 |      "output_type": "execute_result"
44 |     }
45 |    ],
46 |    "source": [
47 |     "re.findall(r\"([\\w.-]+@[\\w.-]+)\", text)"
48 |    ]
49 |   },
50 |   {
51 |    "cell_type": "code",
52 |    "execution_count": null,
53 |    "metadata": {},
54 |    "outputs": [],
55 |    "source": []
56 |   }
57 |  ],
58 |  "metadata": {
59 |   "kernelspec": {
60 |    "display_name": "Python 3",
61 |    "language": "python",
62 |    "name": "python3"
63 |   },
64 |   "language_info": {
65 |    "codemirror_mode": {
66 |     "name": "ipython",
67 |     "version": 3
68 |    },
69 |    "file_extension": ".py",
70 |    "mimetype": "text/x-python",
71 |    "name": "python",
72 |    "nbconvert_exporter": "python",
73 |    "pygments_lexer": "ipython3",
74 |    "version": "3.7.1"
75 |   }
76 |  },
77 |  "nbformat": 4,
78 |  "nbformat_minor": 2
79 | }
80 | 


--------------------------------------------------------------------------------
/Code/FeatureSelection_SelectFromModel.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Feature Selection using sklearn's SelectFromModel"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import pandas as pd\n",
 17 |     "from sklearn.feature_selection import SelectFromModel\n",
 18 |     "from sklearn.linear_model import LassoCV\n",
 19 |     "from sklearn.datasets import load_boston"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": null,
 25 |    "metadata": {},
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "# titanic dataset\n",
 29 |     "data = pd.read_csv('../Data/data_cleaned.csv')\n",
 30 |     "data.head()"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 88,
 36 |    "metadata": {},
 37 |    "outputs": [
 38 |     {
 39 |      "data": {
 40 |       "text/plain": [
 41 |        "0    0.616766\n",
 42 |        "1    0.383234\n",
 43 |        "Name: Survived, dtype: float64"
 44 |       ]
 45 |      },
 46 |      "execution_count": 88,
 47 |      "metadata": {},
 48 |      "output_type": "execute_result"
 49 |     }
 50 |    ],
 51 |    "source": [
 52 |     "#split data into train and test\n",
 53 |     "from sklearn.model_selection import train_test_split\n",
 54 |     "\n",
 55 |     "\n",
 56 |     "# Seperate dependent and independent variable\n",
 57 |     "x = data.drop(['Survived'], axis = 1)\n",
 58 |     "y = data['Survived']\n",
 59 |     "\n",
 60 |     "train_X, test_X, train_y, test_y = train_test_split(x,y, stratify = y, random_state =34)\n",
 61 |     "train_y.value_counts(normalize = True)"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": 89,
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "# Use LassoCV as base model \n",
 71 |     "clf = LassoCV()"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": 90,
 77 |    "metadata": {},
 78 |    "outputs": [
 79 |     {
 80 |      "name": "stderr",
 81 |      "output_type": "stream",
 82 |      "text": [
 83 |       "C:\\Users\\ram\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_split.py:2053: FutureWarning: You should specify a value for 'cv' instead of relying on the default value. The default value will change from 3 to 5 in version 0.22.\n",
 84 |       "  warnings.warn(CV_WARNING, FutureWarning)\n"
 85 |      ]
 86 |     },
 87 |     {
 88 |      "data": {
 89 |       "text/plain": [
 90 |        "4"
 91 |       ]
 92 |      },
 93 |      "execution_count": 90,
 94 |      "metadata": {},
 95 |      "output_type": "execute_result"
 96 |     }
 97 |    ],
 98 |    "source": [
 99 |     "# Feature Selection\n",
100 |     "# Use threshold as the 'mean'\n",
101 |     "sfm = SelectFromModel(clf, threshold = 'mean')\n",
102 |     "sfm.fit(train_X,train_y)\n",
103 |     "n_features = sfm.transform(train_X).shape[1]\n",
104 |     "n_features"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": 91,
110 |    "metadata": {},
111 |    "outputs": [
112 |     {
113 |      "data": {
114 |       "text/plain": [
115 |        "array([ 2,  4,  5, 23], dtype=int64)"
116 |       ]
117 |      },
118 |      "execution_count": 91,
119 |      "metadata": {},
120 |      "output_type": "execute_result"
121 |     }
122 |    ],
123 |    "source": [
124 |     "sfm.get_support(indices = True)"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": null,
130 |    "metadata": {},
131 |    "outputs": [],
132 |    "source": []
133 |   }
134 |  ],
135 |  "metadata": {
136 |   "kernelspec": {
137 |    "display_name": "Python 3",
138 |    "language": "python",
139 |    "name": "python3"
140 |   },
141 |   "language_info": {
142 |    "codemirror_mode": {
143 |     "name": "ipython",
144 |     "version": 3
145 |    },
146 |    "file_extension": ".py",
147 |    "mimetype": "text/x-python",
148 |    "name": "python",
149 |    "nbconvert_exporter": "python",
150 |    "pygments_lexer": "ipython3",
151 |    "version": "3.7.1"
152 |   }
153 |  },
154 |  "nbformat": 4,
155 |  "nbformat_minor": 2
156 | }
157 | 


--------------------------------------------------------------------------------
/Code/Hack of the day - Time series.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import pandas as pd"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 3,
 15 |    "metadata": {},
 16 |    "outputs": [
 17 |     {
 18 |      "data": {
 19 |       "text/html": [
 20 |        "<div>\n",
 21 |        "<style scoped>\n",
 22 |        "    .dataframe tbody tr th:only-of-type {\n",
 23 |        "        vertical-align: middle;\n",
 24 |        "    }\n",
 25 |        "\n",
 26 |        "    .dataframe tbody tr th {\n",
 27 |        "        vertical-align: top;\n",
 28 |        "    }\n",
 29 |        "\n",
 30 |        "    .dataframe thead th {\n",
 31 |        "        text-align: right;\n",
 32 |        "    }\n",
 33 |        "</style>\n",
 34 |        "<table border=\"1\" class=\"dataframe\">\n",
 35 |        "  <thead>\n",
 36 |        "    <tr style=\"text-align: right;\">\n",
 37 |        "      <th></th>\n",
 38 |        "      <th>Date</th>\n",
 39 |        "      <th>Time</th>\n",
 40 |        "      <th>CO(GT)</th>\n",
 41 |        "      <th>PT08.S1(CO)</th>\n",
 42 |        "      <th>NMHC(GT)</th>\n",
 43 |        "    </tr>\n",
 44 |        "  </thead>\n",
 45 |        "  <tbody>\n",
 46 |        "    <tr>\n",
 47 |        "      <th>0</th>\n",
 48 |        "      <td>10/03/2004</td>\n",
 49 |        "      <td>18.00.00</td>\n",
 50 |        "      <td>2</td>\n",
 51 |        "      <td>6</td>\n",
 52 |        "      <td>1360</td>\n",
 53 |        "    </tr>\n",
 54 |        "    <tr>\n",
 55 |        "      <th>1</th>\n",
 56 |        "      <td>10/03/2004</td>\n",
 57 |        "      <td>19.00.00</td>\n",
 58 |        "      <td>2</td>\n",
 59 |        "      <td>1292</td>\n",
 60 |        "      <td>112</td>\n",
 61 |        "    </tr>\n",
 62 |        "    <tr>\n",
 63 |        "      <th>2</th>\n",
 64 |        "      <td>10/03/2004</td>\n",
 65 |        "      <td>20.00.00</td>\n",
 66 |        "      <td>2</td>\n",
 67 |        "      <td>2</td>\n",
 68 |        "      <td>1402</td>\n",
 69 |        "    </tr>\n",
 70 |        "    <tr>\n",
 71 |        "      <th>3</th>\n",
 72 |        "      <td>10/03/2004</td>\n",
 73 |        "      <td>21.00.00</td>\n",
 74 |        "      <td>2</td>\n",
 75 |        "      <td>2</td>\n",
 76 |        "      <td>1376</td>\n",
 77 |        "    </tr>\n",
 78 |        "    <tr>\n",
 79 |        "      <th>4</th>\n",
 80 |        "      <td>10/03/2004</td>\n",
 81 |        "      <td>22.00.00</td>\n",
 82 |        "      <td>1</td>\n",
 83 |        "      <td>6</td>\n",
 84 |        "      <td>1272</td>\n",
 85 |        "    </tr>\n",
 86 |        "  </tbody>\n",
 87 |        "</table>\n",
 88 |        "</div>"
 89 |       ],
 90 |       "text/plain": [
 91 |        "         Date      Time  CO(GT)  PT08.S1(CO)  NMHC(GT)\n",
 92 |        "0  10/03/2004  18.00.00       2            6      1360\n",
 93 |        "1  10/03/2004  19.00.00       2         1292       112\n",
 94 |        "2  10/03/2004  20.00.00       2            2      1402\n",
 95 |        "3  10/03/2004  21.00.00       2            2      1376\n",
 96 |        "4  10/03/2004  22.00.00       1            6      1272"
 97 |       ]
 98 |      },
 99 |      "execution_count": 3,
100 |      "metadata": {},
101 |      "output_type": "execute_result"
102 |     }
103 |    ],
104 |    "source": [
105 |     "data = pd.read_csv('AirQualityUCI.csv')\n",
106 |     "data.head()"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": 4,
112 |    "metadata": {},
113 |    "outputs": [
114 |     {
115 |      "data": {
116 |       "text/plain": [
117 |        "Date           object\n",
118 |        "Time           object\n",
119 |        "CO(GT)          int64\n",
120 |        "PT08.S1(CO)     int64\n",
121 |        "NMHC(GT)        int64\n",
122 |        "dtype: object"
123 |       ]
124 |      },
125 |      "execution_count": 4,
126 |      "metadata": {},
127 |      "output_type": "execute_result"
128 |     }
129 |    ],
130 |    "source": [
131 |     "data.dtypes"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": 5,
137 |    "metadata": {},
138 |    "outputs": [],
139 |    "source": [
140 |     "data['date_time'] = data['Date'] + ' ' + data['Time']\n",
141 |     "data['date_time'] = pd.to_datetime(data['date_time'],format='%d/%m/%Y %H.%M.%S')"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": 6,
147 |    "metadata": {},
148 |    "outputs": [
149 |     {
150 |      "data": {
151 |       "text/html": [
152 |        "<div>\n",
153 |        "<style scoped>\n",
154 |        "    .dataframe tbody tr th:only-of-type {\n",
155 |        "        vertical-align: middle;\n",
156 |        "    }\n",
157 |        "\n",
158 |        "    .dataframe tbody tr th {\n",
159 |        "        vertical-align: top;\n",
160 |        "    }\n",
161 |        "\n",
162 |        "    .dataframe thead th {\n",
163 |        "        text-align: right;\n",
164 |        "    }\n",
165 |        "</style>\n",
166 |        "<table border=\"1\" class=\"dataframe\">\n",
167 |        "  <thead>\n",
168 |        "    <tr style=\"text-align: right;\">\n",
169 |        "      <th></th>\n",
170 |        "      <th>date_time</th>\n",
171 |        "      <th>year</th>\n",
172 |        "      <th>month</th>\n",
173 |        "      <th>dayofweek</th>\n",
174 |        "      <th>quarter</th>\n",
175 |        "      <th>hour</th>\n",
176 |        "      <th>CO(GT)</th>\n",
177 |        "      <th>PT08.S1(CO)</th>\n",
178 |        "      <th>NMHC(GT)</th>\n",
179 |        "    </tr>\n",
180 |        "  </thead>\n",
181 |        "  <tbody>\n",
182 |        "    <tr>\n",
183 |        "      <th>0</th>\n",
184 |        "      <td>2004-03-10 18:00:00</td>\n",
185 |        "      <td>2004</td>\n",
186 |        "      <td>3</td>\n",
187 |        "      <td>2</td>\n",
188 |        "      <td>1</td>\n",
189 |        "      <td>18</td>\n",
190 |        "      <td>2</td>\n",
191 |        "      <td>6</td>\n",
192 |        "      <td>1360</td>\n",
193 |        "    </tr>\n",
194 |        "    <tr>\n",
195 |        "      <th>1</th>\n",
196 |        "      <td>2004-03-10 19:00:00</td>\n",
197 |        "      <td>2004</td>\n",
198 |        "      <td>3</td>\n",
199 |        "      <td>2</td>\n",
200 |        "      <td>1</td>\n",
201 |        "      <td>19</td>\n",
202 |        "      <td>2</td>\n",
203 |        "      <td>1292</td>\n",
204 |        "      <td>112</td>\n",
205 |        "    </tr>\n",
206 |        "    <tr>\n",
207 |        "      <th>2</th>\n",
208 |        "      <td>2004-03-10 20:00:00</td>\n",
209 |        "      <td>2004</td>\n",
210 |        "      <td>3</td>\n",
211 |        "      <td>2</td>\n",
212 |        "      <td>1</td>\n",
213 |        "      <td>20</td>\n",
214 |        "      <td>2</td>\n",
215 |        "      <td>2</td>\n",
216 |        "      <td>1402</td>\n",
217 |        "    </tr>\n",
218 |        "    <tr>\n",
219 |        "      <th>3</th>\n",
220 |        "      <td>2004-03-10 21:00:00</td>\n",
221 |        "      <td>2004</td>\n",
222 |        "      <td>3</td>\n",
223 |        "      <td>2</td>\n",
224 |        "      <td>1</td>\n",
225 |        "      <td>21</td>\n",
226 |        "      <td>2</td>\n",
227 |        "      <td>2</td>\n",
228 |        "      <td>1376</td>\n",
229 |        "    </tr>\n",
230 |        "    <tr>\n",
231 |        "      <th>4</th>\n",
232 |        "      <td>2004-03-10 22:00:00</td>\n",
233 |        "      <td>2004</td>\n",
234 |        "      <td>3</td>\n",
235 |        "      <td>2</td>\n",
236 |        "      <td>1</td>\n",
237 |        "      <td>22</td>\n",
238 |        "      <td>1</td>\n",
239 |        "      <td>6</td>\n",
240 |        "      <td>1272</td>\n",
241 |        "    </tr>\n",
242 |        "  </tbody>\n",
243 |        "</table>\n",
244 |        "</div>"
245 |       ],
246 |       "text/plain": [
247 |        "            date_time  year  month  dayofweek  quarter  hour  CO(GT)  \\\n",
248 |        "0 2004-03-10 18:00:00  2004      3          2        1    18       2   \n",
249 |        "1 2004-03-10 19:00:00  2004      3          2        1    19       2   \n",
250 |        "2 2004-03-10 20:00:00  2004      3          2        1    20       2   \n",
251 |        "3 2004-03-10 21:00:00  2004      3          2        1    21       2   \n",
252 |        "4 2004-03-10 22:00:00  2004      3          2        1    22       1   \n",
253 |        "\n",
254 |        "   PT08.S1(CO)  NMHC(GT)  \n",
255 |        "0            6      1360  \n",
256 |        "1         1292       112  \n",
257 |        "2            2      1402  \n",
258 |        "3            2      1376  \n",
259 |        "4            6      1272  "
260 |       ]
261 |      },
262 |      "execution_count": 6,
263 |      "metadata": {},
264 |      "output_type": "execute_result"
265 |     }
266 |    ],
267 |    "source": [
268 |     "new_df = pd.DataFrame({\"date_time\": data['date_time'],\n",
269 |     "              \"year\": data['date_time'].dt.year,\n",
270 |     "              \"month\": data['date_time'].dt.month,\n",
271 |     "              \"dayofweek\": data['date_time'].dt.dayofweek,\n",
272 |     "              \"quarter\": data['date_time'].dt.quarter,\n",
273 |     "              \"hour\": data['date_time'].dt.hour,\n",
274 |     "              \"CO(GT)\": data['CO(GT)'],\n",
275 |     "              \"PT08.S1(CO)\": data['PT08.S1(CO)'],\n",
276 |     "              \"NMHC(GT)\": data['NMHC(GT)']\n",
277 |     "             })\n",
278 |     "new_df.head()"
279 |    ]
280 |   },
281 |   {
282 |    "cell_type": "code",
283 |    "execution_count": null,
284 |    "metadata": {},
285 |    "outputs": [],
286 |    "source": []
287 |   }
288 |  ],
289 |  "metadata": {
290 |   "kernelspec": {
291 |    "display_name": "Python 3",
292 |    "language": "python",
293 |    "name": "python3"
294 |   },
295 |   "language_info": {
296 |    "codemirror_mode": {
297 |     "name": "ipython",
298 |     "version": 3
299 |    },
300 |    "file_extension": ".py",
301 |    "mimetype": "text/x-python",
302 |    "name": "python",
303 |    "nbconvert_exporter": "python",
304 |    "pygments_lexer": "ipython3",
305 |    "version": "3.7.1"
306 |   }
307 |  },
308 |  "nbformat": 4,
309 |  "nbformat_minor": 2
310 | }
311 | 


--------------------------------------------------------------------------------
/Code/HoD_history.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import pandas as pd"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 2,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "data = pd.read_csv('loan_train.csv')"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 3,
 24 |    "metadata": {},
 25 |    "outputs": [
 26 |     {
 27 |      "data": {
 28 |       "text/html": [
 29 |        "<div>\n",
 30 |        "<style scoped>\n",
 31 |        "    .dataframe tbody tr th:only-of-type {\n",
 32 |        "        vertical-align: middle;\n",
 33 |        "    }\n",
 34 |        "\n",
 35 |        "    .dataframe tbody tr th {\n",
 36 |        "        vertical-align: top;\n",
 37 |        "    }\n",
 38 |        "\n",
 39 |        "    .dataframe thead th {\n",
 40 |        "        text-align: right;\n",
 41 |        "    }\n",
 42 |        "</style>\n",
 43 |        "<table border=\"1\" class=\"dataframe\">\n",
 44 |        "  <thead>\n",
 45 |        "    <tr style=\"text-align: right;\">\n",
 46 |        "      <th></th>\n",
 47 |        "      <th>Loan_ID</th>\n",
 48 |        "      <th>Gender</th>\n",
 49 |        "      <th>Married</th>\n",
 50 |        "      <th>Dependents</th>\n",
 51 |        "      <th>Education</th>\n",
 52 |        "      <th>Self_Employed</th>\n",
 53 |        "      <th>ApplicantIncome</th>\n",
 54 |        "      <th>CoapplicantIncome</th>\n",
 55 |        "      <th>LoanAmount</th>\n",
 56 |        "      <th>Loan_Amount_Term</th>\n",
 57 |        "      <th>Credit_History</th>\n",
 58 |        "      <th>Property_Area</th>\n",
 59 |        "      <th>Loan_Status</th>\n",
 60 |        "    </tr>\n",
 61 |        "  </thead>\n",
 62 |        "  <tbody>\n",
 63 |        "    <tr>\n",
 64 |        "      <th>0</th>\n",
 65 |        "      <td>LP001002</td>\n",
 66 |        "      <td>Male</td>\n",
 67 |        "      <td>No</td>\n",
 68 |        "      <td>0</td>\n",
 69 |        "      <td>Graduate</td>\n",
 70 |        "      <td>No</td>\n",
 71 |        "      <td>5849</td>\n",
 72 |        "      <td>0.0</td>\n",
 73 |        "      <td>NaN</td>\n",
 74 |        "      <td>360.0</td>\n",
 75 |        "      <td>1.0</td>\n",
 76 |        "      <td>Urban</td>\n",
 77 |        "      <td>Y</td>\n",
 78 |        "    </tr>\n",
 79 |        "    <tr>\n",
 80 |        "      <th>1</th>\n",
 81 |        "      <td>LP001003</td>\n",
 82 |        "      <td>Male</td>\n",
 83 |        "      <td>Yes</td>\n",
 84 |        "      <td>1</td>\n",
 85 |        "      <td>Graduate</td>\n",
 86 |        "      <td>No</td>\n",
 87 |        "      <td>4583</td>\n",
 88 |        "      <td>1508.0</td>\n",
 89 |        "      <td>128.0</td>\n",
 90 |        "      <td>360.0</td>\n",
 91 |        "      <td>1.0</td>\n",
 92 |        "      <td>Rural</td>\n",
 93 |        "      <td>N</td>\n",
 94 |        "    </tr>\n",
 95 |        "    <tr>\n",
 96 |        "      <th>2</th>\n",
 97 |        "      <td>LP001005</td>\n",
 98 |        "      <td>Male</td>\n",
 99 |        "      <td>Yes</td>\n",
100 |        "      <td>0</td>\n",
101 |        "      <td>Graduate</td>\n",
102 |        "      <td>Yes</td>\n",
103 |        "      <td>3000</td>\n",
104 |        "      <td>0.0</td>\n",
105 |        "      <td>66.0</td>\n",
106 |        "      <td>360.0</td>\n",
107 |        "      <td>1.0</td>\n",
108 |        "      <td>Urban</td>\n",
109 |        "      <td>Y</td>\n",
110 |        "    </tr>\n",
111 |        "    <tr>\n",
112 |        "      <th>3</th>\n",
113 |        "      <td>LP001006</td>\n",
114 |        "      <td>Male</td>\n",
115 |        "      <td>Yes</td>\n",
116 |        "      <td>0</td>\n",
117 |        "      <td>Not Graduate</td>\n",
118 |        "      <td>No</td>\n",
119 |        "      <td>2583</td>\n",
120 |        "      <td>2358.0</td>\n",
121 |        "      <td>120.0</td>\n",
122 |        "      <td>360.0</td>\n",
123 |        "      <td>1.0</td>\n",
124 |        "      <td>Urban</td>\n",
125 |        "      <td>Y</td>\n",
126 |        "    </tr>\n",
127 |        "    <tr>\n",
128 |        "      <th>4</th>\n",
129 |        "      <td>LP001008</td>\n",
130 |        "      <td>Male</td>\n",
131 |        "      <td>No</td>\n",
132 |        "      <td>0</td>\n",
133 |        "      <td>Graduate</td>\n",
134 |        "      <td>No</td>\n",
135 |        "      <td>6000</td>\n",
136 |        "      <td>0.0</td>\n",
137 |        "      <td>141.0</td>\n",
138 |        "      <td>360.0</td>\n",
139 |        "      <td>1.0</td>\n",
140 |        "      <td>Urban</td>\n",
141 |        "      <td>Y</td>\n",
142 |        "    </tr>\n",
143 |        "  </tbody>\n",
144 |        "</table>\n",
145 |        "</div>"
146 |       ],
147 |       "text/plain": [
148 |        "    Loan_ID Gender Married Dependents     Education Self_Employed  \\\n",
149 |        "0  LP001002   Male      No          0      Graduate            No   \n",
150 |        "1  LP001003   Male     Yes          1      Graduate            No   \n",
151 |        "2  LP001005   Male     Yes          0      Graduate           Yes   \n",
152 |        "3  LP001006   Male     Yes          0  Not Graduate            No   \n",
153 |        "4  LP001008   Male      No          0      Graduate            No   \n",
154 |        "\n",
155 |        "   ApplicantIncome  CoapplicantIncome  LoanAmount  Loan_Amount_Term  \\\n",
156 |        "0             5849                0.0         NaN             360.0   \n",
157 |        "1             4583             1508.0       128.0             360.0   \n",
158 |        "2             3000                0.0        66.0             360.0   \n",
159 |        "3             2583             2358.0       120.0             360.0   \n",
160 |        "4             6000                0.0       141.0             360.0   \n",
161 |        "\n",
162 |        "   Credit_History Property_Area Loan_Status  \n",
163 |        "0             1.0         Urban           Y  \n",
164 |        "1             1.0         Rural           N  \n",
165 |        "2             1.0         Urban           Y  \n",
166 |        "3             1.0         Urban           Y  \n",
167 |        "4             1.0         Urban           Y  "
168 |       ]
169 |      },
170 |      "execution_count": 3,
171 |      "metadata": {},
172 |      "output_type": "execute_result"
173 |     }
174 |    ],
175 |    "source": [
176 |     "data.head()"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "markdown",
181 |    "metadata": {},
182 |    "source": [
183 |     "#### Use %history - get history of commands used previously"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "code",
188 |    "execution_count": 12,
189 |    "metadata": {},
190 |    "outputs": [
191 |     {
192 |      "name": "stdout",
193 |      "output_type": "stream",
194 |      "text": [
195 |       "import pandas as pd\n",
196 |       "data = pd.read_csv('loan_train.csv')\n",
197 |       "data.head()\n",
198 |       "%history\n",
199 |       "% history -n\n",
200 |       "%history -n\n",
201 |       "%history -n -o\n",
202 |       "%history -n -t\n",
203 |       "# prints\n",
204 |       "%history -o\n",
205 |       "# list of all commands\n",
206 |       "%history\n",
207 |       "# Prints line numbers for each command \n",
208 |       "%history -n\n",
209 |       "# list of all commands\n",
210 |       "%history\n"
211 |      ]
212 |     }
213 |    ],
214 |    "source": [
215 |     "# list of all commands\n",
216 |     "%history"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "raw",
221 |    "metadata": {},
222 |    "source": [
223 |     "# Prints line numbers for each command \n",
224 |     "%history -n"
225 |    ]
226 |   },
227 |   {
228 |    "cell_type": "code",
229 |    "execution_count": null,
230 |    "metadata": {},
231 |    "outputs": [],
232 |    "source": [
233 |     "# prints the command as well as output \n",
234 |     "%history -o"
235 |    ]
236 |   },
237 |   {
238 |    "cell_type": "code",
239 |    "execution_count": null,
240 |    "metadata": {},
241 |    "outputs": [],
242 |    "source": [
243 |     "# prints translated  history - converts it into valid python commands before execustion\n",
244 |     "# eg - %history --> get_ipython().run_line_magic('history', '')\n",
245 |     "%history -n -t"
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "code",
250 |    "execution_count": null,
251 |    "metadata": {},
252 |    "outputs": [],
253 |    "source": []
254 |   }
255 |  ],
256 |  "metadata": {
257 |   "kernelspec": {
258 |    "display_name": "Python 3",
259 |    "language": "python",
260 |    "name": "python3"
261 |   },
262 |   "language_info": {
263 |    "codemirror_mode": {
264 |     "name": "ipython",
265 |     "version": 3
266 |    },
267 |    "file_extension": ".py",
268 |    "mimetype": "text/x-python",
269 |    "name": "python",
270 |    "nbconvert_exporter": "python",
271 |    "pygments_lexer": "ipython3",
272 |    "version": "3.7.1"
273 |   }
274 |  },
275 |  "nbformat": 4,
276 |  "nbformat_minor": 2
277 | }
278 | 


--------------------------------------------------------------------------------
/Code/HoD_stratify.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "#import libraries\n",
 10 |     "import pandas as pd\n",
 11 |     "import numpy as np\n"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 2,
 17 |    "metadata": {},
 18 |    "outputs": [
 19 |     {
 20 |      "data": {
 21 |       "text/plain": [
 22 |        "(891, 25)"
 23 |       ]
 24 |      },
 25 |      "execution_count": 2,
 26 |      "metadata": {},
 27 |      "output_type": "execute_result"
 28 |     }
 29 |    ],
 30 |    "source": [
 31 |     "#import dataset\n",
 32 |     "data = pd.read_csv('data_cleaned.csv')\n",
 33 |     "data.shape"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": 3,
 39 |    "metadata": {},
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "# Seperate dependent and independent variable\n",
 43 |     "x = data.drop(['Survived'], axis = 1)\n",
 44 |     "y = data['Survived']"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 4,
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "# import train_test_split\n",
 54 |     "from sklearn.model_selection import train_test_split"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "markdown",
 59 |    "metadata": {},
 60 |    "source": [
 61 |     "## Without stratify"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": 5,
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "train_x, test_x, train_y, test_y = train_test_split(x, y, random_state = 45)"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": 6,
 76 |    "metadata": {},
 77 |    "outputs": [
 78 |     {
 79 |      "name": "stdout",
 80 |      "output_type": "stream",
 81 |      "text": [
 82 |       "0    0.60479\n",
 83 |       "1    0.39521\n",
 84 |       "Name: Survived, dtype: float64\n",
 85 |       "0    0.650224\n",
 86 |       "1    0.349776\n",
 87 |       "Name: Survived, dtype: float64\n"
 88 |      ]
 89 |     }
 90 |    ],
 91 |    "source": [
 92 |     "print(train_y.value_counts(normalize=True))\n",
 93 |     "print(test_y.value_counts(normalize=True))"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "markdown",
 98 |    "metadata": {},
 99 |    "source": [
100 |     "## With Stratify"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": 7,
106 |    "metadata": {},
107 |    "outputs": [],
108 |    "source": [
109 |     "# With Statify\n",
110 |     "train_X, test_X, train_Y, test_Y = train_test_split(x, y, random_state = 56, stratify = y)"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": 8,
116 |    "metadata": {},
117 |    "outputs": [
118 |     {
119 |      "name": "stdout",
120 |      "output_type": "stream",
121 |      "text": [
122 |       "0    0.616766\n",
123 |       "1    0.383234\n",
124 |       "Name: Survived, dtype: float64\n",
125 |       "0    0.61435\n",
126 |       "1    0.38565\n",
127 |       "Name: Survived, dtype: float64\n"
128 |      ]
129 |     }
130 |    ],
131 |    "source": [
132 |     "print(train_Y.value_counts(normalize = True))\n",
133 |     "print(test_Y.value_counts(normalize = True))"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": null,
139 |    "metadata": {},
140 |    "outputs": [],
141 |    "source": []
142 |   }
143 |  ],
144 |  "metadata": {
145 |   "kernelspec": {
146 |    "display_name": "Python 3",
147 |    "language": "python",
148 |    "name": "python3"
149 |   },
150 |   "language_info": {
151 |    "codemirror_mode": {
152 |     "name": "ipython",
153 |     "version": 3
154 |    },
155 |    "file_extension": ".py",
156 |    "mimetype": "text/x-python",
157 |    "name": "python",
158 |    "nbconvert_exporter": "python",
159 |    "pygments_lexer": "ipython3",
160 |    "version": "3.7.1"
161 |   }
162 |  },
163 |  "nbformat": 4,
164 |  "nbformat_minor": 2
165 | }
166 | 


--------------------------------------------------------------------------------
/Code/Pandas Apply.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Pandas Apply"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "# import pandas\n",
 17 |     "import pandas as pd"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 3,
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "#Import dataset\n",
 27 |     "loan = pd.read_csv('../Data/loan_train.csv', index_col = 'Loan_ID')"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": 4,
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "# Define function\n",
 37 |     "def missing(x):\n",
 38 |     "    return sum(x.isnull())"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 5,
 44 |    "metadata": {},
 45 |    "outputs": [
 46 |     {
 47 |      "name": "stdout",
 48 |      "output_type": "stream",
 49 |      "text": [
 50 |       "Missing values per column\n"
 51 |      ]
 52 |     },
 53 |     {
 54 |      "data": {
 55 |       "text/plain": [
 56 |        "Gender           13\n",
 57 |        "Married           3\n",
 58 |        "Dependents       15\n",
 59 |        "Education         0\n",
 60 |        "Self_Employed    32\n",
 61 |        "dtype: int64"
 62 |       ]
 63 |      },
 64 |      "execution_count": 5,
 65 |      "metadata": {},
 66 |      "output_type": "execute_result"
 67 |     }
 68 |    ],
 69 |    "source": [
 70 |     "# Apply per column\n",
 71 |     "print('Missing values per column')\n",
 72 |     "loan.apply(missing, axis = 0).head()"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": 6,
 78 |    "metadata": {},
 79 |    "outputs": [
 80 |     {
 81 |      "name": "stdout",
 82 |      "output_type": "stream",
 83 |      "text": [
 84 |       "Missing values per row\n"
 85 |      ]
 86 |     },
 87 |     {
 88 |      "data": {
 89 |       "text/plain": [
 90 |        "Loan_ID\n",
 91 |        "LP001002    1\n",
 92 |        "LP001003    0\n",
 93 |        "LP001005    0\n",
 94 |        "LP001006    0\n",
 95 |        "LP001008    0\n",
 96 |        "dtype: int64"
 97 |       ]
 98 |      },
 99 |      "execution_count": 6,
100 |      "metadata": {},
101 |      "output_type": "execute_result"
102 |     }
103 |    ],
104 |    "source": [
105 |     "# Apply per row\n",
106 |     "print('Missing values per row')\n",
107 |     "loan.apply(missing, axis = 1).head()"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": null,
113 |    "metadata": {},
114 |    "outputs": [],
115 |    "source": []
116 |   }
117 |  ],
118 |  "metadata": {
119 |   "kernelspec": {
120 |    "display_name": "Python 3",
121 |    "language": "python",
122 |    "name": "python3"
123 |   },
124 |   "language_info": {
125 |    "codemirror_mode": {
126 |     "name": "ipython",
127 |     "version": 3
128 |    },
129 |    "file_extension": ".py",
130 |    "mimetype": "text/x-python",
131 |    "name": "python",
132 |    "nbconvert_exporter": "python",
133 |    "pygments_lexer": "ipython3",
134 |    "version": "3.7.1"
135 |   }
136 |  },
137 |  "nbformat": 4,
138 |  "nbformat_minor": 2
139 | }
140 | 


--------------------------------------------------------------------------------
/Code/Pandas_boolean indexing.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Pandas Boolean Indexing"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "# import pandas\n",
 17 |     "import pandas as pd"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 6,
 23 |    "metadata": {},
 24 |    "outputs": [
 25 |     {
 26 |      "data": {
 27 |       "text/html": [
 28 |        "<div>\n",
 29 |        "<style scoped>\n",
 30 |        "    .dataframe tbody tr th:only-of-type {\n",
 31 |        "        vertical-align: middle;\n",
 32 |        "    }\n",
 33 |        "\n",
 34 |        "    .dataframe tbody tr th {\n",
 35 |        "        vertical-align: top;\n",
 36 |        "    }\n",
 37 |        "\n",
 38 |        "    .dataframe thead th {\n",
 39 |        "        text-align: right;\n",
 40 |        "    }\n",
 41 |        "</style>\n",
 42 |        "<table border=\"1\" class=\"dataframe\">\n",
 43 |        "  <thead>\n",
 44 |        "    <tr style=\"text-align: right;\">\n",
 45 |        "      <th></th>\n",
 46 |        "      <th>Gender</th>\n",
 47 |        "      <th>Married</th>\n",
 48 |        "      <th>Dependents</th>\n",
 49 |        "      <th>Education</th>\n",
 50 |        "      <th>Self_Employed</th>\n",
 51 |        "      <th>ApplicantIncome</th>\n",
 52 |        "      <th>CoapplicantIncome</th>\n",
 53 |        "      <th>LoanAmount</th>\n",
 54 |        "      <th>Loan_Amount_Term</th>\n",
 55 |        "      <th>Credit_History</th>\n",
 56 |        "      <th>Property_Area</th>\n",
 57 |        "      <th>Loan_Status</th>\n",
 58 |        "    </tr>\n",
 59 |        "    <tr>\n",
 60 |        "      <th>Loan_ID</th>\n",
 61 |        "      <th></th>\n",
 62 |        "      <th></th>\n",
 63 |        "      <th></th>\n",
 64 |        "      <th></th>\n",
 65 |        "      <th></th>\n",
 66 |        "      <th></th>\n",
 67 |        "      <th></th>\n",
 68 |        "      <th></th>\n",
 69 |        "      <th></th>\n",
 70 |        "      <th></th>\n",
 71 |        "      <th></th>\n",
 72 |        "      <th></th>\n",
 73 |        "    </tr>\n",
 74 |        "  </thead>\n",
 75 |        "  <tbody>\n",
 76 |        "    <tr>\n",
 77 |        "      <th>LP001002</th>\n",
 78 |        "      <td>Male</td>\n",
 79 |        "      <td>No</td>\n",
 80 |        "      <td>0</td>\n",
 81 |        "      <td>Graduate</td>\n",
 82 |        "      <td>No</td>\n",
 83 |        "      <td>5849</td>\n",
 84 |        "      <td>0.0</td>\n",
 85 |        "      <td>NaN</td>\n",
 86 |        "      <td>360.0</td>\n",
 87 |        "      <td>1.0</td>\n",
 88 |        "      <td>Urban</td>\n",
 89 |        "      <td>Y</td>\n",
 90 |        "    </tr>\n",
 91 |        "    <tr>\n",
 92 |        "      <th>LP001003</th>\n",
 93 |        "      <td>Male</td>\n",
 94 |        "      <td>Yes</td>\n",
 95 |        "      <td>1</td>\n",
 96 |        "      <td>Graduate</td>\n",
 97 |        "      <td>No</td>\n",
 98 |        "      <td>4583</td>\n",
 99 |        "      <td>1508.0</td>\n",
100 |        "      <td>128.0</td>\n",
101 |        "      <td>360.0</td>\n",
102 |        "      <td>1.0</td>\n",
103 |        "      <td>Rural</td>\n",
104 |        "      <td>N</td>\n",
105 |        "    </tr>\n",
106 |        "    <tr>\n",
107 |        "      <th>LP001005</th>\n",
108 |        "      <td>Male</td>\n",
109 |        "      <td>Yes</td>\n",
110 |        "      <td>0</td>\n",
111 |        "      <td>Graduate</td>\n",
112 |        "      <td>Yes</td>\n",
113 |        "      <td>3000</td>\n",
114 |        "      <td>0.0</td>\n",
115 |        "      <td>66.0</td>\n",
116 |        "      <td>360.0</td>\n",
117 |        "      <td>1.0</td>\n",
118 |        "      <td>Urban</td>\n",
119 |        "      <td>Y</td>\n",
120 |        "    </tr>\n",
121 |        "    <tr>\n",
122 |        "      <th>LP001006</th>\n",
123 |        "      <td>Male</td>\n",
124 |        "      <td>Yes</td>\n",
125 |        "      <td>0</td>\n",
126 |        "      <td>Not Graduate</td>\n",
127 |        "      <td>No</td>\n",
128 |        "      <td>2583</td>\n",
129 |        "      <td>2358.0</td>\n",
130 |        "      <td>120.0</td>\n",
131 |        "      <td>360.0</td>\n",
132 |        "      <td>1.0</td>\n",
133 |        "      <td>Urban</td>\n",
134 |        "      <td>Y</td>\n",
135 |        "    </tr>\n",
136 |        "    <tr>\n",
137 |        "      <th>LP001008</th>\n",
138 |        "      <td>Male</td>\n",
139 |        "      <td>No</td>\n",
140 |        "      <td>0</td>\n",
141 |        "      <td>Graduate</td>\n",
142 |        "      <td>No</td>\n",
143 |        "      <td>6000</td>\n",
144 |        "      <td>0.0</td>\n",
145 |        "      <td>141.0</td>\n",
146 |        "      <td>360.0</td>\n",
147 |        "      <td>1.0</td>\n",
148 |        "      <td>Urban</td>\n",
149 |        "      <td>Y</td>\n",
150 |        "    </tr>\n",
151 |        "  </tbody>\n",
152 |        "</table>\n",
153 |        "</div>"
154 |       ],
155 |       "text/plain": [
156 |        "         Gender Married Dependents     Education Self_Employed  \\\n",
157 |        "Loan_ID                                                          \n",
158 |        "LP001002   Male      No          0      Graduate            No   \n",
159 |        "LP001003   Male     Yes          1      Graduate            No   \n",
160 |        "LP001005   Male     Yes          0      Graduate           Yes   \n",
161 |        "LP001006   Male     Yes          0  Not Graduate            No   \n",
162 |        "LP001008   Male      No          0      Graduate            No   \n",
163 |        "\n",
164 |        "          ApplicantIncome  CoapplicantIncome  LoanAmount  Loan_Amount_Term  \\\n",
165 |        "Loan_ID                                                                      \n",
166 |        "LP001002             5849                0.0         NaN             360.0   \n",
167 |        "LP001003             4583             1508.0       128.0             360.0   \n",
168 |        "LP001005             3000                0.0        66.0             360.0   \n",
169 |        "LP001006             2583             2358.0       120.0             360.0   \n",
170 |        "LP001008             6000                0.0       141.0             360.0   \n",
171 |        "\n",
172 |        "          Credit_History Property_Area Loan_Status  \n",
173 |        "Loan_ID                                             \n",
174 |        "LP001002             1.0         Urban           Y  \n",
175 |        "LP001003             1.0         Rural           N  \n",
176 |        "LP001005             1.0         Urban           Y  \n",
177 |        "LP001006             1.0         Urban           Y  \n",
178 |        "LP001008             1.0         Urban           Y  "
179 |       ]
180 |      },
181 |      "execution_count": 6,
182 |      "metadata": {},
183 |      "output_type": "execute_result"
184 |     }
185 |    ],
186 |    "source": [
187 |     "#Import dataset\n",
188 |     "loan = pd.read_csv('../Data/loan_train.csv', index_col = 'Loan_ID')\n",
189 |     "loan.head()"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "code",
194 |    "execution_count": 7,
195 |    "metadata": {},
196 |    "outputs": [
197 |     {
198 |      "data": {
199 |       "text/html": [
200 |        "<div>\n",
201 |        "<style scoped>\n",
202 |        "    .dataframe tbody tr th:only-of-type {\n",
203 |        "        vertical-align: middle;\n",
204 |        "    }\n",
205 |        "\n",
206 |        "    .dataframe tbody tr th {\n",
207 |        "        vertical-align: top;\n",
208 |        "    }\n",
209 |        "\n",
210 |        "    .dataframe thead th {\n",
211 |        "        text-align: right;\n",
212 |        "    }\n",
213 |        "</style>\n",
214 |        "<table border=\"1\" class=\"dataframe\">\n",
215 |        "  <thead>\n",
216 |        "    <tr style=\"text-align: right;\">\n",
217 |        "      <th></th>\n",
218 |        "      <th>Gender</th>\n",
219 |        "      <th>Education</th>\n",
220 |        "      <th>Loan_Status</th>\n",
221 |        "    </tr>\n",
222 |        "    <tr>\n",
223 |        "      <th>Loan_ID</th>\n",
224 |        "      <th></th>\n",
225 |        "      <th></th>\n",
226 |        "      <th></th>\n",
227 |        "    </tr>\n",
228 |        "  </thead>\n",
229 |        "  <tbody>\n",
230 |        "    <tr>\n",
231 |        "      <th>LP001155</th>\n",
232 |        "      <td>Female</td>\n",
233 |        "      <td>Not Graduate</td>\n",
234 |        "      <td>Y</td>\n",
235 |        "    </tr>\n",
236 |        "    <tr>\n",
237 |        "      <th>LP001669</th>\n",
238 |        "      <td>Female</td>\n",
239 |        "      <td>Not Graduate</td>\n",
240 |        "      <td>Y</td>\n",
241 |        "    </tr>\n",
242 |        "    <tr>\n",
243 |        "      <th>LP001692</th>\n",
244 |        "      <td>Female</td>\n",
245 |        "      <td>Not Graduate</td>\n",
246 |        "      <td>Y</td>\n",
247 |        "    </tr>\n",
248 |        "    <tr>\n",
249 |        "      <th>LP001908</th>\n",
250 |        "      <td>Female</td>\n",
251 |        "      <td>Not Graduate</td>\n",
252 |        "      <td>Y</td>\n",
253 |        "    </tr>\n",
254 |        "    <tr>\n",
255 |        "      <th>LP002300</th>\n",
256 |        "      <td>Female</td>\n",
257 |        "      <td>Not Graduate</td>\n",
258 |        "      <td>Y</td>\n",
259 |        "    </tr>\n",
260 |        "  </tbody>\n",
261 |        "</table>\n",
262 |        "</div>"
263 |       ],
264 |       "text/plain": [
265 |        "          Gender     Education Loan_Status\n",
266 |        "Loan_ID                                   \n",
267 |        "LP001155  Female  Not Graduate           Y\n",
268 |        "LP001669  Female  Not Graduate           Y\n",
269 |        "LP001692  Female  Not Graduate           Y\n",
270 |        "LP001908  Female  Not Graduate           Y\n",
271 |        "LP002300  Female  Not Graduate           Y"
272 |       ]
273 |      },
274 |      "execution_count": 7,
275 |      "metadata": {},
276 |      "output_type": "execute_result"
277 |     }
278 |    ],
279 |    "source": [
280 |     "# we want a list of all females who are not graduate and got a loan. \n",
281 |     "\n",
282 |     "loan.loc[(loan['Gender'] == 'Female') & (loan['Education'] == 'Not Graduate') & (loan['Loan_Status'] == 'Y'),\n",
283 |     "         [\"Gender\",\"Education\",\"Loan_Status\"]].head()\n"
284 |    ]
285 |   },
286 |   {
287 |    "cell_type": "code",
288 |    "execution_count": null,
289 |    "metadata": {},
290 |    "outputs": [],
291 |    "source": []
292 |   }
293 |  ],
294 |  "metadata": {
295 |   "kernelspec": {
296 |    "display_name": "Python 3",
297 |    "language": "python",
298 |    "name": "python3"
299 |   },
300 |   "language_info": {
301 |    "codemirror_mode": {
302 |     "name": "ipython",
303 |     "version": 3
304 |    },
305 |    "file_extension": ".py",
306 |    "mimetype": "text/x-python",
307 |    "name": "python",
308 |    "nbconvert_exporter": "python",
309 |    "pygments_lexer": "ipython3",
310 |    "version": "3.7.1"
311 |   }
312 |  },
313 |  "nbformat": 4,
314 |  "nbformat_minor": 2
315 | }
316 | 


--------------------------------------------------------------------------------
/Code/Removing emojis from text.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Removing Emojis from Text"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "code",
12 |    "execution_count": 4,
13 |    "metadata": {},
14 |    "outputs": [
15 |     {
16 |      "name": "stdout",
17 |      "output_type": "stream",
18 |      "text": [
19 |       "Raw tweet: Hi 😂! Have a nice weekend 💕👭\n",
20 |       "Preprocessed tweet: Hi ! Have a nice weekend \n"
21 |      ]
22 |     }
23 |    ],
24 |    "source": [
25 |     "text= \"Hi 😂! Have a nice weekend 💕👭\"\n",
26 |     "preprocessed_text=text.encode('ascii', 'ignore').decode('ascii')\n",
27 |     "\n",
28 |     "print(\"Raw tweet:\",text)  #with emoji\n",
29 |     "print(\"Preprocessed tweet:\",preprocessed_text) # no emoji"
30 |    ]
31 |   },
32 |   {
33 |    "cell_type": "code",
34 |    "execution_count": null,
35 |    "metadata": {},
36 |    "outputs": [],
37 |    "source": []
38 |   }
39 |  ],
40 |  "metadata": {
41 |   "kernelspec": {
42 |    "display_name": "Python 3",
43 |    "language": "python",
44 |    "name": "python3"
45 |   },
46 |   "language_info": {
47 |    "codemirror_mode": {
48 |     "name": "ipython",
49 |     "version": 3
50 |    },
51 |    "file_extension": ".py",
52 |    "mimetype": "text/x-python",
53 |    "name": "python",
54 |    "nbconvert_exporter": "python",
55 |    "pygments_lexer": "ipython3",
56 |    "version": "3.7.1"
57 |   }
58 |  },
59 |  "nbformat": 4,
60 |  "nbformat_minor": 2
61 | }
62 | 


--------------------------------------------------------------------------------
/Code/Running Linux Commands in Jupyter Notebook.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "---\n",
 8 |     "\n",
 9 |     "## Running Linux Commands in Jupyter Notebook\n",
10 |     "\n",
11 |     "---"
12 |    ]
13 |   },
14 |   {
15 |    "cell_type": "code",
16 |    "execution_count": 3,
17 |    "metadata": {},
18 |    "outputs": [
19 |     {
20 |      "name": "stdout",
21 |      "output_type": "stream",
22 |      "text": [
23 |       "    606   14292  113629\r\n"
24 |      ]
25 |     }
26 |    ],
27 |    "source": [
28 |     "# get the number of lines, word count and character count\n",
29 |     "!cat 'ind-ban-comment.csv' | wc "
30 |    ]
31 |   },
32 |   {
33 |    "cell_type": "code",
34 |    "execution_count": 4,
35 |    "metadata": {},
36 |    "outputs": [
37 |     {
38 |      "name": "stdout",
39 |      "output_type": "stream",
40 |      "text": [
41 |       "Batsman,Batsman_Name,Bowler,Bowler_Name,Commentary,Detail,Dismissed,Id,Isball,Isboundary,Iswicket,Over,Runs,Timestamp\r\n",
42 |       "28994,Mohammed Shami,63881,Mustafizur Rahman,\"OUT! Bowled! 5-fer to finish a tremendous last over. His 4th 5-wicket haul in ODIs. Around off, Shami moves across to paddle but misses the ball hits his pads and goes onto hit the stumps. 2 wickets and just 3 runs from the final over. Top notch from Mustafizur. INDIA FINISH WITH 314/9 FROM THEIR 50 OVERS.\",W,28994,346,True,,1,49.6,0,2019-07-02 13:18:47\r\n",
43 |       "5132,Bhuvneshwar Kumar,63881,Mustafizur Rahman,\"WIDE AND RUN OUT! Slower delivery outside off, it is on the wrong side of the tramline. Shami comes for a run. Bhuvi was slow though. Rahim throws it to Mustafizur, who hits the stumps at the bowler's end. The umpire takes it upstairs but Bhuvneshwar had started to walk back even before the replays rolled in.\",W+wd,5132,344,True,,1,49.6,1,2019-07-02 13:17:28\r\n",
44 |       "28994,Mohammed Shami,63881,Mustafizur Rahman,\"Back of a length ball on off, Shami pulls it to deep mid-wicket and gets a single.\",,,343,True,,,49.5,1,2019-07-02 13:16:03\r\n",
45 |       "5132,Bhuvneshwar Kumar,63881,Mustafizur Rahman,\"Just 1 run off the 4 balls. This is World Class stuff. Good length ball on off, Kumar punches it to the side of the pitch and gets a single.\",,,342,True,,,49.4,1,2019-07-02 13:15:17\r\n",
46 |       "3676,MS Dhoni,63881,Mustafizur Rahman,\"OUT! No Dhoni magic in the last over. Slower bouncer away from the body, Dhoni looks to flat-bat it straight over the bowler's head. It comes off the upper half of his bat and goes to the left of mid on. Shakib moves across to his left hand side and takes an easy catch.\",W,3676,340,True,,1,49.3,0,2019-07-02 13:13:39\r\n",
47 |       "3676,MS Dhoni,63881,Mustafizur Rahman,\"Another dot. Bangladesh will take this. Short and slow, Dhoni pulls it to Shakib at short mid-wicket.\",,,339,True,,,49.2,0,2019-07-02 13:12:47\r\n",
48 |       "cat: write error: Broken pipe\r\n"
49 |      ]
50 |     }
51 |    ],
52 |    "source": [
53 |     "# get the top 7 rows\n",
54 |     "!cat 'ind-ban-comment.csv' | head -7"
55 |    ]
56 |   },
57 |   {
58 |    "cell_type": "code",
59 |    "execution_count": null,
60 |    "metadata": {},
61 |    "outputs": [],
62 |    "source": []
63 |   }
64 |  ],
65 |  "metadata": {
66 |   "kernelspec": {
67 |    "display_name": "Python 3",
68 |    "language": "python",
69 |    "name": "python3"
70 |   },
71 |   "language_info": {
72 |    "codemirror_mode": {
73 |     "name": "ipython",
74 |     "version": 3
75 |    },
76 |    "file_extension": ".py",
77 |    "mimetype": "text/x-python",
78 |    "name": "python",
79 |    "nbconvert_exporter": "python",
80 |    "pygments_lexer": "ipython3",
81 |    "version": "3.7.3"
82 |   }
83 |  },
84 |  "nbformat": 4,
85 |  "nbformat_minor": 2
86 | }
87 | 


--------------------------------------------------------------------------------
/Code/Save_sklearn_model.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Save Machine Learning Model using Pickle"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 16,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "#import libraries\n",
 17 |     "import pandas as pd\n",
 18 |     "import numpy as np\n",
 19 |     "import sklearn "
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 17,
 25 |    "metadata": {},
 26 |    "outputs": [
 27 |     {
 28 |      "data": {
 29 |       "text/html": [
 30 |        "<div>\n",
 31 |        "<style scoped>\n",
 32 |        "    .dataframe tbody tr th:only-of-type {\n",
 33 |        "        vertical-align: middle;\n",
 34 |        "    }\n",
 35 |        "\n",
 36 |        "    .dataframe tbody tr th {\n",
 37 |        "        vertical-align: top;\n",
 38 |        "    }\n",
 39 |        "\n",
 40 |        "    .dataframe thead th {\n",
 41 |        "        text-align: right;\n",
 42 |        "    }\n",
 43 |        "</style>\n",
 44 |        "<table border=\"1\" class=\"dataframe\">\n",
 45 |        "  <thead>\n",
 46 |        "    <tr style=\"text-align: right;\">\n",
 47 |        "      <th></th>\n",
 48 |        "      <th>Survived</th>\n",
 49 |        "      <th>Age</th>\n",
 50 |        "      <th>Fare</th>\n",
 51 |        "      <th>Pclass_1</th>\n",
 52 |        "      <th>Pclass_2</th>\n",
 53 |        "      <th>Pclass_3</th>\n",
 54 |        "      <th>Sex_female</th>\n",
 55 |        "      <th>Sex_male</th>\n",
 56 |        "      <th>SibSp_0</th>\n",
 57 |        "      <th>SibSp_1</th>\n",
 58 |        "      <th>...</th>\n",
 59 |        "      <th>Parch_0</th>\n",
 60 |        "      <th>Parch_1</th>\n",
 61 |        "      <th>Parch_2</th>\n",
 62 |        "      <th>Parch_3</th>\n",
 63 |        "      <th>Parch_4</th>\n",
 64 |        "      <th>Parch_5</th>\n",
 65 |        "      <th>Parch_6</th>\n",
 66 |        "      <th>Embarked_C</th>\n",
 67 |        "      <th>Embarked_Q</th>\n",
 68 |        "      <th>Embarked_S</th>\n",
 69 |        "    </tr>\n",
 70 |        "  </thead>\n",
 71 |        "  <tbody>\n",
 72 |        "    <tr>\n",
 73 |        "      <th>0</th>\n",
 74 |        "      <td>0</td>\n",
 75 |        "      <td>22.0</td>\n",
 76 |        "      <td>7.2500</td>\n",
 77 |        "      <td>0</td>\n",
 78 |        "      <td>0</td>\n",
 79 |        "      <td>1</td>\n",
 80 |        "      <td>0</td>\n",
 81 |        "      <td>1</td>\n",
 82 |        "      <td>0</td>\n",
 83 |        "      <td>1</td>\n",
 84 |        "      <td>...</td>\n",
 85 |        "      <td>1</td>\n",
 86 |        "      <td>0</td>\n",
 87 |        "      <td>0</td>\n",
 88 |        "      <td>0</td>\n",
 89 |        "      <td>0</td>\n",
 90 |        "      <td>0</td>\n",
 91 |        "      <td>0</td>\n",
 92 |        "      <td>0</td>\n",
 93 |        "      <td>0</td>\n",
 94 |        "      <td>1</td>\n",
 95 |        "    </tr>\n",
 96 |        "    <tr>\n",
 97 |        "      <th>1</th>\n",
 98 |        "      <td>1</td>\n",
 99 |        "      <td>38.0</td>\n",
100 |        "      <td>71.2833</td>\n",
101 |        "      <td>1</td>\n",
102 |        "      <td>0</td>\n",
103 |        "      <td>0</td>\n",
104 |        "      <td>1</td>\n",
105 |        "      <td>0</td>\n",
106 |        "      <td>0</td>\n",
107 |        "      <td>1</td>\n",
108 |        "      <td>...</td>\n",
109 |        "      <td>1</td>\n",
110 |        "      <td>0</td>\n",
111 |        "      <td>0</td>\n",
112 |        "      <td>0</td>\n",
113 |        "      <td>0</td>\n",
114 |        "      <td>0</td>\n",
115 |        "      <td>0</td>\n",
116 |        "      <td>1</td>\n",
117 |        "      <td>0</td>\n",
118 |        "      <td>0</td>\n",
119 |        "    </tr>\n",
120 |        "    <tr>\n",
121 |        "      <th>2</th>\n",
122 |        "      <td>1</td>\n",
123 |        "      <td>26.0</td>\n",
124 |        "      <td>7.9250</td>\n",
125 |        "      <td>0</td>\n",
126 |        "      <td>0</td>\n",
127 |        "      <td>1</td>\n",
128 |        "      <td>1</td>\n",
129 |        "      <td>0</td>\n",
130 |        "      <td>1</td>\n",
131 |        "      <td>0</td>\n",
132 |        "      <td>...</td>\n",
133 |        "      <td>1</td>\n",
134 |        "      <td>0</td>\n",
135 |        "      <td>0</td>\n",
136 |        "      <td>0</td>\n",
137 |        "      <td>0</td>\n",
138 |        "      <td>0</td>\n",
139 |        "      <td>0</td>\n",
140 |        "      <td>0</td>\n",
141 |        "      <td>0</td>\n",
142 |        "      <td>1</td>\n",
143 |        "    </tr>\n",
144 |        "    <tr>\n",
145 |        "      <th>3</th>\n",
146 |        "      <td>1</td>\n",
147 |        "      <td>35.0</td>\n",
148 |        "      <td>53.1000</td>\n",
149 |        "      <td>1</td>\n",
150 |        "      <td>0</td>\n",
151 |        "      <td>0</td>\n",
152 |        "      <td>1</td>\n",
153 |        "      <td>0</td>\n",
154 |        "      <td>0</td>\n",
155 |        "      <td>1</td>\n",
156 |        "      <td>...</td>\n",
157 |        "      <td>1</td>\n",
158 |        "      <td>0</td>\n",
159 |        "      <td>0</td>\n",
160 |        "      <td>0</td>\n",
161 |        "      <td>0</td>\n",
162 |        "      <td>0</td>\n",
163 |        "      <td>0</td>\n",
164 |        "      <td>0</td>\n",
165 |        "      <td>0</td>\n",
166 |        "      <td>1</td>\n",
167 |        "    </tr>\n",
168 |        "    <tr>\n",
169 |        "      <th>4</th>\n",
170 |        "      <td>0</td>\n",
171 |        "      <td>35.0</td>\n",
172 |        "      <td>8.0500</td>\n",
173 |        "      <td>0</td>\n",
174 |        "      <td>0</td>\n",
175 |        "      <td>1</td>\n",
176 |        "      <td>0</td>\n",
177 |        "      <td>1</td>\n",
178 |        "      <td>1</td>\n",
179 |        "      <td>0</td>\n",
180 |        "      <td>...</td>\n",
181 |        "      <td>1</td>\n",
182 |        "      <td>0</td>\n",
183 |        "      <td>0</td>\n",
184 |        "      <td>0</td>\n",
185 |        "      <td>0</td>\n",
186 |        "      <td>0</td>\n",
187 |        "      <td>0</td>\n",
188 |        "      <td>0</td>\n",
189 |        "      <td>0</td>\n",
190 |        "      <td>1</td>\n",
191 |        "    </tr>\n",
192 |        "  </tbody>\n",
193 |        "</table>\n",
194 |        "<p>5 rows × 25 columns</p>\n",
195 |        "</div>"
196 |       ],
197 |       "text/plain": [
198 |        "   Survived   Age     Fare  Pclass_1  Pclass_2  Pclass_3  Sex_female  \\\n",
199 |        "0         0  22.0   7.2500         0         0         1           0   \n",
200 |        "1         1  38.0  71.2833         1         0         0           1   \n",
201 |        "2         1  26.0   7.9250         0         0         1           1   \n",
202 |        "3         1  35.0  53.1000         1         0         0           1   \n",
203 |        "4         0  35.0   8.0500         0         0         1           0   \n",
204 |        "\n",
205 |        "   Sex_male  SibSp_0  SibSp_1     ...      Parch_0  Parch_1  Parch_2  Parch_3  \\\n",
206 |        "0         1        0        1     ...            1        0        0        0   \n",
207 |        "1         0        0        1     ...            1        0        0        0   \n",
208 |        "2         0        1        0     ...            1        0        0        0   \n",
209 |        "3         0        0        1     ...            1        0        0        0   \n",
210 |        "4         1        1        0     ...            1        0        0        0   \n",
211 |        "\n",
212 |        "   Parch_4  Parch_5  Parch_6  Embarked_C  Embarked_Q  Embarked_S  \n",
213 |        "0        0        0        0           0           0           1  \n",
214 |        "1        0        0        0           1           0           0  \n",
215 |        "2        0        0        0           0           0           1  \n",
216 |        "3        0        0        0           0           0           1  \n",
217 |        "4        0        0        0           0           0           1  \n",
218 |        "\n",
219 |        "[5 rows x 25 columns]"
220 |       ]
221 |      },
222 |      "execution_count": 17,
223 |      "metadata": {},
224 |      "output_type": "execute_result"
225 |     }
226 |    ],
227 |    "source": [
228 |     "#import dataset\n",
229 |     "data = pd.read_csv('../Data/data_cleaned.csv')\n",
230 |     "data.head()"
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "markdown",
235 |    "metadata": {},
236 |    "source": [
237 |     "## Build ML model"
238 |    ]
239 |   },
240 |   {
241 |    "cell_type": "code",
242 |    "execution_count": 18,
243 |    "metadata": {},
244 |    "outputs": [
245 |     {
246 |      "data": {
247 |       "text/plain": [
248 |        "KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',\n",
249 |        "           metric_params=None, n_jobs=None, n_neighbors=3, p=2,\n",
250 |        "           weights='uniform')"
251 |       ]
252 |      },
253 |      "execution_count": 18,
254 |      "metadata": {},
255 |      "output_type": "execute_result"
256 |     }
257 |    ],
258 |    "source": [
259 |     "# Define Dependent and independent variable\n",
260 |     "X = data.drop('Survived', axis = 1)\n",
261 |     "y = data['Survived']\n",
262 |     "\n",
263 |     "# Split train and test data\n",
264 |     "from sklearn.model_selection import train_test_split\n",
265 |     "train_X, test_X, train_y, test_y = train_test_split(X, y, test_size = 0.3, random_state = 142)\n",
266 |     "\n",
267 |     "# import KNeighborsClassifier model \n",
268 |     "from sklearn.neighbors import KNeighborsClassifier \n",
269 |     "knn = KNeighborsClassifier(n_neighbors = 3) \n",
270 |     "  \n",
271 |     "# train model \n",
272 |     "knn.fit(train_X, train_y) "
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "markdown",
277 |    "metadata": {},
278 |    "source": [
279 |     "## Save the model"
280 |    ]
281 |   },
282 |   {
283 |    "cell_type": "code",
284 |    "execution_count": 19,
285 |    "metadata": {},
286 |    "outputs": [
287 |     {
288 |      "data": {
289 |       "text/plain": [
290 |        "array([0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0,\n",
291 |        "       1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0,\n",
292 |        "       0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1,\n",
293 |        "       0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1,\n",
294 |        "       0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n",
295 |        "       0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0,\n",
296 |        "       0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0,\n",
297 |        "       0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,\n",
298 |        "       0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1,\n",
299 |        "       1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0,\n",
300 |        "       1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0,\n",
301 |        "       0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1,\n",
302 |        "       0, 1, 1, 1], dtype=int64)"
303 |       ]
304 |      },
305 |      "execution_count": 19,
306 |      "metadata": {},
307 |      "output_type": "execute_result"
308 |     }
309 |    ],
310 |    "source": [
311 |     "import pickle\n",
312 |     "\n",
313 |     "# Save the KNN model using pickle\n",
314 |     "saved_model = pickle.dumps(knn)\n",
315 |     "\n",
316 |     "# Load the saved  KNN model\n",
317 |     "load_model = pickle.loads(saved_model) \n",
318 |     "\n",
319 |     "# Make predictions using the loaded pickled model \n",
320 |     "load_model.predict(test_X) "
321 |    ]
322 |   },
323 |   {
324 |    "cell_type": "code",
325 |    "execution_count": null,
326 |    "metadata": {},
327 |    "outputs": [],
328 |    "source": []
329 |   }
330 |  ],
331 |  "metadata": {
332 |   "kernelspec": {
333 |    "display_name": "Python 3",
334 |    "language": "python",
335 |    "name": "python3"
336 |   },
337 |   "language_info": {
338 |    "codemirror_mode": {
339 |     "name": "ipython",
340 |     "version": 3
341 |    },
342 |    "file_extension": ".py",
343 |    "mimetype": "text/x-python",
344 |    "name": "python",
345 |    "nbconvert_exporter": "python",
346 |    "pygments_lexer": "ipython3",
347 |    "version": "3.7.1"
348 |   }
349 |  },
350 |  "nbformat": 4,
351 |  "nbformat_minor": 2
352 | }
353 | 


--------------------------------------------------------------------------------
/Code/args.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Let us understand *args in Python"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 7,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "# Function to calculate averages\n",
 17 |     "# We will input variable number of arguaments using *args\n",
 18 |     "\n",
 19 |     "def calculate_average_new(*num):\n",
 20 |     "    add = sum(num)\n",
 21 |     "    count = len(num)\n",
 22 |     "\n",
 23 |     "   \n",
 24 |     "    average = (add / count)  \n",
 25 |     "\n",
 26 |     "       \n",
 27 |     "    print(\"The average of values is\", average)"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": 13,
 33 |    "metadata": {},
 34 |    "outputs": [
 35 |     {
 36 |      "name": "stdout",
 37 |      "output_type": "stream",
 38 |      "text": [
 39 |       "The average of values is 7.5\n"
 40 |      ]
 41 |     }
 42 |    ],
 43 |    "source": [
 44 |     "# Calculate average of 2 values\n",
 45 |     "\n",
 46 |     "calculate_average(7,8)"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 14,
 52 |    "metadata": {},
 53 |    "outputs": [
 54 |     {
 55 |      "name": "stdout",
 56 |      "output_type": "stream",
 57 |      "text": [
 58 |       "The average of values is 8.25\n"
 59 |      ]
 60 |     }
 61 |    ],
 62 |    "source": [
 63 |     "# Calculate Value of 4 values\n",
 64 |     "\n",
 65 |     "calculate_average(3,8,9,13)"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": 15,
 71 |    "metadata": {},
 72 |    "outputs": [
 73 |     {
 74 |      "name": "stdout",
 75 |      "output_type": "stream",
 76 |      "text": [
 77 |       "The average of values is 58.07692307692308\n"
 78 |      ]
 79 |     }
 80 |    ],
 81 |    "source": [
 82 |     "# Calculate Value of 13 values\n",
 83 |     "\n",
 84 |     "calculate_average(3,8,9,10,4,5,6,11,22,32,100,200,345)"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": 16,
 90 |    "metadata": {},
 91 |    "outputs": [
 92 |     {
 93 |      "name": "stdout",
 94 |      "output_type": "stream",
 95 |      "text": [
 96 |       "The average of values is 8.25\n"
 97 |      ]
 98 |     }
 99 |    ],
100 |    "source": [
101 |     "calculate_average(3,8,9,13)"
102 |    ]
103 |   }
104 |  ],
105 |  "metadata": {
106 |   "kernelspec": {
107 |    "display_name": "Python 3",
108 |    "language": "python",
109 |    "name": "python3"
110 |   },
111 |   "language_info": {
112 |    "codemirror_mode": {
113 |     "name": "ipython",
114 |     "version": 3
115 |    },
116 |    "file_extension": ".py",
117 |    "mimetype": "text/x-python",
118 |    "name": "python",
119 |    "nbconvert_exporter": "python",
120 |    "pygments_lexer": "ipython3",
121 |    "version": "3.7.1"
122 |   }
123 |  },
124 |  "nbformat": 4,
125 |  "nbformat_minor": 2
126 | }
127 | 


--------------------------------------------------------------------------------
/Code/av_hack.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "colab": {
  8 |      "base_uri": "https://localhost:8080/",
  9 |      "height": 122
 10 |     },
 11 |     "colab_type": "code",
 12 |     "id": "XG57bzU6cJ2U",
 13 |     "outputId": "26641faf-bc75-4bc5-9616-080905d329e0"
 14 |    },
 15 |    "outputs": [],
 16 |    "source": [
 17 |     "!pip install tokenizers"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 6,
 23 |    "metadata": {
 24 |     "colab": {
 25 |      "base_uri": "https://localhost:8080/",
 26 |      "height": 336
 27 |     },
 28 |     "colab_type": "code",
 29 |     "id": "iQ17w2Q5azCt",
 30 |     "outputId": "2c5c5f7f-4b84-41d4-f87b-745f003e1b78"
 31 |    },
 32 |    "outputs": [
 33 |     {
 34 |      "name": "stdout",
 35 |      "output_type": "stream",
 36 |      "text": [
 37 |       "Unique id: [39, 68, 75, 297, 0, 220, 39, 78, 86, 257, 264, 220, 88, 274, 30]\n",
 38 |       "Tokens: ['H', 'e', 'l', 'lo', '!', 'Ġ', 'H', 'o', 'w', 'Ġa', 're', 'Ġ', 'y', 'ou', '?']\n",
 39 |       "Merges: [(0, 1), (1, 2), (2, 3), (3, 5), (5, 6), (6, 7), (7, 8), (8, 9), (9, 10), (10, 12), (12, 14), (14, 15), (15, 16), (16, 18), (18, 19)]\n"
 40 |      ]
 41 |     }
 42 |    ],
 43 |    "source": [
 44 |     "#importing different tokenizers\n",
 45 |     "from tokenizers import (ByteLevelBPETokenizer,\n",
 46 |     "                            BPETokenizer,\n",
 47 |     "                            SentencePieceBPETokenizer,\n",
 48 |     "                            BertWordPieceTokenizer)\n",
 49 |     "#instantiate tokenizer\n",
 50 |     "tokenizer = ByteLevelBPETokenizer()\n",
 51 |     "\n",
 52 |     "#train with our own data\n",
 53 |     "tokenizer.train([\"sample.txt\"], vocab_size=20000)\n",
 54 |     "\n",
 55 |     "#Lets tokenize the sentence\n",
 56 |     "output = tokenizer.encode(\"Hello! How are you?\")\n",
 57 |     "\n",
 58 |     "print(\"Unique id:\",output.ids)\n",
 59 |     "print(\"Tokens:\",output.tokens)\n",
 60 |     "print(\"Merges:\",output.offsets)"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": 7,
 66 |    "metadata": {
 67 |     "colab": {
 68 |      "base_uri": "https://localhost:8080/",
 69 |      "height": 163
 70 |     },
 71 |     "colab_type": "code",
 72 |     "id": "9qKgNFrabgCm",
 73 |     "outputId": "e6ed2a40-e822-4ff2-d0f6-af4569c1deb5"
 74 |    },
 75 |    "outputs": [
 76 |     {
 77 |      "data": {
 78 |       "text/plain": [
 79 |        "'H'"
 80 |       ]
 81 |      },
 82 |      "execution_count": 7,
 83 |      "metadata": {},
 84 |      "output_type": "execute_result"
 85 |     }
 86 |    ],
 87 |    "source": [
 88 |     "#converting integer back to character\n",
 89 |     "output.original_str[output.offsets[0]]"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": 0,
 95 |    "metadata": {
 96 |     "colab": {},
 97 |     "colab_type": "code",
 98 |     "id": "xqE2EH9Otv0w"
 99 |    },
100 |    "outputs": [],
101 |    "source": []
102 |   }
103 |  ],
104 |  "metadata": {
105 |   "colab": {
106 |    "name": "av hack.ipynb",
107 |    "provenance": []
108 |   },
109 |   "kernelspec": {
110 |    "display_name": "Python 3",
111 |    "language": "python",
112 |    "name": "python3"
113 |   },
114 |   "language_info": {
115 |    "codemirror_mode": {
116 |     "name": "ipython",
117 |     "version": 3
118 |    },
119 |    "file_extension": ".py",
120 |    "mimetype": "text/x-python",
121 |    "name": "python",
122 |    "nbconvert_exporter": "python",
123 |    "pygments_lexer": "ipython3",
124 |    "version": "3.7.1"
125 |   }
126 |  },
127 |  "nbformat": 4,
128 |  "nbformat_minor": 1
129 | }
130 | 


--------------------------------------------------------------------------------
/Code/column_transformer.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Sklearn ColumnTransformer"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 52,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import pandas as pd\n",
 17 |     "from sklearn.compose import ColumnTransformer"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 53,
 23 |    "metadata": {},
 24 |    "outputs": [
 25 |     {
 26 |      "data": {
 27 |       "text/html": [
 28 |        "<div>\n",
 29 |        "<style scoped>\n",
 30 |        "    .dataframe tbody tr th:only-of-type {\n",
 31 |        "        vertical-align: middle;\n",
 32 |        "    }\n",
 33 |        "\n",
 34 |        "    .dataframe tbody tr th {\n",
 35 |        "        vertical-align: top;\n",
 36 |        "    }\n",
 37 |        "\n",
 38 |        "    .dataframe thead th {\n",
 39 |        "        text-align: right;\n",
 40 |        "    }\n",
 41 |        "</style>\n",
 42 |        "<table border=\"1\" class=\"dataframe\">\n",
 43 |        "  <thead>\n",
 44 |        "    <tr style=\"text-align: right;\">\n",
 45 |        "      <th></th>\n",
 46 |        "      <th>Loan_ID</th>\n",
 47 |        "      <th>Gender</th>\n",
 48 |        "      <th>Married</th>\n",
 49 |        "      <th>Dependents</th>\n",
 50 |        "      <th>Education</th>\n",
 51 |        "      <th>Self_Employed</th>\n",
 52 |        "      <th>ApplicantIncome</th>\n",
 53 |        "      <th>CoapplicantIncome</th>\n",
 54 |        "      <th>LoanAmount</th>\n",
 55 |        "      <th>Loan_Amount_Term</th>\n",
 56 |        "      <th>Credit_History</th>\n",
 57 |        "      <th>Property_Area</th>\n",
 58 |        "      <th>Loan_Status</th>\n",
 59 |        "    </tr>\n",
 60 |        "  </thead>\n",
 61 |        "  <tbody>\n",
 62 |        "    <tr>\n",
 63 |        "      <th>0</th>\n",
 64 |        "      <td>LP001002</td>\n",
 65 |        "      <td>Male</td>\n",
 66 |        "      <td>No</td>\n",
 67 |        "      <td>0</td>\n",
 68 |        "      <td>Graduate</td>\n",
 69 |        "      <td>No</td>\n",
 70 |        "      <td>5849</td>\n",
 71 |        "      <td>0.0</td>\n",
 72 |        "      <td>NaN</td>\n",
 73 |        "      <td>360.0</td>\n",
 74 |        "      <td>1.0</td>\n",
 75 |        "      <td>Urban</td>\n",
 76 |        "      <td>Y</td>\n",
 77 |        "    </tr>\n",
 78 |        "    <tr>\n",
 79 |        "      <th>1</th>\n",
 80 |        "      <td>LP001003</td>\n",
 81 |        "      <td>Male</td>\n",
 82 |        "      <td>Yes</td>\n",
 83 |        "      <td>1</td>\n",
 84 |        "      <td>Graduate</td>\n",
 85 |        "      <td>No</td>\n",
 86 |        "      <td>4583</td>\n",
 87 |        "      <td>1508.0</td>\n",
 88 |        "      <td>128.0</td>\n",
 89 |        "      <td>360.0</td>\n",
 90 |        "      <td>1.0</td>\n",
 91 |        "      <td>Rural</td>\n",
 92 |        "      <td>N</td>\n",
 93 |        "    </tr>\n",
 94 |        "    <tr>\n",
 95 |        "      <th>2</th>\n",
 96 |        "      <td>LP001005</td>\n",
 97 |        "      <td>Male</td>\n",
 98 |        "      <td>Yes</td>\n",
 99 |        "      <td>0</td>\n",
100 |        "      <td>Graduate</td>\n",
101 |        "      <td>Yes</td>\n",
102 |        "      <td>3000</td>\n",
103 |        "      <td>0.0</td>\n",
104 |        "      <td>66.0</td>\n",
105 |        "      <td>360.0</td>\n",
106 |        "      <td>1.0</td>\n",
107 |        "      <td>Urban</td>\n",
108 |        "      <td>Y</td>\n",
109 |        "    </tr>\n",
110 |        "    <tr>\n",
111 |        "      <th>3</th>\n",
112 |        "      <td>LP001006</td>\n",
113 |        "      <td>Male</td>\n",
114 |        "      <td>Yes</td>\n",
115 |        "      <td>0</td>\n",
116 |        "      <td>Not Graduate</td>\n",
117 |        "      <td>No</td>\n",
118 |        "      <td>2583</td>\n",
119 |        "      <td>2358.0</td>\n",
120 |        "      <td>120.0</td>\n",
121 |        "      <td>360.0</td>\n",
122 |        "      <td>1.0</td>\n",
123 |        "      <td>Urban</td>\n",
124 |        "      <td>Y</td>\n",
125 |        "    </tr>\n",
126 |        "    <tr>\n",
127 |        "      <th>4</th>\n",
128 |        "      <td>LP001008</td>\n",
129 |        "      <td>Male</td>\n",
130 |        "      <td>No</td>\n",
131 |        "      <td>0</td>\n",
132 |        "      <td>Graduate</td>\n",
133 |        "      <td>No</td>\n",
134 |        "      <td>6000</td>\n",
135 |        "      <td>0.0</td>\n",
136 |        "      <td>141.0</td>\n",
137 |        "      <td>360.0</td>\n",
138 |        "      <td>1.0</td>\n",
139 |        "      <td>Urban</td>\n",
140 |        "      <td>Y</td>\n",
141 |        "    </tr>\n",
142 |        "  </tbody>\n",
143 |        "</table>\n",
144 |        "</div>"
145 |       ],
146 |       "text/plain": [
147 |        "    Loan_ID Gender Married Dependents     Education Self_Employed  \\\n",
148 |        "0  LP001002   Male      No          0      Graduate            No   \n",
149 |        "1  LP001003   Male     Yes          1      Graduate            No   \n",
150 |        "2  LP001005   Male     Yes          0      Graduate           Yes   \n",
151 |        "3  LP001006   Male     Yes          0  Not Graduate            No   \n",
152 |        "4  LP001008   Male      No          0      Graduate            No   \n",
153 |        "\n",
154 |        "   ApplicantIncome  CoapplicantIncome  LoanAmount  Loan_Amount_Term  \\\n",
155 |        "0             5849                0.0         NaN             360.0   \n",
156 |        "1             4583             1508.0       128.0             360.0   \n",
157 |        "2             3000                0.0        66.0             360.0   \n",
158 |        "3             2583             2358.0       120.0             360.0   \n",
159 |        "4             6000                0.0       141.0             360.0   \n",
160 |        "\n",
161 |        "   Credit_History Property_Area Loan_Status  \n",
162 |        "0             1.0         Urban           Y  \n",
163 |        "1             1.0         Rural           N  \n",
164 |        "2             1.0         Urban           Y  \n",
165 |        "3             1.0         Urban           Y  \n",
166 |        "4             1.0         Urban           Y  "
167 |       ]
168 |      },
169 |      "execution_count": 53,
170 |      "metadata": {},
171 |      "output_type": "execute_result"
172 |     }
173 |    ],
174 |    "source": [
175 |     "# import dataset\n",
176 |     "data = pd.read_csv('../Data/loan_train.csv')\n",
177 |     "data.head()"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "code",
182 |    "execution_count": 54,
183 |    "metadata": {},
184 |    "outputs": [],
185 |    "source": [
186 |     "# rows having null values removed for simplicity purpose\n",
187 |     "data.dropna(inplace = True)"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": 55,
193 |    "metadata": {},
194 |    "outputs": [],
195 |    "source": [
196 |     "# Dependent and independent variables\n",
197 |     "X = data.drop('Loan_Status', axis = 1)\n",
198 |     "y = data['Loan_Status']"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": 56,
204 |    "metadata": {},
205 |    "outputs": [],
206 |    "source": [
207 |     "# Apply StandardScalar transformation to numeric features\n",
208 |     "# Apply OneHotEncoder transformation to categorical features\n",
209 |     "\n",
210 |     "from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
211 |     "\n",
212 |     "transformers = [(\"num\", StandardScaler(), ['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount']),\n",
213 |     "               (\"cat\", OneHotEncoder(handle_unknown='ignore'), ['Gender', 'Married', 'Self_Employed', 'Property_Area'])]\n",
214 |     "\n",
215 |     "transformer = ColumnTransformer(transformers = transformers)"
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "code",
220 |    "execution_count": 57,
221 |    "metadata": {},
222 |    "outputs": [
223 |     {
224 |      "name": "stderr",
225 |      "output_type": "stream",
226 |      "text": [
227 |       "C:\\Users\\ram\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\data.py:625: DataConversionWarning: Data with input dtype int64, float64 were all converted to float64 by StandardScaler.\n",
228 |       "  return self.partial_fit(X, y)\n",
229 |       "C:\\Users\\ram\\Anaconda3\\lib\\site-packages\\sklearn\\base.py:462: DataConversionWarning: Data with input dtype int64, float64 were all converted to float64 by StandardScaler.\n",
230 |       "  return self.fit(X, **fit_params).transform(X)\n"
231 |      ]
232 |     },
233 |     {
234 |      "data": {
235 |       "text/plain": [
236 |        "array([-0.13796959, -0.02795204, -0.20808917,  0.        ,  1.        ,\n",
237 |        "        0.        ,  1.        ,  1.        ,  0.        ,  1.        ,\n",
238 |        "        0.        ,  0.        ])"
239 |       ]
240 |      },
241 |      "execution_count": 57,
242 |      "metadata": {},
243 |      "output_type": "execute_result"
244 |     }
245 |    ],
246 |    "source": [
247 |     "X = transformer.fit_transform(X)\n",
248 |     "X[0]"
249 |    ]
250 |   },
251 |   {
252 |    "cell_type": "code",
253 |    "execution_count": null,
254 |    "metadata": {},
255 |    "outputs": [],
256 |    "source": []
257 |   },
258 |   {
259 |    "cell_type": "code",
260 |    "execution_count": null,
261 |    "metadata": {},
262 |    "outputs": [],
263 |    "source": []
264 |   }
265 |  ],
266 |  "metadata": {
267 |   "kernelspec": {
268 |    "display_name": "Python 3",
269 |    "language": "python",
270 |    "name": "python3"
271 |   },
272 |   "language_info": {
273 |    "codemirror_mode": {
274 |     "name": "ipython",
275 |     "version": 3
276 |    },
277 |    "file_extension": ".py",
278 |    "mimetype": "text/x-python",
279 |    "name": "python",
280 |    "nbconvert_exporter": "python",
281 |    "pygments_lexer": "ipython3",
282 |    "version": "3.7.1"
283 |   }
284 |  },
285 |  "nbformat": 4,
286 |  "nbformat_minor": 2
287 | }
288 | 


--------------------------------------------------------------------------------
/Code/convert_string_to_characters.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {
 6 |     "colab_type": "text",
 7 |     "id": "QsmXYpdG-peE"
 8 |    },
 9 |    "source": [
10 |     "## Easiest way to convert a string to characters\n"
11 |    ]
12 |   },
13 |   {
14 |    "cell_type": "code",
15 |    "execution_count": 1,
16 |    "metadata": {
17 |     "colab": {
18 |      "base_uri": "https://localhost:8080/",
19 |      "height": 34
20 |     },
21 |     "colab_type": "code",
22 |     "id": "xqE2EH9Otv0w",
23 |     "outputId": "ec5fb143-cf7a-4cfc-9f6a-9a9b01211639"
24 |    },
25 |    "outputs": [
26 |     {
27 |      "name": "stdout",
28 |      "output_type": "stream",
29 |      "text": [
30 |       "['S', 'p', 'o', 'r', 't', 's', ' ', 'A', 'n', 'a', 'l', 'y', 't', 'i', 'c', 's', ' ', 'c', 'a', 'n', ' ', 'b', 'e', ' ', 'a', ' ', 'g', 'a', 'm', 'e', ' ', 'c', 'h', 'a', 'n', 'g', 'e', 'r']\n"
31 |      ]
32 |     }
33 |    ],
34 |    "source": [
35 |     "text='Sports Analytics can be a game changer'\n",
36 |     "characters=list(text)\n",
37 |     "print(characters)"
38 |    ]
39 |   },
40 |   {
41 |    "cell_type": "code",
42 |    "execution_count": null,
43 |    "metadata": {},
44 |    "outputs": [],
45 |    "source": []
46 |   }
47 |  ],
48 |  "metadata": {
49 |   "colab": {
50 |    "name": "av hack.ipynb",
51 |    "provenance": []
52 |   },
53 |   "kernelspec": {
54 |    "display_name": "Python 3",
55 |    "language": "python",
56 |    "name": "python3"
57 |   },
58 |   "language_info": {
59 |    "codemirror_mode": {
60 |     "name": "ipython",
61 |     "version": 3
62 |    },
63 |    "file_extension": ".py",
64 |    "mimetype": "text/x-python",
65 |    "name": "python",
66 |    "nbconvert_exporter": "python",
67 |    "pygments_lexer": "ipython3",
68 |    "version": "3.7.1"
69 |   }
70 |  },
71 |  "nbformat": 4,
72 |  "nbformat_minor": 1
73 | }
74 | 


--------------------------------------------------------------------------------
/Code/first and last name extraction.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import pandas as pd"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 6,
 15 |    "metadata": {},
 16 |    "outputs": [
 17 |     {
 18 |      "data": {
 19 |       "text/html": [
 20 |        "<div>\n",
 21 |        "<style scoped>\n",
 22 |        "    .dataframe tbody tr th:only-of-type {\n",
 23 |        "        vertical-align: middle;\n",
 24 |        "    }\n",
 25 |        "\n",
 26 |        "    .dataframe tbody tr th {\n",
 27 |        "        vertical-align: top;\n",
 28 |        "    }\n",
 29 |        "\n",
 30 |        "    .dataframe thead th {\n",
 31 |        "        text-align: right;\n",
 32 |        "    }\n",
 33 |        "</style>\n",
 34 |        "<table border=\"1\" class=\"dataframe\">\n",
 35 |        "  <thead>\n",
 36 |        "    <tr style=\"text-align: right;\">\n",
 37 |        "      <th></th>\n",
 38 |        "      <th>name</th>\n",
 39 |        "    </tr>\n",
 40 |        "  </thead>\n",
 41 |        "  <tbody>\n",
 42 |        "    <tr>\n",
 43 |        "      <th>0</th>\n",
 44 |        "      <td>Elise Mccann</td>\n",
 45 |        "    </tr>\n",
 46 |        "    <tr>\n",
 47 |        "      <th>1</th>\n",
 48 |        "      <td>Aiden Berger</td>\n",
 49 |        "    </tr>\n",
 50 |        "    <tr>\n",
 51 |        "      <th>2</th>\n",
 52 |        "      <td>Elle Kelley</td>\n",
 53 |        "    </tr>\n",
 54 |        "  </tbody>\n",
 55 |        "</table>\n",
 56 |        "</div>"
 57 |       ],
 58 |       "text/plain": [
 59 |        "           name\n",
 60 |        "0  Elise Mccann\n",
 61 |        "1  Aiden Berger\n",
 62 |        "2   Elle Kelley"
 63 |       ]
 64 |      },
 65 |      "execution_count": 6,
 66 |      "metadata": {},
 67 |      "output_type": "execute_result"
 68 |     }
 69 |    ],
 70 |    "source": [
 71 |     "# create a dataframe\n",
 72 |     "df = pd.DataFrame({'name':['Elise Mccann', 'Aiden Berger', 'Elle Kelley']})\n",
 73 |     "df"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": 7,
 79 |    "metadata": {},
 80 |    "outputs": [
 81 |     {
 82 |      "data": {
 83 |       "text/html": [
 84 |        "<div>\n",
 85 |        "<style scoped>\n",
 86 |        "    .dataframe tbody tr th:only-of-type {\n",
 87 |        "        vertical-align: middle;\n",
 88 |        "    }\n",
 89 |        "\n",
 90 |        "    .dataframe tbody tr th {\n",
 91 |        "        vertical-align: top;\n",
 92 |        "    }\n",
 93 |        "\n",
 94 |        "    .dataframe thead th {\n",
 95 |        "        text-align: right;\n",
 96 |        "    }\n",
 97 |        "</style>\n",
 98 |        "<table border=\"1\" class=\"dataframe\">\n",
 99 |        "  <thead>\n",
100 |        "    <tr style=\"text-align: right;\">\n",
101 |        "      <th></th>\n",
102 |        "      <th>name</th>\n",
103 |        "      <th>first_name</th>\n",
104 |        "      <th>last_name</th>\n",
105 |        "    </tr>\n",
106 |        "  </thead>\n",
107 |        "  <tbody>\n",
108 |        "    <tr>\n",
109 |        "      <th>0</th>\n",
110 |        "      <td>Elise Mccann</td>\n",
111 |        "      <td>Elise</td>\n",
112 |        "      <td>Mccann</td>\n",
113 |        "    </tr>\n",
114 |        "    <tr>\n",
115 |        "      <th>1</th>\n",
116 |        "      <td>Aiden Berger</td>\n",
117 |        "      <td>Aiden</td>\n",
118 |        "      <td>Berger</td>\n",
119 |        "    </tr>\n",
120 |        "    <tr>\n",
121 |        "      <th>2</th>\n",
122 |        "      <td>Elle Kelley</td>\n",
123 |        "      <td>Elle</td>\n",
124 |        "      <td>Kelley</td>\n",
125 |        "    </tr>\n",
126 |        "  </tbody>\n",
127 |        "</table>\n",
128 |        "</div>"
129 |       ],
130 |       "text/plain": [
131 |        "           name first_name last_name\n",
132 |        "0  Elise Mccann      Elise    Mccann\n",
133 |        "1  Aiden Berger      Aiden    Berger\n",
134 |        "2   Elle Kelley       Elle    Kelley"
135 |       ]
136 |      },
137 |      "execution_count": 7,
138 |      "metadata": {},
139 |      "output_type": "execute_result"
140 |     }
141 |    ],
142 |    "source": [
143 |     "# extract first name and last name\n",
144 |     "df['first_name'] = df['name'].str.split(' ', expand = True)[0]\n",
145 |     "df['last_name'] = df['name'].str.split(' ', expand = True)[1]\n",
146 |     "\n",
147 |     "df"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": null,
153 |    "metadata": {},
154 |    "outputs": [],
155 |    "source": []
156 |   }
157 |  ],
158 |  "metadata": {
159 |   "kernelspec": {
160 |    "display_name": "Python 3",
161 |    "language": "python",
162 |    "name": "python3"
163 |   },
164 |   "language_info": {
165 |    "codemirror_mode": {
166 |     "name": "ipython",
167 |     "version": 3
168 |    },
169 |    "file_extension": ".py",
170 |    "mimetype": "text/x-python",
171 |    "name": "python",
172 |    "nbconvert_exporter": "python",
173 |    "pygments_lexer": "ipython3",
174 |    "version": "3.7.1"
175 |   }
176 |  },
177 |  "nbformat": 4,
178 |  "nbformat_minor": 2
179 | }
180 | 


--------------------------------------------------------------------------------
/Code/generator vs list.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Python List Comprehension v/s Python Generators"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 49,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "from sys import getsizeof"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 50,
 22 |    "metadata": {},
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "# list comprehension\n",
 26 |     "list_comp = [i**3 for i in range(10000)]\n",
 27 |     "# generator expression\n",
 28 |     "gen_comp = (i**3 for i in range(10000))"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 51,
 34 |    "metadata": {},
 35 |    "outputs": [
 36 |     {
 37 |      "name": "stdout",
 38 |      "output_type": "stream",
 39 |      "text": [
 40 |       "Size of list is :  87624\n"
 41 |      ]
 42 |     }
 43 |    ],
 44 |    "source": [
 45 |     "# size of list comprehension\n",
 46 |     "a = getsizeof(list_comp)\n",
 47 |     "print(\"Size of list is : \", a)"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": 52,
 53 |    "metadata": {},
 54 |    "outputs": [
 55 |     {
 56 |      "name": "stdout",
 57 |      "output_type": "stream",
 58 |      "text": [
 59 |       "Size of generator is :  120\n"
 60 |      ]
 61 |     }
 62 |    ],
 63 |    "source": [
 64 |     "#size of generator expression\n",
 65 |     "b = getsizeof(gen_comp)\n",
 66 |     "print(\"Size of generator is : \", b)"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": 53,
 72 |    "metadata": {},
 73 |    "outputs": [
 74 |     {
 75 |      "name": "stdout",
 76 |      "output_type": "stream",
 77 |      "text": [
 78 |       "9.14460999999983\n"
 79 |      ]
 80 |     }
 81 |    ],
 82 |    "source": [
 83 |     "# time taken by list comprehension\n",
 84 |     "import timeit\n",
 85 |     "print(timeit.timeit('''list_com = [i for i in range(100) if i % 2 == 0]'''))"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": 54,
 91 |    "metadata": {},
 92 |    "outputs": [
 93 |     {
 94 |      "name": "stdout",
 95 |      "output_type": "stream",
 96 |      "text": [
 97 |       "0.7809511999998904\n"
 98 |      ]
 99 |     }
100 |    ],
101 |    "source": [
102 |     "# time taken by generator expression \n",
103 |     "print(timeit.timeit('''list_com = (i for i in range(100) if i % 2 == 0)'''))"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": null,
109 |    "metadata": {},
110 |    "outputs": [],
111 |    "source": []
112 |   }
113 |  ],
114 |  "metadata": {
115 |   "kernelspec": {
116 |    "display_name": "Python 3",
117 |    "language": "python",
118 |    "name": "python3"
119 |   },
120 |   "language_info": {
121 |    "codemirror_mode": {
122 |     "name": "ipython",
123 |     "version": 3
124 |    },
125 |    "file_extension": ".py",
126 |    "mimetype": "text/x-python",
127 |    "name": "python",
128 |    "nbconvert_exporter": "python",
129 |    "pygments_lexer": "ipython3",
130 |    "version": "3.7.1"
131 |   }
132 |  },
133 |  "nbformat": 4,
134 |  "nbformat_minor": 2
135 | }
136 | 


--------------------------------------------------------------------------------
/Code/groupby_in_pandas.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Understand Groupby in Pandas"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 14,
 13 |    "metadata": {},
 14 |    "outputs": [
 15 |     {
 16 |      "data": {
 17 |       "text/html": [
 18 |        "<div>\n",
 19 |        "<style scoped>\n",
 20 |        "    .dataframe tbody tr th:only-of-type {\n",
 21 |        "        vertical-align: middle;\n",
 22 |        "    }\n",
 23 |        "\n",
 24 |        "    .dataframe tbody tr th {\n",
 25 |        "        vertical-align: top;\n",
 26 |        "    }\n",
 27 |        "\n",
 28 |        "    .dataframe thead th {\n",
 29 |        "        text-align: right;\n",
 30 |        "    }\n",
 31 |        "</style>\n",
 32 |        "<table border=\"1\" class=\"dataframe\">\n",
 33 |        "  <thead>\n",
 34 |        "    <tr style=\"text-align: right;\">\n",
 35 |        "      <th></th>\n",
 36 |        "      <th>Loan_ID</th>\n",
 37 |        "      <th>Gender</th>\n",
 38 |        "      <th>Married</th>\n",
 39 |        "      <th>Dependents</th>\n",
 40 |        "      <th>Education</th>\n",
 41 |        "      <th>Self_Employed</th>\n",
 42 |        "      <th>ApplicantIncome</th>\n",
 43 |        "      <th>CoapplicantIncome</th>\n",
 44 |        "      <th>LoanAmount</th>\n",
 45 |        "      <th>Loan_Amount_Term</th>\n",
 46 |        "      <th>Credit_History</th>\n",
 47 |        "      <th>Property_Area</th>\n",
 48 |        "      <th>Loan_Status</th>\n",
 49 |        "    </tr>\n",
 50 |        "  </thead>\n",
 51 |        "  <tbody>\n",
 52 |        "    <tr>\n",
 53 |        "      <th>0</th>\n",
 54 |        "      <td>LP001002</td>\n",
 55 |        "      <td>Male</td>\n",
 56 |        "      <td>No</td>\n",
 57 |        "      <td>0</td>\n",
 58 |        "      <td>Graduate</td>\n",
 59 |        "      <td>No</td>\n",
 60 |        "      <td>5849</td>\n",
 61 |        "      <td>0.0</td>\n",
 62 |        "      <td>NaN</td>\n",
 63 |        "      <td>360.0</td>\n",
 64 |        "      <td>1.0</td>\n",
 65 |        "      <td>Urban</td>\n",
 66 |        "      <td>Y</td>\n",
 67 |        "    </tr>\n",
 68 |        "    <tr>\n",
 69 |        "      <th>1</th>\n",
 70 |        "      <td>LP001003</td>\n",
 71 |        "      <td>Male</td>\n",
 72 |        "      <td>Yes</td>\n",
 73 |        "      <td>1</td>\n",
 74 |        "      <td>Graduate</td>\n",
 75 |        "      <td>No</td>\n",
 76 |        "      <td>4583</td>\n",
 77 |        "      <td>1508.0</td>\n",
 78 |        "      <td>128.0</td>\n",
 79 |        "      <td>360.0</td>\n",
 80 |        "      <td>1.0</td>\n",
 81 |        "      <td>Rural</td>\n",
 82 |        "      <td>N</td>\n",
 83 |        "    </tr>\n",
 84 |        "    <tr>\n",
 85 |        "      <th>2</th>\n",
 86 |        "      <td>LP001005</td>\n",
 87 |        "      <td>Male</td>\n",
 88 |        "      <td>Yes</td>\n",
 89 |        "      <td>0</td>\n",
 90 |        "      <td>Graduate</td>\n",
 91 |        "      <td>Yes</td>\n",
 92 |        "      <td>3000</td>\n",
 93 |        "      <td>0.0</td>\n",
 94 |        "      <td>66.0</td>\n",
 95 |        "      <td>360.0</td>\n",
 96 |        "      <td>1.0</td>\n",
 97 |        "      <td>Urban</td>\n",
 98 |        "      <td>Y</td>\n",
 99 |        "    </tr>\n",
100 |        "    <tr>\n",
101 |        "      <th>3</th>\n",
102 |        "      <td>LP001006</td>\n",
103 |        "      <td>Male</td>\n",
104 |        "      <td>Yes</td>\n",
105 |        "      <td>0</td>\n",
106 |        "      <td>Not Graduate</td>\n",
107 |        "      <td>No</td>\n",
108 |        "      <td>2583</td>\n",
109 |        "      <td>2358.0</td>\n",
110 |        "      <td>120.0</td>\n",
111 |        "      <td>360.0</td>\n",
112 |        "      <td>1.0</td>\n",
113 |        "      <td>Urban</td>\n",
114 |        "      <td>Y</td>\n",
115 |        "    </tr>\n",
116 |        "    <tr>\n",
117 |        "      <th>4</th>\n",
118 |        "      <td>LP001008</td>\n",
119 |        "      <td>Male</td>\n",
120 |        "      <td>No</td>\n",
121 |        "      <td>0</td>\n",
122 |        "      <td>Graduate</td>\n",
123 |        "      <td>No</td>\n",
124 |        "      <td>6000</td>\n",
125 |        "      <td>0.0</td>\n",
126 |        "      <td>141.0</td>\n",
127 |        "      <td>360.0</td>\n",
128 |        "      <td>1.0</td>\n",
129 |        "      <td>Urban</td>\n",
130 |        "      <td>Y</td>\n",
131 |        "    </tr>\n",
132 |        "  </tbody>\n",
133 |        "</table>\n",
134 |        "</div>"
135 |       ],
136 |       "text/plain": [
137 |        "    Loan_ID Gender Married Dependents     Education Self_Employed  \\\n",
138 |        "0  LP001002   Male      No          0      Graduate            No   \n",
139 |        "1  LP001003   Male     Yes          1      Graduate            No   \n",
140 |        "2  LP001005   Male     Yes          0      Graduate           Yes   \n",
141 |        "3  LP001006   Male     Yes          0  Not Graduate            No   \n",
142 |        "4  LP001008   Male      No          0      Graduate            No   \n",
143 |        "\n",
144 |        "   ApplicantIncome  CoapplicantIncome  LoanAmount  Loan_Amount_Term  \\\n",
145 |        "0             5849                0.0         NaN             360.0   \n",
146 |        "1             4583             1508.0       128.0             360.0   \n",
147 |        "2             3000                0.0        66.0             360.0   \n",
148 |        "3             2583             2358.0       120.0             360.0   \n",
149 |        "4             6000                0.0       141.0             360.0   \n",
150 |        "\n",
151 |        "   Credit_History Property_Area Loan_Status  \n",
152 |        "0             1.0         Urban           Y  \n",
153 |        "1             1.0         Rural           N  \n",
154 |        "2             1.0         Urban           Y  \n",
155 |        "3             1.0         Urban           Y  \n",
156 |        "4             1.0         Urban           Y  "
157 |       ]
158 |      },
159 |      "execution_count": 14,
160 |      "metadata": {},
161 |      "output_type": "execute_result"
162 |     }
163 |    ],
164 |    "source": [
165 |     "#import dataset\n",
166 |     "import pandas as pd\n",
167 |     "\n",
168 |     "df = pd.read_csv('../Data/loan_train.csv')\n",
169 |     "df.head()"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "code",
174 |    "execution_count": 15,
175 |    "metadata": {},
176 |    "outputs": [
177 |     {
178 |      "data": {
179 |       "text/html": [
180 |        "<div>\n",
181 |        "<style scoped>\n",
182 |        "    .dataframe tbody tr th:only-of-type {\n",
183 |        "        vertical-align: middle;\n",
184 |        "    }\n",
185 |        "\n",
186 |        "    .dataframe tbody tr th {\n",
187 |        "        vertical-align: top;\n",
188 |        "    }\n",
189 |        "\n",
190 |        "    .dataframe thead th {\n",
191 |        "        text-align: right;\n",
192 |        "    }\n",
193 |        "</style>\n",
194 |        "<table border=\"1\" class=\"dataframe\">\n",
195 |        "  <thead>\n",
196 |        "    <tr style=\"text-align: right;\">\n",
197 |        "      <th></th>\n",
198 |        "      <th>ApplicantIncome</th>\n",
199 |        "    </tr>\n",
200 |        "    <tr>\n",
201 |        "      <th>Gender</th>\n",
202 |        "      <th></th>\n",
203 |        "    </tr>\n",
204 |        "  </thead>\n",
205 |        "  <tbody>\n",
206 |        "    <tr>\n",
207 |        "      <th>Female</th>\n",
208 |        "      <td>4643.473214</td>\n",
209 |        "    </tr>\n",
210 |        "    <tr>\n",
211 |        "      <th>Male</th>\n",
212 |        "      <td>5446.460123</td>\n",
213 |        "    </tr>\n",
214 |        "  </tbody>\n",
215 |        "</table>\n",
216 |        "</div>"
217 |       ],
218 |       "text/plain": [
219 |        "        ApplicantIncome\n",
220 |        "Gender                 \n",
221 |        "Female      4643.473214\n",
222 |        "Male        5446.460123"
223 |       ]
224 |      },
225 |      "execution_count": 15,
226 |      "metadata": {},
227 |      "output_type": "execute_result"
228 |     }
229 |    ],
230 |    "source": [
231 |     "# Average Income of males and females\n",
232 |     "df.groupby(['Gender'])[['ApplicantIncome']].mean()\n"
233 |    ]
234 |   },
235 |   {
236 |    "cell_type": "code",
237 |    "execution_count": 16,
238 |    "metadata": {},
239 |    "outputs": [
240 |     {
241 |      "data": {
242 |       "text/html": [
243 |        "<div>\n",
244 |        "<style scoped>\n",
245 |        "    .dataframe tbody tr th:only-of-type {\n",
246 |        "        vertical-align: middle;\n",
247 |        "    }\n",
248 |        "\n",
249 |        "    .dataframe tbody tr th {\n",
250 |        "        vertical-align: top;\n",
251 |        "    }\n",
252 |        "\n",
253 |        "    .dataframe thead th {\n",
254 |        "        text-align: right;\n",
255 |        "    }\n",
256 |        "</style>\n",
257 |        "<table border=\"1\" class=\"dataframe\">\n",
258 |        "  <thead>\n",
259 |        "    <tr style=\"text-align: right;\">\n",
260 |        "      <th></th>\n",
261 |        "      <th>LoanAmount</th>\n",
262 |        "    </tr>\n",
263 |        "    <tr>\n",
264 |        "      <th>Property_Area</th>\n",
265 |        "      <th></th>\n",
266 |        "    </tr>\n",
267 |        "  </thead>\n",
268 |        "  <tbody>\n",
269 |        "    <tr>\n",
270 |        "      <th>Rural</th>\n",
271 |        "      <td>152.260116</td>\n",
272 |        "    </tr>\n",
273 |        "    <tr>\n",
274 |        "      <th>Semiurban</th>\n",
275 |        "      <td>145.504386</td>\n",
276 |        "    </tr>\n",
277 |        "    <tr>\n",
278 |        "      <th>Urban</th>\n",
279 |        "      <td>142.198953</td>\n",
280 |        "    </tr>\n",
281 |        "  </tbody>\n",
282 |        "</table>\n",
283 |        "</div>"
284 |       ],
285 |       "text/plain": [
286 |        "               LoanAmount\n",
287 |        "Property_Area            \n",
288 |        "Rural          152.260116\n",
289 |        "Semiurban      145.504386\n",
290 |        "Urban          142.198953"
291 |       ]
292 |      },
293 |      "execution_count": 16,
294 |      "metadata": {},
295 |      "output_type": "execute_result"
296 |     }
297 |    ],
298 |    "source": [
299 |     "# Average loan amount for different property areas like urban, rural\n",
300 |     "df.groupby(['Property_Area'])[['LoanAmount']].mean()"
301 |    ]
302 |   },
303 |   {
304 |    "cell_type": "code",
305 |    "execution_count": 17,
306 |    "metadata": {},
307 |    "outputs": [
308 |     {
309 |      "data": {
310 |       "text/html": [
311 |        "<div>\n",
312 |        "<style scoped>\n",
313 |        "    .dataframe tbody tr th:only-of-type {\n",
314 |        "        vertical-align: middle;\n",
315 |        "    }\n",
316 |        "\n",
317 |        "    .dataframe tbody tr th {\n",
318 |        "        vertical-align: top;\n",
319 |        "    }\n",
320 |        "\n",
321 |        "    .dataframe thead th {\n",
322 |        "        text-align: right;\n",
323 |        "    }\n",
324 |        "</style>\n",
325 |        "<table border=\"1\" class=\"dataframe\">\n",
326 |        "  <thead>\n",
327 |        "    <tr style=\"text-align: right;\">\n",
328 |        "      <th></th>\n",
329 |        "      <th>Loan_Status</th>\n",
330 |        "    </tr>\n",
331 |        "    <tr>\n",
332 |        "      <th>Education</th>\n",
333 |        "      <th></th>\n",
334 |        "    </tr>\n",
335 |        "  </thead>\n",
336 |        "  <tbody>\n",
337 |        "    <tr>\n",
338 |        "      <th>Graduate</th>\n",
339 |        "      <td>480</td>\n",
340 |        "    </tr>\n",
341 |        "    <tr>\n",
342 |        "      <th>Not Graduate</th>\n",
343 |        "      <td>134</td>\n",
344 |        "    </tr>\n",
345 |        "  </tbody>\n",
346 |        "</table>\n",
347 |        "</div>"
348 |       ],
349 |       "text/plain": [
350 |        "              Loan_Status\n",
351 |        "Education                \n",
352 |        "Graduate              480\n",
353 |        "Not Graduate          134"
354 |       ]
355 |      },
356 |      "execution_count": 17,
357 |      "metadata": {},
358 |      "output_type": "execute_result"
359 |     }
360 |    ],
361 |    "source": [
362 |     "# Compare loan status of different education backgrounds\n",
363 |     "df.groupby(['Education'])[['Loan_Status']].count()"
364 |    ]
365 |   },
366 |   {
367 |    "cell_type": "code",
368 |    "execution_count": null,
369 |    "metadata": {},
370 |    "outputs": [],
371 |    "source": []
372 |   }
373 |  ],
374 |  "metadata": {
375 |   "kernelspec": {
376 |    "display_name": "Python 3",
377 |    "language": "python",
378 |    "name": "python3"
379 |   },
380 |   "language_info": {
381 |    "codemirror_mode": {
382 |     "name": "ipython",
383 |     "version": 3
384 |    },
385 |    "file_extension": ".py",
386 |    "mimetype": "text/x-python",
387 |    "name": "python",
388 |    "nbconvert_exporter": "python",
389 |    "pygments_lexer": "ipython3",
390 |    "version": "3.7.1"
391 |   }
392 |  },
393 |  "nbformat": 4,
394 |  "nbformat_minor": 2
395 | }
396 | 


--------------------------------------------------------------------------------
/Code/hide_print.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "import os\n",
10 |     "import sys"
11 |    ]
12 |   },
13 |   {
14 |    "cell_type": "code",
15 |    "execution_count": 2,
16 |    "metadata": {},
17 |    "outputs": [],
18 |    "source": [
19 |     "class HiddenPrints:\n",
20 |     "    def __enter__(self):\n",
21 |     "        self._original_stdout = sys.stdout\n",
22 |     "        sys.stdout = open(os.devnull, 'w')\n",
23 |     "\n",
24 |     "    def __exit__(self, exc_type, exc_val, exc_tb):\n",
25 |     "        sys.stdout.close()\n",
26 |     "        sys.stdout = self._original_stdout"
27 |    ]
28 |   },
29 |   {
30 |    "cell_type": "code",
31 |    "execution_count": 4,
32 |    "metadata": {},
33 |    "outputs": [
34 |     {
35 |      "name": "stdout",
36 |      "output_type": "stream",
37 |      "text": [
38 |       "This will be printed\n"
39 |      ]
40 |     }
41 |    ],
42 |    "source": [
43 |     "with HiddenPrints():\n",
44 |     "    print(\"This print is hidden\")\n",
45 |     "\n",
46 |     "print(\"This will be printed\")"
47 |    ]
48 |   }
49 |  ],
50 |  "metadata": {
51 |   "kernelspec": {
52 |    "display_name": "Python 3",
53 |    "language": "python",
54 |    "name": "python3"
55 |   },
56 |   "language_info": {
57 |    "codemirror_mode": {
58 |     "name": "ipython",
59 |     "version": 3
60 |    },
61 |    "file_extension": ".py",
62 |    "mimetype": "text/x-python",
63 |    "name": "python",
64 |    "nbconvert_exporter": "python",
65 |    "pygments_lexer": "ipython3",
66 |    "version": "3.7.3"
67 |   }
68 |  },
69 |  "nbformat": 4,
70 |  "nbformat_minor": 2
71 | }
72 | 


--------------------------------------------------------------------------------
/Code/interactive_notebook.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from IPython.core.interactiveshell import InteractiveShell\n",
 10 |     "InteractiveShell.ast_node_interactivity = \"all\""
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 11,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "import pandas as pd\n",
 20 |     "data = pd.read_csv('loan_train.csv')"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 13,
 26 |    "metadata": {},
 27 |    "outputs": [
 28 |     {
 29 |      "data": {
 30 |       "text/plain": [
 31 |        "(614, 13)"
 32 |       ]
 33 |      },
 34 |      "execution_count": 13,
 35 |      "metadata": {},
 36 |      "output_type": "execute_result"
 37 |     },
 38 |     {
 39 |      "data": {
 40 |       "text/html": [
 41 |        "<div>\n",
 42 |        "<style scoped>\n",
 43 |        "    .dataframe tbody tr th:only-of-type {\n",
 44 |        "        vertical-align: middle;\n",
 45 |        "    }\n",
 46 |        "\n",
 47 |        "    .dataframe tbody tr th {\n",
 48 |        "        vertical-align: top;\n",
 49 |        "    }\n",
 50 |        "\n",
 51 |        "    .dataframe thead th {\n",
 52 |        "        text-align: right;\n",
 53 |        "    }\n",
 54 |        "</style>\n",
 55 |        "<table border=\"1\" class=\"dataframe\">\n",
 56 |        "  <thead>\n",
 57 |        "    <tr style=\"text-align: right;\">\n",
 58 |        "      <th></th>\n",
 59 |        "      <th>Loan_ID</th>\n",
 60 |        "      <th>Gender</th>\n",
 61 |        "      <th>Married</th>\n",
 62 |        "      <th>Dependents</th>\n",
 63 |        "      <th>Education</th>\n",
 64 |        "      <th>Self_Employed</th>\n",
 65 |        "      <th>ApplicantIncome</th>\n",
 66 |        "      <th>CoapplicantIncome</th>\n",
 67 |        "      <th>LoanAmount</th>\n",
 68 |        "      <th>Loan_Amount_Term</th>\n",
 69 |        "      <th>Credit_History</th>\n",
 70 |        "      <th>Property_Area</th>\n",
 71 |        "      <th>Loan_Status</th>\n",
 72 |        "    </tr>\n",
 73 |        "  </thead>\n",
 74 |        "  <tbody>\n",
 75 |        "    <tr>\n",
 76 |        "      <th>0</th>\n",
 77 |        "      <td>LP001002</td>\n",
 78 |        "      <td>Male</td>\n",
 79 |        "      <td>No</td>\n",
 80 |        "      <td>0</td>\n",
 81 |        "      <td>Graduate</td>\n",
 82 |        "      <td>No</td>\n",
 83 |        "      <td>5849</td>\n",
 84 |        "      <td>0.0</td>\n",
 85 |        "      <td>NaN</td>\n",
 86 |        "      <td>360.0</td>\n",
 87 |        "      <td>1.0</td>\n",
 88 |        "      <td>Urban</td>\n",
 89 |        "      <td>Y</td>\n",
 90 |        "    </tr>\n",
 91 |        "    <tr>\n",
 92 |        "      <th>1</th>\n",
 93 |        "      <td>LP001003</td>\n",
 94 |        "      <td>Male</td>\n",
 95 |        "      <td>Yes</td>\n",
 96 |        "      <td>1</td>\n",
 97 |        "      <td>Graduate</td>\n",
 98 |        "      <td>No</td>\n",
 99 |        "      <td>4583</td>\n",
100 |        "      <td>1508.0</td>\n",
101 |        "      <td>128.0</td>\n",
102 |        "      <td>360.0</td>\n",
103 |        "      <td>1.0</td>\n",
104 |        "      <td>Rural</td>\n",
105 |        "      <td>N</td>\n",
106 |        "    </tr>\n",
107 |        "    <tr>\n",
108 |        "      <th>2</th>\n",
109 |        "      <td>LP001005</td>\n",
110 |        "      <td>Male</td>\n",
111 |        "      <td>Yes</td>\n",
112 |        "      <td>0</td>\n",
113 |        "      <td>Graduate</td>\n",
114 |        "      <td>Yes</td>\n",
115 |        "      <td>3000</td>\n",
116 |        "      <td>0.0</td>\n",
117 |        "      <td>66.0</td>\n",
118 |        "      <td>360.0</td>\n",
119 |        "      <td>1.0</td>\n",
120 |        "      <td>Urban</td>\n",
121 |        "      <td>Y</td>\n",
122 |        "    </tr>\n",
123 |        "    <tr>\n",
124 |        "      <th>3</th>\n",
125 |        "      <td>LP001006</td>\n",
126 |        "      <td>Male</td>\n",
127 |        "      <td>Yes</td>\n",
128 |        "      <td>0</td>\n",
129 |        "      <td>Not Graduate</td>\n",
130 |        "      <td>No</td>\n",
131 |        "      <td>2583</td>\n",
132 |        "      <td>2358.0</td>\n",
133 |        "      <td>120.0</td>\n",
134 |        "      <td>360.0</td>\n",
135 |        "      <td>1.0</td>\n",
136 |        "      <td>Urban</td>\n",
137 |        "      <td>Y</td>\n",
138 |        "    </tr>\n",
139 |        "    <tr>\n",
140 |        "      <th>4</th>\n",
141 |        "      <td>LP001008</td>\n",
142 |        "      <td>Male</td>\n",
143 |        "      <td>No</td>\n",
144 |        "      <td>0</td>\n",
145 |        "      <td>Graduate</td>\n",
146 |        "      <td>No</td>\n",
147 |        "      <td>6000</td>\n",
148 |        "      <td>0.0</td>\n",
149 |        "      <td>141.0</td>\n",
150 |        "      <td>360.0</td>\n",
151 |        "      <td>1.0</td>\n",
152 |        "      <td>Urban</td>\n",
153 |        "      <td>Y</td>\n",
154 |        "    </tr>\n",
155 |        "  </tbody>\n",
156 |        "</table>\n",
157 |        "</div>"
158 |       ],
159 |       "text/plain": [
160 |        "    Loan_ID Gender Married Dependents     Education Self_Employed  \\\n",
161 |        "0  LP001002   Male      No          0      Graduate            No   \n",
162 |        "1  LP001003   Male     Yes          1      Graduate            No   \n",
163 |        "2  LP001005   Male     Yes          0      Graduate           Yes   \n",
164 |        "3  LP001006   Male     Yes          0  Not Graduate            No   \n",
165 |        "4  LP001008   Male      No          0      Graduate            No   \n",
166 |        "\n",
167 |        "   ApplicantIncome  CoapplicantIncome  LoanAmount  Loan_Amount_Term  \\\n",
168 |        "0             5849                0.0         NaN             360.0   \n",
169 |        "1             4583             1508.0       128.0             360.0   \n",
170 |        "2             3000                0.0        66.0             360.0   \n",
171 |        "3             2583             2358.0       120.0             360.0   \n",
172 |        "4             6000                0.0       141.0             360.0   \n",
173 |        "\n",
174 |        "   Credit_History Property_Area Loan_Status  \n",
175 |        "0             1.0         Urban           Y  \n",
176 |        "1             1.0         Rural           N  \n",
177 |        "2             1.0         Urban           Y  \n",
178 |        "3             1.0         Urban           Y  \n",
179 |        "4             1.0         Urban           Y  "
180 |       ]
181 |      },
182 |      "execution_count": 13,
183 |      "metadata": {},
184 |      "output_type": "execute_result"
185 |     },
186 |     {
187 |      "data": {
188 |       "text/plain": [
189 |        "Loan_ID               object\n",
190 |        "Gender                object\n",
191 |        "Married               object\n",
192 |        "Dependents            object\n",
193 |        "Education             object\n",
194 |        "Self_Employed         object\n",
195 |        "ApplicantIncome        int64\n",
196 |        "CoapplicantIncome    float64\n",
197 |        "LoanAmount           float64\n",
198 |        "Loan_Amount_Term     float64\n",
199 |        "Credit_History       float64\n",
200 |        "Property_Area         object\n",
201 |        "Loan_Status           object\n",
202 |        "dtype: object"
203 |       ]
204 |      },
205 |      "execution_count": 13,
206 |      "metadata": {},
207 |      "output_type": "execute_result"
208 |     }
209 |    ],
210 |    "source": [
211 |     "data.shape\n",
212 |     "data.head()\n",
213 |     "data.dtypes"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "code",
218 |    "execution_count": null,
219 |    "metadata": {},
220 |    "outputs": [],
221 |    "source": []
222 |   }
223 |  ],
224 |  "metadata": {
225 |   "kernelspec": {
226 |    "display_name": "Python 3",
227 |    "language": "python",
228 |    "name": "python3"
229 |   },
230 |   "language_info": {
231 |    "codemirror_mode": {
232 |     "name": "ipython",
233 |     "version": 3
234 |    },
235 |    "file_extension": ".py",
236 |    "mimetype": "text/x-python",
237 |    "name": "python",
238 |    "nbconvert_exporter": "python",
239 |    "pygments_lexer": "ipython3",
240 |    "version": "3.7.1"
241 |   }
242 |  },
243 |  "nbformat": 4,
244 |  "nbformat_minor": 2
245 | }
246 | 


--------------------------------------------------------------------------------
/Code/invert_dictionary.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "## Inverting a Dictionary"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "code",
12 |    "execution_count": 15,
13 |    "metadata": {},
14 |    "outputs": [],
15 |    "source": [
16 |     "# Test Dictionary\n",
17 |     "my_dict = {'Name':20,\n",
18 |     "           'Age' : 3,\n",
19 |     "           'attendance' : 75,\n",
20 |     "           'roll no': 'H405'}"
21 |    ]
22 |   },
23 |   {
24 |    "cell_type": "code",
25 |    "execution_count": 16,
26 |    "metadata": {},
27 |    "outputs": [
28 |     {
29 |      "name": "stdout",
30 |      "output_type": "stream",
31 |      "text": [
32 |       "{'Name': 20, 'Age': 3, 'attendance': 75, 'roll no': 'H405'}\n"
33 |      ]
34 |     }
35 |    ],
36 |    "source": [
37 |     "# Print Dictionary\n",
38 |     "print(my_dict)"
39 |    ]
40 |   },
41 |   {
42 |    "cell_type": "code",
43 |    "execution_count": 17,
44 |    "metadata": {},
45 |    "outputs": [
46 |     {
47 |      "data": {
48 |       "text/plain": [
49 |        "{20: 'Name', 3: 'Age', 75: 'attendance', 'H405': 'roll no'}"
50 |       ]
51 |      },
52 |      "execution_count": 17,
53 |      "metadata": {},
54 |      "output_type": "execute_result"
55 |     }
56 |    ],
57 |    "source": [
58 |     "# invert dictionary\n",
59 |     "new_dict = {v:k for k,v in my_dict.items()}\n",
60 |     "new_dict"
61 |    ]
62 |   },
63 |   {
64 |    "cell_type": "code",
65 |    "execution_count": null,
66 |    "metadata": {},
67 |    "outputs": [],
68 |    "source": []
69 |   }
70 |  ],
71 |  "metadata": {
72 |   "kernelspec": {
73 |    "display_name": "Python 3",
74 |    "language": "python",
75 |    "name": "python3"
76 |   },
77 |   "language_info": {
78 |    "codemirror_mode": {
79 |     "name": "ipython",
80 |     "version": 3
81 |    },
82 |    "file_extension": ".py",
83 |    "mimetype": "text/x-python",
84 |    "name": "python",
85 |    "nbconvert_exporter": "python",
86 |    "pygments_lexer": "ipython3",
87 |    "version": "3.7.1"
88 |   }
89 |  },
90 |  "nbformat": 4,
91 |  "nbformat_minor": 2
92 | }
93 | 


--------------------------------------------------------------------------------
/Code/iterative_imputer.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "iterative_imputer.ipynb",
  7 |       "provenance": []
  8 |     },
  9 |     "kernelspec": {
 10 |       "name": "python3",
 11 |       "display_name": "Python 3"
 12 |     }
 13 |   },
 14 |   "cells": [
 15 |     {
 16 |       "cell_type": "markdown",
 17 |       "metadata": {
 18 |         "id": "x-FLsxTrgR3R",
 19 |         "colab_type": "text"
 20 |       },
 21 |       "source": [
 22 |         "## Impute missing values using IterativeImputer"
 23 |       ]
 24 |     },
 25 |     {
 26 |       "cell_type": "code",
 27 |       "metadata": {
 28 |         "id": "JqsZhhMUZnxX",
 29 |         "colab_type": "code",
 30 |         "colab": {}
 31 |       },
 32 |       "source": [
 33 |         "import numpy as np\n",
 34 |         "\n",
 35 |         "# Since this estimator is experimental, we need to import it explicitly\n",
 36 |         "from sklearn.experimental import enable_iterative_imputer  \n",
 37 |         "from sklearn.impute import IterativeImputer"
 38 |       ],
 39 |       "execution_count": 0,
 40 |       "outputs": []
 41 |     },
 42 |     {
 43 |       "cell_type": "code",
 44 |       "metadata": {
 45 |         "id": "EwRy35OTZ-ea",
 46 |         "colab_type": "code",
 47 |         "colab": {}
 48 |       },
 49 |       "source": [
 50 |         "# Make a test dataset\n",
 51 |         "data = [[1, 2, 3, 4, np.nan],\n",
 52 |         "        [5, 6, 7, 8, 11],\n",
 53 |         "        [9, 1, 12, 2, 3],\n",
 54 |         "        [4, np.nan, 5, 6, 7],\n",
 55 |         "        [4, 8, 1, 2, 3]]"
 56 |       ],
 57 |       "execution_count": 0,
 58 |       "outputs": []
 59 |     },
 60 |     {
 61 |       "cell_type": "code",
 62 |       "metadata": {
 63 |         "id": "dpkWWrFpeHQ0",
 64 |         "colab_type": "code",
 65 |         "colab": {
 66 |           "base_uri": "https://localhost:8080/",
 67 |           "height": 102
 68 |         },
 69 |         "outputId": "3b09b267-461e-4a61-d9d9-2c91f87adbe5"
 70 |       },
 71 |       "source": [
 72 |         "# Impute missing values using iterative imputer\n",
 73 |         "iter_imp = IterativeImputer(random_state= 34)\n",
 74 |         "iter_imp.fit_transform(data)"
 75 |       ],
 76 |       "execution_count": 19,
 77 |       "outputs": [
 78 |         {
 79 |           "output_type": "execute_result",
 80 |           "data": {
 81 |             "text/plain": [
 82 |               "array([[ 1.        ,  2.        ,  3.        ,  4.        ,  2.78144391],\n",
 83 |               "       [ 5.        ,  6.        ,  7.        ,  8.        , 11.        ],\n",
 84 |               "       [ 9.        ,  1.        , 12.        ,  2.        ,  3.        ],\n",
 85 |               "       [ 4.        ,  4.25776595,  5.        ,  6.        ,  7.        ],\n",
 86 |               "       [ 4.        ,  8.        ,  1.        ,  2.        ,  3.        ]])"
 87 |             ]
 88 |           },
 89 |           "metadata": {
 90 |             "tags": []
 91 |           },
 92 |           "execution_count": 19
 93 |         }
 94 |       ]
 95 |     },
 96 |     {
 97 |       "cell_type": "code",
 98 |       "metadata": {
 99 |         "id": "W2KTJygufkhi",
100 |         "colab_type": "code",
101 |         "colab": {}
102 |       },
103 |       "source": [
104 |         ""
105 |       ],
106 |       "execution_count": 0,
107 |       "outputs": []
108 |     }
109 |   ]
110 | }


--------------------------------------------------------------------------------
/Code/list_of_lists_to_list.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "import itertools"
10 |    ]
11 |   },
12 |   {
13 |    "cell_type": "code",
14 |    "execution_count": 2,
15 |    "metadata": {},
16 |    "outputs": [],
17 |    "source": [
18 |     "nested_list = [[1], [2, 3], [4, 5, 6]]"
19 |    ]
20 |   },
21 |   {
22 |    "cell_type": "code",
23 |    "execution_count": 3,
24 |    "metadata": {},
25 |    "outputs": [
26 |     {
27 |      "name": "stdout",
28 |      "output_type": "stream",
29 |      "text": [
30 |       "[1, 2, 3, 4, 5, 6]\n"
31 |      ]
32 |     }
33 |    ],
34 |    "source": [
35 |     "flat_list = list(itertools.chain.from_iterable(nested_list))\n",
36 |     "\n",
37 |     "print(flat_list)"
38 |    ]
39 |   }
40 |  ],
41 |  "metadata": {
42 |   "kernelspec": {
43 |    "display_name": "Python 3",
44 |    "language": "python",
45 |    "name": "python3"
46 |   },
47 |   "language_info": {
48 |    "codemirror_mode": {
49 |     "name": "ipython",
50 |     "version": 3
51 |    },
52 |    "file_extension": ".py",
53 |    "mimetype": "text/x-python",
54 |    "name": "python",
55 |    "nbconvert_exporter": "python",
56 |    "pygments_lexer": "ipython3",
57 |    "version": "3.7.3"
58 |   }
59 |  },
60 |  "nbformat": 4,
61 |  "nbformat_minor": 2
62 | }
63 | 


--------------------------------------------------------------------------------
/Code/magic function - %who.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 3,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "var_1 = 4\n",
10 |     "var_2 = \"hat\""
11 |    ]
12 |   },
13 |   {
14 |    "cell_type": "code",
15 |    "execution_count": 4,
16 |    "metadata": {},
17 |    "outputs": [],
18 |    "source": [
19 |     "my_list = [22,33,44,55,66,98]"
20 |    ]
21 |   },
22 |   {
23 |    "cell_type": "code",
24 |    "execution_count": 5,
25 |    "metadata": {},
26 |    "outputs": [],
27 |    "source": [
28 |     "my_dict = {'India': 'Delhi', 'Indonesia': 'Jakarta', 'China': 'Beijing'}"
29 |    ]
30 |   },
31 |   {
32 |    "cell_type": "code",
33 |    "execution_count": 6,
34 |    "metadata": {},
35 |    "outputs": [
36 |     {
37 |      "name": "stdout",
38 |      "output_type": "stream",
39 |      "text": [
40 |       "Variable   Type    Data/Info\n",
41 |       "----------------------------\n",
42 |       "my_dict    dict    n=3\n",
43 |       "my_list    list    n=6\n",
44 |       "var_1      int     4\n",
45 |       "var_2      str     hat\n"
46 |      ]
47 |     }
48 |    ],
49 |    "source": [
50 |     "%whos"
51 |    ]
52 |   },
53 |   {
54 |    "cell_type": "code",
55 |    "execution_count": null,
56 |    "metadata": {},
57 |    "outputs": [],
58 |    "source": []
59 |   }
60 |  ],
61 |  "metadata": {
62 |   "kernelspec": {
63 |    "display_name": "Python 3",
64 |    "language": "python",
65 |    "name": "python3"
66 |   },
67 |   "language_info": {
68 |    "codemirror_mode": {
69 |     "name": "ipython",
70 |     "version": 3
71 |    },
72 |    "file_extension": ".py",
73 |    "mimetype": "text/x-python",
74 |    "name": "python",
75 |    "nbconvert_exporter": "python",
76 |    "pygments_lexer": "ipython3",
77 |    "version": "3.7.1"
78 |   }
79 |  },
80 |  "nbformat": 4,
81 |  "nbformat_minor": 2
82 | }
83 | 


--------------------------------------------------------------------------------
/Code/make_regression.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 121,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "from sklearn import linear_model, datasets\n",
10 |     "import matplotlib.pyplot as plt"
11 |    ]
12 |   },
13 |   {
14 |    "cell_type": "code",
15 |    "execution_count": 126,
16 |    "metadata": {},
17 |    "outputs": [],
18 |    "source": [
19 |     "X, y, coef = datasets.make_regression(n_samples=1000, n_features=1,\n",
20 |     "                                      n_informative=1, noise=10,\n",
21 |     "                                      coef=True, random_state=0)"
22 |    ]
23 |   },
24 |   {
25 |    "cell_type": "code",
26 |    "execution_count": 127,
27 |    "metadata": {},
28 |    "outputs": [
29 |     {
30 |      "data": {
31 |       "text/plain": [
32 |        "<matplotlib.collections.PathCollection at 0x29b83a2fb70>"
33 |       ]
34 |      },
35 |      "execution_count": 127,
36 |      "metadata": {},
37 |      "output_type": "execute_result"
38 |     },
39 |     {
40 |      "data": {
41 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAD8CAYAAAB6paOMAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAHHlJREFUeJzt3X+Q3PV93/Hn+1aLvCKpVxQlQYvOIh4ix7IMsm+QOup0bGwiAgbOcjBoICFNZjSe2o2dphqk4KlEi6vr3CTYqd2MlZhpPFB+OMiHCLRnu5InKTOyOfkkZBldrIKNtGKMUjilRltxunv3j9099va+e7d3++P76/WY0ejuu9+972dB833f9/15f94fc3dERCS9esIegIiIhEuBQEQk5RQIRERSToFARCTlFAhERFJOgUBEJOUUCEREUk6BQEQk5RQIRERSbknYA2jG5Zdf7qtXrw57GCIisXL48OF/cPcV850Xi0CwevVqRkZGwh6GiEismNlPmzlPqSERkZRTIBARSTkFAhGRlFMgEBFJOQUCEZGUi0XVkIhI2gyNFhkcHuPMeImV+RzbN6+hf32hI9dSIBARiZih0SI79x2jNDEJQHG8xM59xwA6EgyUGhIRiZjB4bHpIFBVmphkcHisI9fTE4GISATUpoIa7SR/ZrzUkWsrEIiIhKw+FdTIynyuI9dXakhEJGRBqaB6uWyG7ZvXdOT6eiIQEQnZXCkfA1UNiYgkTX1paH5ZljfOT8w6r5DP8dyO6zs+HgUCEZEuCioNzfYY2YwxMfn2NHEnU0H1NEcgItJFQfMBE1POpZcsoZDPYZSfBPZsWdexVFA9PRGIiHRRo/mAc6UJjuz6jS6PpkyBQESkBQttBbEyn6MYEAw6VRrajJZTQ2a2yswOmtmLZnbczD5bOX6ZmX3bzH5c+Xt55biZ2Z+Z2Ukze8HMPtDqGEREwlDN9xcri8CK4yX+8PEjrN7xDJsGDjA0Wpz1nu2b15DLZmYc6+Z8QBBzb7SGrckfYHYFcIW7/8DMfhE4DPQDvwu87u4DZrYDWO7u95rZTcC/Bm4CNgBfcvcNc12jr6/PtVWliIQp6Df/weGxwN/uq3LZDJ/4YIGDJ87OeB/QlYZyZnbY3fvmPa/VQBBw4aeAL1f+fMjdX60Ei++6+xoz+2rl60cr549Vz2v0MxUIRCRMQSt/c9nMvIvAoLwOoPYum8tmujYR3GwgaGvVkJmtBtYD3wN+uXpzr/z9S5XTCsCpmredrhwTEYmkRk3gMmbzvrf+V+1ONo9brLYFAjP7BeBJ4HPu/o9znRpwbNZjiZltM7MRMxs5e/Zsu4YpIrJgjSp9Jt1n5ftb+XlhaUsgMLMs5SDwiLvvqxz+WSUlVJ1HeK1y/DSwqubtVwJn6n+mu+919z5371uxYkU7hikisiiNKnoK+Ryf+ODCExphVggFaUfVkAFfA1509z+teWk/cE/l63uAp2qO/06lemgjcG6u+QERkbAFVfpkM8brb17g4UOvNHxftofIVQgFaccTwSbgt4HrzexI5c9NwABwg5n9GLih8j3As8BLwEngL4B/1YYxiIjMa2i0yKaBA1w1R3ln0HkAe7asm175u3xZFhxKE1NzXu/SpdkZ7+v2iuFmtb1qqBNUNSQirWpU+VN/Yw46z4C7NvbyQP86ADYNHJizbLT2fS8P3Ny2z7BQzVYNaWWxiKTCXNs/1gaC3fuPzzrPgYcPvcLfHH2Vc6WJhjuI1YvaXEAjCgQikgqNKnVqjw+NFhkvzW4HXTXXa/WiOBfQiLqPikgqNPrtvPZ4q/X91WUFUZ0LaERPBCKSCts3rwmcI9i+ec10+4hm8v5B8rksu29dG5sbfz0FAhFJvOqNvroaeNKdQk3fn2Y2jp/LpUuXxDYIgAKBiCRcfRVQdTXwh9+zgvufPh64ReRCRW2l8EIpEIhIot3/9OwqoNLE5JwLwYIsXdLDhYvB6wbiUh3UiAKBiCRGbavod+ayTExO8eZbi0/51GoUBAxiUx3UiAKBiCRCfQpoIaWerXCI9fwAKBCISEzVbxTz5oWLLU34VpnBEjMmpt5eNpbLZli6pCcwuBRinhYCBQIRiaH63/4XW/YZ5MFPXgvM3kEMZlcXxWnR2FwUCEQkdoLaRbTD3Rt7p9M8jdI93dhistsUCEQkdtpdrllo8qbev76QiBt/PbWYEJHYyS/Ltu1nVdM7SbzBN0uBQERiZWi0yM//38W2/bwo7iHcbQoEIhIZzWwcMzg8NqOipx2K46U5N6pJOs0RiEgkBFUC7dx3bPr16iRtp7bSqr1e2tJECgQiEgmNNo7Zvf84Fy5OdaRKqF7QRjVpoNSQiERCo7UA46WJRQWB6kIvC3gt6FhV3BvILYYCgYiEbmi0OOfNeaGWZXt4bsf1/GTgZh6849oZm8d/8Y5reXng5oYrguPeQG4xlBoSka6p3QCmdl+ANy9cbGvuf2LKGRotTtf9B6V65tqoJm30RCAiXVGdDK6mgCa9fOsvjpfa3iBuYtLnLQntX19gz5Z1M54W4rS9ZDvpiUBEuqJTbSEaaSbXn9SVwgulJwIR6Yp2NoZrRhpz/YulJwIRaauh0eKsLSCXL8ti0LE1APXSmutfLAUCEWmbodEi2//6KBOTM2/57dgXGMopjOB9wmb6QO87lfJZAKWGRKRtBofHZgWBdmomCAAceumNjo0hiRQIRKRtuj0P0Ei1Ikmao0AgIm3R7kVhrchYVEYSD5ojEJEFqd8ruNrLf3B4rGuTwfPZumFV2EOIFQUCEWnaXB1Cu9Gjp77yKJfN8IHed3LopTeYdCdjxtYNq3igf13Hx5IkbQkEZvYQ8DHgNXd/X+XYZcDjwGrgJ8An3f0NMzPgS8BNwHngd939B+0Yh4h01u79xwM7hH7u8SMdv3Yum+ETHyxw8MTZxO0ZHLZ2PRH8V+DLwNdrju0A/qe7D5jZjsr39wK/CVxd+bMB+PPK3yISYUOjxba3gpjLF++4NpEbxUdRWwKBu/+tma2uO3wb8KHK138FfJdyILgN+Lq7O3DIzPJmdoW7v9qOsYhIZ9z/9PGuXauQz6n9Qxd1smrol6s398rfv1Q5XgBO1Zx3unJMRCKsXYvC5pPNmFYFd1kYk8VBdV2zig3MbBuwDaC3t7fTYxJJrUZVQPWvd8ullyzRk0CXdTIQ/Kya8jGzK4DXKsdPA7W1XVcCZ+rf7O57gb0AfX19UalKE0mUuaqA+tcX+PzQMR4+9EpXx3Sui/MQUtbJ1NB+4J7K1/cAT9Uc/x0r2wic0/yASDga7RM8ODwWShAAdQ0NQ7vKRx+lPDF8uZmdBnYBA8ATZvb7wCvA7ZXTn6VcOnqScvnov2zHGERk4RrV/hfHS10JAtkeY2Lq7Qd+dQ0NR7uqhrY2eOkjAec68Ol2XFdEWrMyn+tof6C7N/Zy8MTZwGsUKvMRKhENn1YWiyRU0CQwMGvP4E7uE9D3rsvoe9dlDfcGVoloNCgQiCRQ0CTwH33jKJM1aZhqh85OVmIMDo/x3I7rp7/Wb/7RpEAgkkBBk8C1QaBbqnMQ+s0/2tSGWiSButEArhmqAIoHBQKRhBkaLRKFdvyqAIoPpYZEEqS6Z3AIWaAZli/LsuuWtUoHxYQCgUiCdHrP4Pnksj3s2fJ+BYCYUSAQSZAw5wbu3tirDWFiSoFAJKZq1wm8I9vDhYtToW4V+ej3TikQxJQCgUgM1a8TKE1MhTyit9clSPyoakgkhoLWCYQtE4VSJVkUBQKRGAp7LiDI1g2rAo9L9Ck1JBIj1XmBsJIwhXxueh7g0e+dYtKdjBlbN6zS/ECMKRCIREztJHB+WRZ3GC9N0GOEuj7AYHqB2AP963TjTxAFApEIqZ8Ert0nOOwgcNfGXq0PSCgFApEIieIkcEHdQhNPgUAkQqLSLA7KTwEP3nGtAkAKqGpIJEKi0q1TqaB00ROBSAiCdg/rX19g++Y1bP/G0Rn7+HabUkHpo0Ag0mVBu4ft3HcMKG/gct83jzHxVvfnCXLZDHu2rFMASCHzGCwL7+vr85GRkbCHIdIWmwYOdHTD+IUy0PaRCWVmh929b77z9EQg0mVRmhAu5HPTewpLemmyWKTLojQhrB3EBPREINJxQ6NF7n/6+IzFYVHgoFSQAAoEIh1V3ToyzF3DGilE5MlEwqfUkEgH3f/08UgEgfoG0dpYXmopEIh0yNBoMRLpoFw2w10beynkcxjlJwGViUotpYZEFql+UdiH37OCvzn6KuOl8G7+1cVgQYvVRBpRIBBZhKHR4owVwMXxEg8feiXkUTF909eNXxZCqSGRRdi574VQ20AEWZbtUQCQRVEgEFmgodFiJDaLr5XtMf7jlveHPQyJqdACgZndaGZjZnbSzHaENQ6RhRocHgt7CDMU8jkGb79GTwOyaKHMEZhZBvgKcANwGnjezPa7+4/CGI/IQkShT1A+l2X3rWt185e2CGuy+DrgpLu/BGBmjwG3AQoEEgnViqDieImM2fQm7ZMhNWnUjV86KaxAUABO1Xx/GtgQ0lhEZqivCKre/MMIAmoNLd0QViCoX+gI5dYnb59gtg3YBtDb29uNMYkAsHv/8UhUBGmDGOmWsALBaWBVzfdXAmdqT3D3vcBeKO9H0L2hSdqFuSCsykDtoaVrwqoaeh642syuMrNLgDuB/SGNRQQop4Q2DRwIexhAdFpVSzqE8kTg7hfN7DPAMJABHnL342GMRdJraLTI7v3HQ30C6DFYuiQzvW0lqCGcdF9o6wjc/Vl3/zV3f7e7fyGscUg6VSeEw04DTTns2bJODeEkVOo1JKk0ODwWmQlh9QaSsCkQSCoErQuIAqWAJAoUCCTxhkaL7Nx3bDoPH5UgcOklGT0JSCSo6Zwk3uDw2IzJ2CjIZowvfHxd2MMQAfREIClwJgK9gWppoZhEjQKBJEZ9OejyZVl23bKWlflcJBrFgRaKSTQpNSSJEFQO+sb5Cbb/9VFW/9NcYE+TMGihmESRAoEkQqNy0IlJ57n//TphTA9ne2aGHy0Uk6hSIJDYGxotRib1U7Xp3ZcxePs1WigmsaA5Aomd2jUBPVZenRsVGTO2bljFA/3liiDd+CUOFAgkVurXBEQlCNy9sXf65i8SNwoEEitRWxOgncMkCTRHILESpTUB2R5TEJBE0BOBRFJQb6BCPkd+WZY3zoe/cQzAxJQzODymQCCxp0AgkdNoz+CwKoPmalIXpScUkcVSakgiJyp7BkN5JfCffLJcBhpEC8QkCRQIJHLC3iym1l0be+lfX2D75jXkspkZr2mBmCSFUkMSKUOjxbCHMG3Tuy+btR5gcHiMM+MlVqpxnCSIAoFExtBokX/zxJGwhzFrUViVdhKTpFIgkMj4430vhLpArJDPqTOopJLmCCQyzk9MhXZt5fslzfREIKmnjWIk7RQIpKuqC8VqJ1yhPAnbaXdv7OXgibOa7BWpo0AgXfP5oWM8cuiV6b0BiuMl/vDxI13ZK6CQz6kpnEgDmiOQrhgaLc4IAlXdmhs+/9bFSJWmikSJAoF0xeDwWCi7hFW9cX6CnfuOKRiIBFAgkLYZGi2yaeAAV+14hk0DB6Zvup8fOtb1PkH5XHbWsdLEZFfmIkTiRnME0hb1G8YUx0vs3HeMrxz8MT9+7c2uj+dcgzYVahInMpueCKQt7n/6+KwNY0oTk6EEgeXLsg2bwalJnMhsCgTSsqHRYih7BORzWTI9NuNYNmPsumWtmsSJLIBSQ9KyMPLu1T2Cg9Yl1K4NUJM4kfmZN9hwo6k3m90O7AZ+HbjO3UdqXtsJ/D4wCfyBuw9Xjt8IfAnIAH/p7gPzXaevr89HRkbmO026oP7G++H3rODhQ690fRw/Gbi569cUiRszO+zuffOd1+oTwQ+BLcBX6y7+XuBOYC2wEviOmf1a5eWvADcAp4HnzWy/u/+oxXFIFwRNCIcRBBptEiMii9NSIHD3FwHMrP6l24DH3P0C8LKZnQSuq7x20t1fqrzvscq5CgQxMDg8NmtCuNuyGVOeX6TNOjVHUAAO1Xx/unIM4FTd8Q0dGoMswlw597BLL5cvy7LrlrXK84u02byBwMy+A/xKwEv3uftTjd4WcMwJrlIKnKQws23ANoDe3t75hilt0GgtAJQ3ZVmZz4WygbzmA0Q6a95A4O4fXcTPPQ2sqvn+SuBM5etGx+uvuxfYC+XJ4kWMQRYoKPVTmpjkj544yucePxIY3Ttt+bLZK4RFpL06tY5gP3CnmS01s6uAq4HvA88DV5vZVWZ2CeUJ5f0dGoMsUKPUz2Slsqzb0bi6JkBEOqulQGBmHzez08A/A54xs2EAdz8OPEF5Evh/AJ9290l3vwh8BhgGXgSeqJwrERCFVbfVp45CPsfgb12j+QCRLmhpHUG3aB1Bd9TPEYRFeweLtEe31hFIglR/+x4cHgtlUrgq7OokkbRRIBCg/DSwe/9xxht07eymKKSoRNJEgUAYGi2y/RtHmZgKP02oxnAi3adAIAwOj0UiCBTUGE4kFAoE0rWcvNG4BFUTxCLhUSBImPnaMgedZwbtLh7L57J87JorOHji7IyxALMqk5QOEgmXAkGCNGoRMfLT12fckD/8nhU8ebg4fV47g0Cz6R3tEyASHVpHkCCbBg6EWvap9I5ItGgdQQLNl/YJs/5e6R2R+NKexTFRTfsUx0s4b6d9hkaL0+d0sv6+fm/gHis3hDPKTwJ7tqxTekckpvREEBONOoMODo9N34C3b17TkRYR+VyW3beuVV5fJKEUCGKiUdqn9ninWkSYlX+2bvwiyaTUUEw0SvvUH+9fX+C5HdeTz7Wvj//4+fDbTohI5+iJICaC0j49wKvnSqze8cz0sUKlPPTNty627drq/SOSbAoEMVGb9jkzXuId2R5KE1OzluoWx0s8cuiVtm0io2ogkeRTIIiR2jz9r+58puF57VwZomogkeTTHEEMDY0W6UaPuIyZgoBICigQxNDg8FhXrjMZg1XnItI6pYZionZVcbtuzxkzptzpMQu86Rc0SSySCnoiiIH6VcXNWL4sSy6bafh6NmP8ySev4eWBm9m6YRVW97omiUXSQ08EETBfD6GgVcVzMWDXLWun31scL83YC2D5siy7bllL//oCQ6NFnjxcnBFgDPjEB7WATCQtFAhC1qh1NLxdMrqQZnIG3LWxd/q9zbSDrg8yDhw8cbbpa4pIvCk1FLK5eghVNbugq5DP8eAd1/JA/7qmr99M6woRSTYFgpA16glUeyPevnnNnPl+eHsvgIWmc5ptXSEiyaVAEKKh0eKsSdqqHrPpFtP96wvs2TL3b/mLndgNCjKaKBZJFwWCEA0OjzWsApp0n7HfQP/6QsNyznwuu+iJ3WqQKeRz2ltAJKU0WdxF9dVB87WKbma/gVw2w+5b17Y0LrWYFkk3BYIuCaoOqi3pbOTMeGlGAMkvy7J0SQ/nShPaIEZE2kKBoEsalWnOJ78sOyOAvHF+glw2w4N3XKsAICJtoTmCLllMOaYB7sxbXioi0goFgi5ZTDnmXRt7OVcK3h1Mdf4i0i4tBQIzGzSzE2b2gpl908zyNa/tNLOTZjZmZptrjt9YOXbSzHa0cv04aWYtQK18LssD/etU5y8iHdfqE8G3gfe5+/uBvwd2ApjZe4E7gbXAjcB/MbOMmWWArwC/CbwX2Fo5NxWWLmnuP3dtJZDq/EWk01qaLHb3b9V8ewj4rcrXtwGPufsF4GUzOwlcV3ntpLu/BGBmj1XO/VEr44i6+oqhuWTMZtTx129RqUohEWm3dlYN/R7weOXrAuXAUHW6cgzgVN3xDW0cQyQ12z00l80ELuZSnb+IdNK8gcDMvgP8SsBL97n7U5Vz7gMuAo9U3xZwvhOcigqsojSzbcA2gN7e3vmGGWnNTOzWPwmIiHTLvIHA3T861+tmdg/wMeAj7tPbXJ0GVtWcdiVwpvJ1o+P1190L7AXo6+uL9J6J8+0n0Mwq4il3BQERCUWrVUM3AvcCt7r7+ZqX9gN3mtlSM7sKuBr4PvA8cLWZXWVml1CeUN7fyhjCVr97WHU/gWqPIGiuYkhVQCISllbnCL4MLAW+bWYAh9z9U+5+3MyeoDwJfBH4tLtPApjZZ4BhIAM85O7HWxxDV9X/9v/mhYsNF3wFTfgGtZZQFZCIhMk8YNPyqOnr6/ORkZGOXmO+9E71nGarfwB+MnDzoq8lItIqMzvs7n3znadeQzS3XSQsbO/gjDXaaUBVQCISLWoxQXPbRcLC2jpMxuBJS0QE9EQANL7BF8dLvHvns0y6U8jnyGV7OD8x1dTPbLSJjIhI1OiJgLkrdqq/2RfHS00HAU3+ikicKBCw8IZwc8nnsloYJiKxotQQs8s7W3Hp0iUKAiISK3oiqOhfX+C5Hde3nNvXPgEiEjcKBHVaTRNphbCIxI0CQZ3+9QX2bFk3/WRQXQ9QyOe4e2MvhXwOozwXkM3MXCugSWIRiSPNEQRodsGXVgiLSBIoELRAK4RFJAmUGhIRSTkFAhGRlFMgEBFJOQUCEZGUUyAQEUk5BQIRkZRLdPmo6vxFROaX2EDQ7K5jIiJpl9jUULO7jomIpF1iA0GjLqDqDioiMlNiA0GjLqDqDioiMlNiA0FQO2l1BxURmS2xk8W1u46pakhEpLHEBgJQd1ARkWYkNjUkIiLNUSAQEUk5BQIRkZRTIBARSTkFAhGRlDN3D3sM8zKzs8BP2/TjLgf+oU0/Kwr0eaItaZ8HkveZkvx53uXuK+Z7QywCQTuZ2Yi794U9jnbR54m2pH0eSN5n0udRakhEJPUUCEREUi6NgWBv2ANoM32eaEva54HkfabUf57UzRGIiMhMaXwiEBGRGqkLBGb2H8zsBTM7YmbfMrOVYY+pVWY2aGYnKp/rm2aWD3tMrTCz283suJlNmVlsqznM7EYzGzOzk2a2I+zxtMrMHjKz18zsh2GPpVVmtsrMDprZi5V/a58Ne0ytMrN3mNn3zexo5TPd3/R705YaMrN/4u7/WPn6D4D3uvunQh5WS8zsN4AD7n7RzP4TgLvfG/KwFs3Mfh2YAr4K/Ft3Hwl5SAtmZhng74EbgNPA88BWd/9RqANrgZn9C+DnwNfd/X1hj6cVZnYFcIW7/8DMfhE4DPTH/P+PAZe6+8/NLAv8L+Cz7n5ovvem7omgGgQqLgViHwnd/VvufrHy7SHgyjDH0yp3f9Hd47659HXASXd/yd3fAh4Dbgt5TC1x978FXg97HO3g7q+6+w8qX/9f4EUg1j3rveznlW+zlT9N3d9SFwgAzOwLZnYKuAv4d2GPp81+D/jvYQ9CKACnar4/TcxvNEllZquB9cD3wh1J68wsY2ZHgNeAb7t7U58pkYHAzL5jZj8M+HMbgLvf5+6rgEeAz4Q72ubM95kq59wHXKT8uSKtmc8TcxZwLPZPn0ljZr8APAl8ri5bEEvuPunu11LOClxnZk2l8BK5Q5m7f7TJU/8b8Aywq4PDaYv5PpOZ3QN8DPiIx2DiZwH/j+LqNLCq5vsrgTMhjUUCVPLoTwKPuPu+sMfTTu4+bmbfBW4E5p3cT+QTwVzM7Oqab28FToQ1lnYxsxuBe4Fb3f182OMRoDw5fLWZXWVmlwB3AvtDHpNUVCZWvwa86O5/GvZ42sHMVlQrBs0sB3yUJu9vaawaehJYQ7kq5afAp9y9GO6oWmNmJ4GlwP+pHDoU50ooM/s48J+BFcA4cMTdN4c7qoUzs5uALwIZ4CF3/0LIQ2qJmT0KfIhyd8ufAbvc/WuhDmqRzOyfA38HHKN8LwD4Y3d/NrxRtcbM3g/8FeV/bz3AE+7+75t6b9oCgYiIzJS61JCIiMykQCAiknIKBCIiKadAICKScgoEIiIpp0AgIpJyCgQiIimnQCAiknL/H1BtkPJRCSR0AAAAAElFTkSuQmCC\n",
42 |       "text/plain": [
43 |        "<Figure size 432x288 with 1 Axes>"
44 |       ]
45 |      },
46 |      "metadata": {
47 |       "needs_background": "light"
48 |      },
49 |      "output_type": "display_data"
50 |     }
51 |    ],
52 |    "source": [
53 |     "plt.scatter(X,y)"
54 |    ]
55 |   },
56 |   {
57 |    "cell_type": "code",
58 |    "execution_count": null,
59 |    "metadata": {},
60 |    "outputs": [],
61 |    "source": []
62 |   }
63 |  ],
64 |  "metadata": {
65 |   "kernelspec": {
66 |    "display_name": "Python 3",
67 |    "language": "python",
68 |    "name": "python3"
69 |   },
70 |   "language_info": {
71 |    "codemirror_mode": {
72 |     "name": "ipython",
73 |     "version": 3
74 |    },
75 |    "file_extension": ".py",
76 |    "mimetype": "text/x-python",
77 |    "name": "python",
78 |    "nbconvert_exporter": "python",
79 |    "pygments_lexer": "ipython3",
80 |    "version": "3.7.1"
81 |   }
82 |  },
83 |  "nbformat": 4,
84 |  "nbformat_minor": 2
85 | }
86 | 


--------------------------------------------------------------------------------
/Code/map python.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Understand Pandas map()"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 15,
 13 |    "metadata": {},
 14 |    "outputs": [
 15 |     {
 16 |      "data": {
 17 |       "application/javascript": [
 18 |        "\n",
 19 |        "        if (window._pyforest_update_imports_cell) { window._pyforest_update_imports_cell('import pandas as pd'); }\n",
 20 |        "    "
 21 |       ],
 22 |       "text/plain": [
 23 |        "<IPython.core.display.Javascript object>"
 24 |       ]
 25 |      },
 26 |      "metadata": {},
 27 |      "output_type": "display_data"
 28 |     },
 29 |     {
 30 |      "data": {
 31 |       "text/html": [
 32 |        "<div>\n",
 33 |        "<style scoped>\n",
 34 |        "    .dataframe tbody tr th:only-of-type {\n",
 35 |        "        vertical-align: middle;\n",
 36 |        "    }\n",
 37 |        "\n",
 38 |        "    .dataframe tbody tr th {\n",
 39 |        "        vertical-align: top;\n",
 40 |        "    }\n",
 41 |        "\n",
 42 |        "    .dataframe thead th {\n",
 43 |        "        text-align: right;\n",
 44 |        "    }\n",
 45 |        "</style>\n",
 46 |        "<table border=\"1\" class=\"dataframe\">\n",
 47 |        "  <thead>\n",
 48 |        "    <tr style=\"text-align: right;\">\n",
 49 |        "      <th></th>\n",
 50 |        "      <th>name</th>\n",
 51 |        "      <th>age</th>\n",
 52 |        "      <th>profession</th>\n",
 53 |        "      <th>city</th>\n",
 54 |        "    </tr>\n",
 55 |        "  </thead>\n",
 56 |        "  <tbody>\n",
 57 |        "    <tr>\n",
 58 |        "      <th>0</th>\n",
 59 |        "      <td>A</td>\n",
 60 |        "      <td>22</td>\n",
 61 |        "      <td>data engineer</td>\n",
 62 |        "      <td>Gurgaon</td>\n",
 63 |        "    </tr>\n",
 64 |        "    <tr>\n",
 65 |        "      <th>1</th>\n",
 66 |        "      <td>B</td>\n",
 67 |        "      <td>26</td>\n",
 68 |        "      <td>data scientist</td>\n",
 69 |        "      <td>Bangalore</td>\n",
 70 |        "    </tr>\n",
 71 |        "    <tr>\n",
 72 |        "      <th>2</th>\n",
 73 |        "      <td>C</td>\n",
 74 |        "      <td>33</td>\n",
 75 |        "      <td>entrepreneur</td>\n",
 76 |        "      <td>Gurgaon</td>\n",
 77 |        "    </tr>\n",
 78 |        "    <tr>\n",
 79 |        "      <th>3</th>\n",
 80 |        "      <td>D</td>\n",
 81 |        "      <td>44</td>\n",
 82 |        "      <td>business analyst</td>\n",
 83 |        "      <td>Pune</td>\n",
 84 |        "    </tr>\n",
 85 |        "    <tr>\n",
 86 |        "      <th>4</th>\n",
 87 |        "      <td>E</td>\n",
 88 |        "      <td>50</td>\n",
 89 |        "      <td>self-employed</td>\n",
 90 |        "      <td>New Delhi</td>\n",
 91 |        "    </tr>\n",
 92 |        "  </tbody>\n",
 93 |        "</table>\n",
 94 |        "</div>"
 95 |       ],
 96 |       "text/plain": [
 97 |        "  name  age        profession       city\n",
 98 |        "0    A   22     data engineer    Gurgaon\n",
 99 |        "1    B   26    data scientist  Bangalore\n",
100 |        "2    C   33      entrepreneur    Gurgaon\n",
101 |        "3    D   44  business analyst       Pune\n",
102 |        "4    E   50     self-employed  New Delhi"
103 |       ]
104 |      },
105 |      "execution_count": 15,
106 |      "metadata": {},
107 |      "output_type": "execute_result"
108 |     }
109 |    ],
110 |    "source": [
111 |     "#sample data\n",
112 |     "data = {'name': ['A', 'B', 'C', 'D', 'E'], \n",
113 |     "        'age': [22, 26, 33, 44, 50],\n",
114 |     "        'profession' : ['data engineer', 'data scientist', 'entrepreneur', 'business analyst', 'self-employed'], \n",
115 |     "        'city': ['Gurgaon', 'Bangalore', 'Gurgaon', 'Pune', 'New Delhi']}\n",
116 |     "\n",
117 |     "df = pd.DataFrame(data)\n",
118 |     "df"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": 12,
124 |    "metadata": {},
125 |    "outputs": [],
126 |    "source": [
127 |     "# dictionary to map city with states\n",
128 |     "map_city_to_states = { 'Gurgaon' : 'Haryana', \n",
129 |     "                  'Bangalore' : 'Karnataka', \n",
130 |     "                  'Pune' : 'Maharashtra', \n",
131 |     "                  'New Delhi' : 'Delhi'}"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": 13,
137 |    "metadata": {},
138 |    "outputs": [
139 |     {
140 |      "data": {
141 |       "text/html": [
142 |        "<div>\n",
143 |        "<style scoped>\n",
144 |        "    .dataframe tbody tr th:only-of-type {\n",
145 |        "        vertical-align: middle;\n",
146 |        "    }\n",
147 |        "\n",
148 |        "    .dataframe tbody tr th {\n",
149 |        "        vertical-align: top;\n",
150 |        "    }\n",
151 |        "\n",
152 |        "    .dataframe thead th {\n",
153 |        "        text-align: right;\n",
154 |        "    }\n",
155 |        "</style>\n",
156 |        "<table border=\"1\" class=\"dataframe\">\n",
157 |        "  <thead>\n",
158 |        "    <tr style=\"text-align: right;\">\n",
159 |        "      <th></th>\n",
160 |        "      <th>name</th>\n",
161 |        "      <th>age</th>\n",
162 |        "      <th>profession</th>\n",
163 |        "      <th>city</th>\n",
164 |        "      <th>state</th>\n",
165 |        "    </tr>\n",
166 |        "  </thead>\n",
167 |        "  <tbody>\n",
168 |        "    <tr>\n",
169 |        "      <th>0</th>\n",
170 |        "      <td>A</td>\n",
171 |        "      <td>22</td>\n",
172 |        "      <td>data engineer</td>\n",
173 |        "      <td>Gurgaon</td>\n",
174 |        "      <td>Haryana</td>\n",
175 |        "    </tr>\n",
176 |        "    <tr>\n",
177 |        "      <th>1</th>\n",
178 |        "      <td>B</td>\n",
179 |        "      <td>26</td>\n",
180 |        "      <td>data scientist</td>\n",
181 |        "      <td>Bangalore</td>\n",
182 |        "      <td>Karnataka</td>\n",
183 |        "    </tr>\n",
184 |        "    <tr>\n",
185 |        "      <th>2</th>\n",
186 |        "      <td>C</td>\n",
187 |        "      <td>33</td>\n",
188 |        "      <td>entrepreneur</td>\n",
189 |        "      <td>Gurgaon</td>\n",
190 |        "      <td>Haryana</td>\n",
191 |        "    </tr>\n",
192 |        "    <tr>\n",
193 |        "      <th>3</th>\n",
194 |        "      <td>D</td>\n",
195 |        "      <td>44</td>\n",
196 |        "      <td>business analyst</td>\n",
197 |        "      <td>Pune</td>\n",
198 |        "      <td>Maharashtra</td>\n",
199 |        "    </tr>\n",
200 |        "    <tr>\n",
201 |        "      <th>4</th>\n",
202 |        "      <td>E</td>\n",
203 |        "      <td>50</td>\n",
204 |        "      <td>self-employed</td>\n",
205 |        "      <td>New Delhi</td>\n",
206 |        "      <td>Delhi</td>\n",
207 |        "    </tr>\n",
208 |        "  </tbody>\n",
209 |        "</table>\n",
210 |        "</div>"
211 |       ],
212 |       "text/plain": [
213 |        "  name  age        profession       city        state\n",
214 |        "0    A   22     data engineer    Gurgaon      Haryana\n",
215 |        "1    B   26    data scientist  Bangalore    Karnataka\n",
216 |        "2    C   33      entrepreneur    Gurgaon      Haryana\n",
217 |        "3    D   44  business analyst       Pune  Maharashtra\n",
218 |        "4    E   50     self-employed  New Delhi        Delhi"
219 |       ]
220 |      },
221 |      "execution_count": 13,
222 |      "metadata": {},
223 |      "output_type": "execute_result"
224 |     }
225 |    ],
226 |    "source": [
227 |     "# apply pandas map to map the city columns with states\n",
228 |     "df['state'] = df['city'].map(map_city_to_states)\n",
229 |     "df"
230 |    ]
231 |   },
232 |   {
233 |    "cell_type": "code",
234 |    "execution_count": null,
235 |    "metadata": {},
236 |    "outputs": [],
237 |    "source": []
238 |   }
239 |  ],
240 |  "metadata": {
241 |   "kernelspec": {
242 |    "display_name": "Python 3",
243 |    "language": "python",
244 |    "name": "python3"
245 |   },
246 |   "language_info": {
247 |    "codemirror_mode": {
248 |     "name": "ipython",
249 |     "version": 3
250 |    },
251 |    "file_extension": ".py",
252 |    "mimetype": "text/x-python",
253 |    "name": "python",
254 |    "nbconvert_exporter": "python",
255 |    "pygments_lexer": "ipython3",
256 |    "version": "3.7.1"
257 |   }
258 |  },
259 |  "nbformat": 4,
260 |  "nbformat_minor": 2
261 | }
262 | 


--------------------------------------------------------------------------------
/Code/merge_pdf.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "from PyPDF2 import PdfFileMerger"
10 |    ]
11 |   },
12 |   {
13 |    "cell_type": "code",
14 |    "execution_count": 2,
15 |    "metadata": {},
16 |    "outputs": [],
17 |    "source": [
18 |     "pdfs = ['../Data/dummy_pdf_page1.pdf', '../Data/dummy_pdf_page2.pdf']\n",
19 |     "output_filename = 'merged_pdf.pdf'"
20 |    ]
21 |   },
22 |   {
23 |    "cell_type": "code",
24 |    "execution_count": 3,
25 |    "metadata": {},
26 |    "outputs": [
27 |     {
28 |      "name": "stdout",
29 |      "output_type": "stream",
30 |      "text": [
31 |       "Output file is merged_pdf.pdf\n"
32 |      ]
33 |     }
34 |    ],
35 |    "source": [
36 |     "merger = PdfFileMerger()\n",
37 |     "\n",
38 |     "for pdf in pdfs:\n",
39 |     "    merger.append(pdf)\n",
40 |     "\n",
41 |     "merger.write(output_filename)\n",
42 |     "print('Output file is', output_filename)\n",
43 |     "merger.close()"
44 |    ]
45 |   }
46 |  ],
47 |  "metadata": {
48 |   "kernelspec": {
49 |    "display_name": "Python 3",
50 |    "language": "python",
51 |    "name": "python3"
52 |   },
53 |   "language_info": {
54 |    "codemirror_mode": {
55 |     "name": "ipython",
56 |     "version": 3
57 |    },
58 |    "file_extension": ".py",
59 |    "mimetype": "text/x-python",
60 |    "name": "python",
61 |    "nbconvert_exporter": "python",
62 |    "pygments_lexer": "ipython3",
63 |    "version": "3.7.3"
64 |   }
65 |  },
66 |  "nbformat": 4,
67 |  "nbformat_minor": 2
68 | }
69 | 


--------------------------------------------------------------------------------
/Code/pandarellel.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "# !pip3 install pandarallel"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "metadata": {},
 16 |    "outputs": [
 17 |     {
 18 |      "name": "stderr",
 19 |      "output_type": "stream",
 20 |      "text": [
 21 |       "/home/lakshay/.local/lib/python3.7/site-packages/tqdm/_tqdm.py:634: FutureWarning: The Panel class is removed from pandas. Accessing it from the top-level namespace will also be removed in the next version\n",
 22 |       "  from pandas import Panel\n"
 23 |      ]
 24 |     }
 25 |    ],
 26 |    "source": [
 27 |     "%load_ext autoreload\n",
 28 |     "%autoreload 2\n",
 29 |     "import pandas as pd\n",
 30 |     "import time\n",
 31 |     "from pandarallel import pandarallel\n",
 32 |     "import math\n",
 33 |     "import numpy as np\n",
 34 |     "import random\n",
 35 |     "from tqdm._tqdm_notebook import tqdm_notebook\n",
 36 |     "tqdm_notebook.pandas()"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": 2,
 42 |    "metadata": {},
 43 |    "outputs": [
 44 |     {
 45 |      "name": "stdout",
 46 |      "output_type": "stream",
 47 |      "text": [
 48 |       "INFO: Pandarallel will run on 8 workers.\n",
 49 |       "INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n"
 50 |      ]
 51 |     }
 52 |    ],
 53 |    "source": [
 54 |     "pandarallel.initialize(progress_bar=True)"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": 3,
 60 |    "metadata": {},
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "df = pd.DataFrame({\n",
 64 |     "    'A' : [random.randint(15,20) for i in range(1,1000000) ],\n",
 65 |     "    'B' : [random.randint(10,30) for i in range(1,1000000) ]\n",
 66 |     "})"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": 4,
 72 |    "metadata": {},
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "def func(x):\n",
 76 |     "    return math.sin(x.A**2) + math.sin(x.B**2) + math.tan(x.A**2)"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "markdown",
 81 |    "metadata": {},
 82 |    "source": [
 83 |     "---\n",
 84 |     "\n",
 85 |     "#### `WITHOUT PARALLELIZATION`\n",
 86 |     "\n",
 87 |     "---"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": 5,
 93 |    "metadata": {},
 94 |    "outputs": [
 95 |     {
 96 |      "data": {
 97 |       "application/vnd.jupyter.widget-view+json": {
 98 |        "model_id": "5f096c0d2622441c9557dfc7abebcdb4",
 99 |        "version_major": 2,
100 |        "version_minor": 0
101 |       },
102 |       "text/plain": [
103 |        "HBox(children=(IntProgress(value=0, max=999999), HTML(value='')))"
104 |       ]
105 |      },
106 |      "metadata": {},
107 |      "output_type": "display_data"
108 |     },
109 |     {
110 |      "name": "stdout",
111 |      "output_type": "stream",
112 |      "text": [
113 |       "\n",
114 |       "CPU times: user 33.7 s, sys: 148 ms, total: 33.9 s\n",
115 |       "Wall time: 33.7 s\n"
116 |      ]
117 |     }
118 |    ],
119 |    "source": [
120 |     "%%time\n",
121 |     "res = df.progress_apply(func, axis=1)"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "markdown",
126 |    "metadata": {},
127 |    "source": [
128 |     "---\n",
129 |     "\n",
130 |     "#### `WITH PARALLELIZATION`\n",
131 |     "\n",
132 |     "---"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": 6,
138 |    "metadata": {},
139 |    "outputs": [
140 |     {
141 |      "data": {
142 |       "application/vnd.jupyter.widget-view+json": {
143 |        "model_id": "16dc675b21954c8da7941a4ad861441b",
144 |        "version_major": 2,
145 |        "version_minor": 0
146 |       },
147 |       "text/plain": [
148 |        "VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=125000), Label(value='0 / 125000')…"
149 |       ]
150 |      },
151 |      "metadata": {},
152 |      "output_type": "display_data"
153 |     },
154 |     {
155 |      "name": "stdout",
156 |      "output_type": "stream",
157 |      "text": [
158 |       "CPU times: user 1.07 s, sys: 89.7 ms, total: 1.16 s\n",
159 |       "Wall time: 11.7 s\n"
160 |      ]
161 |     }
162 |    ],
163 |    "source": [
164 |     "%%time\n",
165 |     "res_parallel = df.parallel_apply(func, axis=1)"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": null,
171 |    "metadata": {},
172 |    "outputs": [],
173 |    "source": []
174 |   },
175 |   {
176 |    "cell_type": "code",
177 |    "execution_count": null,
178 |    "metadata": {},
179 |    "outputs": [],
180 |    "source": []
181 |   }
182 |  ],
183 |  "metadata": {
184 |   "kernelspec": {
185 |    "display_name": "Python 3",
186 |    "language": "python",
187 |    "name": "python3"
188 |   },
189 |   "language_info": {
190 |    "codemirror_mode": {
191 |     "name": "ipython",
192 |     "version": 3
193 |    },
194 |    "file_extension": ".py",
195 |    "mimetype": "text/x-python",
196 |    "name": "python",
197 |    "nbconvert_exporter": "python",
198 |    "pygments_lexer": "ipython3",
199 |    "version": "3.7.3"
200 |   }
201 |  },
202 |  "nbformat": 4,
203 |  "nbformat_minor": 2
204 | }
205 | 


--------------------------------------------------------------------------------
/Code/pandas_crosstab.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Pandas Crosstab"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "# import pandas\n",
 17 |     "import pandas as pd"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 5,
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "#Import dataset\n",
 27 |     "data = pd.read_csv('../Data/loan_train.csv', index_col = 'Loan_ID')"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": 6,
 33 |    "metadata": {},
 34 |    "outputs": [
 35 |     {
 36 |      "data": {
 37 |       "text/html": [
 38 |        "<div>\n",
 39 |        "<style scoped>\n",
 40 |        "    .dataframe tbody tr th:only-of-type {\n",
 41 |        "        vertical-align: middle;\n",
 42 |        "    }\n",
 43 |        "\n",
 44 |        "    .dataframe tbody tr th {\n",
 45 |        "        vertical-align: top;\n",
 46 |        "    }\n",
 47 |        "\n",
 48 |        "    .dataframe thead th {\n",
 49 |        "        text-align: right;\n",
 50 |        "    }\n",
 51 |        "</style>\n",
 52 |        "<table border=\"1\" class=\"dataframe\">\n",
 53 |        "  <thead>\n",
 54 |        "    <tr style=\"text-align: right;\">\n",
 55 |        "      <th>Loan_Status</th>\n",
 56 |        "      <th>N</th>\n",
 57 |        "      <th>Y</th>\n",
 58 |        "    </tr>\n",
 59 |        "    <tr>\n",
 60 |        "      <th>Credit_History</th>\n",
 61 |        "      <th></th>\n",
 62 |        "      <th></th>\n",
 63 |        "    </tr>\n",
 64 |        "  </thead>\n",
 65 |        "  <tbody>\n",
 66 |        "    <tr>\n",
 67 |        "      <th>0.0</th>\n",
 68 |        "      <td>0.921348</td>\n",
 69 |        "      <td>0.078652</td>\n",
 70 |        "    </tr>\n",
 71 |        "    <tr>\n",
 72 |        "      <th>1.0</th>\n",
 73 |        "      <td>0.204211</td>\n",
 74 |        "      <td>0.795789</td>\n",
 75 |        "    </tr>\n",
 76 |        "    <tr>\n",
 77 |        "      <th>All</th>\n",
 78 |        "      <td>0.317376</td>\n",
 79 |        "      <td>0.682624</td>\n",
 80 |        "    </tr>\n",
 81 |        "  </tbody>\n",
 82 |        "</table>\n",
 83 |        "</div>"
 84 |       ],
 85 |       "text/plain": [
 86 |        "Loan_Status            N         Y\n",
 87 |        "Credit_History                    \n",
 88 |        "0.0             0.921348  0.078652\n",
 89 |        "1.0             0.204211  0.795789\n",
 90 |        "All             0.317376  0.682624"
 91 |       ]
 92 |      },
 93 |      "execution_count": 6,
 94 |      "metadata": {},
 95 |      "output_type": "execute_result"
 96 |     }
 97 |    ],
 98 |    "source": [
 99 |     "pd.crosstab(data[\"Credit_History\"],data[\"Loan_Status\"],margins=True, normalize = 'index')"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": null,
105 |    "metadata": {},
106 |    "outputs": [],
107 |    "source": []
108 |   }
109 |  ],
110 |  "metadata": {
111 |   "kernelspec": {
112 |    "display_name": "Python 3",
113 |    "language": "python",
114 |    "name": "python3"
115 |   },
116 |   "language_info": {
117 |    "codemirror_mode": {
118 |     "name": "ipython",
119 |     "version": 3
120 |    },
121 |    "file_extension": ".py",
122 |    "mimetype": "text/x-python",
123 |    "name": "python",
124 |    "nbconvert_exporter": "python",
125 |    "pygments_lexer": "ipython3",
126 |    "version": "3.7.1"
127 |   }
128 |  },
129 |  "nbformat": 4,
130 |  "nbformat_minor": 2
131 | }
132 | 


--------------------------------------------------------------------------------
/Code/pandas_pivot_table.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Pandas Pivot Table"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 5,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "# import pandas\n",
 17 |     "import pandas as pd\n",
 18 |     "import numpy as np"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 6,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "#Import dataset\n",
 28 |     "loan = pd.read_csv('../Data/loan_train.csv', index_col = 'Loan_ID')"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 7,
 34 |    "metadata": {},
 35 |    "outputs": [
 36 |     {
 37 |      "data": {
 38 |       "text/html": [
 39 |        "<div>\n",
 40 |        "<style scoped>\n",
 41 |        "    .dataframe tbody tr th:only-of-type {\n",
 42 |        "        vertical-align: middle;\n",
 43 |        "    }\n",
 44 |        "\n",
 45 |        "    .dataframe tbody tr th {\n",
 46 |        "        vertical-align: top;\n",
 47 |        "    }\n",
 48 |        "\n",
 49 |        "    .dataframe thead th {\n",
 50 |        "        text-align: right;\n",
 51 |        "    }\n",
 52 |        "</style>\n",
 53 |        "<table border=\"1\" class=\"dataframe\">\n",
 54 |        "  <thead>\n",
 55 |        "    <tr style=\"text-align: right;\">\n",
 56 |        "      <th></th>\n",
 57 |        "      <th></th>\n",
 58 |        "      <th></th>\n",
 59 |        "      <th>LoanAmount</th>\n",
 60 |        "    </tr>\n",
 61 |        "    <tr>\n",
 62 |        "      <th>Gender</th>\n",
 63 |        "      <th>Married</th>\n",
 64 |        "      <th>Self_Employed</th>\n",
 65 |        "      <th></th>\n",
 66 |        "    </tr>\n",
 67 |        "  </thead>\n",
 68 |        "  <tbody>\n",
 69 |        "    <tr>\n",
 70 |        "      <th rowspan=\"4\" valign=\"top\">Female</th>\n",
 71 |        "      <th rowspan=\"2\" valign=\"top\">No</th>\n",
 72 |        "      <th>No</th>\n",
 73 |        "      <td>110.596774</td>\n",
 74 |        "    </tr>\n",
 75 |        "    <tr>\n",
 76 |        "      <th>Yes</th>\n",
 77 |        "      <td>125.800000</td>\n",
 78 |        "    </tr>\n",
 79 |        "    <tr>\n",
 80 |        "      <th rowspan=\"2\" valign=\"top\">Yes</th>\n",
 81 |        "      <th>No</th>\n",
 82 |        "      <td>135.480000</td>\n",
 83 |        "    </tr>\n",
 84 |        "    <tr>\n",
 85 |        "      <th>Yes</th>\n",
 86 |        "      <td>282.250000</td>\n",
 87 |        "    </tr>\n",
 88 |        "    <tr>\n",
 89 |        "      <th rowspan=\"4\" valign=\"top\">Male</th>\n",
 90 |        "      <th rowspan=\"2\" valign=\"top\">No</th>\n",
 91 |        "      <th>No</th>\n",
 92 |        "      <td>128.137255</td>\n",
 93 |        "    </tr>\n",
 94 |        "    <tr>\n",
 95 |        "      <th>Yes</th>\n",
 96 |        "      <td>173.625000</td>\n",
 97 |        "    </tr>\n",
 98 |        "    <tr>\n",
 99 |        "      <th rowspan=\"2\" valign=\"top\">Yes</th>\n",
100 |        "      <th>No</th>\n",
101 |        "      <td>151.709220</td>\n",
102 |        "    </tr>\n",
103 |        "    <tr>\n",
104 |        "      <th>Yes</th>\n",
105 |        "      <td>169.355556</td>\n",
106 |        "    </tr>\n",
107 |        "  </tbody>\n",
108 |        "</table>\n",
109 |        "</div>"
110 |       ],
111 |       "text/plain": [
112 |        "                              LoanAmount\n",
113 |        "Gender Married Self_Employed            \n",
114 |        "Female No      No             110.596774\n",
115 |        "               Yes            125.800000\n",
116 |        "       Yes     No             135.480000\n",
117 |        "               Yes            282.250000\n",
118 |        "Male   No      No             128.137255\n",
119 |        "               Yes            173.625000\n",
120 |        "       Yes     No             151.709220\n",
121 |        "               Yes            169.355556"
122 |       ]
123 |      },
124 |      "execution_count": 7,
125 |      "metadata": {},
126 |      "output_type": "execute_result"
127 |     }
128 |    ],
129 |    "source": [
130 |     "pivot = loan.pivot_table(values = ['LoanAmount'], index = ['Gender', 'Married', 'Self_Employed'], aggfunc = np.mean)\n",
131 |     "pivot"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": null,
137 |    "metadata": {},
138 |    "outputs": [],
139 |    "source": []
140 |   }
141 |  ],
142 |  "metadata": {
143 |   "kernelspec": {
144 |    "display_name": "Python 3",
145 |    "language": "python",
146 |    "name": "python3"
147 |   },
148 |   "language_info": {
149 |    "codemirror_mode": {
150 |     "name": "ipython",
151 |     "version": 3
152 |    },
153 |    "file_extension": ".py",
154 |    "mimetype": "text/x-python",
155 |    "name": "python",
156 |    "nbconvert_exporter": "python",
157 |    "pygments_lexer": "ipython3",
158 |    "version": "3.7.1"
159 |   }
160 |  },
161 |  "nbformat": 4,
162 |  "nbformat_minor": 2
163 | }
164 | 


--------------------------------------------------------------------------------
/Code/pandas_read_html_files.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# ***Reading all the tables of a webpage into pandas dataframe:***"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import pandas as pd"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "metadata": {},
 22 |    "source": [
 23 |     "### ***Let the url be: \"https://en.wikipedia.org/wiki/States_and_union_territories_of_India\"***"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 3,
 29 |    "metadata": {},
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "dflist=pd.read_html('https://en.wikipedia.org/wiki/States_and_union_territories_of_India')"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {},
 38 |    "source": [
 39 |     "Here dflist is a list of dataframes. Each dataframe is associated with one of the tables present on the page."
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 4,
 45 |    "metadata": {},
 46 |    "outputs": [
 47 |     {
 48 |      "data": {
 49 |       "text/html": [
 50 |        "<div>\n",
 51 |        "<style scoped>\n",
 52 |        "    .dataframe tbody tr th:only-of-type {\n",
 53 |        "        vertical-align: middle;\n",
 54 |        "    }\n",
 55 |        "\n",
 56 |        "    .dataframe tbody tr th {\n",
 57 |        "        vertical-align: top;\n",
 58 |        "    }\n",
 59 |        "\n",
 60 |        "    .dataframe thead th {\n",
 61 |        "        text-align: right;\n",
 62 |        "    }\n",
 63 |        "</style>\n",
 64 |        "<table border=\"1\" class=\"dataframe\">\n",
 65 |        "  <thead>\n",
 66 |        "    <tr style=\"text-align: right;\">\n",
 67 |        "      <th></th>\n",
 68 |        "      <th>0</th>\n",
 69 |        "      <th>1</th>\n",
 70 |        "    </tr>\n",
 71 |        "  </thead>\n",
 72 |        "  <tbody>\n",
 73 |        "    <tr>\n",
 74 |        "      <th>0</th>\n",
 75 |        "      <td>States and union territories of India</td>\n",
 76 |        "      <td>NaN</td>\n",
 77 |        "    </tr>\n",
 78 |        "    <tr>\n",
 79 |        "      <th>1</th>\n",
 80 |        "      <td>Category</td>\n",
 81 |        "      <td>Federated states</td>\n",
 82 |        "    </tr>\n",
 83 |        "    <tr>\n",
 84 |        "      <th>2</th>\n",
 85 |        "      <td>Location</td>\n",
 86 |        "      <td>Republic of India</td>\n",
 87 |        "    </tr>\n",
 88 |        "    <tr>\n",
 89 |        "      <th>3</th>\n",
 90 |        "      <td>Number</td>\n",
 91 |        "      <td>28 States8 Union territories</td>\n",
 92 |        "    </tr>\n",
 93 |        "    <tr>\n",
 94 |        "      <th>4</th>\n",
 95 |        "      <td>Populations</td>\n",
 96 |        "      <td>States: Sikkim - 610,577 (lowest); Uttar Prade...</td>\n",
 97 |        "    </tr>\n",
 98 |        "  </tbody>\n",
 99 |        "</table>\n",
100 |        "</div>"
101 |       ],
102 |       "text/plain": [
103 |        "                                       0  \\\n",
104 |        "0  States and union territories of India   \n",
105 |        "1                               Category   \n",
106 |        "2                               Location   \n",
107 |        "3                                 Number   \n",
108 |        "4                            Populations   \n",
109 |        "\n",
110 |        "                                                   1  \n",
111 |        "0                                                NaN  \n",
112 |        "1                                   Federated states  \n",
113 |        "2                                  Republic of India  \n",
114 |        "3                       28 States8 Union territories  \n",
115 |        "4  States: Sikkim - 610,577 (lowest); Uttar Prade...  "
116 |       ]
117 |      },
118 |      "execution_count": 4,
119 |      "metadata": {},
120 |      "output_type": "execute_result"
121 |     }
122 |    ],
123 |    "source": [
124 |     "dflist[0].head()"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": 5,
130 |    "metadata": {},
131 |    "outputs": [
132 |     {
133 |      "data": {
134 |       "text/html": [
135 |        "<div>\n",
136 |        "<style scoped>\n",
137 |        "    .dataframe tbody tr th:only-of-type {\n",
138 |        "        vertical-align: middle;\n",
139 |        "    }\n",
140 |        "\n",
141 |        "    .dataframe tbody tr th {\n",
142 |        "        vertical-align: top;\n",
143 |        "    }\n",
144 |        "\n",
145 |        "    .dataframe thead th {\n",
146 |        "        text-align: right;\n",
147 |        "    }\n",
148 |        "</style>\n",
149 |        "<table border=\"1\" class=\"dataframe\">\n",
150 |        "  <thead>\n",
151 |        "    <tr style=\"text-align: right;\">\n",
152 |        "      <th></th>\n",
153 |        "      <th>0</th>\n",
154 |        "      <th>1</th>\n",
155 |        "      <th>2</th>\n",
156 |        "      <th>3</th>\n",
157 |        "      <th>4</th>\n",
158 |        "      <th>5</th>\n",
159 |        "      <th>6</th>\n",
160 |        "      <th>7</th>\n",
161 |        "      <th>8</th>\n",
162 |        "      <th>9</th>\n",
163 |        "      <th>10</th>\n",
164 |        "    </tr>\n",
165 |        "  </thead>\n",
166 |        "  <tbody>\n",
167 |        "    <tr>\n",
168 |        "      <th>0</th>\n",
169 |        "      <td>State</td>\n",
170 |        "      <td>ISO 3166-2:IN</td>\n",
171 |        "      <td>Vehiclecode</td>\n",
172 |        "      <td>Zone</td>\n",
173 |        "      <td>Capital</td>\n",
174 |        "      <td>Largest city</td>\n",
175 |        "      <td>Statehood</td>\n",
176 |        "      <td>Population[35]</td>\n",
177 |        "      <td>Area(km2)</td>\n",
178 |        "      <td>Officiallanguages[36]</td>\n",
179 |        "      <td>Additional officiallanguages[36]</td>\n",
180 |        "    </tr>\n",
181 |        "    <tr>\n",
182 |        "      <th>1</th>\n",
183 |        "      <td>Andhra Pradesh</td>\n",
184 |        "      <td>IN-AP</td>\n",
185 |        "      <td>AP</td>\n",
186 |        "      <td>Southern</td>\n",
187 |        "      <td>Hyderabad (de jure)Amaravati (de facto) Note 1...</td>\n",
188 |        "      <td>Visakhapatnam</td>\n",
189 |        "      <td>1 October 1953</td>\n",
190 |        "      <td>49506799</td>\n",
191 |        "      <td>160205</td>\n",
192 |        "      <td>Telugu</td>\n",
193 |        "      <td>—</td>\n",
194 |        "    </tr>\n",
195 |        "    <tr>\n",
196 |        "      <th>2</th>\n",
197 |        "      <td>Arunachal Pradesh</td>\n",
198 |        "      <td>IN-AR</td>\n",
199 |        "      <td>AR</td>\n",
200 |        "      <td>North-Eastern</td>\n",
201 |        "      <td>Itanagar</td>\n",
202 |        "      <td>20 February 1987</td>\n",
203 |        "      <td>1383727</td>\n",
204 |        "      <td>83743</td>\n",
205 |        "      <td>English</td>\n",
206 |        "      <td>—</td>\n",
207 |        "      <td>NaN</td>\n",
208 |        "    </tr>\n",
209 |        "    <tr>\n",
210 |        "      <th>3</th>\n",
211 |        "      <td>Assam</td>\n",
212 |        "      <td>IN-AS</td>\n",
213 |        "      <td>AS</td>\n",
214 |        "      <td>North-Eastern</td>\n",
215 |        "      <td>Dispur</td>\n",
216 |        "      <td>Guwahati</td>\n",
217 |        "      <td>26 January 1950</td>\n",
218 |        "      <td>31205576</td>\n",
219 |        "      <td>78550</td>\n",
220 |        "      <td>Assamese</td>\n",
221 |        "      <td>Bengali, Bodo</td>\n",
222 |        "    </tr>\n",
223 |        "    <tr>\n",
224 |        "      <th>4</th>\n",
225 |        "      <td>Bihar</td>\n",
226 |        "      <td>IN-BR</td>\n",
227 |        "      <td>BR</td>\n",
228 |        "      <td>Eastern</td>\n",
229 |        "      <td>Patna</td>\n",
230 |        "      <td>26 January 1950</td>\n",
231 |        "      <td>104099452</td>\n",
232 |        "      <td>94163</td>\n",
233 |        "      <td>Hindi</td>\n",
234 |        "      <td>Urdu</td>\n",
235 |        "      <td>NaN</td>\n",
236 |        "    </tr>\n",
237 |        "  </tbody>\n",
238 |        "</table>\n",
239 |        "</div>"
240 |       ],
241 |       "text/plain": [
242 |        "                  0              1            2              3   \\\n",
243 |        "0              State  ISO 3166-2:IN  Vehiclecode           Zone   \n",
244 |        "1     Andhra Pradesh          IN-AP           AP       Southern   \n",
245 |        "2  Arunachal Pradesh          IN-AR           AR  North-Eastern   \n",
246 |        "3              Assam          IN-AS           AS  North-Eastern   \n",
247 |        "4              Bihar          IN-BR           BR        Eastern   \n",
248 |        "\n",
249 |        "                                                  4                 5   \\\n",
250 |        "0                                            Capital      Largest city   \n",
251 |        "1  Hyderabad (de jure)Amaravati (de facto) Note 1...     Visakhapatnam   \n",
252 |        "2                                           Itanagar  20 February 1987   \n",
253 |        "3                                             Dispur          Guwahati   \n",
254 |        "4                                              Patna   26 January 1950   \n",
255 |        "\n",
256 |        "                6               7          8                      9   \\\n",
257 |        "0        Statehood  Population[35]  Area(km2)  Officiallanguages[36]   \n",
258 |        "1   1 October 1953        49506799     160205                 Telugu   \n",
259 |        "2          1383727           83743    English                      —   \n",
260 |        "3  26 January 1950        31205576      78550               Assamese   \n",
261 |        "4        104099452           94163      Hindi                   Urdu   \n",
262 |        "\n",
263 |        "                                 10  \n",
264 |        "0  Additional officiallanguages[36]  \n",
265 |        "1                                 —  \n",
266 |        "2                               NaN  \n",
267 |        "3                     Bengali, Bodo  \n",
268 |        "4                               NaN  "
269 |       ]
270 |      },
271 |      "execution_count": 5,
272 |      "metadata": {},
273 |      "output_type": "execute_result"
274 |     }
275 |    ],
276 |    "source": [
277 |     "dflist[3].head()"
278 |    ]
279 |   },
280 |   {
281 |    "cell_type": "code",
282 |    "execution_count": null,
283 |    "metadata": {},
284 |    "outputs": [],
285 |    "source": []
286 |   }
287 |  ],
288 |  "metadata": {
289 |   "kernelspec": {
290 |    "display_name": "Python 3",
291 |    "language": "python",
292 |    "name": "python3"
293 |   },
294 |   "language_info": {
295 |    "codemirror_mode": {
296 |     "name": "ipython",
297 |     "version": 3
298 |    },
299 |    "file_extension": ".py",
300 |    "mimetype": "text/x-python",
301 |    "name": "python",
302 |    "nbconvert_exporter": "python",
303 |    "pygments_lexer": "ipython3",
304 |    "version": "3.7.1"
305 |   }
306 |  },
307 |  "nbformat": 4,
308 |  "nbformat_minor": 2
309 | }
310 | 


--------------------------------------------------------------------------------
/Code/pdb_pythonDebugger.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Python debugger"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "# define function 1\n",
 17 |     "def func_div(x, y):\n",
 18 |     "    return x / y\n",
 19 |     "\n",
 20 |     "# define fucntion 2\n",
 21 |     "def func_sub(a):\n",
 22 |     "    x = a\n",
 23 |     "    y = a - 1\n",
 24 |     "    return func_div(x, y)"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": 2,
 30 |    "metadata": {},
 31 |    "outputs": [
 32 |     {
 33 |      "ename": "ZeroDivisionError",
 34 |      "evalue": "division by zero",
 35 |      "output_type": "error",
 36 |      "traceback": [
 37 |       "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
 38 |       "\u001b[1;31mZeroDivisionError\u001b[0m                         Traceback (most recent call last)",
 39 |       "\u001b[1;32m<ipython-input-2-f1e0930ffcb1>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mfunc_sub\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
 40 |       "\u001b[1;32m<ipython-input-1-339f82ed2cb4>\u001b[0m in \u001b[0;36mfunc_sub\u001b[1;34m(a)\u001b[0m\n\u001b[0;32m      7\u001b[0m     \u001b[0mx\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0ma\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      8\u001b[0m     \u001b[0my\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0ma\u001b[0m \u001b[1;33m-\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 9\u001b[1;33m     \u001b[1;32mreturn\u001b[0m \u001b[0mfunc_div\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
 41 |       "\u001b[1;32m<ipython-input-1-339f82ed2cb4>\u001b[0m in \u001b[0;36mfunc_div\u001b[1;34m(x, y)\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[1;31m# define function 1\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mfunc_div\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m     \u001b[1;32mreturn\u001b[0m \u001b[0mx\u001b[0m \u001b[1;33m/\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      4\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      5\u001b[0m \u001b[1;31m# define fucntion 2\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
 42 |       "\u001b[1;31mZeroDivisionError\u001b[0m: division by zero"
 43 |      ]
 44 |     }
 45 |    ],
 46 |    "source": [
 47 |     "func_sub(1)"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": 3,
 53 |    "metadata": {},
 54 |    "outputs": [
 55 |     {
 56 |      "name": "stdout",
 57 |      "output_type": "stream",
 58 |      "text": [
 59 |       "Automatic pdb calling has been turned ON\n"
 60 |      ]
 61 |     },
 62 |     {
 63 |      "ename": "ZeroDivisionError",
 64 |      "evalue": "division by zero",
 65 |      "output_type": "error",
 66 |      "traceback": [
 67 |       "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
 68 |       "\u001b[1;31mZeroDivisionError\u001b[0m                         Traceback (most recent call last)",
 69 |       "\u001b[1;32m<ipython-input-3-78fb35c17ec9>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[0mget_ipython\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrun_line_magic\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'pdb'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m''\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mfunc_sub\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
 70 |       "\u001b[1;32m<ipython-input-1-339f82ed2cb4>\u001b[0m in \u001b[0;36mfunc_sub\u001b[1;34m(a)\u001b[0m\n\u001b[0;32m      7\u001b[0m     \u001b[0mx\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0ma\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      8\u001b[0m     \u001b[0my\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0ma\u001b[0m \u001b[1;33m-\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 9\u001b[1;33m     \u001b[1;32mreturn\u001b[0m \u001b[0mfunc_div\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
 71 |       "\u001b[1;32m<ipython-input-1-339f82ed2cb4>\u001b[0m in \u001b[0;36mfunc_div\u001b[1;34m(x, y)\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[1;31m# define function 1\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mfunc_div\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m     \u001b[1;32mreturn\u001b[0m \u001b[0mx\u001b[0m \u001b[1;33m/\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      4\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      5\u001b[0m \u001b[1;31m# define fucntion 2\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
 72 |       "\u001b[1;31mZeroDivisionError\u001b[0m: division by zero"
 73 |      ]
 74 |     },
 75 |     {
 76 |      "name": "stdout",
 77 |      "output_type": "stream",
 78 |      "text": [
 79 |       "> \u001b[1;32m<ipython-input-1-339f82ed2cb4>\u001b[0m(3)\u001b[0;36mfunc_div\u001b[1;34m()\u001b[0m\n",
 80 |       "\u001b[1;32m      1 \u001b[1;33m\u001b[1;31m# define function 1\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
 81 |       "\u001b[0m\u001b[1;32m      2 \u001b[1;33m\u001b[1;32mdef\u001b[0m \u001b[0mfunc_div\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
 82 |       "\u001b[0m\u001b[1;32m----> 3 \u001b[1;33m    \u001b[1;32mreturn\u001b[0m \u001b[0mx\u001b[0m \u001b[1;33m/\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
 83 |       "\u001b[0m\u001b[1;32m      4 \u001b[1;33m\u001b[1;33m\u001b[0m\u001b[0m\n",
 84 |       "\u001b[0m\u001b[1;32m      5 \u001b[1;33m\u001b[1;31m# define fucntion 2\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
 85 |       "\u001b[0m\n",
 86 |       "ipdb> print(x)\n",
 87 |       "1\n",
 88 |       "ipdb> print(y)\n",
 89 |       "0\n",
 90 |       "ipdb> up\n",
 91 |       "> \u001b[1;32m<ipython-input-1-339f82ed2cb4>\u001b[0m(9)\u001b[0;36mfunc_sub\u001b[1;34m()\u001b[0m\n",
 92 |       "\u001b[1;32m      5 \u001b[1;33m\u001b[1;31m# define fucntion 2\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
 93 |       "\u001b[0m\u001b[1;32m      6 \u001b[1;33m\u001b[1;32mdef\u001b[0m \u001b[0mfunc_sub\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
 94 |       "\u001b[0m\u001b[1;32m      7 \u001b[1;33m    \u001b[0mx\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0ma\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
 95 |       "\u001b[0m\u001b[1;32m      8 \u001b[1;33m    \u001b[0my\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0ma\u001b[0m \u001b[1;33m-\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
 96 |       "\u001b[0m\u001b[1;32m----> 9 \u001b[1;33m    \u001b[1;32mreturn\u001b[0m \u001b[0mfunc_div\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
 97 |       "\u001b[0m\n",
 98 |       "ipdb> up\n",
 99 |       "> \u001b[1;32m<ipython-input-3-78fb35c17ec9>\u001b[0m(2)\u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n",
100 |       "\u001b[1;32m      1 \u001b[1;33m\u001b[0mget_ipython\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrun_line_magic\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'pdb'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m''\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
101 |       "\u001b[0m\u001b[1;32m----> 2 \u001b[1;33m\u001b[0mfunc_sub\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
102 |       "\u001b[0m\n",
103 |       "ipdb> quit\n"
104 |      ]
105 |     }
106 |    ],
107 |    "source": [
108 |     "%pdb\n",
109 |     "func_sub(1)"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": null,
115 |    "metadata": {},
116 |    "outputs": [],
117 |    "source": []
118 |   }
119 |  ],
120 |  "metadata": {
121 |   "kernelspec": {
122 |    "display_name": "Python 3",
123 |    "language": "python",
124 |    "name": "python3"
125 |   },
126 |   "language_info": {
127 |    "codemirror_mode": {
128 |     "name": "ipython",
129 |     "version": 3
130 |    },
131 |    "file_extension": ".py",
132 |    "mimetype": "text/x-python",
133 |    "name": "python",
134 |    "nbconvert_exporter": "python",
135 |    "pygments_lexer": "ipython3",
136 |    "version": "3.7.1"
137 |   }
138 |  },
139 |  "nbformat": 4,
140 |  "nbformat_minor": 2
141 | }
142 | 


--------------------------------------------------------------------------------
/Code/pretty print.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "### Pretty Print dictionary"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "code",
12 |    "execution_count": 11,
13 |    "metadata": {},
14 |    "outputs": [],
15 |    "source": [
16 |     "# Build a test dictionary\n",
17 |     "my_dict = {'Student_ID': 34,'Student_name' : 'Tom', 'Student_class' : 5,\n",
18 |     "          'Student_marks' : {'maths' : 92,\n",
19 |     "                            'science' : 95,\n",
20 |     "                            'social_science' : 65,\n",
21 |     "                            'English' : 88}\n",
22 |     "          }\n"
23 |    ]
24 |   },
25 |   {
26 |    "cell_type": "code",
27 |    "execution_count": 12,
28 |    "metadata": {},
29 |    "outputs": [
30 |     {
31 |      "name": "stdout",
32 |      "output_type": "stream",
33 |      "text": [
34 |       "{'Student_ID': 34, 'Student_name': 'Tom', 'Student_class': 5, 'Student_marks': {'maths': 92, 'science': 95, 'social_science': 65, 'English': 88}}\n"
35 |      ]
36 |     }
37 |    ],
38 |    "source": [
39 |     "# with print\n",
40 |     "print(my_dict)"
41 |    ]
42 |   },
43 |   {
44 |    "cell_type": "code",
45 |    "execution_count": 13,
46 |    "metadata": {},
47 |    "outputs": [
48 |     {
49 |      "name": "stdout",
50 |      "output_type": "stream",
51 |      "text": [
52 |       "{'Student_ID': 34,\n",
53 |       " 'Student_class': 5,\n",
54 |       " 'Student_marks': {'English': 88,\n",
55 |       "                   'maths': 92,\n",
56 |       "                   'science': 95,\n",
57 |       "                   'social_science': 65},\n",
58 |       " 'Student_name': 'Tom'}\n"
59 |      ]
60 |     }
61 |    ],
62 |    "source": [
63 |     "# with pprint\n",
64 |     "import pprint\n",
65 |     "pprint.pprint(my_dict)"
66 |    ]
67 |   },
68 |   {
69 |    "cell_type": "code",
70 |    "execution_count": null,
71 |    "metadata": {},
72 |    "outputs": [],
73 |    "source": []
74 |   }
75 |  ],
76 |  "metadata": {
77 |   "kernelspec": {
78 |    "display_name": "Python 3",
79 |    "language": "python",
80 |    "name": "python3"
81 |   },
82 |   "language_info": {
83 |    "codemirror_mode": {
84 |     "name": "ipython",
85 |     "version": 3
86 |    },
87 |    "file_extension": ".py",
88 |    "mimetype": "text/x-python",
89 |    "name": "python",
90 |    "nbconvert_exporter": "python",
91 |    "pygments_lexer": "ipython3",
92 |    "version": "3.7.1"
93 |   }
94 |  },
95 |  "nbformat": 4,
96 |  "nbformat_minor": 2
97 | }
98 | 


--------------------------------------------------------------------------------
/Code/print zenofpython.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "metadata": {},
 7 |    "outputs": [
 8 |     {
 9 |      "name": "stdout",
10 |      "output_type": "stream",
11 |      "text": [
12 |       "The Zen of Python, by Tim Peters\n",
13 |       "\n",
14 |       "Beautiful is better than ugly.\n",
15 |       "Explicit is better than implicit.\n",
16 |       "Simple is better than complex.\n",
17 |       "Complex is better than complicated.\n",
18 |       "Flat is better than nested.\n",
19 |       "Sparse is better than dense.\n",
20 |       "Readability counts.\n",
21 |       "Special cases aren't special enough to break the rules.\n",
22 |       "Although practicality beats purity.\n",
23 |       "Errors should never pass silently.\n",
24 |       "Unless explicitly silenced.\n",
25 |       "In the face of ambiguity, refuse the temptation to guess.\n",
26 |       "There should be one-- and preferably only one --obvious way to do it.\n",
27 |       "Although that way may not be obvious at first unless you're Dutch.\n",
28 |       "Now is better than never.\n",
29 |       "Although never is often better than *right* now.\n",
30 |       "If the implementation is hard to explain, it's a bad idea.\n",
31 |       "If the implementation is easy to explain, it may be a good idea.\n",
32 |       "Namespaces are one honking great idea -- let's do more of those!\n"
33 |      ]
34 |     }
35 |    ],
36 |    "source": [
37 |     "import this"
38 |    ]
39 |   },
40 |   {
41 |    "cell_type": "code",
42 |    "execution_count": null,
43 |    "metadata": {},
44 |    "outputs": [],
45 |    "source": []
46 |   }
47 |  ],
48 |  "metadata": {
49 |   "kernelspec": {
50 |    "display_name": "Python 3",
51 |    "language": "python",
52 |    "name": "python3"
53 |   },
54 |   "language_info": {
55 |    "codemirror_mode": {
56 |     "name": "ipython",
57 |     "version": 3
58 |    },
59 |    "file_extension": ".py",
60 |    "mimetype": "text/x-python",
61 |    "name": "python",
62 |    "nbconvert_exporter": "python",
63 |    "pygments_lexer": "ipython3",
64 |    "version": "3.7.1"
65 |   }
66 |  },
67 |  "nbformat": 4,
68 |  "nbformat_minor": 2
69 | }
70 | 


--------------------------------------------------------------------------------
/Code/read_csv_ParseDate.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 32,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "#import libraries\n",
 10 |     "import pandas as pd"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 33,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "# read dataset\n",
 20 |     "data = pd.read_csv(\"../Data/AirQualityUCI.csv\")"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 34,
 26 |    "metadata": {},
 27 |    "outputs": [
 28 |     {
 29 |      "data": {
 30 |       "text/plain": [
 31 |        "Date           object\n",
 32 |        "Time           object\n",
 33 |        "CO(GT)          int64\n",
 34 |        "PT08.S1(CO)     int64\n",
 35 |        "NMHC(GT)        int64\n",
 36 |        "dtype: object"
 37 |       ]
 38 |      },
 39 |      "execution_count": 34,
 40 |      "metadata": {},
 41 |      "output_type": "execute_result"
 42 |     }
 43 |    ],
 44 |    "source": [
 45 |     "data.dtypes"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "markdown",
 50 |    "metadata": {},
 51 |    "source": [
 52 |     "## Parse Dates in read_csv()"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": 35,
 58 |    "metadata": {},
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "data = pd.read_csv(\"../Data/AirQualityUCI.csv\", parse_dates = [\"Date\"])"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": 36,
 67 |    "metadata": {},
 68 |    "outputs": [
 69 |     {
 70 |      "data": {
 71 |       "text/plain": [
 72 |        "Date           datetime64[ns]\n",
 73 |        "Time                   object\n",
 74 |        "CO(GT)                  int64\n",
 75 |        "PT08.S1(CO)             int64\n",
 76 |        "NMHC(GT)                int64\n",
 77 |        "dtype: object"
 78 |       ]
 79 |      },
 80 |      "execution_count": 36,
 81 |      "metadata": {},
 82 |      "output_type": "execute_result"
 83 |     }
 84 |    ],
 85 |    "source": [
 86 |     "data.dtypes"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": null,
 92 |    "metadata": {},
 93 |    "outputs": [],
 94 |    "source": []
 95 |   }
 96 |  ],
 97 |  "metadata": {
 98 |   "kernelspec": {
 99 |    "display_name": "Python 3",
100 |    "language": "python",
101 |    "name": "python3"
102 |   },
103 |   "language_info": {
104 |    "codemirror_mode": {
105 |     "name": "ipython",
106 |     "version": 3
107 |    },
108 |    "file_extension": ".py",
109 |    "mimetype": "text/x-python",
110 |    "name": "python",
111 |    "nbconvert_exporter": "python",
112 |    "pygments_lexer": "ipython3",
113 |    "version": "3.7.1"
114 |   }
115 |  },
116 |  "nbformat": 4,
117 |  "nbformat_minor": 2
118 | }
119 | 


--------------------------------------------------------------------------------
/Code/run python script.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "## Run a python script inside a notebook"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "code",
12 |    "execution_count": 4,
13 |    "metadata": {},
14 |    "outputs": [],
15 |    "source": [
16 |     "%run add.py"
17 |    ]
18 |   },
19 |   {
20 |    "cell_type": "code",
21 |    "execution_count": 5,
22 |    "metadata": {},
23 |    "outputs": [
24 |     {
25 |      "data": {
26 |       "text/plain": [
27 |        "13"
28 |       ]
29 |      },
30 |      "execution_count": 5,
31 |      "metadata": {},
32 |      "output_type": "execute_result"
33 |     }
34 |    ],
35 |    "source": [
36 |     "sum = addition(6,7)\n",
37 |     "sum"
38 |    ]
39 |   },
40 |   {
41 |    "cell_type": "code",
42 |    "execution_count": null,
43 |    "metadata": {},
44 |    "outputs": [],
45 |    "source": []
46 |   }
47 |  ],
48 |  "metadata": {
49 |   "kernelspec": {
50 |    "display_name": "Python 3",
51 |    "language": "python",
52 |    "name": "python3"
53 |   },
54 |   "language_info": {
55 |    "codemirror_mode": {
56 |     "name": "ipython",
57 |     "version": 3
58 |    },
59 |    "file_extension": ".py",
60 |    "mimetype": "text/x-python",
61 |    "name": "python",
62 |    "nbconvert_exporter": "python",
63 |    "pygments_lexer": "ipython3",
64 |    "version": "3.7.1"
65 |   }
66 |  },
67 |  "nbformat": 4,
68 |  "nbformat_minor": 2
69 | }
70 | 


--------------------------------------------------------------------------------
/Code/select_dtype.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 4,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import pandas as pd"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": null,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "#import the dataset\n",
 19 |     "data = pd.read_csv('loan_train.csv')\n",
 20 |     "data.shape"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 35,
 26 |    "metadata": {},
 27 |    "outputs": [
 28 |     {
 29 |      "data": {
 30 |       "text/plain": [
 31 |        "Loan_ID               object\n",
 32 |        "Gender                object\n",
 33 |        "Married               object\n",
 34 |        "Dependents            object\n",
 35 |        "Education             object\n",
 36 |        "Self_Employed         object\n",
 37 |        "ApplicantIncome        int64\n",
 38 |        "CoapplicantIncome    float64\n",
 39 |        "LoanAmount           float64\n",
 40 |        "Loan_Amount_Term     float64\n",
 41 |        "Credit_History       float64\n",
 42 |        "Property_Area         object\n",
 43 |        "Loan_Status           object\n",
 44 |        "dtype: object"
 45 |       ]
 46 |      },
 47 |      "execution_count": 35,
 48 |      "metadata": {},
 49 |      "output_type": "execute_result"
 50 |     }
 51 |    ],
 52 |    "source": [
 53 |     "#check data types of column\n",
 54 |     "data.dtypes"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": 36,
 60 |    "metadata": {},
 61 |    "outputs": [
 62 |     {
 63 |      "data": {
 64 |       "text/html": [
 65 |        "<div>\n",
 66 |        "<style scoped>\n",
 67 |        "    .dataframe tbody tr th:only-of-type {\n",
 68 |        "        vertical-align: middle;\n",
 69 |        "    }\n",
 70 |        "\n",
 71 |        "    .dataframe tbody tr th {\n",
 72 |        "        vertical-align: top;\n",
 73 |        "    }\n",
 74 |        "\n",
 75 |        "    .dataframe thead th {\n",
 76 |        "        text-align: right;\n",
 77 |        "    }\n",
 78 |        "</style>\n",
 79 |        "<table border=\"1\" class=\"dataframe\">\n",
 80 |        "  <thead>\n",
 81 |        "    <tr style=\"text-align: right;\">\n",
 82 |        "      <th></th>\n",
 83 |        "      <th>Loan_ID</th>\n",
 84 |        "      <th>Gender</th>\n",
 85 |        "      <th>Married</th>\n",
 86 |        "      <th>Dependents</th>\n",
 87 |        "      <th>Education</th>\n",
 88 |        "      <th>Self_Employed</th>\n",
 89 |        "      <th>Property_Area</th>\n",
 90 |        "      <th>Loan_Status</th>\n",
 91 |        "    </tr>\n",
 92 |        "  </thead>\n",
 93 |        "  <tbody>\n",
 94 |        "    <tr>\n",
 95 |        "      <th>0</th>\n",
 96 |        "      <td>LP001002</td>\n",
 97 |        "      <td>Male</td>\n",
 98 |        "      <td>No</td>\n",
 99 |        "      <td>0</td>\n",
100 |        "      <td>Graduate</td>\n",
101 |        "      <td>No</td>\n",
102 |        "      <td>Urban</td>\n",
103 |        "      <td>Y</td>\n",
104 |        "    </tr>\n",
105 |        "    <tr>\n",
106 |        "      <th>1</th>\n",
107 |        "      <td>LP001003</td>\n",
108 |        "      <td>Male</td>\n",
109 |        "      <td>Yes</td>\n",
110 |        "      <td>1</td>\n",
111 |        "      <td>Graduate</td>\n",
112 |        "      <td>No</td>\n",
113 |        "      <td>Rural</td>\n",
114 |        "      <td>N</td>\n",
115 |        "    </tr>\n",
116 |        "    <tr>\n",
117 |        "      <th>2</th>\n",
118 |        "      <td>LP001005</td>\n",
119 |        "      <td>Male</td>\n",
120 |        "      <td>Yes</td>\n",
121 |        "      <td>0</td>\n",
122 |        "      <td>Graduate</td>\n",
123 |        "      <td>Yes</td>\n",
124 |        "      <td>Urban</td>\n",
125 |        "      <td>Y</td>\n",
126 |        "    </tr>\n",
127 |        "    <tr>\n",
128 |        "      <th>3</th>\n",
129 |        "      <td>LP001006</td>\n",
130 |        "      <td>Male</td>\n",
131 |        "      <td>Yes</td>\n",
132 |        "      <td>0</td>\n",
133 |        "      <td>Not Graduate</td>\n",
134 |        "      <td>No</td>\n",
135 |        "      <td>Urban</td>\n",
136 |        "      <td>Y</td>\n",
137 |        "    </tr>\n",
138 |        "    <tr>\n",
139 |        "      <th>4</th>\n",
140 |        "      <td>LP001008</td>\n",
141 |        "      <td>Male</td>\n",
142 |        "      <td>No</td>\n",
143 |        "      <td>0</td>\n",
144 |        "      <td>Graduate</td>\n",
145 |        "      <td>No</td>\n",
146 |        "      <td>Urban</td>\n",
147 |        "      <td>Y</td>\n",
148 |        "    </tr>\n",
149 |        "  </tbody>\n",
150 |        "</table>\n",
151 |        "</div>"
152 |       ],
153 |       "text/plain": [
154 |        "    Loan_ID Gender Married Dependents     Education Self_Employed  \\\n",
155 |        "0  LP001002   Male      No          0      Graduate            No   \n",
156 |        "1  LP001003   Male     Yes          1      Graduate            No   \n",
157 |        "2  LP001005   Male     Yes          0      Graduate           Yes   \n",
158 |        "3  LP001006   Male     Yes          0  Not Graduate            No   \n",
159 |        "4  LP001008   Male      No          0      Graduate            No   \n",
160 |        "\n",
161 |        "  Property_Area Loan_Status  \n",
162 |        "0         Urban           Y  \n",
163 |        "1         Rural           N  \n",
164 |        "2         Urban           Y  \n",
165 |        "3         Urban           Y  \n",
166 |        "4         Urban           Y  "
167 |       ]
168 |      },
169 |      "metadata": {},
170 |      "output_type": "display_data"
171 |     }
172 |    ],
173 |    "source": [
174 |     "# Dataframe containing only categorical variable\n",
175 |     "categorical_var = data.select_dtypes(\"object\").head()\n",
176 |     "display(categorical_var.head())"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": 37,
182 |    "metadata": {},
183 |    "outputs": [
184 |     {
185 |      "data": {
186 |       "text/html": [
187 |        "<div>\n",
188 |        "<style scoped>\n",
189 |        "    .dataframe tbody tr th:only-of-type {\n",
190 |        "        vertical-align: middle;\n",
191 |        "    }\n",
192 |        "\n",
193 |        "    .dataframe tbody tr th {\n",
194 |        "        vertical-align: top;\n",
195 |        "    }\n",
196 |        "\n",
197 |        "    .dataframe thead th {\n",
198 |        "        text-align: right;\n",
199 |        "    }\n",
200 |        "</style>\n",
201 |        "<table border=\"1\" class=\"dataframe\">\n",
202 |        "  <thead>\n",
203 |        "    <tr style=\"text-align: right;\">\n",
204 |        "      <th></th>\n",
205 |        "      <th>ApplicantIncome</th>\n",
206 |        "      <th>CoapplicantIncome</th>\n",
207 |        "      <th>LoanAmount</th>\n",
208 |        "      <th>Loan_Amount_Term</th>\n",
209 |        "      <th>Credit_History</th>\n",
210 |        "    </tr>\n",
211 |        "  </thead>\n",
212 |        "  <tbody>\n",
213 |        "    <tr>\n",
214 |        "      <th>0</th>\n",
215 |        "      <td>5849</td>\n",
216 |        "      <td>0.0</td>\n",
217 |        "      <td>NaN</td>\n",
218 |        "      <td>360.0</td>\n",
219 |        "      <td>1.0</td>\n",
220 |        "    </tr>\n",
221 |        "    <tr>\n",
222 |        "      <th>1</th>\n",
223 |        "      <td>4583</td>\n",
224 |        "      <td>1508.0</td>\n",
225 |        "      <td>128.0</td>\n",
226 |        "      <td>360.0</td>\n",
227 |        "      <td>1.0</td>\n",
228 |        "    </tr>\n",
229 |        "    <tr>\n",
230 |        "      <th>2</th>\n",
231 |        "      <td>3000</td>\n",
232 |        "      <td>0.0</td>\n",
233 |        "      <td>66.0</td>\n",
234 |        "      <td>360.0</td>\n",
235 |        "      <td>1.0</td>\n",
236 |        "    </tr>\n",
237 |        "    <tr>\n",
238 |        "      <th>3</th>\n",
239 |        "      <td>2583</td>\n",
240 |        "      <td>2358.0</td>\n",
241 |        "      <td>120.0</td>\n",
242 |        "      <td>360.0</td>\n",
243 |        "      <td>1.0</td>\n",
244 |        "    </tr>\n",
245 |        "    <tr>\n",
246 |        "      <th>4</th>\n",
247 |        "      <td>6000</td>\n",
248 |        "      <td>0.0</td>\n",
249 |        "      <td>141.0</td>\n",
250 |        "      <td>360.0</td>\n",
251 |        "      <td>1.0</td>\n",
252 |        "    </tr>\n",
253 |        "  </tbody>\n",
254 |        "</table>\n",
255 |        "</div>"
256 |       ],
257 |       "text/plain": [
258 |        "   ApplicantIncome  CoapplicantIncome  LoanAmount  Loan_Amount_Term  \\\n",
259 |        "0             5849                0.0         NaN             360.0   \n",
260 |        "1             4583             1508.0       128.0             360.0   \n",
261 |        "2             3000                0.0        66.0             360.0   \n",
262 |        "3             2583             2358.0       120.0             360.0   \n",
263 |        "4             6000                0.0       141.0             360.0   \n",
264 |        "\n",
265 |        "   Credit_History  \n",
266 |        "0             1.0  \n",
267 |        "1             1.0  \n",
268 |        "2             1.0  \n",
269 |        "3             1.0  \n",
270 |        "4             1.0  "
271 |       ]
272 |      },
273 |      "metadata": {},
274 |      "output_type": "display_data"
275 |     }
276 |    ],
277 |    "source": [
278 |     "# Dataframe containing only numeric variable\n",
279 |     "numeric_var = data.select_dtypes(\"number\")\n",
280 |     "display(numeric_var.head())"
281 |    ]
282 |   },
283 |   {
284 |    "cell_type": "code",
285 |    "execution_count": null,
286 |    "metadata": {},
287 |    "outputs": [],
288 |    "source": []
289 |   }
290 |  ],
291 |  "metadata": {
292 |   "kernelspec": {
293 |    "display_name": "Python 3",
294 |    "language": "python",
295 |    "name": "python3"
296 |   },
297 |   "language_info": {
298 |    "codemirror_mode": {
299 |     "name": "ipython",
300 |     "version": 3
301 |    },
302 |    "file_extension": ".py",
303 |    "mimetype": "text/x-python",
304 |    "name": "python",
305 |    "nbconvert_exporter": "python",
306 |    "pygments_lexer": "ipython3",
307 |    "version": "3.7.1"
308 |   }
309 |  },
310 |  "nbformat": 4,
311 |  "nbformat_minor": 2
312 | }
313 | 


--------------------------------------------------------------------------------
/Code/split_pdf_pages.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "from PyPDF2 import PdfFileWriter, PdfFileReader"
10 |    ]
11 |   },
12 |   {
13 |    "cell_type": "code",
14 |    "execution_count": 2,
15 |    "metadata": {},
16 |    "outputs": [],
17 |    "source": [
18 |     "FILE_NAME = '../Data/dummy_pdf.pdf'"
19 |    ]
20 |   },
21 |   {
22 |    "cell_type": "code",
23 |    "execution_count": 3,
24 |    "metadata": {},
25 |    "outputs": [
26 |     {
27 |      "name": "stdout",
28 |      "output_type": "stream",
29 |      "text": [
30 |       "Completed page1.pdf\n",
31 |       "Completed page2.pdf\n"
32 |      ]
33 |     }
34 |    ],
35 |    "source": [
36 |     "inputpdf = PdfFileReader(open(FILE_NAME, \"rb\"))\n",
37 |     " \n",
38 |     "for i in range(inputpdf.numPages):\n",
39 |     "    output = PdfFileWriter()\n",
40 |     "    output.addPage(inputpdf.getPage(i))\n",
41 |     "    temp_filename = \"page%s.pdf\" % str(i+1)\n",
42 |     "    with open(temp_filename, \"wb\") as outputStream:\n",
43 |     "        output.write(outputStream)\n",
44 |     "        print('Completed', temp_filename)"
45 |    ]
46 |   }
47 |  ],
48 |  "metadata": {
49 |   "kernelspec": {
50 |    "display_name": "Python 3",
51 |    "language": "python",
52 |    "name": "python3"
53 |   },
54 |   "language_info": {
55 |    "codemirror_mode": {
56 |     "name": "ipython",
57 |     "version": 3
58 |    },
59 |    "file_extension": ".py",
60 |    "mimetype": "text/x-python",
61 |    "name": "python",
62 |    "nbconvert_exporter": "python",
63 |    "pygments_lexer": "ipython3",
64 |    "version": "3.7.3"
65 |   }
66 |  },
67 |  "nbformat": 4,
68 |  "nbformat_minor": 2
69 | }
70 | 


--------------------------------------------------------------------------------
/Code/str.split().ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "# import necessary libraries\n",
 10 |     "import pandas as pd"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 2,
 16 |    "metadata": {},
 17 |    "outputs": [
 18 |     {
 19 |      "data": {
 20 |       "text/html": [
 21 |        "<div>\n",
 22 |        "<style scoped>\n",
 23 |        "    .dataframe tbody tr th:only-of-type {\n",
 24 |        "        vertical-align: middle;\n",
 25 |        "    }\n",
 26 |        "\n",
 27 |        "    .dataframe tbody tr th {\n",
 28 |        "        vertical-align: top;\n",
 29 |        "    }\n",
 30 |        "\n",
 31 |        "    .dataframe thead th {\n",
 32 |        "        text-align: right;\n",
 33 |        "    }\n",
 34 |        "</style>\n",
 35 |        "<table border=\"1\" class=\"dataframe\">\n",
 36 |        "  <thead>\n",
 37 |        "    <tr style=\"text-align: right;\">\n",
 38 |        "      <th></th>\n",
 39 |        "      <th>name</th>\n",
 40 |        "    </tr>\n",
 41 |        "  </thead>\n",
 42 |        "  <tbody>\n",
 43 |        "    <tr>\n",
 44 |        "      <th>0</th>\n",
 45 |        "      <td>Elise Mccann</td>\n",
 46 |        "    </tr>\n",
 47 |        "    <tr>\n",
 48 |        "      <th>1</th>\n",
 49 |        "      <td>Aiden Berger</td>\n",
 50 |        "    </tr>\n",
 51 |        "    <tr>\n",
 52 |        "      <th>2</th>\n",
 53 |        "      <td>Elle Kelley</td>\n",
 54 |        "    </tr>\n",
 55 |        "  </tbody>\n",
 56 |        "</table>\n",
 57 |        "</div>"
 58 |       ],
 59 |       "text/plain": [
 60 |        "           name\n",
 61 |        "0  Elise Mccann\n",
 62 |        "1  Aiden Berger\n",
 63 |        "2   Elle Kelley"
 64 |       ]
 65 |      },
 66 |      "execution_count": 2,
 67 |      "metadata": {},
 68 |      "output_type": "execute_result"
 69 |     }
 70 |    ],
 71 |    "source": [
 72 |     "# create a dataframe\n",
 73 |     "df = pd.DataFrame({'name':['Elise Mccann', 'Aiden Berger', 'Elle Kelley']})\n",
 74 |     "df"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 3,
 80 |    "metadata": {},
 81 |    "outputs": [
 82 |     {
 83 |      "data": {
 84 |       "text/html": [
 85 |        "<div>\n",
 86 |        "<style scoped>\n",
 87 |        "    .dataframe tbody tr th:only-of-type {\n",
 88 |        "        vertical-align: middle;\n",
 89 |        "    }\n",
 90 |        "\n",
 91 |        "    .dataframe tbody tr th {\n",
 92 |        "        vertical-align: top;\n",
 93 |        "    }\n",
 94 |        "\n",
 95 |        "    .dataframe thead th {\n",
 96 |        "        text-align: right;\n",
 97 |        "    }\n",
 98 |        "</style>\n",
 99 |        "<table border=\"1\" class=\"dataframe\">\n",
100 |        "  <thead>\n",
101 |        "    <tr style=\"text-align: right;\">\n",
102 |        "      <th></th>\n",
103 |        "      <th>name</th>\n",
104 |        "      <th>first_name</th>\n",
105 |        "      <th>last_name</th>\n",
106 |        "    </tr>\n",
107 |        "  </thead>\n",
108 |        "  <tbody>\n",
109 |        "    <tr>\n",
110 |        "      <th>0</th>\n",
111 |        "      <td>Elise Mccann</td>\n",
112 |        "      <td>Elise</td>\n",
113 |        "      <td>Mccann</td>\n",
114 |        "    </tr>\n",
115 |        "    <tr>\n",
116 |        "      <th>1</th>\n",
117 |        "      <td>Aiden Berger</td>\n",
118 |        "      <td>Aiden</td>\n",
119 |        "      <td>Berger</td>\n",
120 |        "    </tr>\n",
121 |        "    <tr>\n",
122 |        "      <th>2</th>\n",
123 |        "      <td>Elle Kelley</td>\n",
124 |        "      <td>Elle</td>\n",
125 |        "      <td>Kelley</td>\n",
126 |        "    </tr>\n",
127 |        "  </tbody>\n",
128 |        "</table>\n",
129 |        "</div>"
130 |       ],
131 |       "text/plain": [
132 |        "           name first_name last_name\n",
133 |        "0  Elise Mccann      Elise    Mccann\n",
134 |        "1  Aiden Berger      Aiden    Berger\n",
135 |        "2   Elle Kelley       Elle    Kelley"
136 |       ]
137 |      },
138 |      "execution_count": 3,
139 |      "metadata": {},
140 |      "output_type": "execute_result"
141 |     }
142 |    ],
143 |    "source": [
144 |     "# extract first name and last name\n",
145 |     "df['first_name'] = df['name'].str.split(' ', expand = True)[0]\n",
146 |     "df['last_name'] = df['name'].str.split(' ', expand = True)[1]\n",
147 |     "\n",
148 |     "df"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": null,
154 |    "metadata": {},
155 |    "outputs": [],
156 |    "source": []
157 |   }
158 |  ],
159 |  "metadata": {
160 |   "kernelspec": {
161 |    "display_name": "Python 3",
162 |    "language": "python",
163 |    "name": "python3"
164 |   },
165 |   "language_info": {
166 |    "codemirror_mode": {
167 |     "name": "ipython",
168 |     "version": 3
169 |    },
170 |    "file_extension": ".py",
171 |    "mimetype": "text/x-python",
172 |    "name": "python",
173 |    "nbconvert_exporter": "python",
174 |    "pygments_lexer": "ipython3",
175 |    "version": "3.7.1"
176 |   }
177 |  },
178 |  "nbformat": 4,
179 |  "nbformat_minor": 2
180 | }
181 | 


--------------------------------------------------------------------------------
/Code/tqdm.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "#import tqdm\n",
10 |     "from tqdm import tqdm_notebook\n",
11 |     "k = 1"
12 |    ]
13 |   },
14 |   {
15 |    "cell_type": "code",
16 |    "execution_count": null,
17 |    "metadata": {},
18 |    "outputs": [],
19 |    "source": [
20 |     "# nested loops \n",
21 |     "k = k+1\n",
22 |     "for outer_loop in tqdm_notebook(range(1,100), desc = 'outer_loop', leave = True):\n",
23 |     "    for inner_loop in tqdm_notebook(range(1,10000000,1), desc = 'inner_loop', leave = False):\n",
24 |     "        k+=100"
25 |    ]
26 |   },
27 |   {
28 |    "cell_type": "code",
29 |    "execution_count": null,
30 |    "metadata": {},
31 |    "outputs": [],
32 |    "source": [
33 |     "outer_loop"
34 |    ]
35 |   },
36 |   {
37 |    "cell_type": "code",
38 |    "execution_count": null,
39 |    "metadata": {},
40 |    "outputs": [],
41 |    "source": []
42 |   }
43 |  ],
44 |  "metadata": {
45 |   "kernelspec": {
46 |    "display_name": "Python 3",
47 |    "language": "python",
48 |    "name": "python3"
49 |   }
50 |  },
51 |  "nbformat": 4,
52 |  "nbformat_minor": 2
53 | }
54 | 


--------------------------------------------------------------------------------
/Code/write python script.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "## Write contents of a cell in a file"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "code",
12 |    "execution_count": 2,
13 |    "metadata": {},
14 |    "outputs": [
15 |     {
16 |      "name": "stdout",
17 |      "output_type": "stream",
18 |      "text": [
19 |       "Overwriting add.py\n"
20 |      ]
21 |     }
22 |    ],
23 |    "source": [
24 |     "%%writefile add.py\n",
25 |     "def addition(x,y):\n",
26 |     "    return x+y"
27 |    ]
28 |   },
29 |   {
30 |    "cell_type": "code",
31 |    "execution_count": null,
32 |    "metadata": {},
33 |    "outputs": [],
34 |    "source": []
35 |   }
36 |  ],
37 |  "metadata": {
38 |   "kernelspec": {
39 |    "display_name": "Python 3",
40 |    "language": "python",
41 |    "name": "python3"
42 |   },
43 |   "language_info": {
44 |    "codemirror_mode": {
45 |     "name": "ipython",
46 |     "version": 3
47 |    },
48 |    "file_extension": ".py",
49 |    "mimetype": "text/x-python",
50 |    "name": "python",
51 |    "nbconvert_exporter": "python",
52 |    "pygments_lexer": "ipython3",
53 |    "version": "3.7.1"
54 |   }
55 |  },
56 |  "nbformat": 4,
57 |  "nbformat_minor": 2
58 | }
59 | 


--------------------------------------------------------------------------------
/Data/dummy_pdf.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kunalj101/Data-Science-Hacks/74b5339d6f84154b9b603d122cec560bbf26da6a/Data/dummy_pdf.pdf


--------------------------------------------------------------------------------
/Data/dummy_pdf_page1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kunalj101/Data-Science-Hacks/74b5339d6f84154b9b603d122cec560bbf26da6a/Data/dummy_pdf_page1.pdf


--------------------------------------------------------------------------------
/Data/dummy_pdf_page2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kunalj101/Data-Science-Hacks/74b5339d6f84154b9b603d122cec560bbf26da6a/Data/dummy_pdf_page2.pdf


--------------------------------------------------------------------------------
/Data/muti_input_nn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kunalj101/Data-Science-Hacks/74b5339d6f84154b9b603d122cec560bbf26da6a/Data/muti_input_nn.png


--------------------------------------------------------------------------------