├── CONTRIBUTING.md
├── Code
├── %prun.ipynb
├── %timeit.ipynb
├── Convert normal Distribution.ipynb
├── CustomDataGen_Keras.ipynb
├── Date Parser.ipynb
├── Decision Tree Plot.ipynb
├── Dummy Classifier.ipynb
├── Extract E-mails from text.ipynb
├── FeatureSelection_SelectFromModel.ipynb
├── Funnel_charts.ipynb
├── Hack of the day - Time series.ipynb
├── HoD_history.ipynb
├── HoD_stratify.ipynb
├── Image Augmentation - Article Shoot.ipynb
├── Pandas Apply.ipynb
├── Pandas_boolean indexing.ipynb
├── Pandas_conditional_formatting.ipynb
├── Read multiple datasets.ipynb
├── Removing emojis from text.ipynb
├── Resizing images.ipynb
├── Running Linux Commands in Jupyter Notebook.ipynb
├── Save_sklearn_model.ipynb
├── Styling pandas.ipynb
├── args.ipynb
├── av_hack.ipynb
├── binning_data.ipynb
├── column_transformer.ipynb
├── conditional_selection_of_rows.ipynb
├── convert_string_to_characters.ipynb
├── first and last name extraction.ipynb
├── generator vs list.ipynb
├── groupby_in_pandas.ipynb
├── heatmap_over_code.ipynb
├── hide_print.ipynb
├── interactive plot - plotly.ipynb
├── interactive_notebook.ipynb
├── invert_dictionary.ipynb
├── iterative_imputer.ipynb
├── knnImputer.ipynb
├── list_of_lists_to_list.ipynb
├── magic function - %who.ipynb
├── make_regression.ipynb
├── map python.ipynb
├── melt().ipynb
├── merge_pdf.ipynb
├── pandarellel.ipynb
├── pandas bar plot.ipynb
├── pandas profiling.ipynb
├── pandas_crosstab.ipynb
├── pandas_pivot_table.ipynb
├── pandas_read_html_files.ipynb
├── pdb_pythonDebugger.ipynb
├── plot_confusion_matrix.ipynb
├── pretty print.ipynb
├── print zenofpython.ipynb
├── pyforest.ipynb
├── read_csv_ParseDate.ipynb
├── run python script.ipynb
├── select_dtype.ipynb
├── split_pdf_pages.ipynb
├── str.split().ipynb
├── tqdm.ipynb
└── write python script.ipynb
├── Data
├── AirQualityUCI.csv
├── data_cleaned.csv
├── dummy_pdf.pdf
├── dummy_pdf_page1.pdf
├── dummy_pdf_page2.pdf
├── ind-ban-comment.csv
├── loan_train.csv
└── muti_input_nn.png
├── LICENSE
└── README.md
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contribution Guidelines
2 |
3 | This project is created and maintained by Analytics Vidhya. The core idea behind these data science hacks is to help the data science community write and produce better and efficient code. If you also want to contribute your hack, make sure to follow these guidelines
4 |
5 | ### Hack idea
6 | 1. Your hack must have at least one of the following characteristics -
7 |
8 | 1. It must save time.
9 | 1. Includes lesser lines of code.
10 | 1. Provides more functionality.
11 | 1. A better tool or library than an existing one.
12 |
13 | 1. The hacks must be related to data science, machine learning, data engineering or any of the related fields
14 |
15 |
16 | ### Code and submission steps
17 | 1. Create a pull request, describing what your data science hack does
18 | 1. Add all the necessary files in the same format as given in this project
19 | 1. The code must include proper and relevant comments
20 | 1. Each PR will be first reviewed and will be approved accordingly.
21 |
--------------------------------------------------------------------------------
/Code/%prun.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "slideshow": {
7 | "slide_type": "slide"
8 | }
9 | },
10 | "source": [
11 | "# Magic Command - %prun"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 1,
17 | "metadata": {
18 | "slideshow": {
19 | "slide_type": "fragment"
20 | }
21 | },
22 | "outputs": [],
23 | "source": [
24 | "# import pandas\n",
25 | "import pandas as pd"
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": 2,
31 | "metadata": {
32 | "slideshow": {
33 | "slide_type": "fragment"
34 | }
35 | },
36 | "outputs": [],
37 | "source": [
38 | "#Import dataset\n",
39 | "loan = pd.read_csv('../Data/loan_train.csv', index_col = 'Loan_ID')"
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": 3,
45 | "metadata": {
46 | "slideshow": {
47 | "slide_type": "fragment"
48 | }
49 | },
50 | "outputs": [],
51 | "source": [
52 | "# Define function\n",
53 | "def missing(x):\n",
54 | " return sum(x.isnull())"
55 | ]
56 | },
57 | {
58 | "cell_type": "code",
59 | "execution_count": 16,
60 | "metadata": {
61 | "slideshow": {
62 | "slide_type": "slide"
63 | }
64 | },
65 | "outputs": [
66 | {
67 | "name": "stdout",
68 | "output_type": "stream",
69 | "text": [
70 | "Missing values per column\n",
71 | " "
72 | ]
73 | }
74 | ],
75 | "source": [
76 | "# Apply per column\n",
77 | "\n",
78 | "print('Missing values per column')\n",
79 | "%prun loan.apply(missing, axis = 0).head()"
80 | ]
81 | },
82 | {
83 | "cell_type": "code",
84 | "execution_count": 15,
85 | "metadata": {
86 | "slideshow": {
87 | "slide_type": "slide"
88 | }
89 | },
90 | "outputs": [
91 | {
92 | "name": "stdout",
93 | "output_type": "stream",
94 | "text": [
95 | "Missing values per row\n",
96 | " "
97 | ]
98 | }
99 | ],
100 | "source": [
101 | "# Apply per row\n",
102 | "print('Missing values per row')\n",
103 | "%prun loan.apply(missing, axis = 1).head()"
104 | ]
105 | },
106 | {
107 | "cell_type": "code",
108 | "execution_count": null,
109 | "metadata": {
110 | "slideshow": {
111 | "slide_type": "skip"
112 | }
113 | },
114 | "outputs": [],
115 | "source": []
116 | }
117 | ],
118 | "metadata": {
119 | "kernelspec": {
120 | "display_name": "Python 3",
121 | "language": "python",
122 | "name": "python3"
123 | },
124 | "language_info": {
125 | "codemirror_mode": {
126 | "name": "ipython",
127 | "version": 3
128 | },
129 | "file_extension": ".py",
130 | "mimetype": "text/x-python",
131 | "name": "python",
132 | "nbconvert_exporter": "python",
133 | "pygments_lexer": "ipython3",
134 | "version": "3.7.1"
135 | }
136 | },
137 | "nbformat": 4,
138 | "nbformat_minor": 2
139 | }
140 |
--------------------------------------------------------------------------------
/Code/Date Parser.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import datetime\n",
10 | "import dateutil.parser"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 2,
16 | "metadata": {},
17 | "outputs": [],
18 | "source": [
19 | "input_date = '10th May 1996'\n",
20 | "parsed_date = dateutil.parser.parse(input_date)"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": 3,
26 | "metadata": {},
27 | "outputs": [
28 | {
29 | "name": "stdout",
30 | "output_type": "stream",
31 | "text": [
32 | "10-05-1996\n"
33 | ]
34 | }
35 | ],
36 | "source": [
37 | "output_date = datetime.datetime.strftime(parsed_date, '%d-%m-%Y')\n",
38 | "\n",
39 | "print(output_date)"
40 | ]
41 | }
42 | ],
43 | "metadata": {
44 | "kernelspec": {
45 | "display_name": "Python 3",
46 | "language": "python",
47 | "name": "python3"
48 | },
49 | "language_info": {
50 | "codemirror_mode": {
51 | "name": "ipython",
52 | "version": 3
53 | },
54 | "file_extension": ".py",
55 | "mimetype": "text/x-python",
56 | "name": "python",
57 | "nbconvert_exporter": "python",
58 | "pygments_lexer": "ipython3",
59 | "version": "3.7.3"
60 | }
61 | },
62 | "nbformat": 4,
63 | "nbformat_minor": 2
64 | }
65 |
--------------------------------------------------------------------------------
/Code/Dummy Classifier.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Sklearn's Dummy Classifier"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 28,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import pandas as pd\n",
17 | "from sklearn.dummy import DummyClassifier"
18 | ]
19 | },
20 | {
21 | "cell_type": "code",
22 | "execution_count": 29,
23 | "metadata": {},
24 | "outputs": [
25 | {
26 | "data": {
27 | "text/html": [
28 | "
\n",
29 | "\n",
42 | "
\n",
43 | " \n",
44 | " \n",
45 | " | \n",
46 | " Loan_ID | \n",
47 | " Gender | \n",
48 | " Married | \n",
49 | " Dependents | \n",
50 | " Education | \n",
51 | " Self_Employed | \n",
52 | " ApplicantIncome | \n",
53 | " CoapplicantIncome | \n",
54 | " LoanAmount | \n",
55 | " Loan_Amount_Term | \n",
56 | " Credit_History | \n",
57 | " Property_Area | \n",
58 | " Loan_Status | \n",
59 | "
\n",
60 | " \n",
61 | " \n",
62 | " \n",
63 | " 0 | \n",
64 | " LP001002 | \n",
65 | " Male | \n",
66 | " No | \n",
67 | " 0 | \n",
68 | " Graduate | \n",
69 | " No | \n",
70 | " 5849 | \n",
71 | " 0.0 | \n",
72 | " NaN | \n",
73 | " 360.0 | \n",
74 | " 1.0 | \n",
75 | " Urban | \n",
76 | " Y | \n",
77 | "
\n",
78 | " \n",
79 | " 1 | \n",
80 | " LP001003 | \n",
81 | " Male | \n",
82 | " Yes | \n",
83 | " 1 | \n",
84 | " Graduate | \n",
85 | " No | \n",
86 | " 4583 | \n",
87 | " 1508.0 | \n",
88 | " 128.0 | \n",
89 | " 360.0 | \n",
90 | " 1.0 | \n",
91 | " Rural | \n",
92 | " N | \n",
93 | "
\n",
94 | " \n",
95 | " 2 | \n",
96 | " LP001005 | \n",
97 | " Male | \n",
98 | " Yes | \n",
99 | " 0 | \n",
100 | " Graduate | \n",
101 | " Yes | \n",
102 | " 3000 | \n",
103 | " 0.0 | \n",
104 | " 66.0 | \n",
105 | " 360.0 | \n",
106 | " 1.0 | \n",
107 | " Urban | \n",
108 | " Y | \n",
109 | "
\n",
110 | " \n",
111 | " 3 | \n",
112 | " LP001006 | \n",
113 | " Male | \n",
114 | " Yes | \n",
115 | " 0 | \n",
116 | " Not Graduate | \n",
117 | " No | \n",
118 | " 2583 | \n",
119 | " 2358.0 | \n",
120 | " 120.0 | \n",
121 | " 360.0 | \n",
122 | " 1.0 | \n",
123 | " Urban | \n",
124 | " Y | \n",
125 | "
\n",
126 | " \n",
127 | " 4 | \n",
128 | " LP001008 | \n",
129 | " Male | \n",
130 | " No | \n",
131 | " 0 | \n",
132 | " Graduate | \n",
133 | " No | \n",
134 | " 6000 | \n",
135 | " 0.0 | \n",
136 | " 141.0 | \n",
137 | " 360.0 | \n",
138 | " 1.0 | \n",
139 | " Urban | \n",
140 | " Y | \n",
141 | "
\n",
142 | " \n",
143 | "
\n",
144 | "
"
145 | ],
146 | "text/plain": [
147 | " Loan_ID Gender Married Dependents Education Self_Employed \\\n",
148 | "0 LP001002 Male No 0 Graduate No \n",
149 | "1 LP001003 Male Yes 1 Graduate No \n",
150 | "2 LP001005 Male Yes 0 Graduate Yes \n",
151 | "3 LP001006 Male Yes 0 Not Graduate No \n",
152 | "4 LP001008 Male No 0 Graduate No \n",
153 | "\n",
154 | " ApplicantIncome CoapplicantIncome LoanAmount Loan_Amount_Term \\\n",
155 | "0 5849 0.0 NaN 360.0 \n",
156 | "1 4583 1508.0 128.0 360.0 \n",
157 | "2 3000 0.0 66.0 360.0 \n",
158 | "3 2583 2358.0 120.0 360.0 \n",
159 | "4 6000 0.0 141.0 360.0 \n",
160 | "\n",
161 | " Credit_History Property_Area Loan_Status \n",
162 | "0 1.0 Urban Y \n",
163 | "1 1.0 Rural N \n",
164 | "2 1.0 Urban Y \n",
165 | "3 1.0 Urban Y \n",
166 | "4 1.0 Urban Y "
167 | ]
168 | },
169 | "execution_count": 29,
170 | "metadata": {},
171 | "output_type": "execute_result"
172 | }
173 | ],
174 | "source": [
175 | "#import data\n",
176 | "data = pd.read_csv('../Data/loan_train.csv')\n",
177 | "data.head()"
178 | ]
179 | },
180 | {
181 | "cell_type": "code",
182 | "execution_count": 30,
183 | "metadata": {},
184 | "outputs": [],
185 | "source": [
186 | "# Dependent and independent variables\n",
187 | "\n",
188 | "X = data.drop(\"Loan_Status\", axis = 1)\n",
189 | "y = data[\"Loan_Status\"]"
190 | ]
191 | },
192 | {
193 | "cell_type": "code",
194 | "execution_count": 31,
195 | "metadata": {},
196 | "outputs": [
197 | {
198 | "data": {
199 | "text/plain": [
200 | "DummyClassifier(constant=None, random_state=None, strategy='most_frequent')"
201 | ]
202 | },
203 | "execution_count": 31,
204 | "metadata": {},
205 | "output_type": "execute_result"
206 | }
207 | ],
208 | "source": [
209 | "# Dummy classifier with strategy as \"most_frequent\"\n",
210 | "dummy_clf = DummyClassifier(strategy=\"most_frequent\")\n",
211 | "dummy_clf.fit(X, y)"
212 | ]
213 | },
214 | {
215 | "cell_type": "code",
216 | "execution_count": 32,
217 | "metadata": {},
218 | "outputs": [
219 | {
220 | "data": {
221 | "text/plain": [
222 | "array(['Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
223 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
224 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
225 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
226 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
227 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
228 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
229 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
230 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
231 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
232 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
233 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
234 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
235 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
236 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
237 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
238 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
239 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
240 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
241 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
242 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
243 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
244 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
245 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
246 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
247 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
248 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
249 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
250 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
251 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
252 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
253 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
254 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
255 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
256 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
257 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
258 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
259 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
260 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
261 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
262 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
263 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
264 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
265 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
266 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
267 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
268 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n",
269 | " 'Y', 'Y', 'Y'], dtype='\n",
21 | "\n",
34 | "\n",
35 | " \n",
36 | " \n",
37 | " | \n",
38 | " Date | \n",
39 | " Time | \n",
40 | " CO(GT) | \n",
41 | " PT08.S1(CO) | \n",
42 | " NMHC(GT) | \n",
43 | "
\n",
44 | " \n",
45 | " \n",
46 | " \n",
47 | " 0 | \n",
48 | " 10/03/2004 | \n",
49 | " 18.00.00 | \n",
50 | " 2 | \n",
51 | " 6 | \n",
52 | " 1360 | \n",
53 | "
\n",
54 | " \n",
55 | " 1 | \n",
56 | " 10/03/2004 | \n",
57 | " 19.00.00 | \n",
58 | " 2 | \n",
59 | " 1292 | \n",
60 | " 112 | \n",
61 | "
\n",
62 | " \n",
63 | " 2 | \n",
64 | " 10/03/2004 | \n",
65 | " 20.00.00 | \n",
66 | " 2 | \n",
67 | " 2 | \n",
68 | " 1402 | \n",
69 | "
\n",
70 | " \n",
71 | " 3 | \n",
72 | " 10/03/2004 | \n",
73 | " 21.00.00 | \n",
74 | " 2 | \n",
75 | " 2 | \n",
76 | " 1376 | \n",
77 | "
\n",
78 | " \n",
79 | " 4 | \n",
80 | " 10/03/2004 | \n",
81 | " 22.00.00 | \n",
82 | " 1 | \n",
83 | " 6 | \n",
84 | " 1272 | \n",
85 | "
\n",
86 | " \n",
87 | "
\n",
88 | ""
89 | ],
90 | "text/plain": [
91 | " Date Time CO(GT) PT08.S1(CO) NMHC(GT)\n",
92 | "0 10/03/2004 18.00.00 2 6 1360\n",
93 | "1 10/03/2004 19.00.00 2 1292 112\n",
94 | "2 10/03/2004 20.00.00 2 2 1402\n",
95 | "3 10/03/2004 21.00.00 2 2 1376\n",
96 | "4 10/03/2004 22.00.00 1 6 1272"
97 | ]
98 | },
99 | "execution_count": 3,
100 | "metadata": {},
101 | "output_type": "execute_result"
102 | }
103 | ],
104 | "source": [
105 | "data = pd.read_csv('AirQualityUCI.csv')\n",
106 | "data.head()"
107 | ]
108 | },
109 | {
110 | "cell_type": "code",
111 | "execution_count": 4,
112 | "metadata": {},
113 | "outputs": [
114 | {
115 | "data": {
116 | "text/plain": [
117 | "Date object\n",
118 | "Time object\n",
119 | "CO(GT) int64\n",
120 | "PT08.S1(CO) int64\n",
121 | "NMHC(GT) int64\n",
122 | "dtype: object"
123 | ]
124 | },
125 | "execution_count": 4,
126 | "metadata": {},
127 | "output_type": "execute_result"
128 | }
129 | ],
130 | "source": [
131 | "data.dtypes"
132 | ]
133 | },
134 | {
135 | "cell_type": "code",
136 | "execution_count": 5,
137 | "metadata": {},
138 | "outputs": [],
139 | "source": [
140 | "data['date_time'] = data['Date'] + ' ' + data['Time']\n",
141 | "data['date_time'] = pd.to_datetime(data['date_time'],format='%d/%m/%Y %H.%M.%S')"
142 | ]
143 | },
144 | {
145 | "cell_type": "code",
146 | "execution_count": 6,
147 | "metadata": {},
148 | "outputs": [
149 | {
150 | "data": {
151 | "text/html": [
152 | "\n",
153 | "\n",
166 | "
\n",
167 | " \n",
168 | " \n",
169 | " | \n",
170 | " date_time | \n",
171 | " year | \n",
172 | " month | \n",
173 | " dayofweek | \n",
174 | " quarter | \n",
175 | " hour | \n",
176 | " CO(GT) | \n",
177 | " PT08.S1(CO) | \n",
178 | " NMHC(GT) | \n",
179 | "
\n",
180 | " \n",
181 | " \n",
182 | " \n",
183 | " 0 | \n",
184 | " 2004-03-10 18:00:00 | \n",
185 | " 2004 | \n",
186 | " 3 | \n",
187 | " 2 | \n",
188 | " 1 | \n",
189 | " 18 | \n",
190 | " 2 | \n",
191 | " 6 | \n",
192 | " 1360 | \n",
193 | "
\n",
194 | " \n",
195 | " 1 | \n",
196 | " 2004-03-10 19:00:00 | \n",
197 | " 2004 | \n",
198 | " 3 | \n",
199 | " 2 | \n",
200 | " 1 | \n",
201 | " 19 | \n",
202 | " 2 | \n",
203 | " 1292 | \n",
204 | " 112 | \n",
205 | "
\n",
206 | " \n",
207 | " 2 | \n",
208 | " 2004-03-10 20:00:00 | \n",
209 | " 2004 | \n",
210 | " 3 | \n",
211 | " 2 | \n",
212 | " 1 | \n",
213 | " 20 | \n",
214 | " 2 | \n",
215 | " 2 | \n",
216 | " 1402 | \n",
217 | "
\n",
218 | " \n",
219 | " 3 | \n",
220 | " 2004-03-10 21:00:00 | \n",
221 | " 2004 | \n",
222 | " 3 | \n",
223 | " 2 | \n",
224 | " 1 | \n",
225 | " 21 | \n",
226 | " 2 | \n",
227 | " 2 | \n",
228 | " 1376 | \n",
229 | "
\n",
230 | " \n",
231 | " 4 | \n",
232 | " 2004-03-10 22:00:00 | \n",
233 | " 2004 | \n",
234 | " 3 | \n",
235 | " 2 | \n",
236 | " 1 | \n",
237 | " 22 | \n",
238 | " 1 | \n",
239 | " 6 | \n",
240 | " 1272 | \n",
241 | "
\n",
242 | " \n",
243 | "
\n",
244 | "
"
245 | ],
246 | "text/plain": [
247 | " date_time year month dayofweek quarter hour CO(GT) \\\n",
248 | "0 2004-03-10 18:00:00 2004 3 2 1 18 2 \n",
249 | "1 2004-03-10 19:00:00 2004 3 2 1 19 2 \n",
250 | "2 2004-03-10 20:00:00 2004 3 2 1 20 2 \n",
251 | "3 2004-03-10 21:00:00 2004 3 2 1 21 2 \n",
252 | "4 2004-03-10 22:00:00 2004 3 2 1 22 1 \n",
253 | "\n",
254 | " PT08.S1(CO) NMHC(GT) \n",
255 | "0 6 1360 \n",
256 | "1 1292 112 \n",
257 | "2 2 1402 \n",
258 | "3 2 1376 \n",
259 | "4 6 1272 "
260 | ]
261 | },
262 | "execution_count": 6,
263 | "metadata": {},
264 | "output_type": "execute_result"
265 | }
266 | ],
267 | "source": [
268 | "new_df = pd.DataFrame({\"date_time\": data['date_time'],\n",
269 | " \"year\": data['date_time'].dt.year,\n",
270 | " \"month\": data['date_time'].dt.month,\n",
271 | " \"dayofweek\": data['date_time'].dt.dayofweek,\n",
272 | " \"quarter\": data['date_time'].dt.quarter,\n",
273 | " \"hour\": data['date_time'].dt.hour,\n",
274 | " \"CO(GT)\": data['CO(GT)'],\n",
275 | " \"PT08.S1(CO)\": data['PT08.S1(CO)'],\n",
276 | " \"NMHC(GT)\": data['NMHC(GT)']\n",
277 | " })\n",
278 | "new_df.head()"
279 | ]
280 | },
281 | {
282 | "cell_type": "code",
283 | "execution_count": null,
284 | "metadata": {},
285 | "outputs": [],
286 | "source": []
287 | }
288 | ],
289 | "metadata": {
290 | "kernelspec": {
291 | "display_name": "Python 3",
292 | "language": "python",
293 | "name": "python3"
294 | },
295 | "language_info": {
296 | "codemirror_mode": {
297 | "name": "ipython",
298 | "version": 3
299 | },
300 | "file_extension": ".py",
301 | "mimetype": "text/x-python",
302 | "name": "python",
303 | "nbconvert_exporter": "python",
304 | "pygments_lexer": "ipython3",
305 | "version": "3.7.1"
306 | }
307 | },
308 | "nbformat": 4,
309 | "nbformat_minor": 2
310 | }
311 |
--------------------------------------------------------------------------------
/Code/HoD_history.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 2,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "data = pd.read_csv('loan_train.csv')"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 3,
24 | "metadata": {},
25 | "outputs": [
26 | {
27 | "data": {
28 | "text/html": [
29 | "\n",
30 | "\n",
43 | "
\n",
44 | " \n",
45 | " \n",
46 | " | \n",
47 | " Loan_ID | \n",
48 | " Gender | \n",
49 | " Married | \n",
50 | " Dependents | \n",
51 | " Education | \n",
52 | " Self_Employed | \n",
53 | " ApplicantIncome | \n",
54 | " CoapplicantIncome | \n",
55 | " LoanAmount | \n",
56 | " Loan_Amount_Term | \n",
57 | " Credit_History | \n",
58 | " Property_Area | \n",
59 | " Loan_Status | \n",
60 | "
\n",
61 | " \n",
62 | " \n",
63 | " \n",
64 | " 0 | \n",
65 | " LP001002 | \n",
66 | " Male | \n",
67 | " No | \n",
68 | " 0 | \n",
69 | " Graduate | \n",
70 | " No | \n",
71 | " 5849 | \n",
72 | " 0.0 | \n",
73 | " NaN | \n",
74 | " 360.0 | \n",
75 | " 1.0 | \n",
76 | " Urban | \n",
77 | " Y | \n",
78 | "
\n",
79 | " \n",
80 | " 1 | \n",
81 | " LP001003 | \n",
82 | " Male | \n",
83 | " Yes | \n",
84 | " 1 | \n",
85 | " Graduate | \n",
86 | " No | \n",
87 | " 4583 | \n",
88 | " 1508.0 | \n",
89 | " 128.0 | \n",
90 | " 360.0 | \n",
91 | " 1.0 | \n",
92 | " Rural | \n",
93 | " N | \n",
94 | "
\n",
95 | " \n",
96 | " 2 | \n",
97 | " LP001005 | \n",
98 | " Male | \n",
99 | " Yes | \n",
100 | " 0 | \n",
101 | " Graduate | \n",
102 | " Yes | \n",
103 | " 3000 | \n",
104 | " 0.0 | \n",
105 | " 66.0 | \n",
106 | " 360.0 | \n",
107 | " 1.0 | \n",
108 | " Urban | \n",
109 | " Y | \n",
110 | "
\n",
111 | " \n",
112 | " 3 | \n",
113 | " LP001006 | \n",
114 | " Male | \n",
115 | " Yes | \n",
116 | " 0 | \n",
117 | " Not Graduate | \n",
118 | " No | \n",
119 | " 2583 | \n",
120 | " 2358.0 | \n",
121 | " 120.0 | \n",
122 | " 360.0 | \n",
123 | " 1.0 | \n",
124 | " Urban | \n",
125 | " Y | \n",
126 | "
\n",
127 | " \n",
128 | " 4 | \n",
129 | " LP001008 | \n",
130 | " Male | \n",
131 | " No | \n",
132 | " 0 | \n",
133 | " Graduate | \n",
134 | " No | \n",
135 | " 6000 | \n",
136 | " 0.0 | \n",
137 | " 141.0 | \n",
138 | " 360.0 | \n",
139 | " 1.0 | \n",
140 | " Urban | \n",
141 | " Y | \n",
142 | "
\n",
143 | " \n",
144 | "
\n",
145 | "
"
146 | ],
147 | "text/plain": [
148 | " Loan_ID Gender Married Dependents Education Self_Employed \\\n",
149 | "0 LP001002 Male No 0 Graduate No \n",
150 | "1 LP001003 Male Yes 1 Graduate No \n",
151 | "2 LP001005 Male Yes 0 Graduate Yes \n",
152 | "3 LP001006 Male Yes 0 Not Graduate No \n",
153 | "4 LP001008 Male No 0 Graduate No \n",
154 | "\n",
155 | " ApplicantIncome CoapplicantIncome LoanAmount Loan_Amount_Term \\\n",
156 | "0 5849 0.0 NaN 360.0 \n",
157 | "1 4583 1508.0 128.0 360.0 \n",
158 | "2 3000 0.0 66.0 360.0 \n",
159 | "3 2583 2358.0 120.0 360.0 \n",
160 | "4 6000 0.0 141.0 360.0 \n",
161 | "\n",
162 | " Credit_History Property_Area Loan_Status \n",
163 | "0 1.0 Urban Y \n",
164 | "1 1.0 Rural N \n",
165 | "2 1.0 Urban Y \n",
166 | "3 1.0 Urban Y \n",
167 | "4 1.0 Urban Y "
168 | ]
169 | },
170 | "execution_count": 3,
171 | "metadata": {},
172 | "output_type": "execute_result"
173 | }
174 | ],
175 | "source": [
176 | "data.head()"
177 | ]
178 | },
179 | {
180 | "cell_type": "markdown",
181 | "metadata": {},
182 | "source": [
183 | "#### Use %history - get history of commands used previously"
184 | ]
185 | },
186 | {
187 | "cell_type": "code",
188 | "execution_count": 12,
189 | "metadata": {},
190 | "outputs": [
191 | {
192 | "name": "stdout",
193 | "output_type": "stream",
194 | "text": [
195 | "import pandas as pd\n",
196 | "data = pd.read_csv('loan_train.csv')\n",
197 | "data.head()\n",
198 | "%history\n",
199 | "% history -n\n",
200 | "%history -n\n",
201 | "%history -n -o\n",
202 | "%history -n -t\n",
203 | "# prints\n",
204 | "%history -o\n",
205 | "# list of all commands\n",
206 | "%history\n",
207 | "# Prints line numbers for each command \n",
208 | "%history -n\n",
209 | "# list of all commands\n",
210 | "%history\n"
211 | ]
212 | }
213 | ],
214 | "source": [
215 | "# list of all commands\n",
216 | "%history"
217 | ]
218 | },
219 | {
220 | "cell_type": "raw",
221 | "metadata": {},
222 | "source": [
223 | "# Prints line numbers for each command \n",
224 | "%history -n"
225 | ]
226 | },
227 | {
228 | "cell_type": "code",
229 | "execution_count": null,
230 | "metadata": {},
231 | "outputs": [],
232 | "source": [
233 | "# prints the command as well as output \n",
234 | "%history -o"
235 | ]
236 | },
237 | {
238 | "cell_type": "code",
239 | "execution_count": null,
240 | "metadata": {},
241 | "outputs": [],
242 | "source": [
243 | "# prints translated history - converts it into valid python commands before execustion\n",
244 | "# eg - %history --> get_ipython().run_line_magic('history', '')\n",
245 | "%history -n -t"
246 | ]
247 | },
248 | {
249 | "cell_type": "code",
250 | "execution_count": null,
251 | "metadata": {},
252 | "outputs": [],
253 | "source": []
254 | }
255 | ],
256 | "metadata": {
257 | "kernelspec": {
258 | "display_name": "Python 3",
259 | "language": "python",
260 | "name": "python3"
261 | },
262 | "language_info": {
263 | "codemirror_mode": {
264 | "name": "ipython",
265 | "version": 3
266 | },
267 | "file_extension": ".py",
268 | "mimetype": "text/x-python",
269 | "name": "python",
270 | "nbconvert_exporter": "python",
271 | "pygments_lexer": "ipython3",
272 | "version": "3.7.1"
273 | }
274 | },
275 | "nbformat": 4,
276 | "nbformat_minor": 2
277 | }
278 |
--------------------------------------------------------------------------------
/Code/HoD_stratify.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "#import libraries\n",
10 | "import pandas as pd\n",
11 | "import numpy as np\n"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 2,
17 | "metadata": {},
18 | "outputs": [
19 | {
20 | "data": {
21 | "text/plain": [
22 | "(891, 25)"
23 | ]
24 | },
25 | "execution_count": 2,
26 | "metadata": {},
27 | "output_type": "execute_result"
28 | }
29 | ],
30 | "source": [
31 | "#import dataset\n",
32 | "data = pd.read_csv('data_cleaned.csv')\n",
33 | "data.shape"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": 3,
39 | "metadata": {},
40 | "outputs": [],
41 | "source": [
42 | "# Seperate dependent and independent variable\n",
43 | "x = data.drop(['Survived'], axis = 1)\n",
44 | "y = data['Survived']"
45 | ]
46 | },
47 | {
48 | "cell_type": "code",
49 | "execution_count": 4,
50 | "metadata": {},
51 | "outputs": [],
52 | "source": [
53 | "# import train_test_split\n",
54 | "from sklearn.model_selection import train_test_split"
55 | ]
56 | },
57 | {
58 | "cell_type": "markdown",
59 | "metadata": {},
60 | "source": [
61 | "## Without stratify"
62 | ]
63 | },
64 | {
65 | "cell_type": "code",
66 | "execution_count": 5,
67 | "metadata": {},
68 | "outputs": [],
69 | "source": [
70 | "train_x, test_x, train_y, test_y = train_test_split(x, y, random_state = 45)"
71 | ]
72 | },
73 | {
74 | "cell_type": "code",
75 | "execution_count": 6,
76 | "metadata": {},
77 | "outputs": [
78 | {
79 | "name": "stdout",
80 | "output_type": "stream",
81 | "text": [
82 | "0 0.60479\n",
83 | "1 0.39521\n",
84 | "Name: Survived, dtype: float64\n",
85 | "0 0.650224\n",
86 | "1 0.349776\n",
87 | "Name: Survived, dtype: float64\n"
88 | ]
89 | }
90 | ],
91 | "source": [
92 | "print(train_y.value_counts(normalize=True))\n",
93 | "print(test_y.value_counts(normalize=True))"
94 | ]
95 | },
96 | {
97 | "cell_type": "markdown",
98 | "metadata": {},
99 | "source": [
100 | "## With Stratify"
101 | ]
102 | },
103 | {
104 | "cell_type": "code",
105 | "execution_count": 7,
106 | "metadata": {},
107 | "outputs": [],
108 | "source": [
109 | "# With Statify\n",
110 | "train_X, test_X, train_Y, test_Y = train_test_split(x, y, random_state = 56, stratify = y)"
111 | ]
112 | },
113 | {
114 | "cell_type": "code",
115 | "execution_count": 8,
116 | "metadata": {},
117 | "outputs": [
118 | {
119 | "name": "stdout",
120 | "output_type": "stream",
121 | "text": [
122 | "0 0.616766\n",
123 | "1 0.383234\n",
124 | "Name: Survived, dtype: float64\n",
125 | "0 0.61435\n",
126 | "1 0.38565\n",
127 | "Name: Survived, dtype: float64\n"
128 | ]
129 | }
130 | ],
131 | "source": [
132 | "print(train_Y.value_counts(normalize = True))\n",
133 | "print(test_Y.value_counts(normalize = True))"
134 | ]
135 | },
136 | {
137 | "cell_type": "code",
138 | "execution_count": null,
139 | "metadata": {},
140 | "outputs": [],
141 | "source": []
142 | }
143 | ],
144 | "metadata": {
145 | "kernelspec": {
146 | "display_name": "Python 3",
147 | "language": "python",
148 | "name": "python3"
149 | },
150 | "language_info": {
151 | "codemirror_mode": {
152 | "name": "ipython",
153 | "version": 3
154 | },
155 | "file_extension": ".py",
156 | "mimetype": "text/x-python",
157 | "name": "python",
158 | "nbconvert_exporter": "python",
159 | "pygments_lexer": "ipython3",
160 | "version": "3.7.1"
161 | }
162 | },
163 | "nbformat": 4,
164 | "nbformat_minor": 2
165 | }
166 |
--------------------------------------------------------------------------------
/Code/Pandas Apply.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Pandas Apply"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "# import pandas\n",
17 | "import pandas as pd"
18 | ]
19 | },
20 | {
21 | "cell_type": "code",
22 | "execution_count": 3,
23 | "metadata": {},
24 | "outputs": [],
25 | "source": [
26 | "#Import dataset\n",
27 | "loan = pd.read_csv('../Data/loan_train.csv', index_col = 'Loan_ID')"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 4,
33 | "metadata": {},
34 | "outputs": [],
35 | "source": [
36 | "# Define function\n",
37 | "def missing(x):\n",
38 | " return sum(x.isnull())"
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": 5,
44 | "metadata": {},
45 | "outputs": [
46 | {
47 | "name": "stdout",
48 | "output_type": "stream",
49 | "text": [
50 | "Missing values per column\n"
51 | ]
52 | },
53 | {
54 | "data": {
55 | "text/plain": [
56 | "Gender 13\n",
57 | "Married 3\n",
58 | "Dependents 15\n",
59 | "Education 0\n",
60 | "Self_Employed 32\n",
61 | "dtype: int64"
62 | ]
63 | },
64 | "execution_count": 5,
65 | "metadata": {},
66 | "output_type": "execute_result"
67 | }
68 | ],
69 | "source": [
70 | "# Apply per column\n",
71 | "print('Missing values per column')\n",
72 | "loan.apply(missing, axis = 0).head()"
73 | ]
74 | },
75 | {
76 | "cell_type": "code",
77 | "execution_count": 6,
78 | "metadata": {},
79 | "outputs": [
80 | {
81 | "name": "stdout",
82 | "output_type": "stream",
83 | "text": [
84 | "Missing values per row\n"
85 | ]
86 | },
87 | {
88 | "data": {
89 | "text/plain": [
90 | "Loan_ID\n",
91 | "LP001002 1\n",
92 | "LP001003 0\n",
93 | "LP001005 0\n",
94 | "LP001006 0\n",
95 | "LP001008 0\n",
96 | "dtype: int64"
97 | ]
98 | },
99 | "execution_count": 6,
100 | "metadata": {},
101 | "output_type": "execute_result"
102 | }
103 | ],
104 | "source": [
105 | "# Apply per row\n",
106 | "print('Missing values per row')\n",
107 | "loan.apply(missing, axis = 1).head()"
108 | ]
109 | },
110 | {
111 | "cell_type": "code",
112 | "execution_count": null,
113 | "metadata": {},
114 | "outputs": [],
115 | "source": []
116 | }
117 | ],
118 | "metadata": {
119 | "kernelspec": {
120 | "display_name": "Python 3",
121 | "language": "python",
122 | "name": "python3"
123 | },
124 | "language_info": {
125 | "codemirror_mode": {
126 | "name": "ipython",
127 | "version": 3
128 | },
129 | "file_extension": ".py",
130 | "mimetype": "text/x-python",
131 | "name": "python",
132 | "nbconvert_exporter": "python",
133 | "pygments_lexer": "ipython3",
134 | "version": "3.7.1"
135 | }
136 | },
137 | "nbformat": 4,
138 | "nbformat_minor": 2
139 | }
140 |
--------------------------------------------------------------------------------
/Code/Pandas_boolean indexing.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Pandas Boolean Indexing"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "# import pandas\n",
17 | "import pandas as pd"
18 | ]
19 | },
20 | {
21 | "cell_type": "code",
22 | "execution_count": 6,
23 | "metadata": {},
24 | "outputs": [
25 | {
26 | "data": {
27 | "text/html": [
28 | "\n",
29 | "\n",
42 | "
\n",
43 | " \n",
44 | " \n",
45 | " | \n",
46 | " Gender | \n",
47 | " Married | \n",
48 | " Dependents | \n",
49 | " Education | \n",
50 | " Self_Employed | \n",
51 | " ApplicantIncome | \n",
52 | " CoapplicantIncome | \n",
53 | " LoanAmount | \n",
54 | " Loan_Amount_Term | \n",
55 | " Credit_History | \n",
56 | " Property_Area | \n",
57 | " Loan_Status | \n",
58 | "
\n",
59 | " \n",
60 | " Loan_ID | \n",
61 | " | \n",
62 | " | \n",
63 | " | \n",
64 | " | \n",
65 | " | \n",
66 | " | \n",
67 | " | \n",
68 | " | \n",
69 | " | \n",
70 | " | \n",
71 | " | \n",
72 | " | \n",
73 | "
\n",
74 | " \n",
75 | " \n",
76 | " \n",
77 | " LP001002 | \n",
78 | " Male | \n",
79 | " No | \n",
80 | " 0 | \n",
81 | " Graduate | \n",
82 | " No | \n",
83 | " 5849 | \n",
84 | " 0.0 | \n",
85 | " NaN | \n",
86 | " 360.0 | \n",
87 | " 1.0 | \n",
88 | " Urban | \n",
89 | " Y | \n",
90 | "
\n",
91 | " \n",
92 | " LP001003 | \n",
93 | " Male | \n",
94 | " Yes | \n",
95 | " 1 | \n",
96 | " Graduate | \n",
97 | " No | \n",
98 | " 4583 | \n",
99 | " 1508.0 | \n",
100 | " 128.0 | \n",
101 | " 360.0 | \n",
102 | " 1.0 | \n",
103 | " Rural | \n",
104 | " N | \n",
105 | "
\n",
106 | " \n",
107 | " LP001005 | \n",
108 | " Male | \n",
109 | " Yes | \n",
110 | " 0 | \n",
111 | " Graduate | \n",
112 | " Yes | \n",
113 | " 3000 | \n",
114 | " 0.0 | \n",
115 | " 66.0 | \n",
116 | " 360.0 | \n",
117 | " 1.0 | \n",
118 | " Urban | \n",
119 | " Y | \n",
120 | "
\n",
121 | " \n",
122 | " LP001006 | \n",
123 | " Male | \n",
124 | " Yes | \n",
125 | " 0 | \n",
126 | " Not Graduate | \n",
127 | " No | \n",
128 | " 2583 | \n",
129 | " 2358.0 | \n",
130 | " 120.0 | \n",
131 | " 360.0 | \n",
132 | " 1.0 | \n",
133 | " Urban | \n",
134 | " Y | \n",
135 | "
\n",
136 | " \n",
137 | " LP001008 | \n",
138 | " Male | \n",
139 | " No | \n",
140 | " 0 | \n",
141 | " Graduate | \n",
142 | " No | \n",
143 | " 6000 | \n",
144 | " 0.0 | \n",
145 | " 141.0 | \n",
146 | " 360.0 | \n",
147 | " 1.0 | \n",
148 | " Urban | \n",
149 | " Y | \n",
150 | "
\n",
151 | " \n",
152 | "
\n",
153 | "
"
154 | ],
155 | "text/plain": [
156 | " Gender Married Dependents Education Self_Employed \\\n",
157 | "Loan_ID \n",
158 | "LP001002 Male No 0 Graduate No \n",
159 | "LP001003 Male Yes 1 Graduate No \n",
160 | "LP001005 Male Yes 0 Graduate Yes \n",
161 | "LP001006 Male Yes 0 Not Graduate No \n",
162 | "LP001008 Male No 0 Graduate No \n",
163 | "\n",
164 | " ApplicantIncome CoapplicantIncome LoanAmount Loan_Amount_Term \\\n",
165 | "Loan_ID \n",
166 | "LP001002 5849 0.0 NaN 360.0 \n",
167 | "LP001003 4583 1508.0 128.0 360.0 \n",
168 | "LP001005 3000 0.0 66.0 360.0 \n",
169 | "LP001006 2583 2358.0 120.0 360.0 \n",
170 | "LP001008 6000 0.0 141.0 360.0 \n",
171 | "\n",
172 | " Credit_History Property_Area Loan_Status \n",
173 | "Loan_ID \n",
174 | "LP001002 1.0 Urban Y \n",
175 | "LP001003 1.0 Rural N \n",
176 | "LP001005 1.0 Urban Y \n",
177 | "LP001006 1.0 Urban Y \n",
178 | "LP001008 1.0 Urban Y "
179 | ]
180 | },
181 | "execution_count": 6,
182 | "metadata": {},
183 | "output_type": "execute_result"
184 | }
185 | ],
186 | "source": [
187 | "#Import dataset\n",
188 | "loan = pd.read_csv('../Data/loan_train.csv', index_col = 'Loan_ID')\n",
189 | "loan.head()"
190 | ]
191 | },
192 | {
193 | "cell_type": "code",
194 | "execution_count": 7,
195 | "metadata": {},
196 | "outputs": [
197 | {
198 | "data": {
199 | "text/html": [
200 | "\n",
201 | "\n",
214 | "
\n",
215 | " \n",
216 | " \n",
217 | " | \n",
218 | " Gender | \n",
219 | " Education | \n",
220 | " Loan_Status | \n",
221 | "
\n",
222 | " \n",
223 | " Loan_ID | \n",
224 | " | \n",
225 | " | \n",
226 | " | \n",
227 | "
\n",
228 | " \n",
229 | " \n",
230 | " \n",
231 | " LP001155 | \n",
232 | " Female | \n",
233 | " Not Graduate | \n",
234 | " Y | \n",
235 | "
\n",
236 | " \n",
237 | " LP001669 | \n",
238 | " Female | \n",
239 | " Not Graduate | \n",
240 | " Y | \n",
241 | "
\n",
242 | " \n",
243 | " LP001692 | \n",
244 | " Female | \n",
245 | " Not Graduate | \n",
246 | " Y | \n",
247 | "
\n",
248 | " \n",
249 | " LP001908 | \n",
250 | " Female | \n",
251 | " Not Graduate | \n",
252 | " Y | \n",
253 | "
\n",
254 | " \n",
255 | " LP002300 | \n",
256 | " Female | \n",
257 | " Not Graduate | \n",
258 | " Y | \n",
259 | "
\n",
260 | " \n",
261 | "
\n",
262 | "
"
263 | ],
264 | "text/plain": [
265 | " Gender Education Loan_Status\n",
266 | "Loan_ID \n",
267 | "LP001155 Female Not Graduate Y\n",
268 | "LP001669 Female Not Graduate Y\n",
269 | "LP001692 Female Not Graduate Y\n",
270 | "LP001908 Female Not Graduate Y\n",
271 | "LP002300 Female Not Graduate Y"
272 | ]
273 | },
274 | "execution_count": 7,
275 | "metadata": {},
276 | "output_type": "execute_result"
277 | }
278 | ],
279 | "source": [
280 | "# we want a list of all females who are not graduate and got a loan. \n",
281 | "\n",
282 | "loan.loc[(loan['Gender'] == 'Female') & (loan['Education'] == 'Not Graduate') & (loan['Loan_Status'] == 'Y'),\n",
283 | " [\"Gender\",\"Education\",\"Loan_Status\"]].head()\n"
284 | ]
285 | },
286 | {
287 | "cell_type": "code",
288 | "execution_count": null,
289 | "metadata": {},
290 | "outputs": [],
291 | "source": []
292 | }
293 | ],
294 | "metadata": {
295 | "kernelspec": {
296 | "display_name": "Python 3",
297 | "language": "python",
298 | "name": "python3"
299 | },
300 | "language_info": {
301 | "codemirror_mode": {
302 | "name": "ipython",
303 | "version": 3
304 | },
305 | "file_extension": ".py",
306 | "mimetype": "text/x-python",
307 | "name": "python",
308 | "nbconvert_exporter": "python",
309 | "pygments_lexer": "ipython3",
310 | "version": "3.7.1"
311 | }
312 | },
313 | "nbformat": 4,
314 | "nbformat_minor": 2
315 | }
316 |
--------------------------------------------------------------------------------
/Code/Removing emojis from text.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Removing Emojis from Text"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 4,
13 | "metadata": {},
14 | "outputs": [
15 | {
16 | "name": "stdout",
17 | "output_type": "stream",
18 | "text": [
19 | "Raw tweet: Hi 😂! Have a nice weekend 💕👭\n",
20 | "Preprocessed tweet: Hi ! Have a nice weekend \n"
21 | ]
22 | }
23 | ],
24 | "source": [
25 | "text= \"Hi 😂! Have a nice weekend 💕👭\"\n",
26 | "preprocessed_text=text.encode('ascii', 'ignore').decode('ascii')\n",
27 | "\n",
28 | "print(\"Raw tweet:\",text) #with emoji\n",
29 | "print(\"Preprocessed tweet:\",preprocessed_text) # no emoji"
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": null,
35 | "metadata": {},
36 | "outputs": [],
37 | "source": []
38 | }
39 | ],
40 | "metadata": {
41 | "kernelspec": {
42 | "display_name": "Python 3",
43 | "language": "python",
44 | "name": "python3"
45 | },
46 | "language_info": {
47 | "codemirror_mode": {
48 | "name": "ipython",
49 | "version": 3
50 | },
51 | "file_extension": ".py",
52 | "mimetype": "text/x-python",
53 | "name": "python",
54 | "nbconvert_exporter": "python",
55 | "pygments_lexer": "ipython3",
56 | "version": "3.7.1"
57 | }
58 | },
59 | "nbformat": 4,
60 | "nbformat_minor": 2
61 | }
62 |
--------------------------------------------------------------------------------
/Code/Running Linux Commands in Jupyter Notebook.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "---\n",
8 | "\n",
9 | "## Running Linux Commands in Jupyter Notebook\n",
10 | "\n",
11 | "---"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 3,
17 | "metadata": {},
18 | "outputs": [
19 | {
20 | "name": "stdout",
21 | "output_type": "stream",
22 | "text": [
23 | " 606 14292 113629\r\n"
24 | ]
25 | }
26 | ],
27 | "source": [
28 | "# get the number of lines, word count and character count\n",
29 | "!cat 'ind-ban-comment.csv' | wc "
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": 4,
35 | "metadata": {},
36 | "outputs": [
37 | {
38 | "name": "stdout",
39 | "output_type": "stream",
40 | "text": [
41 | "Batsman,Batsman_Name,Bowler,Bowler_Name,Commentary,Detail,Dismissed,Id,Isball,Isboundary,Iswicket,Over,Runs,Timestamp\r\n",
42 | "28994,Mohammed Shami,63881,Mustafizur Rahman,\"OUT! Bowled! 5-fer to finish a tremendous last over. His 4th 5-wicket haul in ODIs. Around off, Shami moves across to paddle but misses the ball hits his pads and goes onto hit the stumps. 2 wickets and just 3 runs from the final over. Top notch from Mustafizur. INDIA FINISH WITH 314/9 FROM THEIR 50 OVERS.\",W,28994,346,True,,1,49.6,0,2019-07-02 13:18:47\r\n",
43 | "5132,Bhuvneshwar Kumar,63881,Mustafizur Rahman,\"WIDE AND RUN OUT! Slower delivery outside off, it is on the wrong side of the tramline. Shami comes for a run. Bhuvi was slow though. Rahim throws it to Mustafizur, who hits the stumps at the bowler's end. The umpire takes it upstairs but Bhuvneshwar had started to walk back even before the replays rolled in.\",W+wd,5132,344,True,,1,49.6,1,2019-07-02 13:17:28\r\n",
44 | "28994,Mohammed Shami,63881,Mustafizur Rahman,\"Back of a length ball on off, Shami pulls it to deep mid-wicket and gets a single.\",,,343,True,,,49.5,1,2019-07-02 13:16:03\r\n",
45 | "5132,Bhuvneshwar Kumar,63881,Mustafizur Rahman,\"Just 1 run off the 4 balls. This is World Class stuff. Good length ball on off, Kumar punches it to the side of the pitch and gets a single.\",,,342,True,,,49.4,1,2019-07-02 13:15:17\r\n",
46 | "3676,MS Dhoni,63881,Mustafizur Rahman,\"OUT! No Dhoni magic in the last over. Slower bouncer away from the body, Dhoni looks to flat-bat it straight over the bowler's head. It comes off the upper half of his bat and goes to the left of mid on. Shakib moves across to his left hand side and takes an easy catch.\",W,3676,340,True,,1,49.3,0,2019-07-02 13:13:39\r\n",
47 | "3676,MS Dhoni,63881,Mustafizur Rahman,\"Another dot. Bangladesh will take this. Short and slow, Dhoni pulls it to Shakib at short mid-wicket.\",,,339,True,,,49.2,0,2019-07-02 13:12:47\r\n",
48 | "cat: write error: Broken pipe\r\n"
49 | ]
50 | }
51 | ],
52 | "source": [
53 | "# get the top 7 rows\n",
54 | "!cat 'ind-ban-comment.csv' | head -7"
55 | ]
56 | },
57 | {
58 | "cell_type": "code",
59 | "execution_count": null,
60 | "metadata": {},
61 | "outputs": [],
62 | "source": []
63 | }
64 | ],
65 | "metadata": {
66 | "kernelspec": {
67 | "display_name": "Python 3",
68 | "language": "python",
69 | "name": "python3"
70 | },
71 | "language_info": {
72 | "codemirror_mode": {
73 | "name": "ipython",
74 | "version": 3
75 | },
76 | "file_extension": ".py",
77 | "mimetype": "text/x-python",
78 | "name": "python",
79 | "nbconvert_exporter": "python",
80 | "pygments_lexer": "ipython3",
81 | "version": "3.7.3"
82 | }
83 | },
84 | "nbformat": 4,
85 | "nbformat_minor": 2
86 | }
87 |
--------------------------------------------------------------------------------
/Code/Save_sklearn_model.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Save Machine Learning Model using Pickle"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 16,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "#import libraries\n",
17 | "import pandas as pd\n",
18 | "import numpy as np\n",
19 | "import sklearn "
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": 17,
25 | "metadata": {},
26 | "outputs": [
27 | {
28 | "data": {
29 | "text/html": [
30 | "\n",
31 | "\n",
44 | "
\n",
45 | " \n",
46 | " \n",
47 | " | \n",
48 | " Survived | \n",
49 | " Age | \n",
50 | " Fare | \n",
51 | " Pclass_1 | \n",
52 | " Pclass_2 | \n",
53 | " Pclass_3 | \n",
54 | " Sex_female | \n",
55 | " Sex_male | \n",
56 | " SibSp_0 | \n",
57 | " SibSp_1 | \n",
58 | " ... | \n",
59 | " Parch_0 | \n",
60 | " Parch_1 | \n",
61 | " Parch_2 | \n",
62 | " Parch_3 | \n",
63 | " Parch_4 | \n",
64 | " Parch_5 | \n",
65 | " Parch_6 | \n",
66 | " Embarked_C | \n",
67 | " Embarked_Q | \n",
68 | " Embarked_S | \n",
69 | "
\n",
70 | " \n",
71 | " \n",
72 | " \n",
73 | " 0 | \n",
74 | " 0 | \n",
75 | " 22.0 | \n",
76 | " 7.2500 | \n",
77 | " 0 | \n",
78 | " 0 | \n",
79 | " 1 | \n",
80 | " 0 | \n",
81 | " 1 | \n",
82 | " 0 | \n",
83 | " 1 | \n",
84 | " ... | \n",
85 | " 1 | \n",
86 | " 0 | \n",
87 | " 0 | \n",
88 | " 0 | \n",
89 | " 0 | \n",
90 | " 0 | \n",
91 | " 0 | \n",
92 | " 0 | \n",
93 | " 0 | \n",
94 | " 1 | \n",
95 | "
\n",
96 | " \n",
97 | " 1 | \n",
98 | " 1 | \n",
99 | " 38.0 | \n",
100 | " 71.2833 | \n",
101 | " 1 | \n",
102 | " 0 | \n",
103 | " 0 | \n",
104 | " 1 | \n",
105 | " 0 | \n",
106 | " 0 | \n",
107 | " 1 | \n",
108 | " ... | \n",
109 | " 1 | \n",
110 | " 0 | \n",
111 | " 0 | \n",
112 | " 0 | \n",
113 | " 0 | \n",
114 | " 0 | \n",
115 | " 0 | \n",
116 | " 1 | \n",
117 | " 0 | \n",
118 | " 0 | \n",
119 | "
\n",
120 | " \n",
121 | " 2 | \n",
122 | " 1 | \n",
123 | " 26.0 | \n",
124 | " 7.9250 | \n",
125 | " 0 | \n",
126 | " 0 | \n",
127 | " 1 | \n",
128 | " 1 | \n",
129 | " 0 | \n",
130 | " 1 | \n",
131 | " 0 | \n",
132 | " ... | \n",
133 | " 1 | \n",
134 | " 0 | \n",
135 | " 0 | \n",
136 | " 0 | \n",
137 | " 0 | \n",
138 | " 0 | \n",
139 | " 0 | \n",
140 | " 0 | \n",
141 | " 0 | \n",
142 | " 1 | \n",
143 | "
\n",
144 | " \n",
145 | " 3 | \n",
146 | " 1 | \n",
147 | " 35.0 | \n",
148 | " 53.1000 | \n",
149 | " 1 | \n",
150 | " 0 | \n",
151 | " 0 | \n",
152 | " 1 | \n",
153 | " 0 | \n",
154 | " 0 | \n",
155 | " 1 | \n",
156 | " ... | \n",
157 | " 1 | \n",
158 | " 0 | \n",
159 | " 0 | \n",
160 | " 0 | \n",
161 | " 0 | \n",
162 | " 0 | \n",
163 | " 0 | \n",
164 | " 0 | \n",
165 | " 0 | \n",
166 | " 1 | \n",
167 | "
\n",
168 | " \n",
169 | " 4 | \n",
170 | " 0 | \n",
171 | " 35.0 | \n",
172 | " 8.0500 | \n",
173 | " 0 | \n",
174 | " 0 | \n",
175 | " 1 | \n",
176 | " 0 | \n",
177 | " 1 | \n",
178 | " 1 | \n",
179 | " 0 | \n",
180 | " ... | \n",
181 | " 1 | \n",
182 | " 0 | \n",
183 | " 0 | \n",
184 | " 0 | \n",
185 | " 0 | \n",
186 | " 0 | \n",
187 | " 0 | \n",
188 | " 0 | \n",
189 | " 0 | \n",
190 | " 1 | \n",
191 | "
\n",
192 | " \n",
193 | "
\n",
194 | "
5 rows × 25 columns
\n",
195 | "
"
196 | ],
197 | "text/plain": [
198 | " Survived Age Fare Pclass_1 Pclass_2 Pclass_3 Sex_female \\\n",
199 | "0 0 22.0 7.2500 0 0 1 0 \n",
200 | "1 1 38.0 71.2833 1 0 0 1 \n",
201 | "2 1 26.0 7.9250 0 0 1 1 \n",
202 | "3 1 35.0 53.1000 1 0 0 1 \n",
203 | "4 0 35.0 8.0500 0 0 1 0 \n",
204 | "\n",
205 | " Sex_male SibSp_0 SibSp_1 ... Parch_0 Parch_1 Parch_2 Parch_3 \\\n",
206 | "0 1 0 1 ... 1 0 0 0 \n",
207 | "1 0 0 1 ... 1 0 0 0 \n",
208 | "2 0 1 0 ... 1 0 0 0 \n",
209 | "3 0 0 1 ... 1 0 0 0 \n",
210 | "4 1 1 0 ... 1 0 0 0 \n",
211 | "\n",
212 | " Parch_4 Parch_5 Parch_6 Embarked_C Embarked_Q Embarked_S \n",
213 | "0 0 0 0 0 0 1 \n",
214 | "1 0 0 0 1 0 0 \n",
215 | "2 0 0 0 0 0 1 \n",
216 | "3 0 0 0 0 0 1 \n",
217 | "4 0 0 0 0 0 1 \n",
218 | "\n",
219 | "[5 rows x 25 columns]"
220 | ]
221 | },
222 | "execution_count": 17,
223 | "metadata": {},
224 | "output_type": "execute_result"
225 | }
226 | ],
227 | "source": [
228 | "#import dataset\n",
229 | "data = pd.read_csv('../Data/data_cleaned.csv')\n",
230 | "data.head()"
231 | ]
232 | },
233 | {
234 | "cell_type": "markdown",
235 | "metadata": {},
236 | "source": [
237 | "## Build ML model"
238 | ]
239 | },
240 | {
241 | "cell_type": "code",
242 | "execution_count": 18,
243 | "metadata": {},
244 | "outputs": [
245 | {
246 | "data": {
247 | "text/plain": [
248 | "KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',\n",
249 | " metric_params=None, n_jobs=None, n_neighbors=3, p=2,\n",
250 | " weights='uniform')"
251 | ]
252 | },
253 | "execution_count": 18,
254 | "metadata": {},
255 | "output_type": "execute_result"
256 | }
257 | ],
258 | "source": [
259 | "# Define Dependent and independent variable\n",
260 | "X = data.drop('Survived', axis = 1)\n",
261 | "y = data['Survived']\n",
262 | "\n",
263 | "# Split train and test data\n",
264 | "from sklearn.model_selection import train_test_split\n",
265 | "train_X, test_X, train_y, test_y = train_test_split(X, y, test_size = 0.3, random_state = 142)\n",
266 | "\n",
267 | "# import KNeighborsClassifier model \n",
268 | "from sklearn.neighbors import KNeighborsClassifier \n",
269 | "knn = KNeighborsClassifier(n_neighbors = 3) \n",
270 | " \n",
271 | "# train model \n",
272 | "knn.fit(train_X, train_y) "
273 | ]
274 | },
275 | {
276 | "cell_type": "markdown",
277 | "metadata": {},
278 | "source": [
279 | "## Save the model"
280 | ]
281 | },
282 | {
283 | "cell_type": "code",
284 | "execution_count": 19,
285 | "metadata": {},
286 | "outputs": [
287 | {
288 | "data": {
289 | "text/plain": [
290 | "array([0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0,\n",
291 | " 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0,\n",
292 | " 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1,\n",
293 | " 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1,\n",
294 | " 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n",
295 | " 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0,\n",
296 | " 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0,\n",
297 | " 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,\n",
298 | " 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1,\n",
299 | " 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0,\n",
300 | " 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0,\n",
301 | " 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1,\n",
302 | " 0, 1, 1, 1], dtype=int64)"
303 | ]
304 | },
305 | "execution_count": 19,
306 | "metadata": {},
307 | "output_type": "execute_result"
308 | }
309 | ],
310 | "source": [
311 | "import pickle\n",
312 | "\n",
313 | "# Save the KNN model using pickle\n",
314 | "saved_model = pickle.dumps(knn)\n",
315 | "\n",
316 | "# Load the saved KNN model\n",
317 | "load_model = pickle.loads(saved_model) \n",
318 | "\n",
319 | "# Make predictions using the loaded pickled model \n",
320 | "load_model.predict(test_X) "
321 | ]
322 | },
323 | {
324 | "cell_type": "code",
325 | "execution_count": null,
326 | "metadata": {},
327 | "outputs": [],
328 | "source": []
329 | }
330 | ],
331 | "metadata": {
332 | "kernelspec": {
333 | "display_name": "Python 3",
334 | "language": "python",
335 | "name": "python3"
336 | },
337 | "language_info": {
338 | "codemirror_mode": {
339 | "name": "ipython",
340 | "version": 3
341 | },
342 | "file_extension": ".py",
343 | "mimetype": "text/x-python",
344 | "name": "python",
345 | "nbconvert_exporter": "python",
346 | "pygments_lexer": "ipython3",
347 | "version": "3.7.1"
348 | }
349 | },
350 | "nbformat": 4,
351 | "nbformat_minor": 2
352 | }
353 |
--------------------------------------------------------------------------------
/Code/args.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Let us understand *args in Python"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 7,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "# Function to calculate averages\n",
17 | "# We will input variable number of arguaments using *args\n",
18 | "\n",
19 | "def calculate_average_new(*num):\n",
20 | " add = sum(num)\n",
21 | " count = len(num)\n",
22 | "\n",
23 | " \n",
24 | " average = (add / count) \n",
25 | "\n",
26 | " \n",
27 | " print(\"The average of values is\", average)"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 13,
33 | "metadata": {},
34 | "outputs": [
35 | {
36 | "name": "stdout",
37 | "output_type": "stream",
38 | "text": [
39 | "The average of values is 7.5\n"
40 | ]
41 | }
42 | ],
43 | "source": [
44 | "# Calculate average of 2 values\n",
45 | "\n",
46 | "calculate_average(7,8)"
47 | ]
48 | },
49 | {
50 | "cell_type": "code",
51 | "execution_count": 14,
52 | "metadata": {},
53 | "outputs": [
54 | {
55 | "name": "stdout",
56 | "output_type": "stream",
57 | "text": [
58 | "The average of values is 8.25\n"
59 | ]
60 | }
61 | ],
62 | "source": [
63 | "# Calculate Value of 4 values\n",
64 | "\n",
65 | "calculate_average(3,8,9,13)"
66 | ]
67 | },
68 | {
69 | "cell_type": "code",
70 | "execution_count": 15,
71 | "metadata": {},
72 | "outputs": [
73 | {
74 | "name": "stdout",
75 | "output_type": "stream",
76 | "text": [
77 | "The average of values is 58.07692307692308\n"
78 | ]
79 | }
80 | ],
81 | "source": [
82 | "# Calculate Value of 13 values\n",
83 | "\n",
84 | "calculate_average(3,8,9,10,4,5,6,11,22,32,100,200,345)"
85 | ]
86 | },
87 | {
88 | "cell_type": "code",
89 | "execution_count": 16,
90 | "metadata": {},
91 | "outputs": [
92 | {
93 | "name": "stdout",
94 | "output_type": "stream",
95 | "text": [
96 | "The average of values is 8.25\n"
97 | ]
98 | }
99 | ],
100 | "source": [
101 | "calculate_average(3,8,9,13)"
102 | ]
103 | }
104 | ],
105 | "metadata": {
106 | "kernelspec": {
107 | "display_name": "Python 3",
108 | "language": "python",
109 | "name": "python3"
110 | },
111 | "language_info": {
112 | "codemirror_mode": {
113 | "name": "ipython",
114 | "version": 3
115 | },
116 | "file_extension": ".py",
117 | "mimetype": "text/x-python",
118 | "name": "python",
119 | "nbconvert_exporter": "python",
120 | "pygments_lexer": "ipython3",
121 | "version": "3.7.1"
122 | }
123 | },
124 | "nbformat": 4,
125 | "nbformat_minor": 2
126 | }
127 |
--------------------------------------------------------------------------------
/Code/av_hack.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {
7 | "colab": {
8 | "base_uri": "https://localhost:8080/",
9 | "height": 122
10 | },
11 | "colab_type": "code",
12 | "id": "XG57bzU6cJ2U",
13 | "outputId": "26641faf-bc75-4bc5-9616-080905d329e0"
14 | },
15 | "outputs": [],
16 | "source": [
17 | "!pip install tokenizers"
18 | ]
19 | },
20 | {
21 | "cell_type": "code",
22 | "execution_count": 6,
23 | "metadata": {
24 | "colab": {
25 | "base_uri": "https://localhost:8080/",
26 | "height": 336
27 | },
28 | "colab_type": "code",
29 | "id": "iQ17w2Q5azCt",
30 | "outputId": "2c5c5f7f-4b84-41d4-f87b-745f003e1b78"
31 | },
32 | "outputs": [
33 | {
34 | "name": "stdout",
35 | "output_type": "stream",
36 | "text": [
37 | "Unique id: [39, 68, 75, 297, 0, 220, 39, 78, 86, 257, 264, 220, 88, 274, 30]\n",
38 | "Tokens: ['H', 'e', 'l', 'lo', '!', 'Ġ', 'H', 'o', 'w', 'Ġa', 're', 'Ġ', 'y', 'ou', '?']\n",
39 | "Merges: [(0, 1), (1, 2), (2, 3), (3, 5), (5, 6), (6, 7), (7, 8), (8, 9), (9, 10), (10, 12), (12, 14), (14, 15), (15, 16), (16, 18), (18, 19)]\n"
40 | ]
41 | }
42 | ],
43 | "source": [
44 | "#importing different tokenizers\n",
45 | "from tokenizers import (ByteLevelBPETokenizer,\n",
46 | " BPETokenizer,\n",
47 | " SentencePieceBPETokenizer,\n",
48 | " BertWordPieceTokenizer)\n",
49 | "#instantiate tokenizer\n",
50 | "tokenizer = ByteLevelBPETokenizer()\n",
51 | "\n",
52 | "#train with our own data\n",
53 | "tokenizer.train([\"sample.txt\"], vocab_size=20000)\n",
54 | "\n",
55 | "#Lets tokenize the sentence\n",
56 | "output = tokenizer.encode(\"Hello! How are you?\")\n",
57 | "\n",
58 | "print(\"Unique id:\",output.ids)\n",
59 | "print(\"Tokens:\",output.tokens)\n",
60 | "print(\"Merges:\",output.offsets)"
61 | ]
62 | },
63 | {
64 | "cell_type": "code",
65 | "execution_count": 7,
66 | "metadata": {
67 | "colab": {
68 | "base_uri": "https://localhost:8080/",
69 | "height": 163
70 | },
71 | "colab_type": "code",
72 | "id": "9qKgNFrabgCm",
73 | "outputId": "e6ed2a40-e822-4ff2-d0f6-af4569c1deb5"
74 | },
75 | "outputs": [
76 | {
77 | "data": {
78 | "text/plain": [
79 | "'H'"
80 | ]
81 | },
82 | "execution_count": 7,
83 | "metadata": {},
84 | "output_type": "execute_result"
85 | }
86 | ],
87 | "source": [
88 | "#converting integer back to character\n",
89 | "output.original_str[output.offsets[0]]"
90 | ]
91 | },
92 | {
93 | "cell_type": "code",
94 | "execution_count": 0,
95 | "metadata": {
96 | "colab": {},
97 | "colab_type": "code",
98 | "id": "xqE2EH9Otv0w"
99 | },
100 | "outputs": [],
101 | "source": []
102 | }
103 | ],
104 | "metadata": {
105 | "colab": {
106 | "name": "av hack.ipynb",
107 | "provenance": []
108 | },
109 | "kernelspec": {
110 | "display_name": "Python 3",
111 | "language": "python",
112 | "name": "python3"
113 | },
114 | "language_info": {
115 | "codemirror_mode": {
116 | "name": "ipython",
117 | "version": 3
118 | },
119 | "file_extension": ".py",
120 | "mimetype": "text/x-python",
121 | "name": "python",
122 | "nbconvert_exporter": "python",
123 | "pygments_lexer": "ipython3",
124 | "version": "3.7.1"
125 | }
126 | },
127 | "nbformat": 4,
128 | "nbformat_minor": 1
129 | }
130 |
--------------------------------------------------------------------------------
/Code/column_transformer.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Sklearn ColumnTransformer"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 52,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import pandas as pd\n",
17 | "from sklearn.compose import ColumnTransformer"
18 | ]
19 | },
20 | {
21 | "cell_type": "code",
22 | "execution_count": 53,
23 | "metadata": {},
24 | "outputs": [
25 | {
26 | "data": {
27 | "text/html": [
28 | "\n",
29 | "\n",
42 | "
\n",
43 | " \n",
44 | " \n",
45 | " | \n",
46 | " Loan_ID | \n",
47 | " Gender | \n",
48 | " Married | \n",
49 | " Dependents | \n",
50 | " Education | \n",
51 | " Self_Employed | \n",
52 | " ApplicantIncome | \n",
53 | " CoapplicantIncome | \n",
54 | " LoanAmount | \n",
55 | " Loan_Amount_Term | \n",
56 | " Credit_History | \n",
57 | " Property_Area | \n",
58 | " Loan_Status | \n",
59 | "
\n",
60 | " \n",
61 | " \n",
62 | " \n",
63 | " 0 | \n",
64 | " LP001002 | \n",
65 | " Male | \n",
66 | " No | \n",
67 | " 0 | \n",
68 | " Graduate | \n",
69 | " No | \n",
70 | " 5849 | \n",
71 | " 0.0 | \n",
72 | " NaN | \n",
73 | " 360.0 | \n",
74 | " 1.0 | \n",
75 | " Urban | \n",
76 | " Y | \n",
77 | "
\n",
78 | " \n",
79 | " 1 | \n",
80 | " LP001003 | \n",
81 | " Male | \n",
82 | " Yes | \n",
83 | " 1 | \n",
84 | " Graduate | \n",
85 | " No | \n",
86 | " 4583 | \n",
87 | " 1508.0 | \n",
88 | " 128.0 | \n",
89 | " 360.0 | \n",
90 | " 1.0 | \n",
91 | " Rural | \n",
92 | " N | \n",
93 | "
\n",
94 | " \n",
95 | " 2 | \n",
96 | " LP001005 | \n",
97 | " Male | \n",
98 | " Yes | \n",
99 | " 0 | \n",
100 | " Graduate | \n",
101 | " Yes | \n",
102 | " 3000 | \n",
103 | " 0.0 | \n",
104 | " 66.0 | \n",
105 | " 360.0 | \n",
106 | " 1.0 | \n",
107 | " Urban | \n",
108 | " Y | \n",
109 | "
\n",
110 | " \n",
111 | " 3 | \n",
112 | " LP001006 | \n",
113 | " Male | \n",
114 | " Yes | \n",
115 | " 0 | \n",
116 | " Not Graduate | \n",
117 | " No | \n",
118 | " 2583 | \n",
119 | " 2358.0 | \n",
120 | " 120.0 | \n",
121 | " 360.0 | \n",
122 | " 1.0 | \n",
123 | " Urban | \n",
124 | " Y | \n",
125 | "
\n",
126 | " \n",
127 | " 4 | \n",
128 | " LP001008 | \n",
129 | " Male | \n",
130 | " No | \n",
131 | " 0 | \n",
132 | " Graduate | \n",
133 | " No | \n",
134 | " 6000 | \n",
135 | " 0.0 | \n",
136 | " 141.0 | \n",
137 | " 360.0 | \n",
138 | " 1.0 | \n",
139 | " Urban | \n",
140 | " Y | \n",
141 | "
\n",
142 | " \n",
143 | "
\n",
144 | "
"
145 | ],
146 | "text/plain": [
147 | " Loan_ID Gender Married Dependents Education Self_Employed \\\n",
148 | "0 LP001002 Male No 0 Graduate No \n",
149 | "1 LP001003 Male Yes 1 Graduate No \n",
150 | "2 LP001005 Male Yes 0 Graduate Yes \n",
151 | "3 LP001006 Male Yes 0 Not Graduate No \n",
152 | "4 LP001008 Male No 0 Graduate No \n",
153 | "\n",
154 | " ApplicantIncome CoapplicantIncome LoanAmount Loan_Amount_Term \\\n",
155 | "0 5849 0.0 NaN 360.0 \n",
156 | "1 4583 1508.0 128.0 360.0 \n",
157 | "2 3000 0.0 66.0 360.0 \n",
158 | "3 2583 2358.0 120.0 360.0 \n",
159 | "4 6000 0.0 141.0 360.0 \n",
160 | "\n",
161 | " Credit_History Property_Area Loan_Status \n",
162 | "0 1.0 Urban Y \n",
163 | "1 1.0 Rural N \n",
164 | "2 1.0 Urban Y \n",
165 | "3 1.0 Urban Y \n",
166 | "4 1.0 Urban Y "
167 | ]
168 | },
169 | "execution_count": 53,
170 | "metadata": {},
171 | "output_type": "execute_result"
172 | }
173 | ],
174 | "source": [
175 | "# import dataset\n",
176 | "data = pd.read_csv('../Data/loan_train.csv')\n",
177 | "data.head()"
178 | ]
179 | },
180 | {
181 | "cell_type": "code",
182 | "execution_count": 54,
183 | "metadata": {},
184 | "outputs": [],
185 | "source": [
186 | "# rows having null values removed for simplicity purpose\n",
187 | "data.dropna(inplace = True)"
188 | ]
189 | },
190 | {
191 | "cell_type": "code",
192 | "execution_count": 55,
193 | "metadata": {},
194 | "outputs": [],
195 | "source": [
196 | "# Dependent and independent variables\n",
197 | "X = data.drop('Loan_Status', axis = 1)\n",
198 | "y = data['Loan_Status']"
199 | ]
200 | },
201 | {
202 | "cell_type": "code",
203 | "execution_count": 56,
204 | "metadata": {},
205 | "outputs": [],
206 | "source": [
207 | "# Apply StandardScalar transformation to numeric features\n",
208 | "# Apply OneHotEncoder transformation to categorical features\n",
209 | "\n",
210 | "from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
211 | "\n",
212 | "transformers = [(\"num\", StandardScaler(), ['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount']),\n",
213 | " (\"cat\", OneHotEncoder(handle_unknown='ignore'), ['Gender', 'Married', 'Self_Employed', 'Property_Area'])]\n",
214 | "\n",
215 | "transformer = ColumnTransformer(transformers = transformers)"
216 | ]
217 | },
218 | {
219 | "cell_type": "code",
220 | "execution_count": 57,
221 | "metadata": {},
222 | "outputs": [
223 | {
224 | "name": "stderr",
225 | "output_type": "stream",
226 | "text": [
227 | "C:\\Users\\ram\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\data.py:625: DataConversionWarning: Data with input dtype int64, float64 were all converted to float64 by StandardScaler.\n",
228 | " return self.partial_fit(X, y)\n",
229 | "C:\\Users\\ram\\Anaconda3\\lib\\site-packages\\sklearn\\base.py:462: DataConversionWarning: Data with input dtype int64, float64 were all converted to float64 by StandardScaler.\n",
230 | " return self.fit(X, **fit_params).transform(X)\n"
231 | ]
232 | },
233 | {
234 | "data": {
235 | "text/plain": [
236 | "array([-0.13796959, -0.02795204, -0.20808917, 0. , 1. ,\n",
237 | " 0. , 1. , 1. , 0. , 1. ,\n",
238 | " 0. , 0. ])"
239 | ]
240 | },
241 | "execution_count": 57,
242 | "metadata": {},
243 | "output_type": "execute_result"
244 | }
245 | ],
246 | "source": [
247 | "X = transformer.fit_transform(X)\n",
248 | "X[0]"
249 | ]
250 | },
251 | {
252 | "cell_type": "code",
253 | "execution_count": null,
254 | "metadata": {},
255 | "outputs": [],
256 | "source": []
257 | },
258 | {
259 | "cell_type": "code",
260 | "execution_count": null,
261 | "metadata": {},
262 | "outputs": [],
263 | "source": []
264 | }
265 | ],
266 | "metadata": {
267 | "kernelspec": {
268 | "display_name": "Python 3",
269 | "language": "python",
270 | "name": "python3"
271 | },
272 | "language_info": {
273 | "codemirror_mode": {
274 | "name": "ipython",
275 | "version": 3
276 | },
277 | "file_extension": ".py",
278 | "mimetype": "text/x-python",
279 | "name": "python",
280 | "nbconvert_exporter": "python",
281 | "pygments_lexer": "ipython3",
282 | "version": "3.7.1"
283 | }
284 | },
285 | "nbformat": 4,
286 | "nbformat_minor": 2
287 | }
288 |
--------------------------------------------------------------------------------
/Code/convert_string_to_characters.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "colab_type": "text",
7 | "id": "QsmXYpdG-peE"
8 | },
9 | "source": [
10 | "## Easiest way to convert a string to characters\n"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 1,
16 | "metadata": {
17 | "colab": {
18 | "base_uri": "https://localhost:8080/",
19 | "height": 34
20 | },
21 | "colab_type": "code",
22 | "id": "xqE2EH9Otv0w",
23 | "outputId": "ec5fb143-cf7a-4cfc-9f6a-9a9b01211639"
24 | },
25 | "outputs": [
26 | {
27 | "name": "stdout",
28 | "output_type": "stream",
29 | "text": [
30 | "['S', 'p', 'o', 'r', 't', 's', ' ', 'A', 'n', 'a', 'l', 'y', 't', 'i', 'c', 's', ' ', 'c', 'a', 'n', ' ', 'b', 'e', ' ', 'a', ' ', 'g', 'a', 'm', 'e', ' ', 'c', 'h', 'a', 'n', 'g', 'e', 'r']\n"
31 | ]
32 | }
33 | ],
34 | "source": [
35 | "text='Sports Analytics can be a game changer'\n",
36 | "characters=list(text)\n",
37 | "print(characters)"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": null,
43 | "metadata": {},
44 | "outputs": [],
45 | "source": []
46 | }
47 | ],
48 | "metadata": {
49 | "colab": {
50 | "name": "av hack.ipynb",
51 | "provenance": []
52 | },
53 | "kernelspec": {
54 | "display_name": "Python 3",
55 | "language": "python",
56 | "name": "python3"
57 | },
58 | "language_info": {
59 | "codemirror_mode": {
60 | "name": "ipython",
61 | "version": 3
62 | },
63 | "file_extension": ".py",
64 | "mimetype": "text/x-python",
65 | "name": "python",
66 | "nbconvert_exporter": "python",
67 | "pygments_lexer": "ipython3",
68 | "version": "3.7.1"
69 | }
70 | },
71 | "nbformat": 4,
72 | "nbformat_minor": 1
73 | }
74 |
--------------------------------------------------------------------------------
/Code/first and last name extraction.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 6,
15 | "metadata": {},
16 | "outputs": [
17 | {
18 | "data": {
19 | "text/html": [
20 | "\n",
21 | "\n",
34 | "
\n",
35 | " \n",
36 | " \n",
37 | " | \n",
38 | " name | \n",
39 | "
\n",
40 | " \n",
41 | " \n",
42 | " \n",
43 | " 0 | \n",
44 | " Elise Mccann | \n",
45 | "
\n",
46 | " \n",
47 | " 1 | \n",
48 | " Aiden Berger | \n",
49 | "
\n",
50 | " \n",
51 | " 2 | \n",
52 | " Elle Kelley | \n",
53 | "
\n",
54 | " \n",
55 | "
\n",
56 | "
"
57 | ],
58 | "text/plain": [
59 | " name\n",
60 | "0 Elise Mccann\n",
61 | "1 Aiden Berger\n",
62 | "2 Elle Kelley"
63 | ]
64 | },
65 | "execution_count": 6,
66 | "metadata": {},
67 | "output_type": "execute_result"
68 | }
69 | ],
70 | "source": [
71 | "# create a dataframe\n",
72 | "df = pd.DataFrame({'name':['Elise Mccann', 'Aiden Berger', 'Elle Kelley']})\n",
73 | "df"
74 | ]
75 | },
76 | {
77 | "cell_type": "code",
78 | "execution_count": 7,
79 | "metadata": {},
80 | "outputs": [
81 | {
82 | "data": {
83 | "text/html": [
84 | "\n",
85 | "\n",
98 | "
\n",
99 | " \n",
100 | " \n",
101 | " | \n",
102 | " name | \n",
103 | " first_name | \n",
104 | " last_name | \n",
105 | "
\n",
106 | " \n",
107 | " \n",
108 | " \n",
109 | " 0 | \n",
110 | " Elise Mccann | \n",
111 | " Elise | \n",
112 | " Mccann | \n",
113 | "
\n",
114 | " \n",
115 | " 1 | \n",
116 | " Aiden Berger | \n",
117 | " Aiden | \n",
118 | " Berger | \n",
119 | "
\n",
120 | " \n",
121 | " 2 | \n",
122 | " Elle Kelley | \n",
123 | " Elle | \n",
124 | " Kelley | \n",
125 | "
\n",
126 | " \n",
127 | "
\n",
128 | "
"
129 | ],
130 | "text/plain": [
131 | " name first_name last_name\n",
132 | "0 Elise Mccann Elise Mccann\n",
133 | "1 Aiden Berger Aiden Berger\n",
134 | "2 Elle Kelley Elle Kelley"
135 | ]
136 | },
137 | "execution_count": 7,
138 | "metadata": {},
139 | "output_type": "execute_result"
140 | }
141 | ],
142 | "source": [
143 | "# extract first name and last name\n",
144 | "df['first_name'] = df['name'].str.split(' ', expand = True)[0]\n",
145 | "df['last_name'] = df['name'].str.split(' ', expand = True)[1]\n",
146 | "\n",
147 | "df"
148 | ]
149 | },
150 | {
151 | "cell_type": "code",
152 | "execution_count": null,
153 | "metadata": {},
154 | "outputs": [],
155 | "source": []
156 | }
157 | ],
158 | "metadata": {
159 | "kernelspec": {
160 | "display_name": "Python 3",
161 | "language": "python",
162 | "name": "python3"
163 | },
164 | "language_info": {
165 | "codemirror_mode": {
166 | "name": "ipython",
167 | "version": 3
168 | },
169 | "file_extension": ".py",
170 | "mimetype": "text/x-python",
171 | "name": "python",
172 | "nbconvert_exporter": "python",
173 | "pygments_lexer": "ipython3",
174 | "version": "3.7.1"
175 | }
176 | },
177 | "nbformat": 4,
178 | "nbformat_minor": 2
179 | }
180 |
--------------------------------------------------------------------------------
/Code/generator vs list.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Python List Comprehension v/s Python Generators"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 49,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "from sys import getsizeof"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 50,
22 | "metadata": {},
23 | "outputs": [],
24 | "source": [
25 | "# list comprehension\n",
26 | "list_comp = [i**3 for i in range(10000)]\n",
27 | "# generator expression\n",
28 | "gen_comp = (i**3 for i in range(10000))"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": 51,
34 | "metadata": {},
35 | "outputs": [
36 | {
37 | "name": "stdout",
38 | "output_type": "stream",
39 | "text": [
40 | "Size of list is : 87624\n"
41 | ]
42 | }
43 | ],
44 | "source": [
45 | "# size of list comprehension\n",
46 | "a = getsizeof(list_comp)\n",
47 | "print(\"Size of list is : \", a)"
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": 52,
53 | "metadata": {},
54 | "outputs": [
55 | {
56 | "name": "stdout",
57 | "output_type": "stream",
58 | "text": [
59 | "Size of generator is : 120\n"
60 | ]
61 | }
62 | ],
63 | "source": [
64 | "#size of generator expression\n",
65 | "b = getsizeof(gen_comp)\n",
66 | "print(\"Size of generator is : \", b)"
67 | ]
68 | },
69 | {
70 | "cell_type": "code",
71 | "execution_count": 53,
72 | "metadata": {},
73 | "outputs": [
74 | {
75 | "name": "stdout",
76 | "output_type": "stream",
77 | "text": [
78 | "9.14460999999983\n"
79 | ]
80 | }
81 | ],
82 | "source": [
83 | "# time taken by list comprehension\n",
84 | "import timeit\n",
85 | "print(timeit.timeit('''list_com = [i for i in range(100) if i % 2 == 0]'''))"
86 | ]
87 | },
88 | {
89 | "cell_type": "code",
90 | "execution_count": 54,
91 | "metadata": {},
92 | "outputs": [
93 | {
94 | "name": "stdout",
95 | "output_type": "stream",
96 | "text": [
97 | "0.7809511999998904\n"
98 | ]
99 | }
100 | ],
101 | "source": [
102 | "# time taken by generator expression \n",
103 | "print(timeit.timeit('''list_com = (i for i in range(100) if i % 2 == 0)'''))"
104 | ]
105 | },
106 | {
107 | "cell_type": "code",
108 | "execution_count": null,
109 | "metadata": {},
110 | "outputs": [],
111 | "source": []
112 | }
113 | ],
114 | "metadata": {
115 | "kernelspec": {
116 | "display_name": "Python 3",
117 | "language": "python",
118 | "name": "python3"
119 | },
120 | "language_info": {
121 | "codemirror_mode": {
122 | "name": "ipython",
123 | "version": 3
124 | },
125 | "file_extension": ".py",
126 | "mimetype": "text/x-python",
127 | "name": "python",
128 | "nbconvert_exporter": "python",
129 | "pygments_lexer": "ipython3",
130 | "version": "3.7.1"
131 | }
132 | },
133 | "nbformat": 4,
134 | "nbformat_minor": 2
135 | }
136 |
--------------------------------------------------------------------------------
/Code/groupby_in_pandas.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Understand Groupby in Pandas"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 14,
13 | "metadata": {},
14 | "outputs": [
15 | {
16 | "data": {
17 | "text/html": [
18 | "\n",
19 | "\n",
32 | "
\n",
33 | " \n",
34 | " \n",
35 | " | \n",
36 | " Loan_ID | \n",
37 | " Gender | \n",
38 | " Married | \n",
39 | " Dependents | \n",
40 | " Education | \n",
41 | " Self_Employed | \n",
42 | " ApplicantIncome | \n",
43 | " CoapplicantIncome | \n",
44 | " LoanAmount | \n",
45 | " Loan_Amount_Term | \n",
46 | " Credit_History | \n",
47 | " Property_Area | \n",
48 | " Loan_Status | \n",
49 | "
\n",
50 | " \n",
51 | " \n",
52 | " \n",
53 | " 0 | \n",
54 | " LP001002 | \n",
55 | " Male | \n",
56 | " No | \n",
57 | " 0 | \n",
58 | " Graduate | \n",
59 | " No | \n",
60 | " 5849 | \n",
61 | " 0.0 | \n",
62 | " NaN | \n",
63 | " 360.0 | \n",
64 | " 1.0 | \n",
65 | " Urban | \n",
66 | " Y | \n",
67 | "
\n",
68 | " \n",
69 | " 1 | \n",
70 | " LP001003 | \n",
71 | " Male | \n",
72 | " Yes | \n",
73 | " 1 | \n",
74 | " Graduate | \n",
75 | " No | \n",
76 | " 4583 | \n",
77 | " 1508.0 | \n",
78 | " 128.0 | \n",
79 | " 360.0 | \n",
80 | " 1.0 | \n",
81 | " Rural | \n",
82 | " N | \n",
83 | "
\n",
84 | " \n",
85 | " 2 | \n",
86 | " LP001005 | \n",
87 | " Male | \n",
88 | " Yes | \n",
89 | " 0 | \n",
90 | " Graduate | \n",
91 | " Yes | \n",
92 | " 3000 | \n",
93 | " 0.0 | \n",
94 | " 66.0 | \n",
95 | " 360.0 | \n",
96 | " 1.0 | \n",
97 | " Urban | \n",
98 | " Y | \n",
99 | "
\n",
100 | " \n",
101 | " 3 | \n",
102 | " LP001006 | \n",
103 | " Male | \n",
104 | " Yes | \n",
105 | " 0 | \n",
106 | " Not Graduate | \n",
107 | " No | \n",
108 | " 2583 | \n",
109 | " 2358.0 | \n",
110 | " 120.0 | \n",
111 | " 360.0 | \n",
112 | " 1.0 | \n",
113 | " Urban | \n",
114 | " Y | \n",
115 | "
\n",
116 | " \n",
117 | " 4 | \n",
118 | " LP001008 | \n",
119 | " Male | \n",
120 | " No | \n",
121 | " 0 | \n",
122 | " Graduate | \n",
123 | " No | \n",
124 | " 6000 | \n",
125 | " 0.0 | \n",
126 | " 141.0 | \n",
127 | " 360.0 | \n",
128 | " 1.0 | \n",
129 | " Urban | \n",
130 | " Y | \n",
131 | "
\n",
132 | " \n",
133 | "
\n",
134 | "
"
135 | ],
136 | "text/plain": [
137 | " Loan_ID Gender Married Dependents Education Self_Employed \\\n",
138 | "0 LP001002 Male No 0 Graduate No \n",
139 | "1 LP001003 Male Yes 1 Graduate No \n",
140 | "2 LP001005 Male Yes 0 Graduate Yes \n",
141 | "3 LP001006 Male Yes 0 Not Graduate No \n",
142 | "4 LP001008 Male No 0 Graduate No \n",
143 | "\n",
144 | " ApplicantIncome CoapplicantIncome LoanAmount Loan_Amount_Term \\\n",
145 | "0 5849 0.0 NaN 360.0 \n",
146 | "1 4583 1508.0 128.0 360.0 \n",
147 | "2 3000 0.0 66.0 360.0 \n",
148 | "3 2583 2358.0 120.0 360.0 \n",
149 | "4 6000 0.0 141.0 360.0 \n",
150 | "\n",
151 | " Credit_History Property_Area Loan_Status \n",
152 | "0 1.0 Urban Y \n",
153 | "1 1.0 Rural N \n",
154 | "2 1.0 Urban Y \n",
155 | "3 1.0 Urban Y \n",
156 | "4 1.0 Urban Y "
157 | ]
158 | },
159 | "execution_count": 14,
160 | "metadata": {},
161 | "output_type": "execute_result"
162 | }
163 | ],
164 | "source": [
165 | "#import dataset\n",
166 | "import pandas as pd\n",
167 | "\n",
168 | "df = pd.read_csv('../Data/loan_train.csv')\n",
169 | "df.head()"
170 | ]
171 | },
172 | {
173 | "cell_type": "code",
174 | "execution_count": 15,
175 | "metadata": {},
176 | "outputs": [
177 | {
178 | "data": {
179 | "text/html": [
180 | "\n",
181 | "\n",
194 | "
\n",
195 | " \n",
196 | " \n",
197 | " | \n",
198 | " ApplicantIncome | \n",
199 | "
\n",
200 | " \n",
201 | " Gender | \n",
202 | " | \n",
203 | "
\n",
204 | " \n",
205 | " \n",
206 | " \n",
207 | " Female | \n",
208 | " 4643.473214 | \n",
209 | "
\n",
210 | " \n",
211 | " Male | \n",
212 | " 5446.460123 | \n",
213 | "
\n",
214 | " \n",
215 | "
\n",
216 | "
"
217 | ],
218 | "text/plain": [
219 | " ApplicantIncome\n",
220 | "Gender \n",
221 | "Female 4643.473214\n",
222 | "Male 5446.460123"
223 | ]
224 | },
225 | "execution_count": 15,
226 | "metadata": {},
227 | "output_type": "execute_result"
228 | }
229 | ],
230 | "source": [
231 | "# Average Income of males and females\n",
232 | "df.groupby(['Gender'])[['ApplicantIncome']].mean()\n"
233 | ]
234 | },
235 | {
236 | "cell_type": "code",
237 | "execution_count": 16,
238 | "metadata": {},
239 | "outputs": [
240 | {
241 | "data": {
242 | "text/html": [
243 | "\n",
244 | "\n",
257 | "
\n",
258 | " \n",
259 | " \n",
260 | " | \n",
261 | " LoanAmount | \n",
262 | "
\n",
263 | " \n",
264 | " Property_Area | \n",
265 | " | \n",
266 | "
\n",
267 | " \n",
268 | " \n",
269 | " \n",
270 | " Rural | \n",
271 | " 152.260116 | \n",
272 | "
\n",
273 | " \n",
274 | " Semiurban | \n",
275 | " 145.504386 | \n",
276 | "
\n",
277 | " \n",
278 | " Urban | \n",
279 | " 142.198953 | \n",
280 | "
\n",
281 | " \n",
282 | "
\n",
283 | "
"
284 | ],
285 | "text/plain": [
286 | " LoanAmount\n",
287 | "Property_Area \n",
288 | "Rural 152.260116\n",
289 | "Semiurban 145.504386\n",
290 | "Urban 142.198953"
291 | ]
292 | },
293 | "execution_count": 16,
294 | "metadata": {},
295 | "output_type": "execute_result"
296 | }
297 | ],
298 | "source": [
299 | "# Average loan amount for different property areas like urban, rural\n",
300 | "df.groupby(['Property_Area'])[['LoanAmount']].mean()"
301 | ]
302 | },
303 | {
304 | "cell_type": "code",
305 | "execution_count": 17,
306 | "metadata": {},
307 | "outputs": [
308 | {
309 | "data": {
310 | "text/html": [
311 | "\n",
312 | "\n",
325 | "
\n",
326 | " \n",
327 | " \n",
328 | " | \n",
329 | " Loan_Status | \n",
330 | "
\n",
331 | " \n",
332 | " Education | \n",
333 | " | \n",
334 | "
\n",
335 | " \n",
336 | " \n",
337 | " \n",
338 | " Graduate | \n",
339 | " 480 | \n",
340 | "
\n",
341 | " \n",
342 | " Not Graduate | \n",
343 | " 134 | \n",
344 | "
\n",
345 | " \n",
346 | "
\n",
347 | "
"
348 | ],
349 | "text/plain": [
350 | " Loan_Status\n",
351 | "Education \n",
352 | "Graduate 480\n",
353 | "Not Graduate 134"
354 | ]
355 | },
356 | "execution_count": 17,
357 | "metadata": {},
358 | "output_type": "execute_result"
359 | }
360 | ],
361 | "source": [
362 | "# Compare loan status of different education backgrounds\n",
363 | "df.groupby(['Education'])[['Loan_Status']].count()"
364 | ]
365 | },
366 | {
367 | "cell_type": "code",
368 | "execution_count": null,
369 | "metadata": {},
370 | "outputs": [],
371 | "source": []
372 | }
373 | ],
374 | "metadata": {
375 | "kernelspec": {
376 | "display_name": "Python 3",
377 | "language": "python",
378 | "name": "python3"
379 | },
380 | "language_info": {
381 | "codemirror_mode": {
382 | "name": "ipython",
383 | "version": 3
384 | },
385 | "file_extension": ".py",
386 | "mimetype": "text/x-python",
387 | "name": "python",
388 | "nbconvert_exporter": "python",
389 | "pygments_lexer": "ipython3",
390 | "version": "3.7.1"
391 | }
392 | },
393 | "nbformat": 4,
394 | "nbformat_minor": 2
395 | }
396 |
--------------------------------------------------------------------------------
/Code/hide_print.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import os\n",
10 | "import sys"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 2,
16 | "metadata": {},
17 | "outputs": [],
18 | "source": [
19 | "class HiddenPrints:\n",
20 | " def __enter__(self):\n",
21 | " self._original_stdout = sys.stdout\n",
22 | " sys.stdout = open(os.devnull, 'w')\n",
23 | "\n",
24 | " def __exit__(self, exc_type, exc_val, exc_tb):\n",
25 | " sys.stdout.close()\n",
26 | " sys.stdout = self._original_stdout"
27 | ]
28 | },
29 | {
30 | "cell_type": "code",
31 | "execution_count": 4,
32 | "metadata": {},
33 | "outputs": [
34 | {
35 | "name": "stdout",
36 | "output_type": "stream",
37 | "text": [
38 | "This will be printed\n"
39 | ]
40 | }
41 | ],
42 | "source": [
43 | "with HiddenPrints():\n",
44 | " print(\"This print is hidden\")\n",
45 | "\n",
46 | "print(\"This will be printed\")"
47 | ]
48 | }
49 | ],
50 | "metadata": {
51 | "kernelspec": {
52 | "display_name": "Python 3",
53 | "language": "python",
54 | "name": "python3"
55 | },
56 | "language_info": {
57 | "codemirror_mode": {
58 | "name": "ipython",
59 | "version": 3
60 | },
61 | "file_extension": ".py",
62 | "mimetype": "text/x-python",
63 | "name": "python",
64 | "nbconvert_exporter": "python",
65 | "pygments_lexer": "ipython3",
66 | "version": "3.7.3"
67 | }
68 | },
69 | "nbformat": 4,
70 | "nbformat_minor": 2
71 | }
72 |
--------------------------------------------------------------------------------
/Code/interactive_notebook.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "from IPython.core.interactiveshell import InteractiveShell\n",
10 | "InteractiveShell.ast_node_interactivity = \"all\""
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 11,
16 | "metadata": {},
17 | "outputs": [],
18 | "source": [
19 | "import pandas as pd\n",
20 | "data = pd.read_csv('loan_train.csv')"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": 13,
26 | "metadata": {},
27 | "outputs": [
28 | {
29 | "data": {
30 | "text/plain": [
31 | "(614, 13)"
32 | ]
33 | },
34 | "execution_count": 13,
35 | "metadata": {},
36 | "output_type": "execute_result"
37 | },
38 | {
39 | "data": {
40 | "text/html": [
41 | "\n",
42 | "\n",
55 | "
\n",
56 | " \n",
57 | " \n",
58 | " | \n",
59 | " Loan_ID | \n",
60 | " Gender | \n",
61 | " Married | \n",
62 | " Dependents | \n",
63 | " Education | \n",
64 | " Self_Employed | \n",
65 | " ApplicantIncome | \n",
66 | " CoapplicantIncome | \n",
67 | " LoanAmount | \n",
68 | " Loan_Amount_Term | \n",
69 | " Credit_History | \n",
70 | " Property_Area | \n",
71 | " Loan_Status | \n",
72 | "
\n",
73 | " \n",
74 | " \n",
75 | " \n",
76 | " 0 | \n",
77 | " LP001002 | \n",
78 | " Male | \n",
79 | " No | \n",
80 | " 0 | \n",
81 | " Graduate | \n",
82 | " No | \n",
83 | " 5849 | \n",
84 | " 0.0 | \n",
85 | " NaN | \n",
86 | " 360.0 | \n",
87 | " 1.0 | \n",
88 | " Urban | \n",
89 | " Y | \n",
90 | "
\n",
91 | " \n",
92 | " 1 | \n",
93 | " LP001003 | \n",
94 | " Male | \n",
95 | " Yes | \n",
96 | " 1 | \n",
97 | " Graduate | \n",
98 | " No | \n",
99 | " 4583 | \n",
100 | " 1508.0 | \n",
101 | " 128.0 | \n",
102 | " 360.0 | \n",
103 | " 1.0 | \n",
104 | " Rural | \n",
105 | " N | \n",
106 | "
\n",
107 | " \n",
108 | " 2 | \n",
109 | " LP001005 | \n",
110 | " Male | \n",
111 | " Yes | \n",
112 | " 0 | \n",
113 | " Graduate | \n",
114 | " Yes | \n",
115 | " 3000 | \n",
116 | " 0.0 | \n",
117 | " 66.0 | \n",
118 | " 360.0 | \n",
119 | " 1.0 | \n",
120 | " Urban | \n",
121 | " Y | \n",
122 | "
\n",
123 | " \n",
124 | " 3 | \n",
125 | " LP001006 | \n",
126 | " Male | \n",
127 | " Yes | \n",
128 | " 0 | \n",
129 | " Not Graduate | \n",
130 | " No | \n",
131 | " 2583 | \n",
132 | " 2358.0 | \n",
133 | " 120.0 | \n",
134 | " 360.0 | \n",
135 | " 1.0 | \n",
136 | " Urban | \n",
137 | " Y | \n",
138 | "
\n",
139 | " \n",
140 | " 4 | \n",
141 | " LP001008 | \n",
142 | " Male | \n",
143 | " No | \n",
144 | " 0 | \n",
145 | " Graduate | \n",
146 | " No | \n",
147 | " 6000 | \n",
148 | " 0.0 | \n",
149 | " 141.0 | \n",
150 | " 360.0 | \n",
151 | " 1.0 | \n",
152 | " Urban | \n",
153 | " Y | \n",
154 | "
\n",
155 | " \n",
156 | "
\n",
157 | "
"
158 | ],
159 | "text/plain": [
160 | " Loan_ID Gender Married Dependents Education Self_Employed \\\n",
161 | "0 LP001002 Male No 0 Graduate No \n",
162 | "1 LP001003 Male Yes 1 Graduate No \n",
163 | "2 LP001005 Male Yes 0 Graduate Yes \n",
164 | "3 LP001006 Male Yes 0 Not Graduate No \n",
165 | "4 LP001008 Male No 0 Graduate No \n",
166 | "\n",
167 | " ApplicantIncome CoapplicantIncome LoanAmount Loan_Amount_Term \\\n",
168 | "0 5849 0.0 NaN 360.0 \n",
169 | "1 4583 1508.0 128.0 360.0 \n",
170 | "2 3000 0.0 66.0 360.0 \n",
171 | "3 2583 2358.0 120.0 360.0 \n",
172 | "4 6000 0.0 141.0 360.0 \n",
173 | "\n",
174 | " Credit_History Property_Area Loan_Status \n",
175 | "0 1.0 Urban Y \n",
176 | "1 1.0 Rural N \n",
177 | "2 1.0 Urban Y \n",
178 | "3 1.0 Urban Y \n",
179 | "4 1.0 Urban Y "
180 | ]
181 | },
182 | "execution_count": 13,
183 | "metadata": {},
184 | "output_type": "execute_result"
185 | },
186 | {
187 | "data": {
188 | "text/plain": [
189 | "Loan_ID object\n",
190 | "Gender object\n",
191 | "Married object\n",
192 | "Dependents object\n",
193 | "Education object\n",
194 | "Self_Employed object\n",
195 | "ApplicantIncome int64\n",
196 | "CoapplicantIncome float64\n",
197 | "LoanAmount float64\n",
198 | "Loan_Amount_Term float64\n",
199 | "Credit_History float64\n",
200 | "Property_Area object\n",
201 | "Loan_Status object\n",
202 | "dtype: object"
203 | ]
204 | },
205 | "execution_count": 13,
206 | "metadata": {},
207 | "output_type": "execute_result"
208 | }
209 | ],
210 | "source": [
211 | "data.shape\n",
212 | "data.head()\n",
213 | "data.dtypes"
214 | ]
215 | },
216 | {
217 | "cell_type": "code",
218 | "execution_count": null,
219 | "metadata": {},
220 | "outputs": [],
221 | "source": []
222 | }
223 | ],
224 | "metadata": {
225 | "kernelspec": {
226 | "display_name": "Python 3",
227 | "language": "python",
228 | "name": "python3"
229 | },
230 | "language_info": {
231 | "codemirror_mode": {
232 | "name": "ipython",
233 | "version": 3
234 | },
235 | "file_extension": ".py",
236 | "mimetype": "text/x-python",
237 | "name": "python",
238 | "nbconvert_exporter": "python",
239 | "pygments_lexer": "ipython3",
240 | "version": "3.7.1"
241 | }
242 | },
243 | "nbformat": 4,
244 | "nbformat_minor": 2
245 | }
246 |
--------------------------------------------------------------------------------
/Code/invert_dictionary.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Inverting a Dictionary"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 15,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "# Test Dictionary\n",
17 | "my_dict = {'Name':20,\n",
18 | " 'Age' : 3,\n",
19 | " 'attendance' : 75,\n",
20 | " 'roll no': 'H405'}"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": 16,
26 | "metadata": {},
27 | "outputs": [
28 | {
29 | "name": "stdout",
30 | "output_type": "stream",
31 | "text": [
32 | "{'Name': 20, 'Age': 3, 'attendance': 75, 'roll no': 'H405'}\n"
33 | ]
34 | }
35 | ],
36 | "source": [
37 | "# Print Dictionary\n",
38 | "print(my_dict)"
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": 17,
44 | "metadata": {},
45 | "outputs": [
46 | {
47 | "data": {
48 | "text/plain": [
49 | "{20: 'Name', 3: 'Age', 75: 'attendance', 'H405': 'roll no'}"
50 | ]
51 | },
52 | "execution_count": 17,
53 | "metadata": {},
54 | "output_type": "execute_result"
55 | }
56 | ],
57 | "source": [
58 | "# invert dictionary\n",
59 | "new_dict = {v:k for k,v in my_dict.items()}\n",
60 | "new_dict"
61 | ]
62 | },
63 | {
64 | "cell_type": "code",
65 | "execution_count": null,
66 | "metadata": {},
67 | "outputs": [],
68 | "source": []
69 | }
70 | ],
71 | "metadata": {
72 | "kernelspec": {
73 | "display_name": "Python 3",
74 | "language": "python",
75 | "name": "python3"
76 | },
77 | "language_info": {
78 | "codemirror_mode": {
79 | "name": "ipython",
80 | "version": 3
81 | },
82 | "file_extension": ".py",
83 | "mimetype": "text/x-python",
84 | "name": "python",
85 | "nbconvert_exporter": "python",
86 | "pygments_lexer": "ipython3",
87 | "version": "3.7.1"
88 | }
89 | },
90 | "nbformat": 4,
91 | "nbformat_minor": 2
92 | }
93 |
--------------------------------------------------------------------------------
/Code/iterative_imputer.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "iterative_imputer.ipynb",
7 | "provenance": []
8 | },
9 | "kernelspec": {
10 | "name": "python3",
11 | "display_name": "Python 3"
12 | }
13 | },
14 | "cells": [
15 | {
16 | "cell_type": "markdown",
17 | "metadata": {
18 | "id": "x-FLsxTrgR3R",
19 | "colab_type": "text"
20 | },
21 | "source": [
22 | "## Impute missing values using IterativeImputer"
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "metadata": {
28 | "id": "JqsZhhMUZnxX",
29 | "colab_type": "code",
30 | "colab": {}
31 | },
32 | "source": [
33 | "import numpy as np\n",
34 | "\n",
35 | "# Since this estimator is experimental, we need to import it explicitly\n",
36 | "from sklearn.experimental import enable_iterative_imputer \n",
37 | "from sklearn.impute import IterativeImputer"
38 | ],
39 | "execution_count": 0,
40 | "outputs": []
41 | },
42 | {
43 | "cell_type": "code",
44 | "metadata": {
45 | "id": "EwRy35OTZ-ea",
46 | "colab_type": "code",
47 | "colab": {}
48 | },
49 | "source": [
50 | "# Make a test dataset\n",
51 | "data = [[1, 2, 3, 4, np.nan],\n",
52 | " [5, 6, 7, 8, 11],\n",
53 | " [9, 1, 12, 2, 3],\n",
54 | " [4, np.nan, 5, 6, 7],\n",
55 | " [4, 8, 1, 2, 3]]"
56 | ],
57 | "execution_count": 0,
58 | "outputs": []
59 | },
60 | {
61 | "cell_type": "code",
62 | "metadata": {
63 | "id": "dpkWWrFpeHQ0",
64 | "colab_type": "code",
65 | "colab": {
66 | "base_uri": "https://localhost:8080/",
67 | "height": 102
68 | },
69 | "outputId": "3b09b267-461e-4a61-d9d9-2c91f87adbe5"
70 | },
71 | "source": [
72 | "# Impute missing values using iterative imputer\n",
73 | "iter_imp = IterativeImputer(random_state= 34)\n",
74 | "iter_imp.fit_transform(data)"
75 | ],
76 | "execution_count": 19,
77 | "outputs": [
78 | {
79 | "output_type": "execute_result",
80 | "data": {
81 | "text/plain": [
82 | "array([[ 1. , 2. , 3. , 4. , 2.78144391],\n",
83 | " [ 5. , 6. , 7. , 8. , 11. ],\n",
84 | " [ 9. , 1. , 12. , 2. , 3. ],\n",
85 | " [ 4. , 4.25776595, 5. , 6. , 7. ],\n",
86 | " [ 4. , 8. , 1. , 2. , 3. ]])"
87 | ]
88 | },
89 | "metadata": {
90 | "tags": []
91 | },
92 | "execution_count": 19
93 | }
94 | ]
95 | },
96 | {
97 | "cell_type": "code",
98 | "metadata": {
99 | "id": "W2KTJygufkhi",
100 | "colab_type": "code",
101 | "colab": {}
102 | },
103 | "source": [
104 | ""
105 | ],
106 | "execution_count": 0,
107 | "outputs": []
108 | }
109 | ]
110 | }
--------------------------------------------------------------------------------
/Code/list_of_lists_to_list.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import itertools"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 2,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "nested_list = [[1], [2, 3], [4, 5, 6]]"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 3,
24 | "metadata": {},
25 | "outputs": [
26 | {
27 | "name": "stdout",
28 | "output_type": "stream",
29 | "text": [
30 | "[1, 2, 3, 4, 5, 6]\n"
31 | ]
32 | }
33 | ],
34 | "source": [
35 | "flat_list = list(itertools.chain.from_iterable(nested_list))\n",
36 | "\n",
37 | "print(flat_list)"
38 | ]
39 | }
40 | ],
41 | "metadata": {
42 | "kernelspec": {
43 | "display_name": "Python 3",
44 | "language": "python",
45 | "name": "python3"
46 | },
47 | "language_info": {
48 | "codemirror_mode": {
49 | "name": "ipython",
50 | "version": 3
51 | },
52 | "file_extension": ".py",
53 | "mimetype": "text/x-python",
54 | "name": "python",
55 | "nbconvert_exporter": "python",
56 | "pygments_lexer": "ipython3",
57 | "version": "3.7.3"
58 | }
59 | },
60 | "nbformat": 4,
61 | "nbformat_minor": 2
62 | }
63 |
--------------------------------------------------------------------------------
/Code/magic function - %who.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 3,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "var_1 = 4\n",
10 | "var_2 = \"hat\""
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 4,
16 | "metadata": {},
17 | "outputs": [],
18 | "source": [
19 | "my_list = [22,33,44,55,66,98]"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": 5,
25 | "metadata": {},
26 | "outputs": [],
27 | "source": [
28 | "my_dict = {'India': 'Delhi', 'Indonesia': 'Jakarta', 'China': 'Beijing'}"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": 6,
34 | "metadata": {},
35 | "outputs": [
36 | {
37 | "name": "stdout",
38 | "output_type": "stream",
39 | "text": [
40 | "Variable Type Data/Info\n",
41 | "----------------------------\n",
42 | "my_dict dict n=3\n",
43 | "my_list list n=6\n",
44 | "var_1 int 4\n",
45 | "var_2 str hat\n"
46 | ]
47 | }
48 | ],
49 | "source": [
50 | "%whos"
51 | ]
52 | },
53 | {
54 | "cell_type": "code",
55 | "execution_count": null,
56 | "metadata": {},
57 | "outputs": [],
58 | "source": []
59 | }
60 | ],
61 | "metadata": {
62 | "kernelspec": {
63 | "display_name": "Python 3",
64 | "language": "python",
65 | "name": "python3"
66 | },
67 | "language_info": {
68 | "codemirror_mode": {
69 | "name": "ipython",
70 | "version": 3
71 | },
72 | "file_extension": ".py",
73 | "mimetype": "text/x-python",
74 | "name": "python",
75 | "nbconvert_exporter": "python",
76 | "pygments_lexer": "ipython3",
77 | "version": "3.7.1"
78 | }
79 | },
80 | "nbformat": 4,
81 | "nbformat_minor": 2
82 | }
83 |
--------------------------------------------------------------------------------
/Code/make_regression.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 121,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "from sklearn import linear_model, datasets\n",
10 | "import matplotlib.pyplot as plt"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 126,
16 | "metadata": {},
17 | "outputs": [],
18 | "source": [
19 | "X, y, coef = datasets.make_regression(n_samples=1000, n_features=1,\n",
20 | " n_informative=1, noise=10,\n",
21 | " coef=True, random_state=0)"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": 127,
27 | "metadata": {},
28 | "outputs": [
29 | {
30 | "data": {
31 | "text/plain": [
32 | ""
33 | ]
34 | },
35 | "execution_count": 127,
36 | "metadata": {},
37 | "output_type": "execute_result"
38 | },
39 | {
40 | "data": {
41 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAD8CAYAAAB6paOMAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAHHlJREFUeJzt3X+Q3PV93/Hn+1aLvCKpVxQlQYvOIh4ix7IMsm+QOup0bGwiAgbOcjBoICFNZjSe2o2dphqk4KlEi6vr3CTYqd2MlZhpPFB+OMiHCLRnu5InKTOyOfkkZBldrIKNtGKMUjilRltxunv3j9099va+e7d3++P76/WY0ejuu9+972dB833f9/15f94fc3dERCS9esIegIiIhEuBQEQk5RQIRERSToFARCTlFAhERFJOgUBEJOUUCEREUk6BQEQk5RQIRERSbknYA2jG5Zdf7qtXrw57GCIisXL48OF/cPcV850Xi0CwevVqRkZGwh6GiEismNlPmzlPqSERkZRTIBARSTkFAhGRlFMgEBFJOQUCEZGUi0XVkIhI2gyNFhkcHuPMeImV+RzbN6+hf32hI9dSIBARiZih0SI79x2jNDEJQHG8xM59xwA6EgyUGhIRiZjB4bHpIFBVmphkcHisI9fTE4GISATUpoIa7SR/ZrzUkWsrEIiIhKw+FdTIynyuI9dXakhEJGRBqaB6uWyG7ZvXdOT6eiIQEQnZXCkfA1UNiYgkTX1paH5ZljfOT8w6r5DP8dyO6zs+HgUCEZEuCioNzfYY2YwxMfn2NHEnU0H1NEcgItJFQfMBE1POpZcsoZDPYZSfBPZsWdexVFA9PRGIiHRRo/mAc6UJjuz6jS6PpkyBQESkBQttBbEyn6MYEAw6VRrajJZTQ2a2yswOmtmLZnbczD5bOX6ZmX3bzH5c+Xt55biZ2Z+Z2Ukze8HMPtDqGEREwlDN9xcri8CK4yX+8PEjrN7xDJsGDjA0Wpz1nu2b15DLZmYc6+Z8QBBzb7SGrckfYHYFcIW7/8DMfhE4DPQDvwu87u4DZrYDWO7u95rZTcC/Bm4CNgBfcvcNc12jr6/PtVWliIQp6Df/weGxwN/uq3LZDJ/4YIGDJ87OeB/QlYZyZnbY3fvmPa/VQBBw4aeAL1f+fMjdX60Ei++6+xoz+2rl60cr549Vz2v0MxUIRCRMQSt/c9nMvIvAoLwOoPYum8tmujYR3GwgaGvVkJmtBtYD3wN+uXpzr/z9S5XTCsCpmredrhwTEYmkRk3gMmbzvrf+V+1ONo9brLYFAjP7BeBJ4HPu/o9znRpwbNZjiZltM7MRMxs5e/Zsu4YpIrJgjSp9Jt1n5ftb+XlhaUsgMLMs5SDwiLvvqxz+WSUlVJ1HeK1y/DSwqubtVwJn6n+mu+919z5371uxYkU7hikisiiNKnoK+Ryf+ODCExphVggFaUfVkAFfA1509z+teWk/cE/l63uAp2qO/06lemgjcG6u+QERkbAFVfpkM8brb17g4UOvNHxftofIVQgFaccTwSbgt4HrzexI5c9NwABwg5n9GLih8j3As8BLwEngL4B/1YYxiIjMa2i0yKaBA1w1R3ln0HkAe7asm175u3xZFhxKE1NzXu/SpdkZ7+v2iuFmtb1qqBNUNSQirWpU+VN/Yw46z4C7NvbyQP86ADYNHJizbLT2fS8P3Ny2z7BQzVYNaWWxiKTCXNs/1gaC3fuPzzrPgYcPvcLfHH2Vc6WJhjuI1YvaXEAjCgQikgqNKnVqjw+NFhkvzW4HXTXXa/WiOBfQiLqPikgqNPrtvPZ4q/X91WUFUZ0LaERPBCKSCts3rwmcI9i+ec10+4hm8v5B8rksu29dG5sbfz0FAhFJvOqNvroaeNKdQk3fn2Y2jp/LpUuXxDYIgAKBiCRcfRVQdTXwh9+zgvufPh64ReRCRW2l8EIpEIhIot3/9OwqoNLE5JwLwYIsXdLDhYvB6wbiUh3UiAKBiCRGbavod+ayTExO8eZbi0/51GoUBAxiUx3UiAKBiCRCfQpoIaWerXCI9fwAKBCISEzVbxTz5oWLLU34VpnBEjMmpt5eNpbLZli6pCcwuBRinhYCBQIRiaH63/4XW/YZ5MFPXgvM3kEMZlcXxWnR2FwUCEQkdoLaRbTD3Rt7p9M8jdI93dhistsUCEQkdtpdrllo8qbev76QiBt/PbWYEJHYyS/Ltu1nVdM7SbzBN0uBQERiZWi0yM//38W2/bwo7iHcbQoEIhIZzWwcMzg8NqOipx2K46U5N6pJOs0RiEgkBFUC7dx3bPr16iRtp7bSqr1e2tJECgQiEgmNNo7Zvf84Fy5OdaRKqF7QRjVpoNSQiERCo7UA46WJRQWB6kIvC3gt6FhV3BvILYYCgYiEbmi0OOfNeaGWZXt4bsf1/GTgZh6849oZm8d/8Y5reXng5oYrguPeQG4xlBoSka6p3QCmdl+ANy9cbGvuf2LKGRotTtf9B6V65tqoJm30RCAiXVGdDK6mgCa9fOsvjpfa3iBuYtLnLQntX19gz5Z1M54W4rS9ZDvpiUBEuqJTbSEaaSbXn9SVwgulJwIR6Yp2NoZrRhpz/YulJwIRaauh0eKsLSCXL8ti0LE1APXSmutfLAUCEWmbodEi2//6KBOTM2/57dgXGMopjOB9wmb6QO87lfJZAKWGRKRtBofHZgWBdmomCAAceumNjo0hiRQIRKRtuj0P0Ei1Ikmao0AgIm3R7kVhrchYVEYSD5ojEJEFqd8ruNrLf3B4rGuTwfPZumFV2EOIFQUCEWnaXB1Cu9Gjp77yKJfN8IHed3LopTeYdCdjxtYNq3igf13Hx5IkbQkEZvYQ8DHgNXd/X+XYZcDjwGrgJ8An3f0NMzPgS8BNwHngd939B+0Yh4h01u79xwM7hH7u8SMdv3Yum+ETHyxw8MTZxO0ZHLZ2PRH8V+DLwNdrju0A/qe7D5jZjsr39wK/CVxd+bMB+PPK3yISYUOjxba3gpjLF++4NpEbxUdRWwKBu/+tma2uO3wb8KHK138FfJdyILgN+Lq7O3DIzPJmdoW7v9qOsYhIZ9z/9PGuXauQz6n9Qxd1smrol6s398rfv1Q5XgBO1Zx3unJMRCKsXYvC5pPNmFYFd1kYk8VBdV2zig3MbBuwDaC3t7fTYxJJrUZVQPWvd8ullyzRk0CXdTIQ/Kya8jGzK4DXKsdPA7W1XVcCZ+rf7O57gb0AfX19UalKE0mUuaqA+tcX+PzQMR4+9EpXx3Sui/MQUtbJ1NB+4J7K1/cAT9Uc/x0r2wic0/yASDga7RM8ODwWShAAdQ0NQ7vKRx+lPDF8uZmdBnYBA8ATZvb7wCvA7ZXTn6VcOnqScvnov2zHGERk4RrV/hfHS10JAtkeY2Lq7Qd+dQ0NR7uqhrY2eOkjAec68Ol2XFdEWrMyn+tof6C7N/Zy8MTZwGsUKvMRKhENn1YWiyRU0CQwMGvP4E7uE9D3rsvoe9dlDfcGVoloNCgQiCRQ0CTwH33jKJM1aZhqh85OVmIMDo/x3I7rp7/Wb/7RpEAgkkBBk8C1QaBbqnMQ+s0/2tSGWiSButEArhmqAIoHBQKRhBkaLRKFdvyqAIoPpYZEEqS6Z3AIWaAZli/LsuuWtUoHxYQCgUiCdHrP4Pnksj3s2fJ+BYCYUSAQSZAw5wbu3tirDWFiSoFAJKZq1wm8I9vDhYtToW4V+ej3TikQxJQCgUgM1a8TKE1MhTyit9clSPyoakgkhoLWCYQtE4VSJVkUBQKRGAp7LiDI1g2rAo9L9Ck1JBIj1XmBsJIwhXxueh7g0e+dYtKdjBlbN6zS/ECMKRCIREztJHB+WRZ3GC9N0GOEuj7AYHqB2AP963TjTxAFApEIqZ8Ert0nOOwgcNfGXq0PSCgFApEIieIkcEHdQhNPgUAkQqLSLA7KTwEP3nGtAkAKqGpIJEKi0q1TqaB00ROBSAiCdg/rX19g++Y1bP/G0Rn7+HabUkHpo0Ag0mVBu4ft3HcMKG/gct83jzHxVvfnCXLZDHu2rFMASCHzGCwL7+vr85GRkbCHIdIWmwYOdHTD+IUy0PaRCWVmh929b77z9EQg0mVRmhAu5HPTewpLemmyWKTLojQhrB3EBPREINJxQ6NF7n/6+IzFYVHgoFSQAAoEIh1V3ToyzF3DGilE5MlEwqfUkEgH3f/08UgEgfoG0dpYXmopEIh0yNBoMRLpoFw2w10beynkcxjlJwGViUotpYZEFql+UdiH37OCvzn6KuOl8G7+1cVgQYvVRBpRIBBZhKHR4owVwMXxEg8feiXkUTF909eNXxZCqSGRRdi574VQ20AEWZbtUQCQRVEgEFmgodFiJDaLr5XtMf7jlveHPQyJqdACgZndaGZjZnbSzHaENQ6RhRocHgt7CDMU8jkGb79GTwOyaKHMEZhZBvgKcANwGnjezPa7+4/CGI/IQkShT1A+l2X3rWt185e2CGuy+DrgpLu/BGBmjwG3AQoEEgnViqDieImM2fQm7ZMhNWnUjV86KaxAUABO1Xx/GtgQ0lhEZqivCKre/MMIAmoNLd0QViCoX+gI5dYnb59gtg3YBtDb29uNMYkAsHv/8UhUBGmDGOmWsALBaWBVzfdXAmdqT3D3vcBeKO9H0L2hSdqFuSCsykDtoaVrwqoaeh642syuMrNLgDuB/SGNRQQop4Q2DRwIexhAdFpVSzqE8kTg7hfN7DPAMJABHnL342GMRdJraLTI7v3HQ30C6DFYuiQzvW0lqCGcdF9o6wjc/Vl3/zV3f7e7fyGscUg6VSeEw04DTTns2bJODeEkVOo1JKk0ODwWmQlh9QaSsCkQSCoErQuIAqWAJAoUCCTxhkaL7Nx3bDoPH5UgcOklGT0JSCSo6Zwk3uDw2IzJ2CjIZowvfHxd2MMQAfREIClwJgK9gWppoZhEjQKBJEZ9OejyZVl23bKWlflcJBrFgRaKSTQpNSSJEFQO+sb5Cbb/9VFW/9NcYE+TMGihmESRAoEkQqNy0IlJ57n//TphTA9ne2aGHy0Uk6hSIJDYGxotRib1U7Xp3ZcxePs1WigmsaA5Aomd2jUBPVZenRsVGTO2bljFA/3liiDd+CUOFAgkVurXBEQlCNy9sXf65i8SNwoEEitRWxOgncMkCTRHILESpTUB2R5TEJBE0BOBRFJQb6BCPkd+WZY3zoe/cQzAxJQzODymQCCxp0AgkdNoz+CwKoPmalIXpScUkcVSakgiJyp7BkN5JfCffLJcBhpEC8QkCRQIJHLC3iym1l0be+lfX2D75jXkspkZr2mBmCSFUkMSKUOjxbCHMG3Tuy+btR5gcHiMM+MlVqpxnCSIAoFExtBokX/zxJGwhzFrUViVdhKTpFIgkMj4430vhLpArJDPqTOopJLmCCQyzk9MhXZt5fslzfREIKmnjWIk7RQIpKuqC8VqJ1yhPAnbaXdv7OXgibOa7BWpo0AgXfP5oWM8cuiV6b0BiuMl/vDxI13ZK6CQz6kpnEgDmiOQrhgaLc4IAlXdmhs+/9bFSJWmikSJAoF0xeDwWCi7hFW9cX6CnfuOKRiIBFAgkLYZGi2yaeAAV+14hk0DB6Zvup8fOtb1PkH5XHbWsdLEZFfmIkTiRnME0hb1G8YUx0vs3HeMrxz8MT9+7c2uj+dcgzYVahInMpueCKQt7n/6+KwNY0oTk6EEgeXLsg2bwalJnMhsCgTSsqHRYih7BORzWTI9NuNYNmPsumWtmsSJLIBSQ9KyMPLu1T2Cg9Yl1K4NUJM4kfmZN9hwo6k3m90O7AZ+HbjO3UdqXtsJ/D4wCfyBuw9Xjt8IfAnIAH/p7gPzXaevr89HRkbmO026oP7G++H3rODhQ690fRw/Gbi569cUiRszO+zuffOd1+oTwQ+BLcBX6y7+XuBOYC2wEviOmf1a5eWvADcAp4HnzWy/u/+oxXFIFwRNCIcRBBptEiMii9NSIHD3FwHMrP6l24DH3P0C8LKZnQSuq7x20t1fqrzvscq5CgQxMDg8NmtCuNuyGVOeX6TNOjVHUAAO1Xx/unIM4FTd8Q0dGoMswlw597BLL5cvy7LrlrXK84u02byBwMy+A/xKwEv3uftTjd4WcMwJrlIKnKQws23ANoDe3t75hilt0GgtAJQ3ZVmZz4WygbzmA0Q6a95A4O4fXcTPPQ2sqvn+SuBM5etGx+uvuxfYC+XJ4kWMQRYoKPVTmpjkj544yucePxIY3Ttt+bLZK4RFpL06tY5gP3CnmS01s6uAq4HvA88DV5vZVWZ2CeUJ5f0dGoMsUKPUz2Slsqzb0bi6JkBEOqulQGBmHzez08A/A54xs2EAdz8OPEF5Evh/AJ9290l3vwh8BhgGXgSeqJwrERCFVbfVp45CPsfgb12j+QCRLmhpHUG3aB1Bd9TPEYRFeweLtEe31hFIglR/+x4cHgtlUrgq7OokkbRRIBCg/DSwe/9xxht07eymKKSoRNJEgUAYGi2y/RtHmZgKP02oxnAi3adAIAwOj0UiCBTUGE4kFAoE0rWcvNG4BFUTxCLhUSBImPnaMgedZwbtLh7L57J87JorOHji7IyxALMqk5QOEgmXAkGCNGoRMfLT12fckD/8nhU8ebg4fV47g0Cz6R3tEyASHVpHkCCbBg6EWvap9I5ItGgdQQLNl/YJs/5e6R2R+NKexTFRTfsUx0s4b6d9hkaL0+d0sv6+fm/gHis3hDPKTwJ7tqxTekckpvREEBONOoMODo9N34C3b17TkRYR+VyW3beuVV5fJKEUCGKiUdqn9ninWkSYlX+2bvwiyaTUUEw0SvvUH+9fX+C5HdeTz7Wvj//4+fDbTohI5+iJICaC0j49wKvnSqze8cz0sUKlPPTNty627drq/SOSbAoEMVGb9jkzXuId2R5KE1OzluoWx0s8cuiVtm0io2ogkeRTIIiR2jz9r+58puF57VwZomogkeTTHEEMDY0W6UaPuIyZgoBICigQxNDg8FhXrjMZg1XnItI6pYZionZVcbtuzxkzptzpMQu86Rc0SSySCnoiiIH6VcXNWL4sSy6bafh6NmP8ySev4eWBm9m6YRVW97omiUXSQ08EETBfD6GgVcVzMWDXLWun31scL83YC2D5siy7bllL//oCQ6NFnjxcnBFgDPjEB7WATCQtFAhC1qh1NLxdMrqQZnIG3LWxd/q9zbSDrg8yDhw8cbbpa4pIvCk1FLK5eghVNbugq5DP8eAd1/JA/7qmr99M6woRSTYFgpA16glUeyPevnnNnPl+eHsvgIWmc5ptXSEiyaVAEKKh0eKsSdqqHrPpFtP96wvs2TL3b/mLndgNCjKaKBZJFwWCEA0OjzWsApp0n7HfQP/6QsNyznwuu+iJ3WqQKeRz2ltAJKU0WdxF9dVB87WKbma/gVw2w+5b17Y0LrWYFkk3BYIuCaoOqi3pbOTMeGlGAMkvy7J0SQ/nShPaIEZE2kKBoEsalWnOJ78sOyOAvHF+glw2w4N3XKsAICJtoTmCLllMOaYB7sxbXioi0goFgi5ZTDnmXRt7OVcK3h1Mdf4i0i4tBQIzGzSzE2b2gpl908zyNa/tNLOTZjZmZptrjt9YOXbSzHa0cv04aWYtQK18LssD/etU5y8iHdfqE8G3gfe5+/uBvwd2ApjZe4E7gbXAjcB/MbOMmWWArwC/CbwX2Fo5NxWWLmnuP3dtJZDq/EWk01qaLHb3b9V8ewj4rcrXtwGPufsF4GUzOwlcV3ntpLu/BGBmj1XO/VEr44i6+oqhuWTMZtTx129RqUohEWm3dlYN/R7weOXrAuXAUHW6cgzgVN3xDW0cQyQ12z00l80ELuZSnb+IdNK8gcDMvgP8SsBL97n7U5Vz7gMuAo9U3xZwvhOcigqsojSzbcA2gN7e3vmGGWnNTOzWPwmIiHTLvIHA3T861+tmdg/wMeAj7tPbXJ0GVtWcdiVwpvJ1o+P1190L7AXo6+uL9J6J8+0n0Mwq4il3BQERCUWrVUM3AvcCt7r7+ZqX9gN3mtlSM7sKuBr4PvA8cLWZXWVml1CeUN7fyhjCVr97WHU/gWqPIGiuYkhVQCISllbnCL4MLAW+bWYAh9z9U+5+3MyeoDwJfBH4tLtPApjZZ4BhIAM85O7HWxxDV9X/9v/mhYsNF3wFTfgGtZZQFZCIhMk8YNPyqOnr6/ORkZGOXmO+9E71nGarfwB+MnDzoq8lItIqMzvs7n3znadeQzS3XSQsbO/gjDXaaUBVQCISLWoxQXPbRcLC2jpMxuBJS0QE9EQANL7BF8dLvHvns0y6U8jnyGV7OD8x1dTPbLSJjIhI1OiJgLkrdqq/2RfHS00HAU3+ikicKBCw8IZwc8nnsloYJiKxotQQs8s7W3Hp0iUKAiISK3oiqOhfX+C5Hde3nNvXPgEiEjcKBHVaTRNphbCIxI0CQZ3+9QX2bFk3/WRQXQ9QyOe4e2MvhXwOozwXkM3MXCugSWIRiSPNEQRodsGXVgiLSBIoELRAK4RFJAmUGhIRSTkFAhGRlFMgEBFJOQUCEZGUUyAQEUk5BQIRkZRLdPmo6vxFROaX2EDQ7K5jIiJpl9jUULO7jomIpF1iA0GjLqDqDioiMlNiA0GjLqDqDioiMlNiA0FQO2l1BxURmS2xk8W1u46pakhEpLHEBgJQd1ARkWYkNjUkIiLNUSAQEUk5BQIRkZRTIBARSTkFAhGRlDN3D3sM8zKzs8BP2/TjLgf+oU0/Kwr0eaItaZ8HkveZkvx53uXuK+Z7QywCQTuZ2Yi794U9jnbR54m2pH0eSN5n0udRakhEJPUUCEREUi6NgWBv2ANoM32eaEva54HkfabUf57UzRGIiMhMaXwiEBGRGqkLBGb2H8zsBTM7YmbfMrOVYY+pVWY2aGYnKp/rm2aWD3tMrTCz283suJlNmVlsqznM7EYzGzOzk2a2I+zxtMrMHjKz18zsh2GPpVVmtsrMDprZi5V/a58Ne0ytMrN3mNn3zexo5TPd3/R705YaMrN/4u7/WPn6D4D3uvunQh5WS8zsN4AD7n7RzP4TgLvfG/KwFs3Mfh2YAr4K/Ft3Hwl5SAtmZhng74EbgNPA88BWd/9RqANrgZn9C+DnwNfd/X1hj6cVZnYFcIW7/8DMfhE4DPTH/P+PAZe6+8/NLAv8L+Cz7n5ovvem7omgGgQqLgViHwnd/VvufrHy7SHgyjDH0yp3f9Hd47659HXASXd/yd3fAh4Dbgt5TC1x978FXg97HO3g7q+6+w8qX/9f4EUg1j3rveznlW+zlT9N3d9SFwgAzOwLZnYKuAv4d2GPp81+D/jvYQ9CKACnar4/TcxvNEllZquB9cD3wh1J68wsY2ZHgNeAb7t7U58pkYHAzL5jZj8M+HMbgLvf5+6rgEeAz4Q72ubM95kq59wHXKT8uSKtmc8TcxZwLPZPn0ljZr8APAl8ri5bEEvuPunu11LOClxnZk2l8BK5Q5m7f7TJU/8b8Aywq4PDaYv5PpOZ3QN8DPiIx2DiZwH/j+LqNLCq5vsrgTMhjUUCVPLoTwKPuPu+sMfTTu4+bmbfBW4E5p3cT+QTwVzM7Oqab28FToQ1lnYxsxuBe4Fb3f182OMRoDw5fLWZXWVmlwB3AvtDHpNUVCZWvwa86O5/GvZ42sHMVlQrBs0sB3yUJu9vaawaehJYQ7kq5afAp9y9GO6oWmNmJ4GlwP+pHDoU50ooM/s48J+BFcA4cMTdN4c7qoUzs5uALwIZ4CF3/0LIQ2qJmT0KfIhyd8ufAbvc/WuhDmqRzOyfA38HHKN8LwD4Y3d/NrxRtcbM3g/8FeV/bz3AE+7+75t6b9oCgYiIzJS61JCIiMykQCAiknIKBCIiKadAICKScgoEIiIpp0AgIpJyCgQiIimnQCAiknL/H1BtkPJRCSR0AAAAAElFTkSuQmCC\n",
42 | "text/plain": [
43 | ""
44 | ]
45 | },
46 | "metadata": {
47 | "needs_background": "light"
48 | },
49 | "output_type": "display_data"
50 | }
51 | ],
52 | "source": [
53 | "plt.scatter(X,y)"
54 | ]
55 | },
56 | {
57 | "cell_type": "code",
58 | "execution_count": null,
59 | "metadata": {},
60 | "outputs": [],
61 | "source": []
62 | }
63 | ],
64 | "metadata": {
65 | "kernelspec": {
66 | "display_name": "Python 3",
67 | "language": "python",
68 | "name": "python3"
69 | },
70 | "language_info": {
71 | "codemirror_mode": {
72 | "name": "ipython",
73 | "version": 3
74 | },
75 | "file_extension": ".py",
76 | "mimetype": "text/x-python",
77 | "name": "python",
78 | "nbconvert_exporter": "python",
79 | "pygments_lexer": "ipython3",
80 | "version": "3.7.1"
81 | }
82 | },
83 | "nbformat": 4,
84 | "nbformat_minor": 2
85 | }
86 |
--------------------------------------------------------------------------------
/Code/map python.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Understand Pandas map()"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 15,
13 | "metadata": {},
14 | "outputs": [
15 | {
16 | "data": {
17 | "application/javascript": [
18 | "\n",
19 | " if (window._pyforest_update_imports_cell) { window._pyforest_update_imports_cell('import pandas as pd'); }\n",
20 | " "
21 | ],
22 | "text/plain": [
23 | ""
24 | ]
25 | },
26 | "metadata": {},
27 | "output_type": "display_data"
28 | },
29 | {
30 | "data": {
31 | "text/html": [
32 | "\n",
33 | "\n",
46 | "
\n",
47 | " \n",
48 | " \n",
49 | " | \n",
50 | " name | \n",
51 | " age | \n",
52 | " profession | \n",
53 | " city | \n",
54 | "
\n",
55 | " \n",
56 | " \n",
57 | " \n",
58 | " 0 | \n",
59 | " A | \n",
60 | " 22 | \n",
61 | " data engineer | \n",
62 | " Gurgaon | \n",
63 | "
\n",
64 | " \n",
65 | " 1 | \n",
66 | " B | \n",
67 | " 26 | \n",
68 | " data scientist | \n",
69 | " Bangalore | \n",
70 | "
\n",
71 | " \n",
72 | " 2 | \n",
73 | " C | \n",
74 | " 33 | \n",
75 | " entrepreneur | \n",
76 | " Gurgaon | \n",
77 | "
\n",
78 | " \n",
79 | " 3 | \n",
80 | " D | \n",
81 | " 44 | \n",
82 | " business analyst | \n",
83 | " Pune | \n",
84 | "
\n",
85 | " \n",
86 | " 4 | \n",
87 | " E | \n",
88 | " 50 | \n",
89 | " self-employed | \n",
90 | " New Delhi | \n",
91 | "
\n",
92 | " \n",
93 | "
\n",
94 | "
"
95 | ],
96 | "text/plain": [
97 | " name age profession city\n",
98 | "0 A 22 data engineer Gurgaon\n",
99 | "1 B 26 data scientist Bangalore\n",
100 | "2 C 33 entrepreneur Gurgaon\n",
101 | "3 D 44 business analyst Pune\n",
102 | "4 E 50 self-employed New Delhi"
103 | ]
104 | },
105 | "execution_count": 15,
106 | "metadata": {},
107 | "output_type": "execute_result"
108 | }
109 | ],
110 | "source": [
111 | "#sample data\n",
112 | "data = {'name': ['A', 'B', 'C', 'D', 'E'], \n",
113 | " 'age': [22, 26, 33, 44, 50],\n",
114 | " 'profession' : ['data engineer', 'data scientist', 'entrepreneur', 'business analyst', 'self-employed'], \n",
115 | " 'city': ['Gurgaon', 'Bangalore', 'Gurgaon', 'Pune', 'New Delhi']}\n",
116 | "\n",
117 | "df = pd.DataFrame(data)\n",
118 | "df"
119 | ]
120 | },
121 | {
122 | "cell_type": "code",
123 | "execution_count": 12,
124 | "metadata": {},
125 | "outputs": [],
126 | "source": [
127 | "# dictionary to map city with states\n",
128 | "map_city_to_states = { 'Gurgaon' : 'Haryana', \n",
129 | " 'Bangalore' : 'Karnataka', \n",
130 | " 'Pune' : 'Maharashtra', \n",
131 | " 'New Delhi' : 'Delhi'}"
132 | ]
133 | },
134 | {
135 | "cell_type": "code",
136 | "execution_count": 13,
137 | "metadata": {},
138 | "outputs": [
139 | {
140 | "data": {
141 | "text/html": [
142 | "\n",
143 | "\n",
156 | "
\n",
157 | " \n",
158 | " \n",
159 | " | \n",
160 | " name | \n",
161 | " age | \n",
162 | " profession | \n",
163 | " city | \n",
164 | " state | \n",
165 | "
\n",
166 | " \n",
167 | " \n",
168 | " \n",
169 | " 0 | \n",
170 | " A | \n",
171 | " 22 | \n",
172 | " data engineer | \n",
173 | " Gurgaon | \n",
174 | " Haryana | \n",
175 | "
\n",
176 | " \n",
177 | " 1 | \n",
178 | " B | \n",
179 | " 26 | \n",
180 | " data scientist | \n",
181 | " Bangalore | \n",
182 | " Karnataka | \n",
183 | "
\n",
184 | " \n",
185 | " 2 | \n",
186 | " C | \n",
187 | " 33 | \n",
188 | " entrepreneur | \n",
189 | " Gurgaon | \n",
190 | " Haryana | \n",
191 | "
\n",
192 | " \n",
193 | " 3 | \n",
194 | " D | \n",
195 | " 44 | \n",
196 | " business analyst | \n",
197 | " Pune | \n",
198 | " Maharashtra | \n",
199 | "
\n",
200 | " \n",
201 | " 4 | \n",
202 | " E | \n",
203 | " 50 | \n",
204 | " self-employed | \n",
205 | " New Delhi | \n",
206 | " Delhi | \n",
207 | "
\n",
208 | " \n",
209 | "
\n",
210 | "
"
211 | ],
212 | "text/plain": [
213 | " name age profession city state\n",
214 | "0 A 22 data engineer Gurgaon Haryana\n",
215 | "1 B 26 data scientist Bangalore Karnataka\n",
216 | "2 C 33 entrepreneur Gurgaon Haryana\n",
217 | "3 D 44 business analyst Pune Maharashtra\n",
218 | "4 E 50 self-employed New Delhi Delhi"
219 | ]
220 | },
221 | "execution_count": 13,
222 | "metadata": {},
223 | "output_type": "execute_result"
224 | }
225 | ],
226 | "source": [
227 | "# apply pandas map to map the city columns with states\n",
228 | "df['state'] = df['city'].map(map_city_to_states)\n",
229 | "df"
230 | ]
231 | },
232 | {
233 | "cell_type": "code",
234 | "execution_count": null,
235 | "metadata": {},
236 | "outputs": [],
237 | "source": []
238 | }
239 | ],
240 | "metadata": {
241 | "kernelspec": {
242 | "display_name": "Python 3",
243 | "language": "python",
244 | "name": "python3"
245 | },
246 | "language_info": {
247 | "codemirror_mode": {
248 | "name": "ipython",
249 | "version": 3
250 | },
251 | "file_extension": ".py",
252 | "mimetype": "text/x-python",
253 | "name": "python",
254 | "nbconvert_exporter": "python",
255 | "pygments_lexer": "ipython3",
256 | "version": "3.7.1"
257 | }
258 | },
259 | "nbformat": 4,
260 | "nbformat_minor": 2
261 | }
262 |
--------------------------------------------------------------------------------
/Code/merge_pdf.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "from PyPDF2 import PdfFileMerger"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 2,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "pdfs = ['../Data/dummy_pdf_page1.pdf', '../Data/dummy_pdf_page2.pdf']\n",
19 | "output_filename = 'merged_pdf.pdf'"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": 3,
25 | "metadata": {},
26 | "outputs": [
27 | {
28 | "name": "stdout",
29 | "output_type": "stream",
30 | "text": [
31 | "Output file is merged_pdf.pdf\n"
32 | ]
33 | }
34 | ],
35 | "source": [
36 | "merger = PdfFileMerger()\n",
37 | "\n",
38 | "for pdf in pdfs:\n",
39 | " merger.append(pdf)\n",
40 | "\n",
41 | "merger.write(output_filename)\n",
42 | "print('Output file is', output_filename)\n",
43 | "merger.close()"
44 | ]
45 | }
46 | ],
47 | "metadata": {
48 | "kernelspec": {
49 | "display_name": "Python 3",
50 | "language": "python",
51 | "name": "python3"
52 | },
53 | "language_info": {
54 | "codemirror_mode": {
55 | "name": "ipython",
56 | "version": 3
57 | },
58 | "file_extension": ".py",
59 | "mimetype": "text/x-python",
60 | "name": "python",
61 | "nbconvert_exporter": "python",
62 | "pygments_lexer": "ipython3",
63 | "version": "3.7.3"
64 | }
65 | },
66 | "nbformat": 4,
67 | "nbformat_minor": 2
68 | }
69 |
--------------------------------------------------------------------------------
/Code/pandarellel.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "# !pip3 install pandarallel"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 1,
15 | "metadata": {},
16 | "outputs": [
17 | {
18 | "name": "stderr",
19 | "output_type": "stream",
20 | "text": [
21 | "/home/lakshay/.local/lib/python3.7/site-packages/tqdm/_tqdm.py:634: FutureWarning: The Panel class is removed from pandas. Accessing it from the top-level namespace will also be removed in the next version\n",
22 | " from pandas import Panel\n"
23 | ]
24 | }
25 | ],
26 | "source": [
27 | "%load_ext autoreload\n",
28 | "%autoreload 2\n",
29 | "import pandas as pd\n",
30 | "import time\n",
31 | "from pandarallel import pandarallel\n",
32 | "import math\n",
33 | "import numpy as np\n",
34 | "import random\n",
35 | "from tqdm._tqdm_notebook import tqdm_notebook\n",
36 | "tqdm_notebook.pandas()"
37 | ]
38 | },
39 | {
40 | "cell_type": "code",
41 | "execution_count": 2,
42 | "metadata": {},
43 | "outputs": [
44 | {
45 | "name": "stdout",
46 | "output_type": "stream",
47 | "text": [
48 | "INFO: Pandarallel will run on 8 workers.\n",
49 | "INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n"
50 | ]
51 | }
52 | ],
53 | "source": [
54 | "pandarallel.initialize(progress_bar=True)"
55 | ]
56 | },
57 | {
58 | "cell_type": "code",
59 | "execution_count": 3,
60 | "metadata": {},
61 | "outputs": [],
62 | "source": [
63 | "df = pd.DataFrame({\n",
64 | " 'A' : [random.randint(15,20) for i in range(1,1000000) ],\n",
65 | " 'B' : [random.randint(10,30) for i in range(1,1000000) ]\n",
66 | "})"
67 | ]
68 | },
69 | {
70 | "cell_type": "code",
71 | "execution_count": 4,
72 | "metadata": {},
73 | "outputs": [],
74 | "source": [
75 | "def func(x):\n",
76 | " return math.sin(x.A**2) + math.sin(x.B**2) + math.tan(x.A**2)"
77 | ]
78 | },
79 | {
80 | "cell_type": "markdown",
81 | "metadata": {},
82 | "source": [
83 | "---\n",
84 | "\n",
85 | "#### `WITHOUT PARALLELIZATION`\n",
86 | "\n",
87 | "---"
88 | ]
89 | },
90 | {
91 | "cell_type": "code",
92 | "execution_count": 5,
93 | "metadata": {},
94 | "outputs": [
95 | {
96 | "data": {
97 | "application/vnd.jupyter.widget-view+json": {
98 | "model_id": "5f096c0d2622441c9557dfc7abebcdb4",
99 | "version_major": 2,
100 | "version_minor": 0
101 | },
102 | "text/plain": [
103 | "HBox(children=(IntProgress(value=0, max=999999), HTML(value='')))"
104 | ]
105 | },
106 | "metadata": {},
107 | "output_type": "display_data"
108 | },
109 | {
110 | "name": "stdout",
111 | "output_type": "stream",
112 | "text": [
113 | "\n",
114 | "CPU times: user 33.7 s, sys: 148 ms, total: 33.9 s\n",
115 | "Wall time: 33.7 s\n"
116 | ]
117 | }
118 | ],
119 | "source": [
120 | "%%time\n",
121 | "res = df.progress_apply(func, axis=1)"
122 | ]
123 | },
124 | {
125 | "cell_type": "markdown",
126 | "metadata": {},
127 | "source": [
128 | "---\n",
129 | "\n",
130 | "#### `WITH PARALLELIZATION`\n",
131 | "\n",
132 | "---"
133 | ]
134 | },
135 | {
136 | "cell_type": "code",
137 | "execution_count": 6,
138 | "metadata": {},
139 | "outputs": [
140 | {
141 | "data": {
142 | "application/vnd.jupyter.widget-view+json": {
143 | "model_id": "16dc675b21954c8da7941a4ad861441b",
144 | "version_major": 2,
145 | "version_minor": 0
146 | },
147 | "text/plain": [
148 | "VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=125000), Label(value='0 / 125000')…"
149 | ]
150 | },
151 | "metadata": {},
152 | "output_type": "display_data"
153 | },
154 | {
155 | "name": "stdout",
156 | "output_type": "stream",
157 | "text": [
158 | "CPU times: user 1.07 s, sys: 89.7 ms, total: 1.16 s\n",
159 | "Wall time: 11.7 s\n"
160 | ]
161 | }
162 | ],
163 | "source": [
164 | "%%time\n",
165 | "res_parallel = df.parallel_apply(func, axis=1)"
166 | ]
167 | },
168 | {
169 | "cell_type": "code",
170 | "execution_count": null,
171 | "metadata": {},
172 | "outputs": [],
173 | "source": []
174 | },
175 | {
176 | "cell_type": "code",
177 | "execution_count": null,
178 | "metadata": {},
179 | "outputs": [],
180 | "source": []
181 | }
182 | ],
183 | "metadata": {
184 | "kernelspec": {
185 | "display_name": "Python 3",
186 | "language": "python",
187 | "name": "python3"
188 | },
189 | "language_info": {
190 | "codemirror_mode": {
191 | "name": "ipython",
192 | "version": 3
193 | },
194 | "file_extension": ".py",
195 | "mimetype": "text/x-python",
196 | "name": "python",
197 | "nbconvert_exporter": "python",
198 | "pygments_lexer": "ipython3",
199 | "version": "3.7.3"
200 | }
201 | },
202 | "nbformat": 4,
203 | "nbformat_minor": 2
204 | }
205 |
--------------------------------------------------------------------------------
/Code/pandas_crosstab.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Pandas Crosstab"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "# import pandas\n",
17 | "import pandas as pd"
18 | ]
19 | },
20 | {
21 | "cell_type": "code",
22 | "execution_count": 5,
23 | "metadata": {},
24 | "outputs": [],
25 | "source": [
26 | "#Import dataset\n",
27 | "data = pd.read_csv('../Data/loan_train.csv', index_col = 'Loan_ID')"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 6,
33 | "metadata": {},
34 | "outputs": [
35 | {
36 | "data": {
37 | "text/html": [
38 | "\n",
39 | "\n",
52 | "
\n",
53 | " \n",
54 | " \n",
55 | " Loan_Status | \n",
56 | " N | \n",
57 | " Y | \n",
58 | "
\n",
59 | " \n",
60 | " Credit_History | \n",
61 | " | \n",
62 | " | \n",
63 | "
\n",
64 | " \n",
65 | " \n",
66 | " \n",
67 | " 0.0 | \n",
68 | " 0.921348 | \n",
69 | " 0.078652 | \n",
70 | "
\n",
71 | " \n",
72 | " 1.0 | \n",
73 | " 0.204211 | \n",
74 | " 0.795789 | \n",
75 | "
\n",
76 | " \n",
77 | " All | \n",
78 | " 0.317376 | \n",
79 | " 0.682624 | \n",
80 | "
\n",
81 | " \n",
82 | "
\n",
83 | "
"
84 | ],
85 | "text/plain": [
86 | "Loan_Status N Y\n",
87 | "Credit_History \n",
88 | "0.0 0.921348 0.078652\n",
89 | "1.0 0.204211 0.795789\n",
90 | "All 0.317376 0.682624"
91 | ]
92 | },
93 | "execution_count": 6,
94 | "metadata": {},
95 | "output_type": "execute_result"
96 | }
97 | ],
98 | "source": [
99 | "pd.crosstab(data[\"Credit_History\"],data[\"Loan_Status\"],margins=True, normalize = 'index')"
100 | ]
101 | },
102 | {
103 | "cell_type": "code",
104 | "execution_count": null,
105 | "metadata": {},
106 | "outputs": [],
107 | "source": []
108 | }
109 | ],
110 | "metadata": {
111 | "kernelspec": {
112 | "display_name": "Python 3",
113 | "language": "python",
114 | "name": "python3"
115 | },
116 | "language_info": {
117 | "codemirror_mode": {
118 | "name": "ipython",
119 | "version": 3
120 | },
121 | "file_extension": ".py",
122 | "mimetype": "text/x-python",
123 | "name": "python",
124 | "nbconvert_exporter": "python",
125 | "pygments_lexer": "ipython3",
126 | "version": "3.7.1"
127 | }
128 | },
129 | "nbformat": 4,
130 | "nbformat_minor": 2
131 | }
132 |
--------------------------------------------------------------------------------
/Code/pandas_pivot_table.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Pandas Pivot Table"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 5,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "# import pandas\n",
17 | "import pandas as pd\n",
18 | "import numpy as np"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 6,
24 | "metadata": {},
25 | "outputs": [],
26 | "source": [
27 | "#Import dataset\n",
28 | "loan = pd.read_csv('../Data/loan_train.csv', index_col = 'Loan_ID')"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": 7,
34 | "metadata": {},
35 | "outputs": [
36 | {
37 | "data": {
38 | "text/html": [
39 | "\n",
40 | "\n",
53 | "
\n",
54 | " \n",
55 | " \n",
56 | " | \n",
57 | " | \n",
58 | " | \n",
59 | " LoanAmount | \n",
60 | "
\n",
61 | " \n",
62 | " Gender | \n",
63 | " Married | \n",
64 | " Self_Employed | \n",
65 | " | \n",
66 | "
\n",
67 | " \n",
68 | " \n",
69 | " \n",
70 | " Female | \n",
71 | " No | \n",
72 | " No | \n",
73 | " 110.596774 | \n",
74 | "
\n",
75 | " \n",
76 | " Yes | \n",
77 | " 125.800000 | \n",
78 | "
\n",
79 | " \n",
80 | " Yes | \n",
81 | " No | \n",
82 | " 135.480000 | \n",
83 | "
\n",
84 | " \n",
85 | " Yes | \n",
86 | " 282.250000 | \n",
87 | "
\n",
88 | " \n",
89 | " Male | \n",
90 | " No | \n",
91 | " No | \n",
92 | " 128.137255 | \n",
93 | "
\n",
94 | " \n",
95 | " Yes | \n",
96 | " 173.625000 | \n",
97 | "
\n",
98 | " \n",
99 | " Yes | \n",
100 | " No | \n",
101 | " 151.709220 | \n",
102 | "
\n",
103 | " \n",
104 | " Yes | \n",
105 | " 169.355556 | \n",
106 | "
\n",
107 | " \n",
108 | "
\n",
109 | "
"
110 | ],
111 | "text/plain": [
112 | " LoanAmount\n",
113 | "Gender Married Self_Employed \n",
114 | "Female No No 110.596774\n",
115 | " Yes 125.800000\n",
116 | " Yes No 135.480000\n",
117 | " Yes 282.250000\n",
118 | "Male No No 128.137255\n",
119 | " Yes 173.625000\n",
120 | " Yes No 151.709220\n",
121 | " Yes 169.355556"
122 | ]
123 | },
124 | "execution_count": 7,
125 | "metadata": {},
126 | "output_type": "execute_result"
127 | }
128 | ],
129 | "source": [
130 | "pivot = loan.pivot_table(values = ['LoanAmount'], index = ['Gender', 'Married', 'Self_Employed'], aggfunc = np.mean)\n",
131 | "pivot"
132 | ]
133 | },
134 | {
135 | "cell_type": "code",
136 | "execution_count": null,
137 | "metadata": {},
138 | "outputs": [],
139 | "source": []
140 | }
141 | ],
142 | "metadata": {
143 | "kernelspec": {
144 | "display_name": "Python 3",
145 | "language": "python",
146 | "name": "python3"
147 | },
148 | "language_info": {
149 | "codemirror_mode": {
150 | "name": "ipython",
151 | "version": 3
152 | },
153 | "file_extension": ".py",
154 | "mimetype": "text/x-python",
155 | "name": "python",
156 | "nbconvert_exporter": "python",
157 | "pygments_lexer": "ipython3",
158 | "version": "3.7.1"
159 | }
160 | },
161 | "nbformat": 4,
162 | "nbformat_minor": 2
163 | }
164 |
--------------------------------------------------------------------------------
/Code/pandas_read_html_files.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# ***Reading all the tables of a webpage into pandas dataframe:***"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import pandas as pd"
17 | ]
18 | },
19 | {
20 | "cell_type": "markdown",
21 | "metadata": {},
22 | "source": [
23 | "### ***Let the url be: \"https://en.wikipedia.org/wiki/States_and_union_territories_of_India\"***"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": 3,
29 | "metadata": {},
30 | "outputs": [],
31 | "source": [
32 | "dflist=pd.read_html('https://en.wikipedia.org/wiki/States_and_union_territories_of_India')"
33 | ]
34 | },
35 | {
36 | "cell_type": "markdown",
37 | "metadata": {},
38 | "source": [
39 | "Here dflist is a list of dataframes. Each dataframe is associated with one of the tables present on the page."
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": 4,
45 | "metadata": {},
46 | "outputs": [
47 | {
48 | "data": {
49 | "text/html": [
50 | "\n",
51 | "\n",
64 | "
\n",
65 | " \n",
66 | " \n",
67 | " | \n",
68 | " 0 | \n",
69 | " 1 | \n",
70 | "
\n",
71 | " \n",
72 | " \n",
73 | " \n",
74 | " 0 | \n",
75 | " States and union territories of India | \n",
76 | " NaN | \n",
77 | "
\n",
78 | " \n",
79 | " 1 | \n",
80 | " Category | \n",
81 | " Federated states | \n",
82 | "
\n",
83 | " \n",
84 | " 2 | \n",
85 | " Location | \n",
86 | " Republic of India | \n",
87 | "
\n",
88 | " \n",
89 | " 3 | \n",
90 | " Number | \n",
91 | " 28 States8 Union territories | \n",
92 | "
\n",
93 | " \n",
94 | " 4 | \n",
95 | " Populations | \n",
96 | " States: Sikkim - 610,577 (lowest); Uttar Prade... | \n",
97 | "
\n",
98 | " \n",
99 | "
\n",
100 | "
"
101 | ],
102 | "text/plain": [
103 | " 0 \\\n",
104 | "0 States and union territories of India \n",
105 | "1 Category \n",
106 | "2 Location \n",
107 | "3 Number \n",
108 | "4 Populations \n",
109 | "\n",
110 | " 1 \n",
111 | "0 NaN \n",
112 | "1 Federated states \n",
113 | "2 Republic of India \n",
114 | "3 28 States8 Union territories \n",
115 | "4 States: Sikkim - 610,577 (lowest); Uttar Prade... "
116 | ]
117 | },
118 | "execution_count": 4,
119 | "metadata": {},
120 | "output_type": "execute_result"
121 | }
122 | ],
123 | "source": [
124 | "dflist[0].head()"
125 | ]
126 | },
127 | {
128 | "cell_type": "code",
129 | "execution_count": 5,
130 | "metadata": {},
131 | "outputs": [
132 | {
133 | "data": {
134 | "text/html": [
135 | "\n",
136 | "\n",
149 | "
\n",
150 | " \n",
151 | " \n",
152 | " | \n",
153 | " 0 | \n",
154 | " 1 | \n",
155 | " 2 | \n",
156 | " 3 | \n",
157 | " 4 | \n",
158 | " 5 | \n",
159 | " 6 | \n",
160 | " 7 | \n",
161 | " 8 | \n",
162 | " 9 | \n",
163 | " 10 | \n",
164 | "
\n",
165 | " \n",
166 | " \n",
167 | " \n",
168 | " 0 | \n",
169 | " State | \n",
170 | " ISO 3166-2:IN | \n",
171 | " Vehiclecode | \n",
172 | " Zone | \n",
173 | " Capital | \n",
174 | " Largest city | \n",
175 | " Statehood | \n",
176 | " Population[35] | \n",
177 | " Area(km2) | \n",
178 | " Officiallanguages[36] | \n",
179 | " Additional officiallanguages[36] | \n",
180 | "
\n",
181 | " \n",
182 | " 1 | \n",
183 | " Andhra Pradesh | \n",
184 | " IN-AP | \n",
185 | " AP | \n",
186 | " Southern | \n",
187 | " Hyderabad (de jure)Amaravati (de facto) Note 1... | \n",
188 | " Visakhapatnam | \n",
189 | " 1 October 1953 | \n",
190 | " 49506799 | \n",
191 | " 160205 | \n",
192 | " Telugu | \n",
193 | " — | \n",
194 | "
\n",
195 | " \n",
196 | " 2 | \n",
197 | " Arunachal Pradesh | \n",
198 | " IN-AR | \n",
199 | " AR | \n",
200 | " North-Eastern | \n",
201 | " Itanagar | \n",
202 | " 20 February 1987 | \n",
203 | " 1383727 | \n",
204 | " 83743 | \n",
205 | " English | \n",
206 | " — | \n",
207 | " NaN | \n",
208 | "
\n",
209 | " \n",
210 | " 3 | \n",
211 | " Assam | \n",
212 | " IN-AS | \n",
213 | " AS | \n",
214 | " North-Eastern | \n",
215 | " Dispur | \n",
216 | " Guwahati | \n",
217 | " 26 January 1950 | \n",
218 | " 31205576 | \n",
219 | " 78550 | \n",
220 | " Assamese | \n",
221 | " Bengali, Bodo | \n",
222 | "
\n",
223 | " \n",
224 | " 4 | \n",
225 | " Bihar | \n",
226 | " IN-BR | \n",
227 | " BR | \n",
228 | " Eastern | \n",
229 | " Patna | \n",
230 | " 26 January 1950 | \n",
231 | " 104099452 | \n",
232 | " 94163 | \n",
233 | " Hindi | \n",
234 | " Urdu | \n",
235 | " NaN | \n",
236 | "
\n",
237 | " \n",
238 | "
\n",
239 | "
"
240 | ],
241 | "text/plain": [
242 | " 0 1 2 3 \\\n",
243 | "0 State ISO 3166-2:IN Vehiclecode Zone \n",
244 | "1 Andhra Pradesh IN-AP AP Southern \n",
245 | "2 Arunachal Pradesh IN-AR AR North-Eastern \n",
246 | "3 Assam IN-AS AS North-Eastern \n",
247 | "4 Bihar IN-BR BR Eastern \n",
248 | "\n",
249 | " 4 5 \\\n",
250 | "0 Capital Largest city \n",
251 | "1 Hyderabad (de jure)Amaravati (de facto) Note 1... Visakhapatnam \n",
252 | "2 Itanagar 20 February 1987 \n",
253 | "3 Dispur Guwahati \n",
254 | "4 Patna 26 January 1950 \n",
255 | "\n",
256 | " 6 7 8 9 \\\n",
257 | "0 Statehood Population[35] Area(km2) Officiallanguages[36] \n",
258 | "1 1 October 1953 49506799 160205 Telugu \n",
259 | "2 1383727 83743 English — \n",
260 | "3 26 January 1950 31205576 78550 Assamese \n",
261 | "4 104099452 94163 Hindi Urdu \n",
262 | "\n",
263 | " 10 \n",
264 | "0 Additional officiallanguages[36] \n",
265 | "1 — \n",
266 | "2 NaN \n",
267 | "3 Bengali, Bodo \n",
268 | "4 NaN "
269 | ]
270 | },
271 | "execution_count": 5,
272 | "metadata": {},
273 | "output_type": "execute_result"
274 | }
275 | ],
276 | "source": [
277 | "dflist[3].head()"
278 | ]
279 | },
280 | {
281 | "cell_type": "code",
282 | "execution_count": null,
283 | "metadata": {},
284 | "outputs": [],
285 | "source": []
286 | }
287 | ],
288 | "metadata": {
289 | "kernelspec": {
290 | "display_name": "Python 3",
291 | "language": "python",
292 | "name": "python3"
293 | },
294 | "language_info": {
295 | "codemirror_mode": {
296 | "name": "ipython",
297 | "version": 3
298 | },
299 | "file_extension": ".py",
300 | "mimetype": "text/x-python",
301 | "name": "python",
302 | "nbconvert_exporter": "python",
303 | "pygments_lexer": "ipython3",
304 | "version": "3.7.1"
305 | }
306 | },
307 | "nbformat": 4,
308 | "nbformat_minor": 2
309 | }
310 |
--------------------------------------------------------------------------------
/Code/pdb_pythonDebugger.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Python debugger"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "# define function 1\n",
17 | "def func_div(x, y):\n",
18 | " return x / y\n",
19 | "\n",
20 | "# define fucntion 2\n",
21 | "def func_sub(a):\n",
22 | " x = a\n",
23 | " y = a - 1\n",
24 | " return func_div(x, y)"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "execution_count": 2,
30 | "metadata": {},
31 | "outputs": [
32 | {
33 | "ename": "ZeroDivisionError",
34 | "evalue": "division by zero",
35 | "output_type": "error",
36 | "traceback": [
37 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
38 | "\u001b[1;31mZeroDivisionError\u001b[0m Traceback (most recent call last)",
39 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mfunc_sub\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
40 | "\u001b[1;32m\u001b[0m in \u001b[0;36mfunc_sub\u001b[1;34m(a)\u001b[0m\n\u001b[0;32m 7\u001b[0m \u001b[0mx\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0ma\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 8\u001b[0m \u001b[0my\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0ma\u001b[0m \u001b[1;33m-\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 9\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfunc_div\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
41 | "\u001b[1;32m\u001b[0m in \u001b[0;36mfunc_div\u001b[1;34m(x, y)\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;31m# define function 1\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mfunc_div\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mx\u001b[0m \u001b[1;33m/\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 4\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;31m# define fucntion 2\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
42 | "\u001b[1;31mZeroDivisionError\u001b[0m: division by zero"
43 | ]
44 | }
45 | ],
46 | "source": [
47 | "func_sub(1)"
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": 3,
53 | "metadata": {},
54 | "outputs": [
55 | {
56 | "name": "stdout",
57 | "output_type": "stream",
58 | "text": [
59 | "Automatic pdb calling has been turned ON\n"
60 | ]
61 | },
62 | {
63 | "ename": "ZeroDivisionError",
64 | "evalue": "division by zero",
65 | "output_type": "error",
66 | "traceback": [
67 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
68 | "\u001b[1;31mZeroDivisionError\u001b[0m Traceback (most recent call last)",
69 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[0mget_ipython\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrun_line_magic\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'pdb'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m''\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mfunc_sub\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
70 | "\u001b[1;32m\u001b[0m in \u001b[0;36mfunc_sub\u001b[1;34m(a)\u001b[0m\n\u001b[0;32m 7\u001b[0m \u001b[0mx\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0ma\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 8\u001b[0m \u001b[0my\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0ma\u001b[0m \u001b[1;33m-\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 9\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfunc_div\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
71 | "\u001b[1;32m\u001b[0m in \u001b[0;36mfunc_div\u001b[1;34m(x, y)\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;31m# define function 1\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mfunc_div\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mx\u001b[0m \u001b[1;33m/\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 4\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;31m# define fucntion 2\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
72 | "\u001b[1;31mZeroDivisionError\u001b[0m: division by zero"
73 | ]
74 | },
75 | {
76 | "name": "stdout",
77 | "output_type": "stream",
78 | "text": [
79 | "> \u001b[1;32m\u001b[0m(3)\u001b[0;36mfunc_div\u001b[1;34m()\u001b[0m\n",
80 | "\u001b[1;32m 1 \u001b[1;33m\u001b[1;31m# define function 1\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
81 | "\u001b[0m\u001b[1;32m 2 \u001b[1;33m\u001b[1;32mdef\u001b[0m \u001b[0mfunc_div\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
82 | "\u001b[0m\u001b[1;32m----> 3 \u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mx\u001b[0m \u001b[1;33m/\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
83 | "\u001b[0m\u001b[1;32m 4 \u001b[1;33m\u001b[1;33m\u001b[0m\u001b[0m\n",
84 | "\u001b[0m\u001b[1;32m 5 \u001b[1;33m\u001b[1;31m# define fucntion 2\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
85 | "\u001b[0m\n",
86 | "ipdb> print(x)\n",
87 | "1\n",
88 | "ipdb> print(y)\n",
89 | "0\n",
90 | "ipdb> up\n",
91 | "> \u001b[1;32m\u001b[0m(9)\u001b[0;36mfunc_sub\u001b[1;34m()\u001b[0m\n",
92 | "\u001b[1;32m 5 \u001b[1;33m\u001b[1;31m# define fucntion 2\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
93 | "\u001b[0m\u001b[1;32m 6 \u001b[1;33m\u001b[1;32mdef\u001b[0m \u001b[0mfunc_sub\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
94 | "\u001b[0m\u001b[1;32m 7 \u001b[1;33m \u001b[0mx\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0ma\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
95 | "\u001b[0m\u001b[1;32m 8 \u001b[1;33m \u001b[0my\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0ma\u001b[0m \u001b[1;33m-\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
96 | "\u001b[0m\u001b[1;32m----> 9 \u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfunc_div\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
97 | "\u001b[0m\n",
98 | "ipdb> up\n",
99 | "> \u001b[1;32m\u001b[0m(2)\u001b[0;36m\u001b[1;34m()\u001b[0m\n",
100 | "\u001b[1;32m 1 \u001b[1;33m\u001b[0mget_ipython\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrun_line_magic\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'pdb'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m''\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
101 | "\u001b[0m\u001b[1;32m----> 2 \u001b[1;33m\u001b[0mfunc_sub\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
102 | "\u001b[0m\n",
103 | "ipdb> quit\n"
104 | ]
105 | }
106 | ],
107 | "source": [
108 | "%pdb\n",
109 | "func_sub(1)"
110 | ]
111 | },
112 | {
113 | "cell_type": "code",
114 | "execution_count": null,
115 | "metadata": {},
116 | "outputs": [],
117 | "source": []
118 | }
119 | ],
120 | "metadata": {
121 | "kernelspec": {
122 | "display_name": "Python 3",
123 | "language": "python",
124 | "name": "python3"
125 | },
126 | "language_info": {
127 | "codemirror_mode": {
128 | "name": "ipython",
129 | "version": 3
130 | },
131 | "file_extension": ".py",
132 | "mimetype": "text/x-python",
133 | "name": "python",
134 | "nbconvert_exporter": "python",
135 | "pygments_lexer": "ipython3",
136 | "version": "3.7.1"
137 | }
138 | },
139 | "nbformat": 4,
140 | "nbformat_minor": 2
141 | }
142 |
--------------------------------------------------------------------------------
/Code/pretty print.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "### Pretty Print dictionary"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 11,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "# Build a test dictionary\n",
17 | "my_dict = {'Student_ID': 34,'Student_name' : 'Tom', 'Student_class' : 5,\n",
18 | " 'Student_marks' : {'maths' : 92,\n",
19 | " 'science' : 95,\n",
20 | " 'social_science' : 65,\n",
21 | " 'English' : 88}\n",
22 | " }\n"
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": 12,
28 | "metadata": {},
29 | "outputs": [
30 | {
31 | "name": "stdout",
32 | "output_type": "stream",
33 | "text": [
34 | "{'Student_ID': 34, 'Student_name': 'Tom', 'Student_class': 5, 'Student_marks': {'maths': 92, 'science': 95, 'social_science': 65, 'English': 88}}\n"
35 | ]
36 | }
37 | ],
38 | "source": [
39 | "# with print\n",
40 | "print(my_dict)"
41 | ]
42 | },
43 | {
44 | "cell_type": "code",
45 | "execution_count": 13,
46 | "metadata": {},
47 | "outputs": [
48 | {
49 | "name": "stdout",
50 | "output_type": "stream",
51 | "text": [
52 | "{'Student_ID': 34,\n",
53 | " 'Student_class': 5,\n",
54 | " 'Student_marks': {'English': 88,\n",
55 | " 'maths': 92,\n",
56 | " 'science': 95,\n",
57 | " 'social_science': 65},\n",
58 | " 'Student_name': 'Tom'}\n"
59 | ]
60 | }
61 | ],
62 | "source": [
63 | "# with pprint\n",
64 | "import pprint\n",
65 | "pprint.pprint(my_dict)"
66 | ]
67 | },
68 | {
69 | "cell_type": "code",
70 | "execution_count": null,
71 | "metadata": {},
72 | "outputs": [],
73 | "source": []
74 | }
75 | ],
76 | "metadata": {
77 | "kernelspec": {
78 | "display_name": "Python 3",
79 | "language": "python",
80 | "name": "python3"
81 | },
82 | "language_info": {
83 | "codemirror_mode": {
84 | "name": "ipython",
85 | "version": 3
86 | },
87 | "file_extension": ".py",
88 | "mimetype": "text/x-python",
89 | "name": "python",
90 | "nbconvert_exporter": "python",
91 | "pygments_lexer": "ipython3",
92 | "version": "3.7.1"
93 | }
94 | },
95 | "nbformat": 4,
96 | "nbformat_minor": 2
97 | }
98 |
--------------------------------------------------------------------------------
/Code/print zenofpython.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [
8 | {
9 | "name": "stdout",
10 | "output_type": "stream",
11 | "text": [
12 | "The Zen of Python, by Tim Peters\n",
13 | "\n",
14 | "Beautiful is better than ugly.\n",
15 | "Explicit is better than implicit.\n",
16 | "Simple is better than complex.\n",
17 | "Complex is better than complicated.\n",
18 | "Flat is better than nested.\n",
19 | "Sparse is better than dense.\n",
20 | "Readability counts.\n",
21 | "Special cases aren't special enough to break the rules.\n",
22 | "Although practicality beats purity.\n",
23 | "Errors should never pass silently.\n",
24 | "Unless explicitly silenced.\n",
25 | "In the face of ambiguity, refuse the temptation to guess.\n",
26 | "There should be one-- and preferably only one --obvious way to do it.\n",
27 | "Although that way may not be obvious at first unless you're Dutch.\n",
28 | "Now is better than never.\n",
29 | "Although never is often better than *right* now.\n",
30 | "If the implementation is hard to explain, it's a bad idea.\n",
31 | "If the implementation is easy to explain, it may be a good idea.\n",
32 | "Namespaces are one honking great idea -- let's do more of those!\n"
33 | ]
34 | }
35 | ],
36 | "source": [
37 | "import this"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": null,
43 | "metadata": {},
44 | "outputs": [],
45 | "source": []
46 | }
47 | ],
48 | "metadata": {
49 | "kernelspec": {
50 | "display_name": "Python 3",
51 | "language": "python",
52 | "name": "python3"
53 | },
54 | "language_info": {
55 | "codemirror_mode": {
56 | "name": "ipython",
57 | "version": 3
58 | },
59 | "file_extension": ".py",
60 | "mimetype": "text/x-python",
61 | "name": "python",
62 | "nbconvert_exporter": "python",
63 | "pygments_lexer": "ipython3",
64 | "version": "3.7.1"
65 | }
66 | },
67 | "nbformat": 4,
68 | "nbformat_minor": 2
69 | }
70 |
--------------------------------------------------------------------------------
/Code/read_csv_ParseDate.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 32,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "#import libraries\n",
10 | "import pandas as pd"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 33,
16 | "metadata": {},
17 | "outputs": [],
18 | "source": [
19 | "# read dataset\n",
20 | "data = pd.read_csv(\"../Data/AirQualityUCI.csv\")"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": 34,
26 | "metadata": {},
27 | "outputs": [
28 | {
29 | "data": {
30 | "text/plain": [
31 | "Date object\n",
32 | "Time object\n",
33 | "CO(GT) int64\n",
34 | "PT08.S1(CO) int64\n",
35 | "NMHC(GT) int64\n",
36 | "dtype: object"
37 | ]
38 | },
39 | "execution_count": 34,
40 | "metadata": {},
41 | "output_type": "execute_result"
42 | }
43 | ],
44 | "source": [
45 | "data.dtypes"
46 | ]
47 | },
48 | {
49 | "cell_type": "markdown",
50 | "metadata": {},
51 | "source": [
52 | "## Parse Dates in read_csv()"
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": 35,
58 | "metadata": {},
59 | "outputs": [],
60 | "source": [
61 | "data = pd.read_csv(\"../Data/AirQualityUCI.csv\", parse_dates = [\"Date\"])"
62 | ]
63 | },
64 | {
65 | "cell_type": "code",
66 | "execution_count": 36,
67 | "metadata": {},
68 | "outputs": [
69 | {
70 | "data": {
71 | "text/plain": [
72 | "Date datetime64[ns]\n",
73 | "Time object\n",
74 | "CO(GT) int64\n",
75 | "PT08.S1(CO) int64\n",
76 | "NMHC(GT) int64\n",
77 | "dtype: object"
78 | ]
79 | },
80 | "execution_count": 36,
81 | "metadata": {},
82 | "output_type": "execute_result"
83 | }
84 | ],
85 | "source": [
86 | "data.dtypes"
87 | ]
88 | },
89 | {
90 | "cell_type": "code",
91 | "execution_count": null,
92 | "metadata": {},
93 | "outputs": [],
94 | "source": []
95 | }
96 | ],
97 | "metadata": {
98 | "kernelspec": {
99 | "display_name": "Python 3",
100 | "language": "python",
101 | "name": "python3"
102 | },
103 | "language_info": {
104 | "codemirror_mode": {
105 | "name": "ipython",
106 | "version": 3
107 | },
108 | "file_extension": ".py",
109 | "mimetype": "text/x-python",
110 | "name": "python",
111 | "nbconvert_exporter": "python",
112 | "pygments_lexer": "ipython3",
113 | "version": "3.7.1"
114 | }
115 | },
116 | "nbformat": 4,
117 | "nbformat_minor": 2
118 | }
119 |
--------------------------------------------------------------------------------
/Code/run python script.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Run a python script inside a notebook"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 4,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "%run add.py"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 5,
22 | "metadata": {},
23 | "outputs": [
24 | {
25 | "data": {
26 | "text/plain": [
27 | "13"
28 | ]
29 | },
30 | "execution_count": 5,
31 | "metadata": {},
32 | "output_type": "execute_result"
33 | }
34 | ],
35 | "source": [
36 | "sum = addition(6,7)\n",
37 | "sum"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": null,
43 | "metadata": {},
44 | "outputs": [],
45 | "source": []
46 | }
47 | ],
48 | "metadata": {
49 | "kernelspec": {
50 | "display_name": "Python 3",
51 | "language": "python",
52 | "name": "python3"
53 | },
54 | "language_info": {
55 | "codemirror_mode": {
56 | "name": "ipython",
57 | "version": 3
58 | },
59 | "file_extension": ".py",
60 | "mimetype": "text/x-python",
61 | "name": "python",
62 | "nbconvert_exporter": "python",
63 | "pygments_lexer": "ipython3",
64 | "version": "3.7.1"
65 | }
66 | },
67 | "nbformat": 4,
68 | "nbformat_minor": 2
69 | }
70 |
--------------------------------------------------------------------------------
/Code/select_dtype.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 4,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": null,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "#import the dataset\n",
19 | "data = pd.read_csv('loan_train.csv')\n",
20 | "data.shape"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": 35,
26 | "metadata": {},
27 | "outputs": [
28 | {
29 | "data": {
30 | "text/plain": [
31 | "Loan_ID object\n",
32 | "Gender object\n",
33 | "Married object\n",
34 | "Dependents object\n",
35 | "Education object\n",
36 | "Self_Employed object\n",
37 | "ApplicantIncome int64\n",
38 | "CoapplicantIncome float64\n",
39 | "LoanAmount float64\n",
40 | "Loan_Amount_Term float64\n",
41 | "Credit_History float64\n",
42 | "Property_Area object\n",
43 | "Loan_Status object\n",
44 | "dtype: object"
45 | ]
46 | },
47 | "execution_count": 35,
48 | "metadata": {},
49 | "output_type": "execute_result"
50 | }
51 | ],
52 | "source": [
53 | "#check data types of column\n",
54 | "data.dtypes"
55 | ]
56 | },
57 | {
58 | "cell_type": "code",
59 | "execution_count": 36,
60 | "metadata": {},
61 | "outputs": [
62 | {
63 | "data": {
64 | "text/html": [
65 | "\n",
66 | "\n",
79 | "
\n",
80 | " \n",
81 | " \n",
82 | " | \n",
83 | " Loan_ID | \n",
84 | " Gender | \n",
85 | " Married | \n",
86 | " Dependents | \n",
87 | " Education | \n",
88 | " Self_Employed | \n",
89 | " Property_Area | \n",
90 | " Loan_Status | \n",
91 | "
\n",
92 | " \n",
93 | " \n",
94 | " \n",
95 | " 0 | \n",
96 | " LP001002 | \n",
97 | " Male | \n",
98 | " No | \n",
99 | " 0 | \n",
100 | " Graduate | \n",
101 | " No | \n",
102 | " Urban | \n",
103 | " Y | \n",
104 | "
\n",
105 | " \n",
106 | " 1 | \n",
107 | " LP001003 | \n",
108 | " Male | \n",
109 | " Yes | \n",
110 | " 1 | \n",
111 | " Graduate | \n",
112 | " No | \n",
113 | " Rural | \n",
114 | " N | \n",
115 | "
\n",
116 | " \n",
117 | " 2 | \n",
118 | " LP001005 | \n",
119 | " Male | \n",
120 | " Yes | \n",
121 | " 0 | \n",
122 | " Graduate | \n",
123 | " Yes | \n",
124 | " Urban | \n",
125 | " Y | \n",
126 | "
\n",
127 | " \n",
128 | " 3 | \n",
129 | " LP001006 | \n",
130 | " Male | \n",
131 | " Yes | \n",
132 | " 0 | \n",
133 | " Not Graduate | \n",
134 | " No | \n",
135 | " Urban | \n",
136 | " Y | \n",
137 | "
\n",
138 | " \n",
139 | " 4 | \n",
140 | " LP001008 | \n",
141 | " Male | \n",
142 | " No | \n",
143 | " 0 | \n",
144 | " Graduate | \n",
145 | " No | \n",
146 | " Urban | \n",
147 | " Y | \n",
148 | "
\n",
149 | " \n",
150 | "
\n",
151 | "
"
152 | ],
153 | "text/plain": [
154 | " Loan_ID Gender Married Dependents Education Self_Employed \\\n",
155 | "0 LP001002 Male No 0 Graduate No \n",
156 | "1 LP001003 Male Yes 1 Graduate No \n",
157 | "2 LP001005 Male Yes 0 Graduate Yes \n",
158 | "3 LP001006 Male Yes 0 Not Graduate No \n",
159 | "4 LP001008 Male No 0 Graduate No \n",
160 | "\n",
161 | " Property_Area Loan_Status \n",
162 | "0 Urban Y \n",
163 | "1 Rural N \n",
164 | "2 Urban Y \n",
165 | "3 Urban Y \n",
166 | "4 Urban Y "
167 | ]
168 | },
169 | "metadata": {},
170 | "output_type": "display_data"
171 | }
172 | ],
173 | "source": [
174 | "# Dataframe containing only categorical variable\n",
175 | "categorical_var = data.select_dtypes(\"object\").head()\n",
176 | "display(categorical_var.head())"
177 | ]
178 | },
179 | {
180 | "cell_type": "code",
181 | "execution_count": 37,
182 | "metadata": {},
183 | "outputs": [
184 | {
185 | "data": {
186 | "text/html": [
187 | "\n",
188 | "\n",
201 | "
\n",
202 | " \n",
203 | " \n",
204 | " | \n",
205 | " ApplicantIncome | \n",
206 | " CoapplicantIncome | \n",
207 | " LoanAmount | \n",
208 | " Loan_Amount_Term | \n",
209 | " Credit_History | \n",
210 | "
\n",
211 | " \n",
212 | " \n",
213 | " \n",
214 | " 0 | \n",
215 | " 5849 | \n",
216 | " 0.0 | \n",
217 | " NaN | \n",
218 | " 360.0 | \n",
219 | " 1.0 | \n",
220 | "
\n",
221 | " \n",
222 | " 1 | \n",
223 | " 4583 | \n",
224 | " 1508.0 | \n",
225 | " 128.0 | \n",
226 | " 360.0 | \n",
227 | " 1.0 | \n",
228 | "
\n",
229 | " \n",
230 | " 2 | \n",
231 | " 3000 | \n",
232 | " 0.0 | \n",
233 | " 66.0 | \n",
234 | " 360.0 | \n",
235 | " 1.0 | \n",
236 | "
\n",
237 | " \n",
238 | " 3 | \n",
239 | " 2583 | \n",
240 | " 2358.0 | \n",
241 | " 120.0 | \n",
242 | " 360.0 | \n",
243 | " 1.0 | \n",
244 | "
\n",
245 | " \n",
246 | " 4 | \n",
247 | " 6000 | \n",
248 | " 0.0 | \n",
249 | " 141.0 | \n",
250 | " 360.0 | \n",
251 | " 1.0 | \n",
252 | "
\n",
253 | " \n",
254 | "
\n",
255 | "
"
256 | ],
257 | "text/plain": [
258 | " ApplicantIncome CoapplicantIncome LoanAmount Loan_Amount_Term \\\n",
259 | "0 5849 0.0 NaN 360.0 \n",
260 | "1 4583 1508.0 128.0 360.0 \n",
261 | "2 3000 0.0 66.0 360.0 \n",
262 | "3 2583 2358.0 120.0 360.0 \n",
263 | "4 6000 0.0 141.0 360.0 \n",
264 | "\n",
265 | " Credit_History \n",
266 | "0 1.0 \n",
267 | "1 1.0 \n",
268 | "2 1.0 \n",
269 | "3 1.0 \n",
270 | "4 1.0 "
271 | ]
272 | },
273 | "metadata": {},
274 | "output_type": "display_data"
275 | }
276 | ],
277 | "source": [
278 | "# Dataframe containing only numeric variable\n",
279 | "numeric_var = data.select_dtypes(\"number\")\n",
280 | "display(numeric_var.head())"
281 | ]
282 | },
283 | {
284 | "cell_type": "code",
285 | "execution_count": null,
286 | "metadata": {},
287 | "outputs": [],
288 | "source": []
289 | }
290 | ],
291 | "metadata": {
292 | "kernelspec": {
293 | "display_name": "Python 3",
294 | "language": "python",
295 | "name": "python3"
296 | },
297 | "language_info": {
298 | "codemirror_mode": {
299 | "name": "ipython",
300 | "version": 3
301 | },
302 | "file_extension": ".py",
303 | "mimetype": "text/x-python",
304 | "name": "python",
305 | "nbconvert_exporter": "python",
306 | "pygments_lexer": "ipython3",
307 | "version": "3.7.1"
308 | }
309 | },
310 | "nbformat": 4,
311 | "nbformat_minor": 2
312 | }
313 |
--------------------------------------------------------------------------------
/Code/split_pdf_pages.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "from PyPDF2 import PdfFileWriter, PdfFileReader"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 2,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "FILE_NAME = '../Data/dummy_pdf.pdf'"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 3,
24 | "metadata": {},
25 | "outputs": [
26 | {
27 | "name": "stdout",
28 | "output_type": "stream",
29 | "text": [
30 | "Completed page1.pdf\n",
31 | "Completed page2.pdf\n"
32 | ]
33 | }
34 | ],
35 | "source": [
36 | "inputpdf = PdfFileReader(open(FILE_NAME, \"rb\"))\n",
37 | " \n",
38 | "for i in range(inputpdf.numPages):\n",
39 | " output = PdfFileWriter()\n",
40 | " output.addPage(inputpdf.getPage(i))\n",
41 | " temp_filename = \"page%s.pdf\" % str(i+1)\n",
42 | " with open(temp_filename, \"wb\") as outputStream:\n",
43 | " output.write(outputStream)\n",
44 | " print('Completed', temp_filename)"
45 | ]
46 | }
47 | ],
48 | "metadata": {
49 | "kernelspec": {
50 | "display_name": "Python 3",
51 | "language": "python",
52 | "name": "python3"
53 | },
54 | "language_info": {
55 | "codemirror_mode": {
56 | "name": "ipython",
57 | "version": 3
58 | },
59 | "file_extension": ".py",
60 | "mimetype": "text/x-python",
61 | "name": "python",
62 | "nbconvert_exporter": "python",
63 | "pygments_lexer": "ipython3",
64 | "version": "3.7.3"
65 | }
66 | },
67 | "nbformat": 4,
68 | "nbformat_minor": 2
69 | }
70 |
--------------------------------------------------------------------------------
/Code/str.split().ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "# import necessary libraries\n",
10 | "import pandas as pd"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 2,
16 | "metadata": {},
17 | "outputs": [
18 | {
19 | "data": {
20 | "text/html": [
21 | "\n",
22 | "\n",
35 | "
\n",
36 | " \n",
37 | " \n",
38 | " | \n",
39 | " name | \n",
40 | "
\n",
41 | " \n",
42 | " \n",
43 | " \n",
44 | " 0 | \n",
45 | " Elise Mccann | \n",
46 | "
\n",
47 | " \n",
48 | " 1 | \n",
49 | " Aiden Berger | \n",
50 | "
\n",
51 | " \n",
52 | " 2 | \n",
53 | " Elle Kelley | \n",
54 | "
\n",
55 | " \n",
56 | "
\n",
57 | "
"
58 | ],
59 | "text/plain": [
60 | " name\n",
61 | "0 Elise Mccann\n",
62 | "1 Aiden Berger\n",
63 | "2 Elle Kelley"
64 | ]
65 | },
66 | "execution_count": 2,
67 | "metadata": {},
68 | "output_type": "execute_result"
69 | }
70 | ],
71 | "source": [
72 | "# create a dataframe\n",
73 | "df = pd.DataFrame({'name':['Elise Mccann', 'Aiden Berger', 'Elle Kelley']})\n",
74 | "df"
75 | ]
76 | },
77 | {
78 | "cell_type": "code",
79 | "execution_count": 3,
80 | "metadata": {},
81 | "outputs": [
82 | {
83 | "data": {
84 | "text/html": [
85 | "\n",
86 | "\n",
99 | "
\n",
100 | " \n",
101 | " \n",
102 | " | \n",
103 | " name | \n",
104 | " first_name | \n",
105 | " last_name | \n",
106 | "
\n",
107 | " \n",
108 | " \n",
109 | " \n",
110 | " 0 | \n",
111 | " Elise Mccann | \n",
112 | " Elise | \n",
113 | " Mccann | \n",
114 | "
\n",
115 | " \n",
116 | " 1 | \n",
117 | " Aiden Berger | \n",
118 | " Aiden | \n",
119 | " Berger | \n",
120 | "
\n",
121 | " \n",
122 | " 2 | \n",
123 | " Elle Kelley | \n",
124 | " Elle | \n",
125 | " Kelley | \n",
126 | "
\n",
127 | " \n",
128 | "
\n",
129 | "
"
130 | ],
131 | "text/plain": [
132 | " name first_name last_name\n",
133 | "0 Elise Mccann Elise Mccann\n",
134 | "1 Aiden Berger Aiden Berger\n",
135 | "2 Elle Kelley Elle Kelley"
136 | ]
137 | },
138 | "execution_count": 3,
139 | "metadata": {},
140 | "output_type": "execute_result"
141 | }
142 | ],
143 | "source": [
144 | "# extract first name and last name\n",
145 | "df['first_name'] = df['name'].str.split(' ', expand = True)[0]\n",
146 | "df['last_name'] = df['name'].str.split(' ', expand = True)[1]\n",
147 | "\n",
148 | "df"
149 | ]
150 | },
151 | {
152 | "cell_type": "code",
153 | "execution_count": null,
154 | "metadata": {},
155 | "outputs": [],
156 | "source": []
157 | }
158 | ],
159 | "metadata": {
160 | "kernelspec": {
161 | "display_name": "Python 3",
162 | "language": "python",
163 | "name": "python3"
164 | },
165 | "language_info": {
166 | "codemirror_mode": {
167 | "name": "ipython",
168 | "version": 3
169 | },
170 | "file_extension": ".py",
171 | "mimetype": "text/x-python",
172 | "name": "python",
173 | "nbconvert_exporter": "python",
174 | "pygments_lexer": "ipython3",
175 | "version": "3.7.1"
176 | }
177 | },
178 | "nbformat": 4,
179 | "nbformat_minor": 2
180 | }
181 |
--------------------------------------------------------------------------------
/Code/tqdm.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "#import tqdm\n",
10 | "from tqdm import tqdm_notebook\n",
11 | "k = 1"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": null,
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "# nested loops \n",
21 | "k = k+1\n",
22 | "for outer_loop in tqdm_notebook(range(1,100), desc = 'outer_loop', leave = True):\n",
23 | " for inner_loop in tqdm_notebook(range(1,10000000,1), desc = 'inner_loop', leave = False):\n",
24 | " k+=100"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "execution_count": null,
30 | "metadata": {},
31 | "outputs": [],
32 | "source": [
33 | "outer_loop"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": null,
39 | "metadata": {},
40 | "outputs": [],
41 | "source": []
42 | }
43 | ],
44 | "metadata": {
45 | "kernelspec": {
46 | "display_name": "Python 3",
47 | "language": "python",
48 | "name": "python3"
49 | }
50 | },
51 | "nbformat": 4,
52 | "nbformat_minor": 2
53 | }
54 |
--------------------------------------------------------------------------------
/Code/write python script.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Write contents of a cell in a file"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 2,
13 | "metadata": {},
14 | "outputs": [
15 | {
16 | "name": "stdout",
17 | "output_type": "stream",
18 | "text": [
19 | "Overwriting add.py\n"
20 | ]
21 | }
22 | ],
23 | "source": [
24 | "%%writefile add.py\n",
25 | "def addition(x,y):\n",
26 | " return x+y"
27 | ]
28 | },
29 | {
30 | "cell_type": "code",
31 | "execution_count": null,
32 | "metadata": {},
33 | "outputs": [],
34 | "source": []
35 | }
36 | ],
37 | "metadata": {
38 | "kernelspec": {
39 | "display_name": "Python 3",
40 | "language": "python",
41 | "name": "python3"
42 | },
43 | "language_info": {
44 | "codemirror_mode": {
45 | "name": "ipython",
46 | "version": 3
47 | },
48 | "file_extension": ".py",
49 | "mimetype": "text/x-python",
50 | "name": "python",
51 | "nbconvert_exporter": "python",
52 | "pygments_lexer": "ipython3",
53 | "version": "3.7.1"
54 | }
55 | },
56 | "nbformat": 4,
57 | "nbformat_minor": 2
58 | }
59 |
--------------------------------------------------------------------------------
/Data/dummy_pdf.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kunalj101/Data-Science-Hacks/74b5339d6f84154b9b603d122cec560bbf26da6a/Data/dummy_pdf.pdf
--------------------------------------------------------------------------------
/Data/dummy_pdf_page1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kunalj101/Data-Science-Hacks/74b5339d6f84154b9b603d122cec560bbf26da6a/Data/dummy_pdf_page1.pdf
--------------------------------------------------------------------------------
/Data/dummy_pdf_page2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kunalj101/Data-Science-Hacks/74b5339d6f84154b9b603d122cec560bbf26da6a/Data/dummy_pdf_page2.pdf
--------------------------------------------------------------------------------
/Data/muti_input_nn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kunalj101/Data-Science-Hacks/74b5339d6f84154b9b603d122cec560bbf26da6a/Data/muti_input_nn.png
--------------------------------------------------------------------------------