├── CONTRIBUTING.md ├── Code ├── %prun.ipynb ├── %timeit.ipynb ├── Convert normal Distribution.ipynb ├── CustomDataGen_Keras.ipynb ├── Date Parser.ipynb ├── Decision Tree Plot.ipynb ├── Dummy Classifier.ipynb ├── Extract E-mails from text.ipynb ├── FeatureSelection_SelectFromModel.ipynb ├── Funnel_charts.ipynb ├── Hack of the day - Time series.ipynb ├── HoD_history.ipynb ├── HoD_stratify.ipynb ├── Image Augmentation - Article Shoot.ipynb ├── Pandas Apply.ipynb ├── Pandas_boolean indexing.ipynb ├── Pandas_conditional_formatting.ipynb ├── Read multiple datasets.ipynb ├── Removing emojis from text.ipynb ├── Resizing images.ipynb ├── Running Linux Commands in Jupyter Notebook.ipynb ├── Save_sklearn_model.ipynb ├── Styling pandas.ipynb ├── args.ipynb ├── av_hack.ipynb ├── binning_data.ipynb ├── column_transformer.ipynb ├── conditional_selection_of_rows.ipynb ├── convert_string_to_characters.ipynb ├── first and last name extraction.ipynb ├── generator vs list.ipynb ├── groupby_in_pandas.ipynb ├── heatmap_over_code.ipynb ├── hide_print.ipynb ├── interactive plot - plotly.ipynb ├── interactive_notebook.ipynb ├── invert_dictionary.ipynb ├── iterative_imputer.ipynb ├── knnImputer.ipynb ├── list_of_lists_to_list.ipynb ├── magic function - %who.ipynb ├── make_regression.ipynb ├── map python.ipynb ├── melt().ipynb ├── merge_pdf.ipynb ├── pandarellel.ipynb ├── pandas bar plot.ipynb ├── pandas profiling.ipynb ├── pandas_crosstab.ipynb ├── pandas_pivot_table.ipynb ├── pandas_read_html_files.ipynb ├── pdb_pythonDebugger.ipynb ├── plot_confusion_matrix.ipynb ├── pretty print.ipynb ├── print zenofpython.ipynb ├── pyforest.ipynb ├── read_csv_ParseDate.ipynb ├── run python script.ipynb ├── select_dtype.ipynb ├── split_pdf_pages.ipynb ├── str.split().ipynb ├── tqdm.ipynb └── write python script.ipynb ├── Data ├── AirQualityUCI.csv ├── data_cleaned.csv ├── dummy_pdf.pdf ├── dummy_pdf_page1.pdf ├── dummy_pdf_page2.pdf ├── ind-ban-comment.csv ├── loan_train.csv └── muti_input_nn.png ├── LICENSE └── README.md /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contribution Guidelines 2 | 3 | This project is created and maintained by Analytics Vidhya. The core idea behind these data science hacks is to help the data science community write and produce better and efficient code. If you also want to contribute your hack, make sure to follow these guidelines 4 | 5 | ### Hack idea 6 | 1. Your hack must have at least one of the following characteristics - 7 | 8 | 1. It must save time. 9 | 1. Includes lesser lines of code. 10 | 1. Provides more functionality. 11 | 1. A better tool or library than an existing one. 12 | 13 | 1. The hacks must be related to data science, machine learning, data engineering or any of the related fields 14 | 15 | 16 | ### Code and submission steps 17 | 1. Create a pull request, describing what your data science hack does 18 | 1. Add all the necessary files in the same format as given in this project 19 | 1. The code must include proper and relevant comments 20 | 1. Each PR will be first reviewed and will be approved accordingly. 21 | -------------------------------------------------------------------------------- /Code/%prun.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "# Magic Command - %prun" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 1, 17 | "metadata": { 18 | "slideshow": { 19 | "slide_type": "fragment" 20 | } 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "# import pandas\n", 25 | "import pandas as pd" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 2, 31 | "metadata": { 32 | "slideshow": { 33 | "slide_type": "fragment" 34 | } 35 | }, 36 | "outputs": [], 37 | "source": [ 38 | "#Import dataset\n", 39 | "loan = pd.read_csv('../Data/loan_train.csv', index_col = 'Loan_ID')" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 3, 45 | "metadata": { 46 | "slideshow": { 47 | "slide_type": "fragment" 48 | } 49 | }, 50 | "outputs": [], 51 | "source": [ 52 | "# Define function\n", 53 | "def missing(x):\n", 54 | " return sum(x.isnull())" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 16, 60 | "metadata": { 61 | "slideshow": { 62 | "slide_type": "slide" 63 | } 64 | }, 65 | "outputs": [ 66 | { 67 | "name": "stdout", 68 | "output_type": "stream", 69 | "text": [ 70 | "Missing values per column\n", 71 | " " 72 | ] 73 | } 74 | ], 75 | "source": [ 76 | "# Apply per column\n", 77 | "\n", 78 | "print('Missing values per column')\n", 79 | "%prun loan.apply(missing, axis = 0).head()" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 15, 85 | "metadata": { 86 | "slideshow": { 87 | "slide_type": "slide" 88 | } 89 | }, 90 | "outputs": [ 91 | { 92 | "name": "stdout", 93 | "output_type": "stream", 94 | "text": [ 95 | "Missing values per row\n", 96 | " " 97 | ] 98 | } 99 | ], 100 | "source": [ 101 | "# Apply per row\n", 102 | "print('Missing values per row')\n", 103 | "%prun loan.apply(missing, axis = 1).head()" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": { 110 | "slideshow": { 111 | "slide_type": "skip" 112 | } 113 | }, 114 | "outputs": [], 115 | "source": [] 116 | } 117 | ], 118 | "metadata": { 119 | "kernelspec": { 120 | "display_name": "Python 3", 121 | "language": "python", 122 | "name": "python3" 123 | }, 124 | "language_info": { 125 | "codemirror_mode": { 126 | "name": "ipython", 127 | "version": 3 128 | }, 129 | "file_extension": ".py", 130 | "mimetype": "text/x-python", 131 | "name": "python", 132 | "nbconvert_exporter": "python", 133 | "pygments_lexer": "ipython3", 134 | "version": "3.7.1" 135 | } 136 | }, 137 | "nbformat": 4, 138 | "nbformat_minor": 2 139 | } 140 | -------------------------------------------------------------------------------- /Code/Date Parser.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import datetime\n", 10 | "import dateutil.parser" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "input_date = '10th May 1996'\n", 20 | "parsed_date = dateutil.parser.parse(input_date)" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 3, 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "name": "stdout", 30 | "output_type": "stream", 31 | "text": [ 32 | "10-05-1996\n" 33 | ] 34 | } 35 | ], 36 | "source": [ 37 | "output_date = datetime.datetime.strftime(parsed_date, '%d-%m-%Y')\n", 38 | "\n", 39 | "print(output_date)" 40 | ] 41 | } 42 | ], 43 | "metadata": { 44 | "kernelspec": { 45 | "display_name": "Python 3", 46 | "language": "python", 47 | "name": "python3" 48 | }, 49 | "language_info": { 50 | "codemirror_mode": { 51 | "name": "ipython", 52 | "version": 3 53 | }, 54 | "file_extension": ".py", 55 | "mimetype": "text/x-python", 56 | "name": "python", 57 | "nbconvert_exporter": "python", 58 | "pygments_lexer": "ipython3", 59 | "version": "3.7.3" 60 | } 61 | }, 62 | "nbformat": 4, 63 | "nbformat_minor": 2 64 | } 65 | -------------------------------------------------------------------------------- /Code/Dummy Classifier.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Sklearn's Dummy Classifier" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 28, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import pandas as pd\n", 17 | "from sklearn.dummy import DummyClassifier" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 29, 23 | "metadata": {}, 24 | "outputs": [ 25 | { 26 | "data": { 27 | "text/html": [ 28 | "
\n", 29 | "\n", 42 | "\n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | "
Loan_IDGenderMarriedDependentsEducationSelf_EmployedApplicantIncomeCoapplicantIncomeLoanAmountLoan_Amount_TermCredit_HistoryProperty_AreaLoan_Status
0LP001002MaleNo0GraduateNo58490.0NaN360.01.0UrbanY
1LP001003MaleYes1GraduateNo45831508.0128.0360.01.0RuralN
2LP001005MaleYes0GraduateYes30000.066.0360.01.0UrbanY
3LP001006MaleYes0Not GraduateNo25832358.0120.0360.01.0UrbanY
4LP001008MaleNo0GraduateNo60000.0141.0360.01.0UrbanY
\n", 144 | "
" 145 | ], 146 | "text/plain": [ 147 | " Loan_ID Gender Married Dependents Education Self_Employed \\\n", 148 | "0 LP001002 Male No 0 Graduate No \n", 149 | "1 LP001003 Male Yes 1 Graduate No \n", 150 | "2 LP001005 Male Yes 0 Graduate Yes \n", 151 | "3 LP001006 Male Yes 0 Not Graduate No \n", 152 | "4 LP001008 Male No 0 Graduate No \n", 153 | "\n", 154 | " ApplicantIncome CoapplicantIncome LoanAmount Loan_Amount_Term \\\n", 155 | "0 5849 0.0 NaN 360.0 \n", 156 | "1 4583 1508.0 128.0 360.0 \n", 157 | "2 3000 0.0 66.0 360.0 \n", 158 | "3 2583 2358.0 120.0 360.0 \n", 159 | "4 6000 0.0 141.0 360.0 \n", 160 | "\n", 161 | " Credit_History Property_Area Loan_Status \n", 162 | "0 1.0 Urban Y \n", 163 | "1 1.0 Rural N \n", 164 | "2 1.0 Urban Y \n", 165 | "3 1.0 Urban Y \n", 166 | "4 1.0 Urban Y " 167 | ] 168 | }, 169 | "execution_count": 29, 170 | "metadata": {}, 171 | "output_type": "execute_result" 172 | } 173 | ], 174 | "source": [ 175 | "#import data\n", 176 | "data = pd.read_csv('../Data/loan_train.csv')\n", 177 | "data.head()" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": 30, 183 | "metadata": {}, 184 | "outputs": [], 185 | "source": [ 186 | "# Dependent and independent variables\n", 187 | "\n", 188 | "X = data.drop(\"Loan_Status\", axis = 1)\n", 189 | "y = data[\"Loan_Status\"]" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": 31, 195 | "metadata": {}, 196 | "outputs": [ 197 | { 198 | "data": { 199 | "text/plain": [ 200 | "DummyClassifier(constant=None, random_state=None, strategy='most_frequent')" 201 | ] 202 | }, 203 | "execution_count": 31, 204 | "metadata": {}, 205 | "output_type": "execute_result" 206 | } 207 | ], 208 | "source": [ 209 | "# Dummy classifier with strategy as \"most_frequent\"\n", 210 | "dummy_clf = DummyClassifier(strategy=\"most_frequent\")\n", 211 | "dummy_clf.fit(X, y)" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": 32, 217 | "metadata": {}, 218 | "outputs": [ 219 | { 220 | "data": { 221 | "text/plain": [ 222 | "array(['Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 223 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 224 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 225 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 226 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 227 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 228 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 229 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 230 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 231 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 232 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 233 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 234 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 235 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 236 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 237 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 238 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 239 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 240 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 241 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 242 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 243 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 244 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 245 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 246 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 247 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 248 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 249 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 250 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 251 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 252 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 253 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 254 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 255 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 256 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 257 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 258 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 259 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 260 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 261 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 262 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 263 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 264 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 265 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 266 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 267 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 268 | " 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',\n", 269 | " 'Y', 'Y', 'Y'], dtype='\n", 21 | "\n", 34 | "\n", 35 | " \n", 36 | " \n", 37 | " \n", 38 | " \n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | "
DateTimeCO(GT)PT08.S1(CO)NMHC(GT)
010/03/200418.00.00261360
110/03/200419.00.0021292112
210/03/200420.00.00221402
310/03/200421.00.00221376
410/03/200422.00.00161272
\n", 88 | "" 89 | ], 90 | "text/plain": [ 91 | " Date Time CO(GT) PT08.S1(CO) NMHC(GT)\n", 92 | "0 10/03/2004 18.00.00 2 6 1360\n", 93 | "1 10/03/2004 19.00.00 2 1292 112\n", 94 | "2 10/03/2004 20.00.00 2 2 1402\n", 95 | "3 10/03/2004 21.00.00 2 2 1376\n", 96 | "4 10/03/2004 22.00.00 1 6 1272" 97 | ] 98 | }, 99 | "execution_count": 3, 100 | "metadata": {}, 101 | "output_type": "execute_result" 102 | } 103 | ], 104 | "source": [ 105 | "data = pd.read_csv('AirQualityUCI.csv')\n", 106 | "data.head()" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 4, 112 | "metadata": {}, 113 | "outputs": [ 114 | { 115 | "data": { 116 | "text/plain": [ 117 | "Date object\n", 118 | "Time object\n", 119 | "CO(GT) int64\n", 120 | "PT08.S1(CO) int64\n", 121 | "NMHC(GT) int64\n", 122 | "dtype: object" 123 | ] 124 | }, 125 | "execution_count": 4, 126 | "metadata": {}, 127 | "output_type": "execute_result" 128 | } 129 | ], 130 | "source": [ 131 | "data.dtypes" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 5, 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "data['date_time'] = data['Date'] + ' ' + data['Time']\n", 141 | "data['date_time'] = pd.to_datetime(data['date_time'],format='%d/%m/%Y %H.%M.%S')" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 6, 147 | "metadata": {}, 148 | "outputs": [ 149 | { 150 | "data": { 151 | "text/html": [ 152 | "
\n", 153 | "\n", 166 | "\n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | "
date_timeyearmonthdayofweekquarterhourCO(GT)PT08.S1(CO)NMHC(GT)
02004-03-10 18:00:00200432118261360
12004-03-10 19:00:0020043211921292112
22004-03-10 20:00:00200432120221402
32004-03-10 21:00:00200432121221376
42004-03-10 22:00:00200432122161272
\n", 244 | "
" 245 | ], 246 | "text/plain": [ 247 | " date_time year month dayofweek quarter hour CO(GT) \\\n", 248 | "0 2004-03-10 18:00:00 2004 3 2 1 18 2 \n", 249 | "1 2004-03-10 19:00:00 2004 3 2 1 19 2 \n", 250 | "2 2004-03-10 20:00:00 2004 3 2 1 20 2 \n", 251 | "3 2004-03-10 21:00:00 2004 3 2 1 21 2 \n", 252 | "4 2004-03-10 22:00:00 2004 3 2 1 22 1 \n", 253 | "\n", 254 | " PT08.S1(CO) NMHC(GT) \n", 255 | "0 6 1360 \n", 256 | "1 1292 112 \n", 257 | "2 2 1402 \n", 258 | "3 2 1376 \n", 259 | "4 6 1272 " 260 | ] 261 | }, 262 | "execution_count": 6, 263 | "metadata": {}, 264 | "output_type": "execute_result" 265 | } 266 | ], 267 | "source": [ 268 | "new_df = pd.DataFrame({\"date_time\": data['date_time'],\n", 269 | " \"year\": data['date_time'].dt.year,\n", 270 | " \"month\": data['date_time'].dt.month,\n", 271 | " \"dayofweek\": data['date_time'].dt.dayofweek,\n", 272 | " \"quarter\": data['date_time'].dt.quarter,\n", 273 | " \"hour\": data['date_time'].dt.hour,\n", 274 | " \"CO(GT)\": data['CO(GT)'],\n", 275 | " \"PT08.S1(CO)\": data['PT08.S1(CO)'],\n", 276 | " \"NMHC(GT)\": data['NMHC(GT)']\n", 277 | " })\n", 278 | "new_df.head()" 279 | ] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "execution_count": null, 284 | "metadata": {}, 285 | "outputs": [], 286 | "source": [] 287 | } 288 | ], 289 | "metadata": { 290 | "kernelspec": { 291 | "display_name": "Python 3", 292 | "language": "python", 293 | "name": "python3" 294 | }, 295 | "language_info": { 296 | "codemirror_mode": { 297 | "name": "ipython", 298 | "version": 3 299 | }, 300 | "file_extension": ".py", 301 | "mimetype": "text/x-python", 302 | "name": "python", 303 | "nbconvert_exporter": "python", 304 | "pygments_lexer": "ipython3", 305 | "version": "3.7.1" 306 | } 307 | }, 308 | "nbformat": 4, 309 | "nbformat_minor": 2 310 | } 311 | -------------------------------------------------------------------------------- /Code/HoD_history.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "data = pd.read_csv('loan_train.csv')" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 3, 24 | "metadata": {}, 25 | "outputs": [ 26 | { 27 | "data": { 28 | "text/html": [ 29 | "
\n", 30 | "\n", 43 | "\n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | "
Loan_IDGenderMarriedDependentsEducationSelf_EmployedApplicantIncomeCoapplicantIncomeLoanAmountLoan_Amount_TermCredit_HistoryProperty_AreaLoan_Status
0LP001002MaleNo0GraduateNo58490.0NaN360.01.0UrbanY
1LP001003MaleYes1GraduateNo45831508.0128.0360.01.0RuralN
2LP001005MaleYes0GraduateYes30000.066.0360.01.0UrbanY
3LP001006MaleYes0Not GraduateNo25832358.0120.0360.01.0UrbanY
4LP001008MaleNo0GraduateNo60000.0141.0360.01.0UrbanY
\n", 145 | "
" 146 | ], 147 | "text/plain": [ 148 | " Loan_ID Gender Married Dependents Education Self_Employed \\\n", 149 | "0 LP001002 Male No 0 Graduate No \n", 150 | "1 LP001003 Male Yes 1 Graduate No \n", 151 | "2 LP001005 Male Yes 0 Graduate Yes \n", 152 | "3 LP001006 Male Yes 0 Not Graduate No \n", 153 | "4 LP001008 Male No 0 Graduate No \n", 154 | "\n", 155 | " ApplicantIncome CoapplicantIncome LoanAmount Loan_Amount_Term \\\n", 156 | "0 5849 0.0 NaN 360.0 \n", 157 | "1 4583 1508.0 128.0 360.0 \n", 158 | "2 3000 0.0 66.0 360.0 \n", 159 | "3 2583 2358.0 120.0 360.0 \n", 160 | "4 6000 0.0 141.0 360.0 \n", 161 | "\n", 162 | " Credit_History Property_Area Loan_Status \n", 163 | "0 1.0 Urban Y \n", 164 | "1 1.0 Rural N \n", 165 | "2 1.0 Urban Y \n", 166 | "3 1.0 Urban Y \n", 167 | "4 1.0 Urban Y " 168 | ] 169 | }, 170 | "execution_count": 3, 171 | "metadata": {}, 172 | "output_type": "execute_result" 173 | } 174 | ], 175 | "source": [ 176 | "data.head()" 177 | ] 178 | }, 179 | { 180 | "cell_type": "markdown", 181 | "metadata": {}, 182 | "source": [ 183 | "#### Use %history - get history of commands used previously" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": 12, 189 | "metadata": {}, 190 | "outputs": [ 191 | { 192 | "name": "stdout", 193 | "output_type": "stream", 194 | "text": [ 195 | "import pandas as pd\n", 196 | "data = pd.read_csv('loan_train.csv')\n", 197 | "data.head()\n", 198 | "%history\n", 199 | "% history -n\n", 200 | "%history -n\n", 201 | "%history -n -o\n", 202 | "%history -n -t\n", 203 | "# prints\n", 204 | "%history -o\n", 205 | "# list of all commands\n", 206 | "%history\n", 207 | "# Prints line numbers for each command \n", 208 | "%history -n\n", 209 | "# list of all commands\n", 210 | "%history\n" 211 | ] 212 | } 213 | ], 214 | "source": [ 215 | "# list of all commands\n", 216 | "%history" 217 | ] 218 | }, 219 | { 220 | "cell_type": "raw", 221 | "metadata": {}, 222 | "source": [ 223 | "# Prints line numbers for each command \n", 224 | "%history -n" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": null, 230 | "metadata": {}, 231 | "outputs": [], 232 | "source": [ 233 | "# prints the command as well as output \n", 234 | "%history -o" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": null, 240 | "metadata": {}, 241 | "outputs": [], 242 | "source": [ 243 | "# prints translated history - converts it into valid python commands before execustion\n", 244 | "# eg - %history --> get_ipython().run_line_magic('history', '')\n", 245 | "%history -n -t" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": null, 251 | "metadata": {}, 252 | "outputs": [], 253 | "source": [] 254 | } 255 | ], 256 | "metadata": { 257 | "kernelspec": { 258 | "display_name": "Python 3", 259 | "language": "python", 260 | "name": "python3" 261 | }, 262 | "language_info": { 263 | "codemirror_mode": { 264 | "name": "ipython", 265 | "version": 3 266 | }, 267 | "file_extension": ".py", 268 | "mimetype": "text/x-python", 269 | "name": "python", 270 | "nbconvert_exporter": "python", 271 | "pygments_lexer": "ipython3", 272 | "version": "3.7.1" 273 | } 274 | }, 275 | "nbformat": 4, 276 | "nbformat_minor": 2 277 | } 278 | -------------------------------------------------------------------------------- /Code/HoD_stratify.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "#import libraries\n", 10 | "import pandas as pd\n", 11 | "import numpy as np\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "data": { 21 | "text/plain": [ 22 | "(891, 25)" 23 | ] 24 | }, 25 | "execution_count": 2, 26 | "metadata": {}, 27 | "output_type": "execute_result" 28 | } 29 | ], 30 | "source": [ 31 | "#import dataset\n", 32 | "data = pd.read_csv('data_cleaned.csv')\n", 33 | "data.shape" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 3, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "# Seperate dependent and independent variable\n", 43 | "x = data.drop(['Survived'], axis = 1)\n", 44 | "y = data['Survived']" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 4, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "# import train_test_split\n", 54 | "from sklearn.model_selection import train_test_split" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "## Without stratify" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 5, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "train_x, test_x, train_y, test_y = train_test_split(x, y, random_state = 45)" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 6, 76 | "metadata": {}, 77 | "outputs": [ 78 | { 79 | "name": "stdout", 80 | "output_type": "stream", 81 | "text": [ 82 | "0 0.60479\n", 83 | "1 0.39521\n", 84 | "Name: Survived, dtype: float64\n", 85 | "0 0.650224\n", 86 | "1 0.349776\n", 87 | "Name: Survived, dtype: float64\n" 88 | ] 89 | } 90 | ], 91 | "source": [ 92 | "print(train_y.value_counts(normalize=True))\n", 93 | "print(test_y.value_counts(normalize=True))" 94 | ] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "metadata": {}, 99 | "source": [ 100 | "## With Stratify" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 7, 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "# With Statify\n", 110 | "train_X, test_X, train_Y, test_Y = train_test_split(x, y, random_state = 56, stratify = y)" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 8, 116 | "metadata": {}, 117 | "outputs": [ 118 | { 119 | "name": "stdout", 120 | "output_type": "stream", 121 | "text": [ 122 | "0 0.616766\n", 123 | "1 0.383234\n", 124 | "Name: Survived, dtype: float64\n", 125 | "0 0.61435\n", 126 | "1 0.38565\n", 127 | "Name: Survived, dtype: float64\n" 128 | ] 129 | } 130 | ], 131 | "source": [ 132 | "print(train_Y.value_counts(normalize = True))\n", 133 | "print(test_Y.value_counts(normalize = True))" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [] 142 | } 143 | ], 144 | "metadata": { 145 | "kernelspec": { 146 | "display_name": "Python 3", 147 | "language": "python", 148 | "name": "python3" 149 | }, 150 | "language_info": { 151 | "codemirror_mode": { 152 | "name": "ipython", 153 | "version": 3 154 | }, 155 | "file_extension": ".py", 156 | "mimetype": "text/x-python", 157 | "name": "python", 158 | "nbconvert_exporter": "python", 159 | "pygments_lexer": "ipython3", 160 | "version": "3.7.1" 161 | } 162 | }, 163 | "nbformat": 4, 164 | "nbformat_minor": 2 165 | } 166 | -------------------------------------------------------------------------------- /Code/Pandas Apply.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Pandas Apply" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "# import pandas\n", 17 | "import pandas as pd" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 3, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "#Import dataset\n", 27 | "loan = pd.read_csv('../Data/loan_train.csv', index_col = 'Loan_ID')" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 4, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "# Define function\n", 37 | "def missing(x):\n", 38 | " return sum(x.isnull())" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 5, 44 | "metadata": {}, 45 | "outputs": [ 46 | { 47 | "name": "stdout", 48 | "output_type": "stream", 49 | "text": [ 50 | "Missing values per column\n" 51 | ] 52 | }, 53 | { 54 | "data": { 55 | "text/plain": [ 56 | "Gender 13\n", 57 | "Married 3\n", 58 | "Dependents 15\n", 59 | "Education 0\n", 60 | "Self_Employed 32\n", 61 | "dtype: int64" 62 | ] 63 | }, 64 | "execution_count": 5, 65 | "metadata": {}, 66 | "output_type": "execute_result" 67 | } 68 | ], 69 | "source": [ 70 | "# Apply per column\n", 71 | "print('Missing values per column')\n", 72 | "loan.apply(missing, axis = 0).head()" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 6, 78 | "metadata": {}, 79 | "outputs": [ 80 | { 81 | "name": "stdout", 82 | "output_type": "stream", 83 | "text": [ 84 | "Missing values per row\n" 85 | ] 86 | }, 87 | { 88 | "data": { 89 | "text/plain": [ 90 | "Loan_ID\n", 91 | "LP001002 1\n", 92 | "LP001003 0\n", 93 | "LP001005 0\n", 94 | "LP001006 0\n", 95 | "LP001008 0\n", 96 | "dtype: int64" 97 | ] 98 | }, 99 | "execution_count": 6, 100 | "metadata": {}, 101 | "output_type": "execute_result" 102 | } 103 | ], 104 | "source": [ 105 | "# Apply per row\n", 106 | "print('Missing values per row')\n", 107 | "loan.apply(missing, axis = 1).head()" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [] 116 | } 117 | ], 118 | "metadata": { 119 | "kernelspec": { 120 | "display_name": "Python 3", 121 | "language": "python", 122 | "name": "python3" 123 | }, 124 | "language_info": { 125 | "codemirror_mode": { 126 | "name": "ipython", 127 | "version": 3 128 | }, 129 | "file_extension": ".py", 130 | "mimetype": "text/x-python", 131 | "name": "python", 132 | "nbconvert_exporter": "python", 133 | "pygments_lexer": "ipython3", 134 | "version": "3.7.1" 135 | } 136 | }, 137 | "nbformat": 4, 138 | "nbformat_minor": 2 139 | } 140 | -------------------------------------------------------------------------------- /Code/Pandas_boolean indexing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Pandas Boolean Indexing" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "# import pandas\n", 17 | "import pandas as pd" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 6, 23 | "metadata": {}, 24 | "outputs": [ 25 | { 26 | "data": { 27 | "text/html": [ 28 | "
\n", 29 | "\n", 42 | "\n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | "
GenderMarriedDependentsEducationSelf_EmployedApplicantIncomeCoapplicantIncomeLoanAmountLoan_Amount_TermCredit_HistoryProperty_AreaLoan_Status
Loan_ID
LP001002MaleNo0GraduateNo58490.0NaN360.01.0UrbanY
LP001003MaleYes1GraduateNo45831508.0128.0360.01.0RuralN
LP001005MaleYes0GraduateYes30000.066.0360.01.0UrbanY
LP001006MaleYes0Not GraduateNo25832358.0120.0360.01.0UrbanY
LP001008MaleNo0GraduateNo60000.0141.0360.01.0UrbanY
\n", 153 | "
" 154 | ], 155 | "text/plain": [ 156 | " Gender Married Dependents Education Self_Employed \\\n", 157 | "Loan_ID \n", 158 | "LP001002 Male No 0 Graduate No \n", 159 | "LP001003 Male Yes 1 Graduate No \n", 160 | "LP001005 Male Yes 0 Graduate Yes \n", 161 | "LP001006 Male Yes 0 Not Graduate No \n", 162 | "LP001008 Male No 0 Graduate No \n", 163 | "\n", 164 | " ApplicantIncome CoapplicantIncome LoanAmount Loan_Amount_Term \\\n", 165 | "Loan_ID \n", 166 | "LP001002 5849 0.0 NaN 360.0 \n", 167 | "LP001003 4583 1508.0 128.0 360.0 \n", 168 | "LP001005 3000 0.0 66.0 360.0 \n", 169 | "LP001006 2583 2358.0 120.0 360.0 \n", 170 | "LP001008 6000 0.0 141.0 360.0 \n", 171 | "\n", 172 | " Credit_History Property_Area Loan_Status \n", 173 | "Loan_ID \n", 174 | "LP001002 1.0 Urban Y \n", 175 | "LP001003 1.0 Rural N \n", 176 | "LP001005 1.0 Urban Y \n", 177 | "LP001006 1.0 Urban Y \n", 178 | "LP001008 1.0 Urban Y " 179 | ] 180 | }, 181 | "execution_count": 6, 182 | "metadata": {}, 183 | "output_type": "execute_result" 184 | } 185 | ], 186 | "source": [ 187 | "#Import dataset\n", 188 | "loan = pd.read_csv('../Data/loan_train.csv', index_col = 'Loan_ID')\n", 189 | "loan.head()" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": 7, 195 | "metadata": {}, 196 | "outputs": [ 197 | { 198 | "data": { 199 | "text/html": [ 200 | "
\n", 201 | "\n", 214 | "\n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | "
GenderEducationLoan_Status
Loan_ID
LP001155FemaleNot GraduateY
LP001669FemaleNot GraduateY
LP001692FemaleNot GraduateY
LP001908FemaleNot GraduateY
LP002300FemaleNot GraduateY
\n", 262 | "
" 263 | ], 264 | "text/plain": [ 265 | " Gender Education Loan_Status\n", 266 | "Loan_ID \n", 267 | "LP001155 Female Not Graduate Y\n", 268 | "LP001669 Female Not Graduate Y\n", 269 | "LP001692 Female Not Graduate Y\n", 270 | "LP001908 Female Not Graduate Y\n", 271 | "LP002300 Female Not Graduate Y" 272 | ] 273 | }, 274 | "execution_count": 7, 275 | "metadata": {}, 276 | "output_type": "execute_result" 277 | } 278 | ], 279 | "source": [ 280 | "# we want a list of all females who are not graduate and got a loan. \n", 281 | "\n", 282 | "loan.loc[(loan['Gender'] == 'Female') & (loan['Education'] == 'Not Graduate') & (loan['Loan_Status'] == 'Y'),\n", 283 | " [\"Gender\",\"Education\",\"Loan_Status\"]].head()\n" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": null, 289 | "metadata": {}, 290 | "outputs": [], 291 | "source": [] 292 | } 293 | ], 294 | "metadata": { 295 | "kernelspec": { 296 | "display_name": "Python 3", 297 | "language": "python", 298 | "name": "python3" 299 | }, 300 | "language_info": { 301 | "codemirror_mode": { 302 | "name": "ipython", 303 | "version": 3 304 | }, 305 | "file_extension": ".py", 306 | "mimetype": "text/x-python", 307 | "name": "python", 308 | "nbconvert_exporter": "python", 309 | "pygments_lexer": "ipython3", 310 | "version": "3.7.1" 311 | } 312 | }, 313 | "nbformat": 4, 314 | "nbformat_minor": 2 315 | } 316 | -------------------------------------------------------------------------------- /Code/Removing emojis from text.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Removing Emojis from Text" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 4, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "name": "stdout", 17 | "output_type": "stream", 18 | "text": [ 19 | "Raw tweet: Hi 😂! Have a nice weekend 💕👭\n", 20 | "Preprocessed tweet: Hi ! Have a nice weekend \n" 21 | ] 22 | } 23 | ], 24 | "source": [ 25 | "text= \"Hi 😂! Have a nice weekend 💕👭\"\n", 26 | "preprocessed_text=text.encode('ascii', 'ignore').decode('ascii')\n", 27 | "\n", 28 | "print(\"Raw tweet:\",text) #with emoji\n", 29 | "print(\"Preprocessed tweet:\",preprocessed_text) # no emoji" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [] 38 | } 39 | ], 40 | "metadata": { 41 | "kernelspec": { 42 | "display_name": "Python 3", 43 | "language": "python", 44 | "name": "python3" 45 | }, 46 | "language_info": { 47 | "codemirror_mode": { 48 | "name": "ipython", 49 | "version": 3 50 | }, 51 | "file_extension": ".py", 52 | "mimetype": "text/x-python", 53 | "name": "python", 54 | "nbconvert_exporter": "python", 55 | "pygments_lexer": "ipython3", 56 | "version": "3.7.1" 57 | } 58 | }, 59 | "nbformat": 4, 60 | "nbformat_minor": 2 61 | } 62 | -------------------------------------------------------------------------------- /Code/Running Linux Commands in Jupyter Notebook.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "---\n", 8 | "\n", 9 | "## Running Linux Commands in Jupyter Notebook\n", 10 | "\n", 11 | "---" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 3, 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "name": "stdout", 21 | "output_type": "stream", 22 | "text": [ 23 | " 606 14292 113629\r\n" 24 | ] 25 | } 26 | ], 27 | "source": [ 28 | "# get the number of lines, word count and character count\n", 29 | "!cat 'ind-ban-comment.csv' | wc " 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 4, 35 | "metadata": {}, 36 | "outputs": [ 37 | { 38 | "name": "stdout", 39 | "output_type": "stream", 40 | "text": [ 41 | "Batsman,Batsman_Name,Bowler,Bowler_Name,Commentary,Detail,Dismissed,Id,Isball,Isboundary,Iswicket,Over,Runs,Timestamp\r\n", 42 | "28994,Mohammed Shami,63881,Mustafizur Rahman,\"OUT! Bowled! 5-fer to finish a tremendous last over. His 4th 5-wicket haul in ODIs. Around off, Shami moves across to paddle but misses the ball hits his pads and goes onto hit the stumps. 2 wickets and just 3 runs from the final over. Top notch from Mustafizur. INDIA FINISH WITH 314/9 FROM THEIR 50 OVERS.\",W,28994,346,True,,1,49.6,0,2019-07-02 13:18:47\r\n", 43 | "5132,Bhuvneshwar Kumar,63881,Mustafizur Rahman,\"WIDE AND RUN OUT! Slower delivery outside off, it is on the wrong side of the tramline. Shami comes for a run. Bhuvi was slow though. Rahim throws it to Mustafizur, who hits the stumps at the bowler's end. The umpire takes it upstairs but Bhuvneshwar had started to walk back even before the replays rolled in.\",W+wd,5132,344,True,,1,49.6,1,2019-07-02 13:17:28\r\n", 44 | "28994,Mohammed Shami,63881,Mustafizur Rahman,\"Back of a length ball on off, Shami pulls it to deep mid-wicket and gets a single.\",,,343,True,,,49.5,1,2019-07-02 13:16:03\r\n", 45 | "5132,Bhuvneshwar Kumar,63881,Mustafizur Rahman,\"Just 1 run off the 4 balls. This is World Class stuff. Good length ball on off, Kumar punches it to the side of the pitch and gets a single.\",,,342,True,,,49.4,1,2019-07-02 13:15:17\r\n", 46 | "3676,MS Dhoni,63881,Mustafizur Rahman,\"OUT! No Dhoni magic in the last over. Slower bouncer away from the body, Dhoni looks to flat-bat it straight over the bowler's head. It comes off the upper half of his bat and goes to the left of mid on. Shakib moves across to his left hand side and takes an easy catch.\",W,3676,340,True,,1,49.3,0,2019-07-02 13:13:39\r\n", 47 | "3676,MS Dhoni,63881,Mustafizur Rahman,\"Another dot. Bangladesh will take this. Short and slow, Dhoni pulls it to Shakib at short mid-wicket.\",,,339,True,,,49.2,0,2019-07-02 13:12:47\r\n", 48 | "cat: write error: Broken pipe\r\n" 49 | ] 50 | } 51 | ], 52 | "source": [ 53 | "# get the top 7 rows\n", 54 | "!cat 'ind-ban-comment.csv' | head -7" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [] 63 | } 64 | ], 65 | "metadata": { 66 | "kernelspec": { 67 | "display_name": "Python 3", 68 | "language": "python", 69 | "name": "python3" 70 | }, 71 | "language_info": { 72 | "codemirror_mode": { 73 | "name": "ipython", 74 | "version": 3 75 | }, 76 | "file_extension": ".py", 77 | "mimetype": "text/x-python", 78 | "name": "python", 79 | "nbconvert_exporter": "python", 80 | "pygments_lexer": "ipython3", 81 | "version": "3.7.3" 82 | } 83 | }, 84 | "nbformat": 4, 85 | "nbformat_minor": 2 86 | } 87 | -------------------------------------------------------------------------------- /Code/Save_sklearn_model.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Save Machine Learning Model using Pickle" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 16, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "#import libraries\n", 17 | "import pandas as pd\n", 18 | "import numpy as np\n", 19 | "import sklearn " 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 17, 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "data": { 29 | "text/html": [ 30 | "
\n", 31 | "\n", 44 | "\n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | "
SurvivedAgeFarePclass_1Pclass_2Pclass_3Sex_femaleSex_maleSibSp_0SibSp_1...Parch_0Parch_1Parch_2Parch_3Parch_4Parch_5Parch_6Embarked_CEmbarked_QEmbarked_S
0022.07.25000010101...1000000001
1138.071.28331001001...1000000100
2126.07.92500011010...1000000001
3135.053.10001001001...1000000001
4035.08.05000010110...1000000001
\n", 194 | "

5 rows × 25 columns

\n", 195 | "
" 196 | ], 197 | "text/plain": [ 198 | " Survived Age Fare Pclass_1 Pclass_2 Pclass_3 Sex_female \\\n", 199 | "0 0 22.0 7.2500 0 0 1 0 \n", 200 | "1 1 38.0 71.2833 1 0 0 1 \n", 201 | "2 1 26.0 7.9250 0 0 1 1 \n", 202 | "3 1 35.0 53.1000 1 0 0 1 \n", 203 | "4 0 35.0 8.0500 0 0 1 0 \n", 204 | "\n", 205 | " Sex_male SibSp_0 SibSp_1 ... Parch_0 Parch_1 Parch_2 Parch_3 \\\n", 206 | "0 1 0 1 ... 1 0 0 0 \n", 207 | "1 0 0 1 ... 1 0 0 0 \n", 208 | "2 0 1 0 ... 1 0 0 0 \n", 209 | "3 0 0 1 ... 1 0 0 0 \n", 210 | "4 1 1 0 ... 1 0 0 0 \n", 211 | "\n", 212 | " Parch_4 Parch_5 Parch_6 Embarked_C Embarked_Q Embarked_S \n", 213 | "0 0 0 0 0 0 1 \n", 214 | "1 0 0 0 1 0 0 \n", 215 | "2 0 0 0 0 0 1 \n", 216 | "3 0 0 0 0 0 1 \n", 217 | "4 0 0 0 0 0 1 \n", 218 | "\n", 219 | "[5 rows x 25 columns]" 220 | ] 221 | }, 222 | "execution_count": 17, 223 | "metadata": {}, 224 | "output_type": "execute_result" 225 | } 226 | ], 227 | "source": [ 228 | "#import dataset\n", 229 | "data = pd.read_csv('../Data/data_cleaned.csv')\n", 230 | "data.head()" 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": {}, 236 | "source": [ 237 | "## Build ML model" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": 18, 243 | "metadata": {}, 244 | "outputs": [ 245 | { 246 | "data": { 247 | "text/plain": [ 248 | "KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',\n", 249 | " metric_params=None, n_jobs=None, n_neighbors=3, p=2,\n", 250 | " weights='uniform')" 251 | ] 252 | }, 253 | "execution_count": 18, 254 | "metadata": {}, 255 | "output_type": "execute_result" 256 | } 257 | ], 258 | "source": [ 259 | "# Define Dependent and independent variable\n", 260 | "X = data.drop('Survived', axis = 1)\n", 261 | "y = data['Survived']\n", 262 | "\n", 263 | "# Split train and test data\n", 264 | "from sklearn.model_selection import train_test_split\n", 265 | "train_X, test_X, train_y, test_y = train_test_split(X, y, test_size = 0.3, random_state = 142)\n", 266 | "\n", 267 | "# import KNeighborsClassifier model \n", 268 | "from sklearn.neighbors import KNeighborsClassifier \n", 269 | "knn = KNeighborsClassifier(n_neighbors = 3) \n", 270 | " \n", 271 | "# train model \n", 272 | "knn.fit(train_X, train_y) " 273 | ] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": {}, 278 | "source": [ 279 | "## Save the model" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": 19, 285 | "metadata": {}, 286 | "outputs": [ 287 | { 288 | "data": { 289 | "text/plain": [ 290 | "array([0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0,\n", 291 | " 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0,\n", 292 | " 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1,\n", 293 | " 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1,\n", 294 | " 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", 295 | " 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0,\n", 296 | " 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0,\n", 297 | " 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,\n", 298 | " 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1,\n", 299 | " 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0,\n", 300 | " 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0,\n", 301 | " 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1,\n", 302 | " 0, 1, 1, 1], dtype=int64)" 303 | ] 304 | }, 305 | "execution_count": 19, 306 | "metadata": {}, 307 | "output_type": "execute_result" 308 | } 309 | ], 310 | "source": [ 311 | "import pickle\n", 312 | "\n", 313 | "# Save the KNN model using pickle\n", 314 | "saved_model = pickle.dumps(knn)\n", 315 | "\n", 316 | "# Load the saved KNN model\n", 317 | "load_model = pickle.loads(saved_model) \n", 318 | "\n", 319 | "# Make predictions using the loaded pickled model \n", 320 | "load_model.predict(test_X) " 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": null, 326 | "metadata": {}, 327 | "outputs": [], 328 | "source": [] 329 | } 330 | ], 331 | "metadata": { 332 | "kernelspec": { 333 | "display_name": "Python 3", 334 | "language": "python", 335 | "name": "python3" 336 | }, 337 | "language_info": { 338 | "codemirror_mode": { 339 | "name": "ipython", 340 | "version": 3 341 | }, 342 | "file_extension": ".py", 343 | "mimetype": "text/x-python", 344 | "name": "python", 345 | "nbconvert_exporter": "python", 346 | "pygments_lexer": "ipython3", 347 | "version": "3.7.1" 348 | } 349 | }, 350 | "nbformat": 4, 351 | "nbformat_minor": 2 352 | } 353 | -------------------------------------------------------------------------------- /Code/args.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Let us understand *args in Python" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 7, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "# Function to calculate averages\n", 17 | "# We will input variable number of arguaments using *args\n", 18 | "\n", 19 | "def calculate_average_new(*num):\n", 20 | " add = sum(num)\n", 21 | " count = len(num)\n", 22 | "\n", 23 | " \n", 24 | " average = (add / count) \n", 25 | "\n", 26 | " \n", 27 | " print(\"The average of values is\", average)" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 13, 33 | "metadata": {}, 34 | "outputs": [ 35 | { 36 | "name": "stdout", 37 | "output_type": "stream", 38 | "text": [ 39 | "The average of values is 7.5\n" 40 | ] 41 | } 42 | ], 43 | "source": [ 44 | "# Calculate average of 2 values\n", 45 | "\n", 46 | "calculate_average(7,8)" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 14, 52 | "metadata": {}, 53 | "outputs": [ 54 | { 55 | "name": "stdout", 56 | "output_type": "stream", 57 | "text": [ 58 | "The average of values is 8.25\n" 59 | ] 60 | } 61 | ], 62 | "source": [ 63 | "# Calculate Value of 4 values\n", 64 | "\n", 65 | "calculate_average(3,8,9,13)" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 15, 71 | "metadata": {}, 72 | "outputs": [ 73 | { 74 | "name": "stdout", 75 | "output_type": "stream", 76 | "text": [ 77 | "The average of values is 58.07692307692308\n" 78 | ] 79 | } 80 | ], 81 | "source": [ 82 | "# Calculate Value of 13 values\n", 83 | "\n", 84 | "calculate_average(3,8,9,10,4,5,6,11,22,32,100,200,345)" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 16, 90 | "metadata": {}, 91 | "outputs": [ 92 | { 93 | "name": "stdout", 94 | "output_type": "stream", 95 | "text": [ 96 | "The average of values is 8.25\n" 97 | ] 98 | } 99 | ], 100 | "source": [ 101 | "calculate_average(3,8,9,13)" 102 | ] 103 | } 104 | ], 105 | "metadata": { 106 | "kernelspec": { 107 | "display_name": "Python 3", 108 | "language": "python", 109 | "name": "python3" 110 | }, 111 | "language_info": { 112 | "codemirror_mode": { 113 | "name": "ipython", 114 | "version": 3 115 | }, 116 | "file_extension": ".py", 117 | "mimetype": "text/x-python", 118 | "name": "python", 119 | "nbconvert_exporter": "python", 120 | "pygments_lexer": "ipython3", 121 | "version": "3.7.1" 122 | } 123 | }, 124 | "nbformat": 4, 125 | "nbformat_minor": 2 126 | } 127 | -------------------------------------------------------------------------------- /Code/av_hack.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "colab": { 8 | "base_uri": "https://localhost:8080/", 9 | "height": 122 10 | }, 11 | "colab_type": "code", 12 | "id": "XG57bzU6cJ2U", 13 | "outputId": "26641faf-bc75-4bc5-9616-080905d329e0" 14 | }, 15 | "outputs": [], 16 | "source": [ 17 | "!pip install tokenizers" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 6, 23 | "metadata": { 24 | "colab": { 25 | "base_uri": "https://localhost:8080/", 26 | "height": 336 27 | }, 28 | "colab_type": "code", 29 | "id": "iQ17w2Q5azCt", 30 | "outputId": "2c5c5f7f-4b84-41d4-f87b-745f003e1b78" 31 | }, 32 | "outputs": [ 33 | { 34 | "name": "stdout", 35 | "output_type": "stream", 36 | "text": [ 37 | "Unique id: [39, 68, 75, 297, 0, 220, 39, 78, 86, 257, 264, 220, 88, 274, 30]\n", 38 | "Tokens: ['H', 'e', 'l', 'lo', '!', 'Ġ', 'H', 'o', 'w', 'Ġa', 're', 'Ġ', 'y', 'ou', '?']\n", 39 | "Merges: [(0, 1), (1, 2), (2, 3), (3, 5), (5, 6), (6, 7), (7, 8), (8, 9), (9, 10), (10, 12), (12, 14), (14, 15), (15, 16), (16, 18), (18, 19)]\n" 40 | ] 41 | } 42 | ], 43 | "source": [ 44 | "#importing different tokenizers\n", 45 | "from tokenizers import (ByteLevelBPETokenizer,\n", 46 | " BPETokenizer,\n", 47 | " SentencePieceBPETokenizer,\n", 48 | " BertWordPieceTokenizer)\n", 49 | "#instantiate tokenizer\n", 50 | "tokenizer = ByteLevelBPETokenizer()\n", 51 | "\n", 52 | "#train with our own data\n", 53 | "tokenizer.train([\"sample.txt\"], vocab_size=20000)\n", 54 | "\n", 55 | "#Lets tokenize the sentence\n", 56 | "output = tokenizer.encode(\"Hello! How are you?\")\n", 57 | "\n", 58 | "print(\"Unique id:\",output.ids)\n", 59 | "print(\"Tokens:\",output.tokens)\n", 60 | "print(\"Merges:\",output.offsets)" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 7, 66 | "metadata": { 67 | "colab": { 68 | "base_uri": "https://localhost:8080/", 69 | "height": 163 70 | }, 71 | "colab_type": "code", 72 | "id": "9qKgNFrabgCm", 73 | "outputId": "e6ed2a40-e822-4ff2-d0f6-af4569c1deb5" 74 | }, 75 | "outputs": [ 76 | { 77 | "data": { 78 | "text/plain": [ 79 | "'H'" 80 | ] 81 | }, 82 | "execution_count": 7, 83 | "metadata": {}, 84 | "output_type": "execute_result" 85 | } 86 | ], 87 | "source": [ 88 | "#converting integer back to character\n", 89 | "output.original_str[output.offsets[0]]" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 0, 95 | "metadata": { 96 | "colab": {}, 97 | "colab_type": "code", 98 | "id": "xqE2EH9Otv0w" 99 | }, 100 | "outputs": [], 101 | "source": [] 102 | } 103 | ], 104 | "metadata": { 105 | "colab": { 106 | "name": "av hack.ipynb", 107 | "provenance": [] 108 | }, 109 | "kernelspec": { 110 | "display_name": "Python 3", 111 | "language": "python", 112 | "name": "python3" 113 | }, 114 | "language_info": { 115 | "codemirror_mode": { 116 | "name": "ipython", 117 | "version": 3 118 | }, 119 | "file_extension": ".py", 120 | "mimetype": "text/x-python", 121 | "name": "python", 122 | "nbconvert_exporter": "python", 123 | "pygments_lexer": "ipython3", 124 | "version": "3.7.1" 125 | } 126 | }, 127 | "nbformat": 4, 128 | "nbformat_minor": 1 129 | } 130 | -------------------------------------------------------------------------------- /Code/column_transformer.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Sklearn ColumnTransformer" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 52, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import pandas as pd\n", 17 | "from sklearn.compose import ColumnTransformer" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 53, 23 | "metadata": {}, 24 | "outputs": [ 25 | { 26 | "data": { 27 | "text/html": [ 28 | "
\n", 29 | "\n", 42 | "\n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | "
Loan_IDGenderMarriedDependentsEducationSelf_EmployedApplicantIncomeCoapplicantIncomeLoanAmountLoan_Amount_TermCredit_HistoryProperty_AreaLoan_Status
0LP001002MaleNo0GraduateNo58490.0NaN360.01.0UrbanY
1LP001003MaleYes1GraduateNo45831508.0128.0360.01.0RuralN
2LP001005MaleYes0GraduateYes30000.066.0360.01.0UrbanY
3LP001006MaleYes0Not GraduateNo25832358.0120.0360.01.0UrbanY
4LP001008MaleNo0GraduateNo60000.0141.0360.01.0UrbanY
\n", 144 | "
" 145 | ], 146 | "text/plain": [ 147 | " Loan_ID Gender Married Dependents Education Self_Employed \\\n", 148 | "0 LP001002 Male No 0 Graduate No \n", 149 | "1 LP001003 Male Yes 1 Graduate No \n", 150 | "2 LP001005 Male Yes 0 Graduate Yes \n", 151 | "3 LP001006 Male Yes 0 Not Graduate No \n", 152 | "4 LP001008 Male No 0 Graduate No \n", 153 | "\n", 154 | " ApplicantIncome CoapplicantIncome LoanAmount Loan_Amount_Term \\\n", 155 | "0 5849 0.0 NaN 360.0 \n", 156 | "1 4583 1508.0 128.0 360.0 \n", 157 | "2 3000 0.0 66.0 360.0 \n", 158 | "3 2583 2358.0 120.0 360.0 \n", 159 | "4 6000 0.0 141.0 360.0 \n", 160 | "\n", 161 | " Credit_History Property_Area Loan_Status \n", 162 | "0 1.0 Urban Y \n", 163 | "1 1.0 Rural N \n", 164 | "2 1.0 Urban Y \n", 165 | "3 1.0 Urban Y \n", 166 | "4 1.0 Urban Y " 167 | ] 168 | }, 169 | "execution_count": 53, 170 | "metadata": {}, 171 | "output_type": "execute_result" 172 | } 173 | ], 174 | "source": [ 175 | "# import dataset\n", 176 | "data = pd.read_csv('../Data/loan_train.csv')\n", 177 | "data.head()" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": 54, 183 | "metadata": {}, 184 | "outputs": [], 185 | "source": [ 186 | "# rows having null values removed for simplicity purpose\n", 187 | "data.dropna(inplace = True)" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": 55, 193 | "metadata": {}, 194 | "outputs": [], 195 | "source": [ 196 | "# Dependent and independent variables\n", 197 | "X = data.drop('Loan_Status', axis = 1)\n", 198 | "y = data['Loan_Status']" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 56, 204 | "metadata": {}, 205 | "outputs": [], 206 | "source": [ 207 | "# Apply StandardScalar transformation to numeric features\n", 208 | "# Apply OneHotEncoder transformation to categorical features\n", 209 | "\n", 210 | "from sklearn.preprocessing import StandardScaler, OneHotEncoder\n", 211 | "\n", 212 | "transformers = [(\"num\", StandardScaler(), ['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount']),\n", 213 | " (\"cat\", OneHotEncoder(handle_unknown='ignore'), ['Gender', 'Married', 'Self_Employed', 'Property_Area'])]\n", 214 | "\n", 215 | "transformer = ColumnTransformer(transformers = transformers)" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 57, 221 | "metadata": {}, 222 | "outputs": [ 223 | { 224 | "name": "stderr", 225 | "output_type": "stream", 226 | "text": [ 227 | "C:\\Users\\ram\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\data.py:625: DataConversionWarning: Data with input dtype int64, float64 were all converted to float64 by StandardScaler.\n", 228 | " return self.partial_fit(X, y)\n", 229 | "C:\\Users\\ram\\Anaconda3\\lib\\site-packages\\sklearn\\base.py:462: DataConversionWarning: Data with input dtype int64, float64 were all converted to float64 by StandardScaler.\n", 230 | " return self.fit(X, **fit_params).transform(X)\n" 231 | ] 232 | }, 233 | { 234 | "data": { 235 | "text/plain": [ 236 | "array([-0.13796959, -0.02795204, -0.20808917, 0. , 1. ,\n", 237 | " 0. , 1. , 1. , 0. , 1. ,\n", 238 | " 0. , 0. ])" 239 | ] 240 | }, 241 | "execution_count": 57, 242 | "metadata": {}, 243 | "output_type": "execute_result" 244 | } 245 | ], 246 | "source": [ 247 | "X = transformer.fit_transform(X)\n", 248 | "X[0]" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": null, 254 | "metadata": {}, 255 | "outputs": [], 256 | "source": [] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": null, 261 | "metadata": {}, 262 | "outputs": [], 263 | "source": [] 264 | } 265 | ], 266 | "metadata": { 267 | "kernelspec": { 268 | "display_name": "Python 3", 269 | "language": "python", 270 | "name": "python3" 271 | }, 272 | "language_info": { 273 | "codemirror_mode": { 274 | "name": "ipython", 275 | "version": 3 276 | }, 277 | "file_extension": ".py", 278 | "mimetype": "text/x-python", 279 | "name": "python", 280 | "nbconvert_exporter": "python", 281 | "pygments_lexer": "ipython3", 282 | "version": "3.7.1" 283 | } 284 | }, 285 | "nbformat": 4, 286 | "nbformat_minor": 2 287 | } 288 | -------------------------------------------------------------------------------- /Code/convert_string_to_characters.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "colab_type": "text", 7 | "id": "QsmXYpdG-peE" 8 | }, 9 | "source": [ 10 | "## Easiest way to convert a string to characters\n" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 1, 16 | "metadata": { 17 | "colab": { 18 | "base_uri": "https://localhost:8080/", 19 | "height": 34 20 | }, 21 | "colab_type": "code", 22 | "id": "xqE2EH9Otv0w", 23 | "outputId": "ec5fb143-cf7a-4cfc-9f6a-9a9b01211639" 24 | }, 25 | "outputs": [ 26 | { 27 | "name": "stdout", 28 | "output_type": "stream", 29 | "text": [ 30 | "['S', 'p', 'o', 'r', 't', 's', ' ', 'A', 'n', 'a', 'l', 'y', 't', 'i', 'c', 's', ' ', 'c', 'a', 'n', ' ', 'b', 'e', ' ', 'a', ' ', 'g', 'a', 'm', 'e', ' ', 'c', 'h', 'a', 'n', 'g', 'e', 'r']\n" 31 | ] 32 | } 33 | ], 34 | "source": [ 35 | "text='Sports Analytics can be a game changer'\n", 36 | "characters=list(text)\n", 37 | "print(characters)" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [] 46 | } 47 | ], 48 | "metadata": { 49 | "colab": { 50 | "name": "av hack.ipynb", 51 | "provenance": [] 52 | }, 53 | "kernelspec": { 54 | "display_name": "Python 3", 55 | "language": "python", 56 | "name": "python3" 57 | }, 58 | "language_info": { 59 | "codemirror_mode": { 60 | "name": "ipython", 61 | "version": 3 62 | }, 63 | "file_extension": ".py", 64 | "mimetype": "text/x-python", 65 | "name": "python", 66 | "nbconvert_exporter": "python", 67 | "pygments_lexer": "ipython3", 68 | "version": "3.7.1" 69 | } 70 | }, 71 | "nbformat": 4, 72 | "nbformat_minor": 1 73 | } 74 | -------------------------------------------------------------------------------- /Code/first and last name extraction.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 6, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "data": { 19 | "text/html": [ 20 | "
\n", 21 | "\n", 34 | "\n", 35 | " \n", 36 | " \n", 37 | " \n", 38 | " \n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | "
name
0Elise Mccann
1Aiden Berger
2Elle Kelley
\n", 56 | "
" 57 | ], 58 | "text/plain": [ 59 | " name\n", 60 | "0 Elise Mccann\n", 61 | "1 Aiden Berger\n", 62 | "2 Elle Kelley" 63 | ] 64 | }, 65 | "execution_count": 6, 66 | "metadata": {}, 67 | "output_type": "execute_result" 68 | } 69 | ], 70 | "source": [ 71 | "# create a dataframe\n", 72 | "df = pd.DataFrame({'name':['Elise Mccann', 'Aiden Berger', 'Elle Kelley']})\n", 73 | "df" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 7, 79 | "metadata": {}, 80 | "outputs": [ 81 | { 82 | "data": { 83 | "text/html": [ 84 | "
\n", 85 | "\n", 98 | "\n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | "
namefirst_namelast_name
0Elise MccannEliseMccann
1Aiden BergerAidenBerger
2Elle KelleyElleKelley
\n", 128 | "
" 129 | ], 130 | "text/plain": [ 131 | " name first_name last_name\n", 132 | "0 Elise Mccann Elise Mccann\n", 133 | "1 Aiden Berger Aiden Berger\n", 134 | "2 Elle Kelley Elle Kelley" 135 | ] 136 | }, 137 | "execution_count": 7, 138 | "metadata": {}, 139 | "output_type": "execute_result" 140 | } 141 | ], 142 | "source": [ 143 | "# extract first name and last name\n", 144 | "df['first_name'] = df['name'].str.split(' ', expand = True)[0]\n", 145 | "df['last_name'] = df['name'].str.split(' ', expand = True)[1]\n", 146 | "\n", 147 | "df" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": null, 153 | "metadata": {}, 154 | "outputs": [], 155 | "source": [] 156 | } 157 | ], 158 | "metadata": { 159 | "kernelspec": { 160 | "display_name": "Python 3", 161 | "language": "python", 162 | "name": "python3" 163 | }, 164 | "language_info": { 165 | "codemirror_mode": { 166 | "name": "ipython", 167 | "version": 3 168 | }, 169 | "file_extension": ".py", 170 | "mimetype": "text/x-python", 171 | "name": "python", 172 | "nbconvert_exporter": "python", 173 | "pygments_lexer": "ipython3", 174 | "version": "3.7.1" 175 | } 176 | }, 177 | "nbformat": 4, 178 | "nbformat_minor": 2 179 | } 180 | -------------------------------------------------------------------------------- /Code/generator vs list.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Python List Comprehension v/s Python Generators" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 49, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "from sys import getsizeof" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 50, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "# list comprehension\n", 26 | "list_comp = [i**3 for i in range(10000)]\n", 27 | "# generator expression\n", 28 | "gen_comp = (i**3 for i in range(10000))" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 51, 34 | "metadata": {}, 35 | "outputs": [ 36 | { 37 | "name": "stdout", 38 | "output_type": "stream", 39 | "text": [ 40 | "Size of list is : 87624\n" 41 | ] 42 | } 43 | ], 44 | "source": [ 45 | "# size of list comprehension\n", 46 | "a = getsizeof(list_comp)\n", 47 | "print(\"Size of list is : \", a)" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 52, 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "name": "stdout", 57 | "output_type": "stream", 58 | "text": [ 59 | "Size of generator is : 120\n" 60 | ] 61 | } 62 | ], 63 | "source": [ 64 | "#size of generator expression\n", 65 | "b = getsizeof(gen_comp)\n", 66 | "print(\"Size of generator is : \", b)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 53, 72 | "metadata": {}, 73 | "outputs": [ 74 | { 75 | "name": "stdout", 76 | "output_type": "stream", 77 | "text": [ 78 | "9.14460999999983\n" 79 | ] 80 | } 81 | ], 82 | "source": [ 83 | "# time taken by list comprehension\n", 84 | "import timeit\n", 85 | "print(timeit.timeit('''list_com = [i for i in range(100) if i % 2 == 0]'''))" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 54, 91 | "metadata": {}, 92 | "outputs": [ 93 | { 94 | "name": "stdout", 95 | "output_type": "stream", 96 | "text": [ 97 | "0.7809511999998904\n" 98 | ] 99 | } 100 | ], 101 | "source": [ 102 | "# time taken by generator expression \n", 103 | "print(timeit.timeit('''list_com = (i for i in range(100) if i % 2 == 0)'''))" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [] 112 | } 113 | ], 114 | "metadata": { 115 | "kernelspec": { 116 | "display_name": "Python 3", 117 | "language": "python", 118 | "name": "python3" 119 | }, 120 | "language_info": { 121 | "codemirror_mode": { 122 | "name": "ipython", 123 | "version": 3 124 | }, 125 | "file_extension": ".py", 126 | "mimetype": "text/x-python", 127 | "name": "python", 128 | "nbconvert_exporter": "python", 129 | "pygments_lexer": "ipython3", 130 | "version": "3.7.1" 131 | } 132 | }, 133 | "nbformat": 4, 134 | "nbformat_minor": 2 135 | } 136 | -------------------------------------------------------------------------------- /Code/groupby_in_pandas.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Understand Groupby in Pandas" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 14, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "data": { 17 | "text/html": [ 18 | "
\n", 19 | "\n", 32 | "\n", 33 | " \n", 34 | " \n", 35 | " \n", 36 | " \n", 37 | " \n", 38 | " \n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | "
Loan_IDGenderMarriedDependentsEducationSelf_EmployedApplicantIncomeCoapplicantIncomeLoanAmountLoan_Amount_TermCredit_HistoryProperty_AreaLoan_Status
0LP001002MaleNo0GraduateNo58490.0NaN360.01.0UrbanY
1LP001003MaleYes1GraduateNo45831508.0128.0360.01.0RuralN
2LP001005MaleYes0GraduateYes30000.066.0360.01.0UrbanY
3LP001006MaleYes0Not GraduateNo25832358.0120.0360.01.0UrbanY
4LP001008MaleNo0GraduateNo60000.0141.0360.01.0UrbanY
\n", 134 | "
" 135 | ], 136 | "text/plain": [ 137 | " Loan_ID Gender Married Dependents Education Self_Employed \\\n", 138 | "0 LP001002 Male No 0 Graduate No \n", 139 | "1 LP001003 Male Yes 1 Graduate No \n", 140 | "2 LP001005 Male Yes 0 Graduate Yes \n", 141 | "3 LP001006 Male Yes 0 Not Graduate No \n", 142 | "4 LP001008 Male No 0 Graduate No \n", 143 | "\n", 144 | " ApplicantIncome CoapplicantIncome LoanAmount Loan_Amount_Term \\\n", 145 | "0 5849 0.0 NaN 360.0 \n", 146 | "1 4583 1508.0 128.0 360.0 \n", 147 | "2 3000 0.0 66.0 360.0 \n", 148 | "3 2583 2358.0 120.0 360.0 \n", 149 | "4 6000 0.0 141.0 360.0 \n", 150 | "\n", 151 | " Credit_History Property_Area Loan_Status \n", 152 | "0 1.0 Urban Y \n", 153 | "1 1.0 Rural N \n", 154 | "2 1.0 Urban Y \n", 155 | "3 1.0 Urban Y \n", 156 | "4 1.0 Urban Y " 157 | ] 158 | }, 159 | "execution_count": 14, 160 | "metadata": {}, 161 | "output_type": "execute_result" 162 | } 163 | ], 164 | "source": [ 165 | "#import dataset\n", 166 | "import pandas as pd\n", 167 | "\n", 168 | "df = pd.read_csv('../Data/loan_train.csv')\n", 169 | "df.head()" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 15, 175 | "metadata": {}, 176 | "outputs": [ 177 | { 178 | "data": { 179 | "text/html": [ 180 | "
\n", 181 | "\n", 194 | "\n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | "
ApplicantIncome
Gender
Female4643.473214
Male5446.460123
\n", 216 | "
" 217 | ], 218 | "text/plain": [ 219 | " ApplicantIncome\n", 220 | "Gender \n", 221 | "Female 4643.473214\n", 222 | "Male 5446.460123" 223 | ] 224 | }, 225 | "execution_count": 15, 226 | "metadata": {}, 227 | "output_type": "execute_result" 228 | } 229 | ], 230 | "source": [ 231 | "# Average Income of males and females\n", 232 | "df.groupby(['Gender'])[['ApplicantIncome']].mean()\n" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": 16, 238 | "metadata": {}, 239 | "outputs": [ 240 | { 241 | "data": { 242 | "text/html": [ 243 | "
\n", 244 | "\n", 257 | "\n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | "
LoanAmount
Property_Area
Rural152.260116
Semiurban145.504386
Urban142.198953
\n", 283 | "
" 284 | ], 285 | "text/plain": [ 286 | " LoanAmount\n", 287 | "Property_Area \n", 288 | "Rural 152.260116\n", 289 | "Semiurban 145.504386\n", 290 | "Urban 142.198953" 291 | ] 292 | }, 293 | "execution_count": 16, 294 | "metadata": {}, 295 | "output_type": "execute_result" 296 | } 297 | ], 298 | "source": [ 299 | "# Average loan amount for different property areas like urban, rural\n", 300 | "df.groupby(['Property_Area'])[['LoanAmount']].mean()" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": 17, 306 | "metadata": {}, 307 | "outputs": [ 308 | { 309 | "data": { 310 | "text/html": [ 311 | "
\n", 312 | "\n", 325 | "\n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | "
Loan_Status
Education
Graduate480
Not Graduate134
\n", 347 | "
" 348 | ], 349 | "text/plain": [ 350 | " Loan_Status\n", 351 | "Education \n", 352 | "Graduate 480\n", 353 | "Not Graduate 134" 354 | ] 355 | }, 356 | "execution_count": 17, 357 | "metadata": {}, 358 | "output_type": "execute_result" 359 | } 360 | ], 361 | "source": [ 362 | "# Compare loan status of different education backgrounds\n", 363 | "df.groupby(['Education'])[['Loan_Status']].count()" 364 | ] 365 | }, 366 | { 367 | "cell_type": "code", 368 | "execution_count": null, 369 | "metadata": {}, 370 | "outputs": [], 371 | "source": [] 372 | } 373 | ], 374 | "metadata": { 375 | "kernelspec": { 376 | "display_name": "Python 3", 377 | "language": "python", 378 | "name": "python3" 379 | }, 380 | "language_info": { 381 | "codemirror_mode": { 382 | "name": "ipython", 383 | "version": 3 384 | }, 385 | "file_extension": ".py", 386 | "mimetype": "text/x-python", 387 | "name": "python", 388 | "nbconvert_exporter": "python", 389 | "pygments_lexer": "ipython3", 390 | "version": "3.7.1" 391 | } 392 | }, 393 | "nbformat": 4, 394 | "nbformat_minor": 2 395 | } 396 | -------------------------------------------------------------------------------- /Code/hide_print.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "import sys" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "class HiddenPrints:\n", 20 | " def __enter__(self):\n", 21 | " self._original_stdout = sys.stdout\n", 22 | " sys.stdout = open(os.devnull, 'w')\n", 23 | "\n", 24 | " def __exit__(self, exc_type, exc_val, exc_tb):\n", 25 | " sys.stdout.close()\n", 26 | " sys.stdout = self._original_stdout" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 4, 32 | "metadata": {}, 33 | "outputs": [ 34 | { 35 | "name": "stdout", 36 | "output_type": "stream", 37 | "text": [ 38 | "This will be printed\n" 39 | ] 40 | } 41 | ], 42 | "source": [ 43 | "with HiddenPrints():\n", 44 | " print(\"This print is hidden\")\n", 45 | "\n", 46 | "print(\"This will be printed\")" 47 | ] 48 | } 49 | ], 50 | "metadata": { 51 | "kernelspec": { 52 | "display_name": "Python 3", 53 | "language": "python", 54 | "name": "python3" 55 | }, 56 | "language_info": { 57 | "codemirror_mode": { 58 | "name": "ipython", 59 | "version": 3 60 | }, 61 | "file_extension": ".py", 62 | "mimetype": "text/x-python", 63 | "name": "python", 64 | "nbconvert_exporter": "python", 65 | "pygments_lexer": "ipython3", 66 | "version": "3.7.3" 67 | } 68 | }, 69 | "nbformat": 4, 70 | "nbformat_minor": 2 71 | } 72 | -------------------------------------------------------------------------------- /Code/interactive_notebook.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from IPython.core.interactiveshell import InteractiveShell\n", 10 | "InteractiveShell.ast_node_interactivity = \"all\"" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 11, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "import pandas as pd\n", 20 | "data = pd.read_csv('loan_train.csv')" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 13, 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "data": { 30 | "text/plain": [ 31 | "(614, 13)" 32 | ] 33 | }, 34 | "execution_count": 13, 35 | "metadata": {}, 36 | "output_type": "execute_result" 37 | }, 38 | { 39 | "data": { 40 | "text/html": [ 41 | "
\n", 42 | "\n", 55 | "\n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | "
Loan_IDGenderMarriedDependentsEducationSelf_EmployedApplicantIncomeCoapplicantIncomeLoanAmountLoan_Amount_TermCredit_HistoryProperty_AreaLoan_Status
0LP001002MaleNo0GraduateNo58490.0NaN360.01.0UrbanY
1LP001003MaleYes1GraduateNo45831508.0128.0360.01.0RuralN
2LP001005MaleYes0GraduateYes30000.066.0360.01.0UrbanY
3LP001006MaleYes0Not GraduateNo25832358.0120.0360.01.0UrbanY
4LP001008MaleNo0GraduateNo60000.0141.0360.01.0UrbanY
\n", 157 | "
" 158 | ], 159 | "text/plain": [ 160 | " Loan_ID Gender Married Dependents Education Self_Employed \\\n", 161 | "0 LP001002 Male No 0 Graduate No \n", 162 | "1 LP001003 Male Yes 1 Graduate No \n", 163 | "2 LP001005 Male Yes 0 Graduate Yes \n", 164 | "3 LP001006 Male Yes 0 Not Graduate No \n", 165 | "4 LP001008 Male No 0 Graduate No \n", 166 | "\n", 167 | " ApplicantIncome CoapplicantIncome LoanAmount Loan_Amount_Term \\\n", 168 | "0 5849 0.0 NaN 360.0 \n", 169 | "1 4583 1508.0 128.0 360.0 \n", 170 | "2 3000 0.0 66.0 360.0 \n", 171 | "3 2583 2358.0 120.0 360.0 \n", 172 | "4 6000 0.0 141.0 360.0 \n", 173 | "\n", 174 | " Credit_History Property_Area Loan_Status \n", 175 | "0 1.0 Urban Y \n", 176 | "1 1.0 Rural N \n", 177 | "2 1.0 Urban Y \n", 178 | "3 1.0 Urban Y \n", 179 | "4 1.0 Urban Y " 180 | ] 181 | }, 182 | "execution_count": 13, 183 | "metadata": {}, 184 | "output_type": "execute_result" 185 | }, 186 | { 187 | "data": { 188 | "text/plain": [ 189 | "Loan_ID object\n", 190 | "Gender object\n", 191 | "Married object\n", 192 | "Dependents object\n", 193 | "Education object\n", 194 | "Self_Employed object\n", 195 | "ApplicantIncome int64\n", 196 | "CoapplicantIncome float64\n", 197 | "LoanAmount float64\n", 198 | "Loan_Amount_Term float64\n", 199 | "Credit_History float64\n", 200 | "Property_Area object\n", 201 | "Loan_Status object\n", 202 | "dtype: object" 203 | ] 204 | }, 205 | "execution_count": 13, 206 | "metadata": {}, 207 | "output_type": "execute_result" 208 | } 209 | ], 210 | "source": [ 211 | "data.shape\n", 212 | "data.head()\n", 213 | "data.dtypes" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": null, 219 | "metadata": {}, 220 | "outputs": [], 221 | "source": [] 222 | } 223 | ], 224 | "metadata": { 225 | "kernelspec": { 226 | "display_name": "Python 3", 227 | "language": "python", 228 | "name": "python3" 229 | }, 230 | "language_info": { 231 | "codemirror_mode": { 232 | "name": "ipython", 233 | "version": 3 234 | }, 235 | "file_extension": ".py", 236 | "mimetype": "text/x-python", 237 | "name": "python", 238 | "nbconvert_exporter": "python", 239 | "pygments_lexer": "ipython3", 240 | "version": "3.7.1" 241 | } 242 | }, 243 | "nbformat": 4, 244 | "nbformat_minor": 2 245 | } 246 | -------------------------------------------------------------------------------- /Code/invert_dictionary.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Inverting a Dictionary" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 15, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "# Test Dictionary\n", 17 | "my_dict = {'Name':20,\n", 18 | " 'Age' : 3,\n", 19 | " 'attendance' : 75,\n", 20 | " 'roll no': 'H405'}" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 16, 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "name": "stdout", 30 | "output_type": "stream", 31 | "text": [ 32 | "{'Name': 20, 'Age': 3, 'attendance': 75, 'roll no': 'H405'}\n" 33 | ] 34 | } 35 | ], 36 | "source": [ 37 | "# Print Dictionary\n", 38 | "print(my_dict)" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 17, 44 | "metadata": {}, 45 | "outputs": [ 46 | { 47 | "data": { 48 | "text/plain": [ 49 | "{20: 'Name', 3: 'Age', 75: 'attendance', 'H405': 'roll no'}" 50 | ] 51 | }, 52 | "execution_count": 17, 53 | "metadata": {}, 54 | "output_type": "execute_result" 55 | } 56 | ], 57 | "source": [ 58 | "# invert dictionary\n", 59 | "new_dict = {v:k for k,v in my_dict.items()}\n", 60 | "new_dict" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [] 69 | } 70 | ], 71 | "metadata": { 72 | "kernelspec": { 73 | "display_name": "Python 3", 74 | "language": "python", 75 | "name": "python3" 76 | }, 77 | "language_info": { 78 | "codemirror_mode": { 79 | "name": "ipython", 80 | "version": 3 81 | }, 82 | "file_extension": ".py", 83 | "mimetype": "text/x-python", 84 | "name": "python", 85 | "nbconvert_exporter": "python", 86 | "pygments_lexer": "ipython3", 87 | "version": "3.7.1" 88 | } 89 | }, 90 | "nbformat": 4, 91 | "nbformat_minor": 2 92 | } 93 | -------------------------------------------------------------------------------- /Code/iterative_imputer.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "iterative_imputer.ipynb", 7 | "provenance": [] 8 | }, 9 | "kernelspec": { 10 | "name": "python3", 11 | "display_name": "Python 3" 12 | } 13 | }, 14 | "cells": [ 15 | { 16 | "cell_type": "markdown", 17 | "metadata": { 18 | "id": "x-FLsxTrgR3R", 19 | "colab_type": "text" 20 | }, 21 | "source": [ 22 | "## Impute missing values using IterativeImputer" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "metadata": { 28 | "id": "JqsZhhMUZnxX", 29 | "colab_type": "code", 30 | "colab": {} 31 | }, 32 | "source": [ 33 | "import numpy as np\n", 34 | "\n", 35 | "# Since this estimator is experimental, we need to import it explicitly\n", 36 | "from sklearn.experimental import enable_iterative_imputer \n", 37 | "from sklearn.impute import IterativeImputer" 38 | ], 39 | "execution_count": 0, 40 | "outputs": [] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "metadata": { 45 | "id": "EwRy35OTZ-ea", 46 | "colab_type": "code", 47 | "colab": {} 48 | }, 49 | "source": [ 50 | "# Make a test dataset\n", 51 | "data = [[1, 2, 3, 4, np.nan],\n", 52 | " [5, 6, 7, 8, 11],\n", 53 | " [9, 1, 12, 2, 3],\n", 54 | " [4, np.nan, 5, 6, 7],\n", 55 | " [4, 8, 1, 2, 3]]" 56 | ], 57 | "execution_count": 0, 58 | "outputs": [] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "metadata": { 63 | "id": "dpkWWrFpeHQ0", 64 | "colab_type": "code", 65 | "colab": { 66 | "base_uri": "https://localhost:8080/", 67 | "height": 102 68 | }, 69 | "outputId": "3b09b267-461e-4a61-d9d9-2c91f87adbe5" 70 | }, 71 | "source": [ 72 | "# Impute missing values using iterative imputer\n", 73 | "iter_imp = IterativeImputer(random_state= 34)\n", 74 | "iter_imp.fit_transform(data)" 75 | ], 76 | "execution_count": 19, 77 | "outputs": [ 78 | { 79 | "output_type": "execute_result", 80 | "data": { 81 | "text/plain": [ 82 | "array([[ 1. , 2. , 3. , 4. , 2.78144391],\n", 83 | " [ 5. , 6. , 7. , 8. , 11. ],\n", 84 | " [ 9. , 1. , 12. , 2. , 3. ],\n", 85 | " [ 4. , 4.25776595, 5. , 6. , 7. ],\n", 86 | " [ 4. , 8. , 1. , 2. , 3. ]])" 87 | ] 88 | }, 89 | "metadata": { 90 | "tags": [] 91 | }, 92 | "execution_count": 19 93 | } 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "metadata": { 99 | "id": "W2KTJygufkhi", 100 | "colab_type": "code", 101 | "colab": {} 102 | }, 103 | "source": [ 104 | "" 105 | ], 106 | "execution_count": 0, 107 | "outputs": [] 108 | } 109 | ] 110 | } -------------------------------------------------------------------------------- /Code/list_of_lists_to_list.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import itertools" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "nested_list = [[1], [2, 3], [4, 5, 6]]" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 3, 24 | "metadata": {}, 25 | "outputs": [ 26 | { 27 | "name": "stdout", 28 | "output_type": "stream", 29 | "text": [ 30 | "[1, 2, 3, 4, 5, 6]\n" 31 | ] 32 | } 33 | ], 34 | "source": [ 35 | "flat_list = list(itertools.chain.from_iterable(nested_list))\n", 36 | "\n", 37 | "print(flat_list)" 38 | ] 39 | } 40 | ], 41 | "metadata": { 42 | "kernelspec": { 43 | "display_name": "Python 3", 44 | "language": "python", 45 | "name": "python3" 46 | }, 47 | "language_info": { 48 | "codemirror_mode": { 49 | "name": "ipython", 50 | "version": 3 51 | }, 52 | "file_extension": ".py", 53 | "mimetype": "text/x-python", 54 | "name": "python", 55 | "nbconvert_exporter": "python", 56 | "pygments_lexer": "ipython3", 57 | "version": "3.7.3" 58 | } 59 | }, 60 | "nbformat": 4, 61 | "nbformat_minor": 2 62 | } 63 | -------------------------------------------------------------------------------- /Code/magic function - %who.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "var_1 = 4\n", 10 | "var_2 = \"hat\"" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 4, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "my_list = [22,33,44,55,66,98]" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 5, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "my_dict = {'India': 'Delhi', 'Indonesia': 'Jakarta', 'China': 'Beijing'}" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 6, 34 | "metadata": {}, 35 | "outputs": [ 36 | { 37 | "name": "stdout", 38 | "output_type": "stream", 39 | "text": [ 40 | "Variable Type Data/Info\n", 41 | "----------------------------\n", 42 | "my_dict dict n=3\n", 43 | "my_list list n=6\n", 44 | "var_1 int 4\n", 45 | "var_2 str hat\n" 46 | ] 47 | } 48 | ], 49 | "source": [ 50 | "%whos" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [] 59 | } 60 | ], 61 | "metadata": { 62 | "kernelspec": { 63 | "display_name": "Python 3", 64 | "language": "python", 65 | "name": "python3" 66 | }, 67 | "language_info": { 68 | "codemirror_mode": { 69 | "name": "ipython", 70 | "version": 3 71 | }, 72 | "file_extension": ".py", 73 | "mimetype": "text/x-python", 74 | "name": "python", 75 | "nbconvert_exporter": "python", 76 | "pygments_lexer": "ipython3", 77 | "version": "3.7.1" 78 | } 79 | }, 80 | "nbformat": 4, 81 | "nbformat_minor": 2 82 | } 83 | -------------------------------------------------------------------------------- /Code/make_regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 121, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from sklearn import linear_model, datasets\n", 10 | "import matplotlib.pyplot as plt" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 126, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "X, y, coef = datasets.make_regression(n_samples=1000, n_features=1,\n", 20 | " n_informative=1, noise=10,\n", 21 | " coef=True, random_state=0)" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 127, 27 | "metadata": {}, 28 | "outputs": [ 29 | { 30 | "data": { 31 | "text/plain": [ 32 | "" 33 | ] 34 | }, 35 | "execution_count": 127, 36 | "metadata": {}, 37 | "output_type": "execute_result" 38 | }, 39 | { 40 | "data": { 41 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAD8CAYAAAB6paOMAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAHHlJREFUeJzt3X+Q3PV93/Hn+1aLvCKpVxQlQYvOIh4ix7IMsm+QOup0bGwiAgbOcjBoICFNZjSe2o2dphqk4KlEi6vr3CTYqd2MlZhpPFB+OMiHCLRnu5InKTOyOfkkZBldrIKNtGKMUjilRltxunv3j9099va+e7d3++P76/WY0ejuu9+972dB833f9/15f94fc3dERCS9esIegIiIhEuBQEQk5RQIRERSToFARCTlFAhERFJOgUBEJOUUCEREUk6BQEQk5RQIRERSbknYA2jG5Zdf7qtXrw57GCIisXL48OF/cPcV850Xi0CwevVqRkZGwh6GiEismNlPmzlPqSERkZRTIBARSTkFAhGRlFMgEBFJOQUCEZGUi0XVkIhI2gyNFhkcHuPMeImV+RzbN6+hf32hI9dSIBARiZih0SI79x2jNDEJQHG8xM59xwA6EgyUGhIRiZjB4bHpIFBVmphkcHisI9fTE4GISATUpoIa7SR/ZrzUkWsrEIiIhKw+FdTIynyuI9dXakhEJGRBqaB6uWyG7ZvXdOT6eiIQEQnZXCkfA1UNiYgkTX1paH5ZljfOT8w6r5DP8dyO6zs+HgUCEZEuCioNzfYY2YwxMfn2NHEnU0H1NEcgItJFQfMBE1POpZcsoZDPYZSfBPZsWdexVFA9PRGIiHRRo/mAc6UJjuz6jS6PpkyBQESkBQttBbEyn6MYEAw6VRrajJZTQ2a2yswOmtmLZnbczD5bOX6ZmX3bzH5c+Xt55biZ2Z+Z2Ukze8HMPtDqGEREwlDN9xcri8CK4yX+8PEjrN7xDJsGDjA0Wpz1nu2b15DLZmYc6+Z8QBBzb7SGrckfYHYFcIW7/8DMfhE4DPQDvwu87u4DZrYDWO7u95rZTcC/Bm4CNgBfcvcNc12jr6/PtVWliIQp6Df/weGxwN/uq3LZDJ/4YIGDJ87OeB/QlYZyZnbY3fvmPa/VQBBw4aeAL1f+fMjdX60Ei++6+xoz+2rl60cr549Vz2v0MxUIRCRMQSt/c9nMvIvAoLwOoPYum8tmujYR3GwgaGvVkJmtBtYD3wN+uXpzr/z9S5XTCsCpmredrhwTEYmkRk3gMmbzvrf+V+1ONo9brLYFAjP7BeBJ4HPu/o9znRpwbNZjiZltM7MRMxs5e/Zsu4YpIrJgjSp9Jt1n5ftb+XlhaUsgMLMs5SDwiLvvqxz+WSUlVJ1HeK1y/DSwqubtVwJn6n+mu+919z5371uxYkU7hikisiiNKnoK+Ryf+ODCExphVggFaUfVkAFfA1509z+teWk/cE/l63uAp2qO/06lemgjcG6u+QERkbAFVfpkM8brb17g4UOvNHxftofIVQgFaccTwSbgt4HrzexI5c9NwABwg5n9GLih8j3As8BLwEngL4B/1YYxiIjMa2i0yKaBA1w1R3ln0HkAe7asm175u3xZFhxKE1NzXu/SpdkZ7+v2iuFmtb1qqBNUNSQirWpU+VN/Yw46z4C7NvbyQP86ADYNHJizbLT2fS8P3Ny2z7BQzVYNaWWxiKTCXNs/1gaC3fuPzzrPgYcPvcLfHH2Vc6WJhjuI1YvaXEAjCgQikgqNKnVqjw+NFhkvzW4HXTXXa/WiOBfQiLqPikgqNPrtvPZ4q/X91WUFUZ0LaERPBCKSCts3rwmcI9i+ec10+4hm8v5B8rksu29dG5sbfz0FAhFJvOqNvroaeNKdQk3fn2Y2jp/LpUuXxDYIgAKBiCRcfRVQdTXwh9+zgvufPh64ReRCRW2l8EIpEIhIot3/9OwqoNLE5JwLwYIsXdLDhYvB6wbiUh3UiAKBiCRGbavod+ayTExO8eZbi0/51GoUBAxiUx3UiAKBiCRCfQpoIaWerXCI9fwAKBCISEzVbxTz5oWLLU34VpnBEjMmpt5eNpbLZli6pCcwuBRinhYCBQIRiaH63/4XW/YZ5MFPXgvM3kEMZlcXxWnR2FwUCEQkdoLaRbTD3Rt7p9M8jdI93dhistsUCEQkdtpdrllo8qbev76QiBt/PbWYEJHYyS/Ltu1nVdM7SbzBN0uBQERiZWi0yM//38W2/bwo7iHcbQoEIhIZzWwcMzg8NqOipx2K46U5N6pJOs0RiEgkBFUC7dx3bPr16iRtp7bSqr1e2tJECgQiEgmNNo7Zvf84Fy5OdaRKqF7QRjVpoNSQiERCo7UA46WJRQWB6kIvC3gt6FhV3BvILYYCgYiEbmi0OOfNeaGWZXt4bsf1/GTgZh6849oZm8d/8Y5reXng5oYrguPeQG4xlBoSka6p3QCmdl+ANy9cbGvuf2LKGRotTtf9B6V65tqoJm30RCAiXVGdDK6mgCa9fOsvjpfa3iBuYtLnLQntX19gz5Z1M54W4rS9ZDvpiUBEuqJTbSEaaSbXn9SVwgulJwIR6Yp2NoZrRhpz/YulJwIRaauh0eKsLSCXL8ti0LE1APXSmutfLAUCEWmbodEi2//6KBOTM2/57dgXGMopjOB9wmb6QO87lfJZAKWGRKRtBofHZgWBdmomCAAceumNjo0hiRQIRKRtuj0P0Ei1Ikmao0AgIm3R7kVhrchYVEYSD5ojEJEFqd8ruNrLf3B4rGuTwfPZumFV2EOIFQUCEWnaXB1Cu9Gjp77yKJfN8IHed3LopTeYdCdjxtYNq3igf13Hx5IkbQkEZvYQ8DHgNXd/X+XYZcDjwGrgJ8An3f0NMzPgS8BNwHngd939B+0Yh4h01u79xwM7hH7u8SMdv3Yum+ETHyxw8MTZxO0ZHLZ2PRH8V+DLwNdrju0A/qe7D5jZjsr39wK/CVxd+bMB+PPK3yISYUOjxba3gpjLF++4NpEbxUdRWwKBu/+tma2uO3wb8KHK138FfJdyILgN+Lq7O3DIzPJmdoW7v9qOsYhIZ9z/9PGuXauQz6n9Qxd1smrol6s398rfv1Q5XgBO1Zx3unJMRCKsXYvC5pPNmFYFd1kYk8VBdV2zig3MbBuwDaC3t7fTYxJJrUZVQPWvd8ullyzRk0CXdTIQ/Kya8jGzK4DXKsdPA7W1XVcCZ+rf7O57gb0AfX19UalKE0mUuaqA+tcX+PzQMR4+9EpXx3Sui/MQUtbJ1NB+4J7K1/cAT9Uc/x0r2wic0/yASDga7RM8ODwWShAAdQ0NQ7vKRx+lPDF8uZmdBnYBA8ATZvb7wCvA7ZXTn6VcOnqScvnov2zHGERk4RrV/hfHS10JAtkeY2Lq7Qd+dQ0NR7uqhrY2eOkjAec68Ol2XFdEWrMyn+tof6C7N/Zy8MTZwGsUKvMRKhENn1YWiyRU0CQwMGvP4E7uE9D3rsvoe9dlDfcGVoloNCgQiCRQ0CTwH33jKJM1aZhqh85OVmIMDo/x3I7rp7/Wb/7RpEAgkkBBk8C1QaBbqnMQ+s0/2tSGWiSButEArhmqAIoHBQKRhBkaLRKFdvyqAIoPpYZEEqS6Z3AIWaAZli/LsuuWtUoHxYQCgUiCdHrP4Pnksj3s2fJ+BYCYUSAQSZAw5wbu3tirDWFiSoFAJKZq1wm8I9vDhYtToW4V+ej3TikQxJQCgUgM1a8TKE1MhTyit9clSPyoakgkhoLWCYQtE4VSJVkUBQKRGAp7LiDI1g2rAo9L9Ck1JBIj1XmBsJIwhXxueh7g0e+dYtKdjBlbN6zS/ECMKRCIREztJHB+WRZ3GC9N0GOEuj7AYHqB2AP963TjTxAFApEIqZ8Ert0nOOwgcNfGXq0PSCgFApEIieIkcEHdQhNPgUAkQqLSLA7KTwEP3nGtAkAKqGpIJEKi0q1TqaB00ROBSAiCdg/rX19g++Y1bP/G0Rn7+HabUkHpo0Ag0mVBu4ft3HcMKG/gct83jzHxVvfnCXLZDHu2rFMASCHzGCwL7+vr85GRkbCHIdIWmwYOdHTD+IUy0PaRCWVmh929b77z9EQg0mVRmhAu5HPTewpLemmyWKTLojQhrB3EBPREINJxQ6NF7n/6+IzFYVHgoFSQAAoEIh1V3ToyzF3DGilE5MlEwqfUkEgH3f/08UgEgfoG0dpYXmopEIh0yNBoMRLpoFw2w10beynkcxjlJwGViUotpYZEFql+UdiH37OCvzn6KuOl8G7+1cVgQYvVRBpRIBBZhKHR4owVwMXxEg8feiXkUTF909eNXxZCqSGRRdi574VQ20AEWZbtUQCQRVEgEFmgodFiJDaLr5XtMf7jlveHPQyJqdACgZndaGZjZnbSzHaENQ6RhRocHgt7CDMU8jkGb79GTwOyaKHMEZhZBvgKcANwGnjezPa7+4/CGI/IQkShT1A+l2X3rWt185e2CGuy+DrgpLu/BGBmjwG3AQoEEgnViqDieImM2fQm7ZMhNWnUjV86KaxAUABO1Xx/GtgQ0lhEZqivCKre/MMIAmoNLd0QViCoX+gI5dYnb59gtg3YBtDb29uNMYkAsHv/8UhUBGmDGOmWsALBaWBVzfdXAmdqT3D3vcBeKO9H0L2hSdqFuSCsykDtoaVrwqoaeh642syuMrNLgDuB/SGNRQQop4Q2DRwIexhAdFpVSzqE8kTg7hfN7DPAMJABHnL342GMRdJraLTI7v3HQ30C6DFYuiQzvW0lqCGcdF9o6wjc/Vl3/zV3f7e7fyGscUg6VSeEw04DTTns2bJODeEkVOo1JKk0ODwWmQlh9QaSsCkQSCoErQuIAqWAJAoUCCTxhkaL7Nx3bDoPH5UgcOklGT0JSCSo6Zwk3uDw2IzJ2CjIZowvfHxd2MMQAfREIClwJgK9gWppoZhEjQKBJEZ9OejyZVl23bKWlflcJBrFgRaKSTQpNSSJEFQO+sb5Cbb/9VFW/9NcYE+TMGihmESRAoEkQqNy0IlJ57n//TphTA9ne2aGHy0Uk6hSIJDYGxotRib1U7Xp3ZcxePs1WigmsaA5Aomd2jUBPVZenRsVGTO2bljFA/3liiDd+CUOFAgkVurXBEQlCNy9sXf65i8SNwoEEitRWxOgncMkCTRHILESpTUB2R5TEJBE0BOBRFJQb6BCPkd+WZY3zoe/cQzAxJQzODymQCCxp0AgkdNoz+CwKoPmalIXpScUkcVSakgiJyp7BkN5JfCffLJcBhpEC8QkCRQIJHLC3iym1l0be+lfX2D75jXkspkZr2mBmCSFUkMSKUOjxbCHMG3Tuy+btR5gcHiMM+MlVqpxnCSIAoFExtBokX/zxJGwhzFrUViVdhKTpFIgkMj4430vhLpArJDPqTOopJLmCCQyzk9MhXZt5fslzfREIKmnjWIk7RQIpKuqC8VqJ1yhPAnbaXdv7OXgibOa7BWpo0AgXfP5oWM8cuiV6b0BiuMl/vDxI13ZK6CQz6kpnEgDmiOQrhgaLc4IAlXdmhs+/9bFSJWmikSJAoF0xeDwWCi7hFW9cX6CnfuOKRiIBFAgkLYZGi2yaeAAV+14hk0DB6Zvup8fOtb1PkH5XHbWsdLEZFfmIkTiRnME0hb1G8YUx0vs3HeMrxz8MT9+7c2uj+dcgzYVahInMpueCKQt7n/6+KwNY0oTk6EEgeXLsg2bwalJnMhsCgTSsqHRYih7BORzWTI9NuNYNmPsumWtmsSJLIBSQ9KyMPLu1T2Cg9Yl1K4NUJM4kfmZN9hwo6k3m90O7AZ+HbjO3UdqXtsJ/D4wCfyBuw9Xjt8IfAnIAH/p7gPzXaevr89HRkbmO026oP7G++H3rODhQ690fRw/Gbi569cUiRszO+zuffOd1+oTwQ+BLcBX6y7+XuBOYC2wEviOmf1a5eWvADcAp4HnzWy/u/+oxXFIFwRNCIcRBBptEiMii9NSIHD3FwHMrP6l24DH3P0C8LKZnQSuq7x20t1fqrzvscq5CgQxMDg8NmtCuNuyGVOeX6TNOjVHUAAO1Xx/unIM4FTd8Q0dGoMswlw597BLL5cvy7LrlrXK84u02byBwMy+A/xKwEv3uftTjd4WcMwJrlIKnKQws23ANoDe3t75hilt0GgtAJQ3ZVmZz4WygbzmA0Q6a95A4O4fXcTPPQ2sqvn+SuBM5etGx+uvuxfYC+XJ4kWMQRYoKPVTmpjkj544yucePxIY3Ttt+bLZK4RFpL06tY5gP3CnmS01s6uAq4HvA88DV5vZVWZ2CeUJ5f0dGoMsUKPUz2Slsqzb0bi6JkBEOqulQGBmHzez08A/A54xs2EAdz8OPEF5Evh/AJ9290l3vwh8BhgGXgSeqJwrERCFVbfVp45CPsfgb12j+QCRLmhpHUG3aB1Bd9TPEYRFeweLtEe31hFIglR/+x4cHgtlUrgq7OokkbRRIBCg/DSwe/9xxht07eymKKSoRNJEgUAYGi2y/RtHmZgKP02oxnAi3adAIAwOj0UiCBTUGE4kFAoE0rWcvNG4BFUTxCLhUSBImPnaMgedZwbtLh7L57J87JorOHji7IyxALMqk5QOEgmXAkGCNGoRMfLT12fckD/8nhU8ebg4fV47g0Cz6R3tEyASHVpHkCCbBg6EWvap9I5ItGgdQQLNl/YJs/5e6R2R+NKexTFRTfsUx0s4b6d9hkaL0+d0sv6+fm/gHis3hDPKTwJ7tqxTekckpvREEBONOoMODo9N34C3b17TkRYR+VyW3beuVV5fJKEUCGKiUdqn9ninWkSYlX+2bvwiyaTUUEw0SvvUH+9fX+C5HdeTz7Wvj//4+fDbTohI5+iJICaC0j49wKvnSqze8cz0sUKlPPTNty627drq/SOSbAoEMVGb9jkzXuId2R5KE1OzluoWx0s8cuiVtm0io2ogkeRTIIiR2jz9r+58puF57VwZomogkeTTHEEMDY0W6UaPuIyZgoBICigQxNDg8FhXrjMZg1XnItI6pYZionZVcbtuzxkzptzpMQu86Rc0SSySCnoiiIH6VcXNWL4sSy6bafh6NmP8ySev4eWBm9m6YRVW97omiUXSQ08EETBfD6GgVcVzMWDXLWun31scL83YC2D5siy7bllL//oCQ6NFnjxcnBFgDPjEB7WATCQtFAhC1qh1NLxdMrqQZnIG3LWxd/q9zbSDrg8yDhw8cbbpa4pIvCk1FLK5eghVNbugq5DP8eAd1/JA/7qmr99M6woRSTYFgpA16glUeyPevnnNnPl+eHsvgIWmc5ptXSEiyaVAEKKh0eKsSdqqHrPpFtP96wvs2TL3b/mLndgNCjKaKBZJFwWCEA0OjzWsApp0n7HfQP/6QsNyznwuu+iJ3WqQKeRz2ltAJKU0WdxF9dVB87WKbma/gVw2w+5b17Y0LrWYFkk3BYIuCaoOqi3pbOTMeGlGAMkvy7J0SQ/nShPaIEZE2kKBoEsalWnOJ78sOyOAvHF+glw2w4N3XKsAICJtoTmCLllMOaYB7sxbXioi0goFgi5ZTDnmXRt7OVcK3h1Mdf4i0i4tBQIzGzSzE2b2gpl908zyNa/tNLOTZjZmZptrjt9YOXbSzHa0cv04aWYtQK18LssD/etU5y8iHdfqE8G3gfe5+/uBvwd2ApjZe4E7gbXAjcB/MbOMmWWArwC/CbwX2Fo5NxWWLmnuP3dtJZDq/EWk01qaLHb3b9V8ewj4rcrXtwGPufsF4GUzOwlcV3ntpLu/BGBmj1XO/VEr44i6+oqhuWTMZtTx129RqUohEWm3dlYN/R7weOXrAuXAUHW6cgzgVN3xDW0cQyQ12z00l80ELuZSnb+IdNK8gcDMvgP8SsBL97n7U5Vz7gMuAo9U3xZwvhOcigqsojSzbcA2gN7e3vmGGWnNTOzWPwmIiHTLvIHA3T861+tmdg/wMeAj7tPbXJ0GVtWcdiVwpvJ1o+P1190L7AXo6+uL9J6J8+0n0Mwq4il3BQERCUWrVUM3AvcCt7r7+ZqX9gN3mtlSM7sKuBr4PvA8cLWZXWVml1CeUN7fyhjCVr97WHU/gWqPIGiuYkhVQCISllbnCL4MLAW+bWYAh9z9U+5+3MyeoDwJfBH4tLtPApjZZ4BhIAM85O7HWxxDV9X/9v/mhYsNF3wFTfgGtZZQFZCIhMk8YNPyqOnr6/ORkZGOXmO+9E71nGarfwB+MnDzoq8lItIqMzvs7n3znadeQzS3XSQsbO/gjDXaaUBVQCISLWoxQXPbRcLC2jpMxuBJS0QE9EQANL7BF8dLvHvns0y6U8jnyGV7OD8x1dTPbLSJjIhI1OiJgLkrdqq/2RfHS00HAU3+ikicKBCw8IZwc8nnsloYJiKxotQQs8s7W3Hp0iUKAiISK3oiqOhfX+C5Hde3nNvXPgEiEjcKBHVaTRNphbCIxI0CQZ3+9QX2bFk3/WRQXQ9QyOe4e2MvhXwOozwXkM3MXCugSWIRiSPNEQRodsGXVgiLSBIoELRAK4RFJAmUGhIRSTkFAhGRlFMgEBFJOQUCEZGUUyAQEUk5BQIRkZRLdPmo6vxFROaX2EDQ7K5jIiJpl9jUULO7jomIpF1iA0GjLqDqDioiMlNiA0GjLqDqDioiMlNiA0FQO2l1BxURmS2xk8W1u46pakhEpLHEBgJQd1ARkWYkNjUkIiLNUSAQEUk5BQIRkZRTIBARSTkFAhGRlDN3D3sM8zKzs8BP2/TjLgf+oU0/Kwr0eaItaZ8HkveZkvx53uXuK+Z7QywCQTuZ2Yi794U9jnbR54m2pH0eSN5n0udRakhEJPUUCEREUi6NgWBv2ANoM32eaEva54HkfabUf57UzRGIiMhMaXwiEBGRGqkLBGb2H8zsBTM7YmbfMrOVYY+pVWY2aGYnKp/rm2aWD3tMrTCz283suJlNmVlsqznM7EYzGzOzk2a2I+zxtMrMHjKz18zsh2GPpVVmtsrMDprZi5V/a58Ne0ytMrN3mNn3zexo5TPd3/R705YaMrN/4u7/WPn6D4D3uvunQh5WS8zsN4AD7n7RzP4TgLvfG/KwFs3Mfh2YAr4K/Ft3Hwl5SAtmZhng74EbgNPA88BWd/9RqANrgZn9C+DnwNfd/X1hj6cVZnYFcIW7/8DMfhE4DPTH/P+PAZe6+8/NLAv8L+Cz7n5ovvem7omgGgQqLgViHwnd/VvufrHy7SHgyjDH0yp3f9Hd47659HXASXd/yd3fAh4Dbgt5TC1x978FXg97HO3g7q+6+w8qX/9f4EUg1j3rveznlW+zlT9N3d9SFwgAzOwLZnYKuAv4d2GPp81+D/jvYQ9CKACnar4/TcxvNEllZquB9cD3wh1J68wsY2ZHgNeAb7t7U58pkYHAzL5jZj8M+HMbgLvf5+6rgEeAz4Q72ubM95kq59wHXKT8uSKtmc8TcxZwLPZPn0ljZr8APAl8ri5bEEvuPunu11LOClxnZk2l8BK5Q5m7f7TJU/8b8Aywq4PDaYv5PpOZ3QN8DPiIx2DiZwH/j+LqNLCq5vsrgTMhjUUCVPLoTwKPuPu+sMfTTu4+bmbfBW4E5p3cT+QTwVzM7Oqab28FToQ1lnYxsxuBe4Fb3f182OMRoDw5fLWZXWVmlwB3AvtDHpNUVCZWvwa86O5/GvZ42sHMVlQrBs0sB3yUJu9vaawaehJYQ7kq5afAp9y9GO6oWmNmJ4GlwP+pHDoU50ooM/s48J+BFcA4cMTdN4c7qoUzs5uALwIZ4CF3/0LIQ2qJmT0KfIhyd8ufAbvc/WuhDmqRzOyfA38HHKN8LwD4Y3d/NrxRtcbM3g/8FeV/bz3AE+7+75t6b9oCgYiIzJS61JCIiMykQCAiknIKBCIiKadAICKScgoEIiIpp0AgIpJyCgQiIimnQCAiknL/H1BtkPJRCSR0AAAAAElFTkSuQmCC\n", 42 | "text/plain": [ 43 | "
" 44 | ] 45 | }, 46 | "metadata": { 47 | "needs_background": "light" 48 | }, 49 | "output_type": "display_data" 50 | } 51 | ], 52 | "source": [ 53 | "plt.scatter(X,y)" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [] 62 | } 63 | ], 64 | "metadata": { 65 | "kernelspec": { 66 | "display_name": "Python 3", 67 | "language": "python", 68 | "name": "python3" 69 | }, 70 | "language_info": { 71 | "codemirror_mode": { 72 | "name": "ipython", 73 | "version": 3 74 | }, 75 | "file_extension": ".py", 76 | "mimetype": "text/x-python", 77 | "name": "python", 78 | "nbconvert_exporter": "python", 79 | "pygments_lexer": "ipython3", 80 | "version": "3.7.1" 81 | } 82 | }, 83 | "nbformat": 4, 84 | "nbformat_minor": 2 85 | } 86 | -------------------------------------------------------------------------------- /Code/map python.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Understand Pandas map()" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 15, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "data": { 17 | "application/javascript": [ 18 | "\n", 19 | " if (window._pyforest_update_imports_cell) { window._pyforest_update_imports_cell('import pandas as pd'); }\n", 20 | " " 21 | ], 22 | "text/plain": [ 23 | "" 24 | ] 25 | }, 26 | "metadata": {}, 27 | "output_type": "display_data" 28 | }, 29 | { 30 | "data": { 31 | "text/html": [ 32 | "
\n", 33 | "\n", 46 | "\n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | "
nameageprofessioncity
0A22data engineerGurgaon
1B26data scientistBangalore
2C33entrepreneurGurgaon
3D44business analystPune
4E50self-employedNew Delhi
\n", 94 | "
" 95 | ], 96 | "text/plain": [ 97 | " name age profession city\n", 98 | "0 A 22 data engineer Gurgaon\n", 99 | "1 B 26 data scientist Bangalore\n", 100 | "2 C 33 entrepreneur Gurgaon\n", 101 | "3 D 44 business analyst Pune\n", 102 | "4 E 50 self-employed New Delhi" 103 | ] 104 | }, 105 | "execution_count": 15, 106 | "metadata": {}, 107 | "output_type": "execute_result" 108 | } 109 | ], 110 | "source": [ 111 | "#sample data\n", 112 | "data = {'name': ['A', 'B', 'C', 'D', 'E'], \n", 113 | " 'age': [22, 26, 33, 44, 50],\n", 114 | " 'profession' : ['data engineer', 'data scientist', 'entrepreneur', 'business analyst', 'self-employed'], \n", 115 | " 'city': ['Gurgaon', 'Bangalore', 'Gurgaon', 'Pune', 'New Delhi']}\n", 116 | "\n", 117 | "df = pd.DataFrame(data)\n", 118 | "df" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 12, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "# dictionary to map city with states\n", 128 | "map_city_to_states = { 'Gurgaon' : 'Haryana', \n", 129 | " 'Bangalore' : 'Karnataka', \n", 130 | " 'Pune' : 'Maharashtra', \n", 131 | " 'New Delhi' : 'Delhi'}" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 13, 137 | "metadata": {}, 138 | "outputs": [ 139 | { 140 | "data": { 141 | "text/html": [ 142 | "
\n", 143 | "\n", 156 | "\n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | "
nameageprofessioncitystate
0A22data engineerGurgaonHaryana
1B26data scientistBangaloreKarnataka
2C33entrepreneurGurgaonHaryana
3D44business analystPuneMaharashtra
4E50self-employedNew DelhiDelhi
\n", 210 | "
" 211 | ], 212 | "text/plain": [ 213 | " name age profession city state\n", 214 | "0 A 22 data engineer Gurgaon Haryana\n", 215 | "1 B 26 data scientist Bangalore Karnataka\n", 216 | "2 C 33 entrepreneur Gurgaon Haryana\n", 217 | "3 D 44 business analyst Pune Maharashtra\n", 218 | "4 E 50 self-employed New Delhi Delhi" 219 | ] 220 | }, 221 | "execution_count": 13, 222 | "metadata": {}, 223 | "output_type": "execute_result" 224 | } 225 | ], 226 | "source": [ 227 | "# apply pandas map to map the city columns with states\n", 228 | "df['state'] = df['city'].map(map_city_to_states)\n", 229 | "df" 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": null, 235 | "metadata": {}, 236 | "outputs": [], 237 | "source": [] 238 | } 239 | ], 240 | "metadata": { 241 | "kernelspec": { 242 | "display_name": "Python 3", 243 | "language": "python", 244 | "name": "python3" 245 | }, 246 | "language_info": { 247 | "codemirror_mode": { 248 | "name": "ipython", 249 | "version": 3 250 | }, 251 | "file_extension": ".py", 252 | "mimetype": "text/x-python", 253 | "name": "python", 254 | "nbconvert_exporter": "python", 255 | "pygments_lexer": "ipython3", 256 | "version": "3.7.1" 257 | } 258 | }, 259 | "nbformat": 4, 260 | "nbformat_minor": 2 261 | } 262 | -------------------------------------------------------------------------------- /Code/merge_pdf.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from PyPDF2 import PdfFileMerger" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "pdfs = ['../Data/dummy_pdf_page1.pdf', '../Data/dummy_pdf_page2.pdf']\n", 19 | "output_filename = 'merged_pdf.pdf'" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 3, 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "name": "stdout", 29 | "output_type": "stream", 30 | "text": [ 31 | "Output file is merged_pdf.pdf\n" 32 | ] 33 | } 34 | ], 35 | "source": [ 36 | "merger = PdfFileMerger()\n", 37 | "\n", 38 | "for pdf in pdfs:\n", 39 | " merger.append(pdf)\n", 40 | "\n", 41 | "merger.write(output_filename)\n", 42 | "print('Output file is', output_filename)\n", 43 | "merger.close()" 44 | ] 45 | } 46 | ], 47 | "metadata": { 48 | "kernelspec": { 49 | "display_name": "Python 3", 50 | "language": "python", 51 | "name": "python3" 52 | }, 53 | "language_info": { 54 | "codemirror_mode": { 55 | "name": "ipython", 56 | "version": 3 57 | }, 58 | "file_extension": ".py", 59 | "mimetype": "text/x-python", 60 | "name": "python", 61 | "nbconvert_exporter": "python", 62 | "pygments_lexer": "ipython3", 63 | "version": "3.7.3" 64 | } 65 | }, 66 | "nbformat": 4, 67 | "nbformat_minor": 2 68 | } 69 | -------------------------------------------------------------------------------- /Code/pandarellel.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# !pip3 install pandarallel" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "name": "stderr", 19 | "output_type": "stream", 20 | "text": [ 21 | "/home/lakshay/.local/lib/python3.7/site-packages/tqdm/_tqdm.py:634: FutureWarning: The Panel class is removed from pandas. Accessing it from the top-level namespace will also be removed in the next version\n", 22 | " from pandas import Panel\n" 23 | ] 24 | } 25 | ], 26 | "source": [ 27 | "%load_ext autoreload\n", 28 | "%autoreload 2\n", 29 | "import pandas as pd\n", 30 | "import time\n", 31 | "from pandarallel import pandarallel\n", 32 | "import math\n", 33 | "import numpy as np\n", 34 | "import random\n", 35 | "from tqdm._tqdm_notebook import tqdm_notebook\n", 36 | "tqdm_notebook.pandas()" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 2, 42 | "metadata": {}, 43 | "outputs": [ 44 | { 45 | "name": "stdout", 46 | "output_type": "stream", 47 | "text": [ 48 | "INFO: Pandarallel will run on 8 workers.\n", 49 | "INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n" 50 | ] 51 | } 52 | ], 53 | "source": [ 54 | "pandarallel.initialize(progress_bar=True)" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 3, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "df = pd.DataFrame({\n", 64 | " 'A' : [random.randint(15,20) for i in range(1,1000000) ],\n", 65 | " 'B' : [random.randint(10,30) for i in range(1,1000000) ]\n", 66 | "})" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 4, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "def func(x):\n", 76 | " return math.sin(x.A**2) + math.sin(x.B**2) + math.tan(x.A**2)" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "---\n", 84 | "\n", 85 | "#### `WITHOUT PARALLELIZATION`\n", 86 | "\n", 87 | "---" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 5, 93 | "metadata": {}, 94 | "outputs": [ 95 | { 96 | "data": { 97 | "application/vnd.jupyter.widget-view+json": { 98 | "model_id": "5f096c0d2622441c9557dfc7abebcdb4", 99 | "version_major": 2, 100 | "version_minor": 0 101 | }, 102 | "text/plain": [ 103 | "HBox(children=(IntProgress(value=0, max=999999), HTML(value='')))" 104 | ] 105 | }, 106 | "metadata": {}, 107 | "output_type": "display_data" 108 | }, 109 | { 110 | "name": "stdout", 111 | "output_type": "stream", 112 | "text": [ 113 | "\n", 114 | "CPU times: user 33.7 s, sys: 148 ms, total: 33.9 s\n", 115 | "Wall time: 33.7 s\n" 116 | ] 117 | } 118 | ], 119 | "source": [ 120 | "%%time\n", 121 | "res = df.progress_apply(func, axis=1)" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "---\n", 129 | "\n", 130 | "#### `WITH PARALLELIZATION`\n", 131 | "\n", 132 | "---" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 6, 138 | "metadata": {}, 139 | "outputs": [ 140 | { 141 | "data": { 142 | "application/vnd.jupyter.widget-view+json": { 143 | "model_id": "16dc675b21954c8da7941a4ad861441b", 144 | "version_major": 2, 145 | "version_minor": 0 146 | }, 147 | "text/plain": [ 148 | "VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=125000), Label(value='0 / 125000')…" 149 | ] 150 | }, 151 | "metadata": {}, 152 | "output_type": "display_data" 153 | }, 154 | { 155 | "name": "stdout", 156 | "output_type": "stream", 157 | "text": [ 158 | "CPU times: user 1.07 s, sys: 89.7 ms, total: 1.16 s\n", 159 | "Wall time: 11.7 s\n" 160 | ] 161 | } 162 | ], 163 | "source": [ 164 | "%%time\n", 165 | "res_parallel = df.parallel_apply(func, axis=1)" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": null, 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [] 181 | } 182 | ], 183 | "metadata": { 184 | "kernelspec": { 185 | "display_name": "Python 3", 186 | "language": "python", 187 | "name": "python3" 188 | }, 189 | "language_info": { 190 | "codemirror_mode": { 191 | "name": "ipython", 192 | "version": 3 193 | }, 194 | "file_extension": ".py", 195 | "mimetype": "text/x-python", 196 | "name": "python", 197 | "nbconvert_exporter": "python", 198 | "pygments_lexer": "ipython3", 199 | "version": "3.7.3" 200 | } 201 | }, 202 | "nbformat": 4, 203 | "nbformat_minor": 2 204 | } 205 | -------------------------------------------------------------------------------- /Code/pandas_crosstab.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Pandas Crosstab" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "# import pandas\n", 17 | "import pandas as pd" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 5, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "#Import dataset\n", 27 | "data = pd.read_csv('../Data/loan_train.csv', index_col = 'Loan_ID')" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 6, 33 | "metadata": {}, 34 | "outputs": [ 35 | { 36 | "data": { 37 | "text/html": [ 38 | "
\n", 39 | "\n", 52 | "\n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | "
Loan_StatusNY
Credit_History
0.00.9213480.078652
1.00.2042110.795789
All0.3173760.682624
\n", 83 | "
" 84 | ], 85 | "text/plain": [ 86 | "Loan_Status N Y\n", 87 | "Credit_History \n", 88 | "0.0 0.921348 0.078652\n", 89 | "1.0 0.204211 0.795789\n", 90 | "All 0.317376 0.682624" 91 | ] 92 | }, 93 | "execution_count": 6, 94 | "metadata": {}, 95 | "output_type": "execute_result" 96 | } 97 | ], 98 | "source": [ 99 | "pd.crosstab(data[\"Credit_History\"],data[\"Loan_Status\"],margins=True, normalize = 'index')" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [] 108 | } 109 | ], 110 | "metadata": { 111 | "kernelspec": { 112 | "display_name": "Python 3", 113 | "language": "python", 114 | "name": "python3" 115 | }, 116 | "language_info": { 117 | "codemirror_mode": { 118 | "name": "ipython", 119 | "version": 3 120 | }, 121 | "file_extension": ".py", 122 | "mimetype": "text/x-python", 123 | "name": "python", 124 | "nbconvert_exporter": "python", 125 | "pygments_lexer": "ipython3", 126 | "version": "3.7.1" 127 | } 128 | }, 129 | "nbformat": 4, 130 | "nbformat_minor": 2 131 | } 132 | -------------------------------------------------------------------------------- /Code/pandas_pivot_table.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Pandas Pivot Table" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 5, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "# import pandas\n", 17 | "import pandas as pd\n", 18 | "import numpy as np" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 6, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "#Import dataset\n", 28 | "loan = pd.read_csv('../Data/loan_train.csv', index_col = 'Loan_ID')" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 7, 34 | "metadata": {}, 35 | "outputs": [ 36 | { 37 | "data": { 38 | "text/html": [ 39 | "
\n", 40 | "\n", 53 | "\n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | "
LoanAmount
GenderMarriedSelf_Employed
FemaleNoNo110.596774
Yes125.800000
YesNo135.480000
Yes282.250000
MaleNoNo128.137255
Yes173.625000
YesNo151.709220
Yes169.355556
\n", 109 | "
" 110 | ], 111 | "text/plain": [ 112 | " LoanAmount\n", 113 | "Gender Married Self_Employed \n", 114 | "Female No No 110.596774\n", 115 | " Yes 125.800000\n", 116 | " Yes No 135.480000\n", 117 | " Yes 282.250000\n", 118 | "Male No No 128.137255\n", 119 | " Yes 173.625000\n", 120 | " Yes No 151.709220\n", 121 | " Yes 169.355556" 122 | ] 123 | }, 124 | "execution_count": 7, 125 | "metadata": {}, 126 | "output_type": "execute_result" 127 | } 128 | ], 129 | "source": [ 130 | "pivot = loan.pivot_table(values = ['LoanAmount'], index = ['Gender', 'Married', 'Self_Employed'], aggfunc = np.mean)\n", 131 | "pivot" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [] 140 | } 141 | ], 142 | "metadata": { 143 | "kernelspec": { 144 | "display_name": "Python 3", 145 | "language": "python", 146 | "name": "python3" 147 | }, 148 | "language_info": { 149 | "codemirror_mode": { 150 | "name": "ipython", 151 | "version": 3 152 | }, 153 | "file_extension": ".py", 154 | "mimetype": "text/x-python", 155 | "name": "python", 156 | "nbconvert_exporter": "python", 157 | "pygments_lexer": "ipython3", 158 | "version": "3.7.1" 159 | } 160 | }, 161 | "nbformat": 4, 162 | "nbformat_minor": 2 163 | } 164 | -------------------------------------------------------------------------------- /Code/pandas_read_html_files.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# ***Reading all the tables of a webpage into pandas dataframe:***" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import pandas as pd" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "### ***Let the url be: \"https://en.wikipedia.org/wiki/States_and_union_territories_of_India\"***" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 3, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "dflist=pd.read_html('https://en.wikipedia.org/wiki/States_and_union_territories_of_India')" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "Here dflist is a list of dataframes. Each dataframe is associated with one of the tables present on the page." 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 4, 45 | "metadata": {}, 46 | "outputs": [ 47 | { 48 | "data": { 49 | "text/html": [ 50 | "
\n", 51 | "\n", 64 | "\n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | "
01
0States and union territories of IndiaNaN
1CategoryFederated states
2LocationRepublic of India
3Number28 States8 Union territories
4PopulationsStates: Sikkim - 610,577 (lowest); Uttar Prade...
\n", 100 | "
" 101 | ], 102 | "text/plain": [ 103 | " 0 \\\n", 104 | "0 States and union territories of India \n", 105 | "1 Category \n", 106 | "2 Location \n", 107 | "3 Number \n", 108 | "4 Populations \n", 109 | "\n", 110 | " 1 \n", 111 | "0 NaN \n", 112 | "1 Federated states \n", 113 | "2 Republic of India \n", 114 | "3 28 States8 Union territories \n", 115 | "4 States: Sikkim - 610,577 (lowest); Uttar Prade... " 116 | ] 117 | }, 118 | "execution_count": 4, 119 | "metadata": {}, 120 | "output_type": "execute_result" 121 | } 122 | ], 123 | "source": [ 124 | "dflist[0].head()" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 5, 130 | "metadata": {}, 131 | "outputs": [ 132 | { 133 | "data": { 134 | "text/html": [ 135 | "
\n", 136 | "\n", 149 | "\n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | "
012345678910
0StateISO 3166-2:INVehiclecodeZoneCapitalLargest cityStatehoodPopulation[35]Area(km2)Officiallanguages[36]Additional officiallanguages[36]
1Andhra PradeshIN-APAPSouthernHyderabad (de jure)Amaravati (de facto) Note 1...Visakhapatnam1 October 195349506799160205Telugu
2Arunachal PradeshIN-ARARNorth-EasternItanagar20 February 1987138372783743EnglishNaN
3AssamIN-ASASNorth-EasternDispurGuwahati26 January 19503120557678550AssameseBengali, Bodo
4BiharIN-BRBREasternPatna26 January 195010409945294163HindiUrduNaN
\n", 239 | "
" 240 | ], 241 | "text/plain": [ 242 | " 0 1 2 3 \\\n", 243 | "0 State ISO 3166-2:IN Vehiclecode Zone \n", 244 | "1 Andhra Pradesh IN-AP AP Southern \n", 245 | "2 Arunachal Pradesh IN-AR AR North-Eastern \n", 246 | "3 Assam IN-AS AS North-Eastern \n", 247 | "4 Bihar IN-BR BR Eastern \n", 248 | "\n", 249 | " 4 5 \\\n", 250 | "0 Capital Largest city \n", 251 | "1 Hyderabad (de jure)Amaravati (de facto) Note 1... Visakhapatnam \n", 252 | "2 Itanagar 20 February 1987 \n", 253 | "3 Dispur Guwahati \n", 254 | "4 Patna 26 January 1950 \n", 255 | "\n", 256 | " 6 7 8 9 \\\n", 257 | "0 Statehood Population[35] Area(km2) Officiallanguages[36] \n", 258 | "1 1 October 1953 49506799 160205 Telugu \n", 259 | "2 1383727 83743 English — \n", 260 | "3 26 January 1950 31205576 78550 Assamese \n", 261 | "4 104099452 94163 Hindi Urdu \n", 262 | "\n", 263 | " 10 \n", 264 | "0 Additional officiallanguages[36] \n", 265 | "1 — \n", 266 | "2 NaN \n", 267 | "3 Bengali, Bodo \n", 268 | "4 NaN " 269 | ] 270 | }, 271 | "execution_count": 5, 272 | "metadata": {}, 273 | "output_type": "execute_result" 274 | } 275 | ], 276 | "source": [ 277 | "dflist[3].head()" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": null, 283 | "metadata": {}, 284 | "outputs": [], 285 | "source": [] 286 | } 287 | ], 288 | "metadata": { 289 | "kernelspec": { 290 | "display_name": "Python 3", 291 | "language": "python", 292 | "name": "python3" 293 | }, 294 | "language_info": { 295 | "codemirror_mode": { 296 | "name": "ipython", 297 | "version": 3 298 | }, 299 | "file_extension": ".py", 300 | "mimetype": "text/x-python", 301 | "name": "python", 302 | "nbconvert_exporter": "python", 303 | "pygments_lexer": "ipython3", 304 | "version": "3.7.1" 305 | } 306 | }, 307 | "nbformat": 4, 308 | "nbformat_minor": 2 309 | } 310 | -------------------------------------------------------------------------------- /Code/pdb_pythonDebugger.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Python debugger" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "# define function 1\n", 17 | "def func_div(x, y):\n", 18 | " return x / y\n", 19 | "\n", 20 | "# define fucntion 2\n", 21 | "def func_sub(a):\n", 22 | " x = a\n", 23 | " y = a - 1\n", 24 | " return func_div(x, y)" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 2, 30 | "metadata": {}, 31 | "outputs": [ 32 | { 33 | "ename": "ZeroDivisionError", 34 | "evalue": "division by zero", 35 | "output_type": "error", 36 | "traceback": [ 37 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 38 | "\u001b[1;31mZeroDivisionError\u001b[0m Traceback (most recent call last)", 39 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mfunc_sub\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", 40 | "\u001b[1;32m\u001b[0m in \u001b[0;36mfunc_sub\u001b[1;34m(a)\u001b[0m\n\u001b[0;32m 7\u001b[0m \u001b[0mx\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0ma\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 8\u001b[0m \u001b[0my\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0ma\u001b[0m \u001b[1;33m-\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 9\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfunc_div\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", 41 | "\u001b[1;32m\u001b[0m in \u001b[0;36mfunc_div\u001b[1;34m(x, y)\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;31m# define function 1\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mfunc_div\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mx\u001b[0m \u001b[1;33m/\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 4\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;31m# define fucntion 2\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 42 | "\u001b[1;31mZeroDivisionError\u001b[0m: division by zero" 43 | ] 44 | } 45 | ], 46 | "source": [ 47 | "func_sub(1)" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 3, 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "name": "stdout", 57 | "output_type": "stream", 58 | "text": [ 59 | "Automatic pdb calling has been turned ON\n" 60 | ] 61 | }, 62 | { 63 | "ename": "ZeroDivisionError", 64 | "evalue": "division by zero", 65 | "output_type": "error", 66 | "traceback": [ 67 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 68 | "\u001b[1;31mZeroDivisionError\u001b[0m Traceback (most recent call last)", 69 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[0mget_ipython\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrun_line_magic\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'pdb'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m''\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mfunc_sub\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", 70 | "\u001b[1;32m\u001b[0m in \u001b[0;36mfunc_sub\u001b[1;34m(a)\u001b[0m\n\u001b[0;32m 7\u001b[0m \u001b[0mx\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0ma\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 8\u001b[0m \u001b[0my\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0ma\u001b[0m \u001b[1;33m-\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 9\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfunc_div\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", 71 | "\u001b[1;32m\u001b[0m in \u001b[0;36mfunc_div\u001b[1;34m(x, y)\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;31m# define function 1\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mfunc_div\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mx\u001b[0m \u001b[1;33m/\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 4\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;31m# define fucntion 2\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 72 | "\u001b[1;31mZeroDivisionError\u001b[0m: division by zero" 73 | ] 74 | }, 75 | { 76 | "name": "stdout", 77 | "output_type": "stream", 78 | "text": [ 79 | "> \u001b[1;32m\u001b[0m(3)\u001b[0;36mfunc_div\u001b[1;34m()\u001b[0m\n", 80 | "\u001b[1;32m 1 \u001b[1;33m\u001b[1;31m# define function 1\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 81 | "\u001b[0m\u001b[1;32m 2 \u001b[1;33m\u001b[1;32mdef\u001b[0m \u001b[0mfunc_div\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 82 | "\u001b[0m\u001b[1;32m----> 3 \u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mx\u001b[0m \u001b[1;33m/\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 83 | "\u001b[0m\u001b[1;32m 4 \u001b[1;33m\u001b[1;33m\u001b[0m\u001b[0m\n", 84 | "\u001b[0m\u001b[1;32m 5 \u001b[1;33m\u001b[1;31m# define fucntion 2\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 85 | "\u001b[0m\n", 86 | "ipdb> print(x)\n", 87 | "1\n", 88 | "ipdb> print(y)\n", 89 | "0\n", 90 | "ipdb> up\n", 91 | "> \u001b[1;32m\u001b[0m(9)\u001b[0;36mfunc_sub\u001b[1;34m()\u001b[0m\n", 92 | "\u001b[1;32m 5 \u001b[1;33m\u001b[1;31m# define fucntion 2\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 93 | "\u001b[0m\u001b[1;32m 6 \u001b[1;33m\u001b[1;32mdef\u001b[0m \u001b[0mfunc_sub\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 94 | "\u001b[0m\u001b[1;32m 7 \u001b[1;33m \u001b[0mx\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0ma\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 95 | "\u001b[0m\u001b[1;32m 8 \u001b[1;33m \u001b[0my\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0ma\u001b[0m \u001b[1;33m-\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 96 | "\u001b[0m\u001b[1;32m----> 9 \u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfunc_div\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 97 | "\u001b[0m\n", 98 | "ipdb> up\n", 99 | "> \u001b[1;32m\u001b[0m(2)\u001b[0;36m\u001b[1;34m()\u001b[0m\n", 100 | "\u001b[1;32m 1 \u001b[1;33m\u001b[0mget_ipython\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrun_line_magic\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'pdb'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m''\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 101 | "\u001b[0m\u001b[1;32m----> 2 \u001b[1;33m\u001b[0mfunc_sub\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 102 | "\u001b[0m\n", 103 | "ipdb> quit\n" 104 | ] 105 | } 106 | ], 107 | "source": [ 108 | "%pdb\n", 109 | "func_sub(1)" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [] 118 | } 119 | ], 120 | "metadata": { 121 | "kernelspec": { 122 | "display_name": "Python 3", 123 | "language": "python", 124 | "name": "python3" 125 | }, 126 | "language_info": { 127 | "codemirror_mode": { 128 | "name": "ipython", 129 | "version": 3 130 | }, 131 | "file_extension": ".py", 132 | "mimetype": "text/x-python", 133 | "name": "python", 134 | "nbconvert_exporter": "python", 135 | "pygments_lexer": "ipython3", 136 | "version": "3.7.1" 137 | } 138 | }, 139 | "nbformat": 4, 140 | "nbformat_minor": 2 141 | } 142 | -------------------------------------------------------------------------------- /Code/pretty print.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### Pretty Print dictionary" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 11, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "# Build a test dictionary\n", 17 | "my_dict = {'Student_ID': 34,'Student_name' : 'Tom', 'Student_class' : 5,\n", 18 | " 'Student_marks' : {'maths' : 92,\n", 19 | " 'science' : 95,\n", 20 | " 'social_science' : 65,\n", 21 | " 'English' : 88}\n", 22 | " }\n" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 12, 28 | "metadata": {}, 29 | "outputs": [ 30 | { 31 | "name": "stdout", 32 | "output_type": "stream", 33 | "text": [ 34 | "{'Student_ID': 34, 'Student_name': 'Tom', 'Student_class': 5, 'Student_marks': {'maths': 92, 'science': 95, 'social_science': 65, 'English': 88}}\n" 35 | ] 36 | } 37 | ], 38 | "source": [ 39 | "# with print\n", 40 | "print(my_dict)" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 13, 46 | "metadata": {}, 47 | "outputs": [ 48 | { 49 | "name": "stdout", 50 | "output_type": "stream", 51 | "text": [ 52 | "{'Student_ID': 34,\n", 53 | " 'Student_class': 5,\n", 54 | " 'Student_marks': {'English': 88,\n", 55 | " 'maths': 92,\n", 56 | " 'science': 95,\n", 57 | " 'social_science': 65},\n", 58 | " 'Student_name': 'Tom'}\n" 59 | ] 60 | } 61 | ], 62 | "source": [ 63 | "# with pprint\n", 64 | "import pprint\n", 65 | "pprint.pprint(my_dict)" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [] 74 | } 75 | ], 76 | "metadata": { 77 | "kernelspec": { 78 | "display_name": "Python 3", 79 | "language": "python", 80 | "name": "python3" 81 | }, 82 | "language_info": { 83 | "codemirror_mode": { 84 | "name": "ipython", 85 | "version": 3 86 | }, 87 | "file_extension": ".py", 88 | "mimetype": "text/x-python", 89 | "name": "python", 90 | "nbconvert_exporter": "python", 91 | "pygments_lexer": "ipython3", 92 | "version": "3.7.1" 93 | } 94 | }, 95 | "nbformat": 4, 96 | "nbformat_minor": 2 97 | } 98 | -------------------------------------------------------------------------------- /Code/print zenofpython.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "The Zen of Python, by Tim Peters\n", 13 | "\n", 14 | "Beautiful is better than ugly.\n", 15 | "Explicit is better than implicit.\n", 16 | "Simple is better than complex.\n", 17 | "Complex is better than complicated.\n", 18 | "Flat is better than nested.\n", 19 | "Sparse is better than dense.\n", 20 | "Readability counts.\n", 21 | "Special cases aren't special enough to break the rules.\n", 22 | "Although practicality beats purity.\n", 23 | "Errors should never pass silently.\n", 24 | "Unless explicitly silenced.\n", 25 | "In the face of ambiguity, refuse the temptation to guess.\n", 26 | "There should be one-- and preferably only one --obvious way to do it.\n", 27 | "Although that way may not be obvious at first unless you're Dutch.\n", 28 | "Now is better than never.\n", 29 | "Although never is often better than *right* now.\n", 30 | "If the implementation is hard to explain, it's a bad idea.\n", 31 | "If the implementation is easy to explain, it may be a good idea.\n", 32 | "Namespaces are one honking great idea -- let's do more of those!\n" 33 | ] 34 | } 35 | ], 36 | "source": [ 37 | "import this" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [] 46 | } 47 | ], 48 | "metadata": { 49 | "kernelspec": { 50 | "display_name": "Python 3", 51 | "language": "python", 52 | "name": "python3" 53 | }, 54 | "language_info": { 55 | "codemirror_mode": { 56 | "name": "ipython", 57 | "version": 3 58 | }, 59 | "file_extension": ".py", 60 | "mimetype": "text/x-python", 61 | "name": "python", 62 | "nbconvert_exporter": "python", 63 | "pygments_lexer": "ipython3", 64 | "version": "3.7.1" 65 | } 66 | }, 67 | "nbformat": 4, 68 | "nbformat_minor": 2 69 | } 70 | -------------------------------------------------------------------------------- /Code/read_csv_ParseDate.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 32, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "#import libraries\n", 10 | "import pandas as pd" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 33, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "# read dataset\n", 20 | "data = pd.read_csv(\"../Data/AirQualityUCI.csv\")" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 34, 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "data": { 30 | "text/plain": [ 31 | "Date object\n", 32 | "Time object\n", 33 | "CO(GT) int64\n", 34 | "PT08.S1(CO) int64\n", 35 | "NMHC(GT) int64\n", 36 | "dtype: object" 37 | ] 38 | }, 39 | "execution_count": 34, 40 | "metadata": {}, 41 | "output_type": "execute_result" 42 | } 43 | ], 44 | "source": [ 45 | "data.dtypes" 46 | ] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "metadata": {}, 51 | "source": [ 52 | "## Parse Dates in read_csv()" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 35, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "data = pd.read_csv(\"../Data/AirQualityUCI.csv\", parse_dates = [\"Date\"])" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 36, 67 | "metadata": {}, 68 | "outputs": [ 69 | { 70 | "data": { 71 | "text/plain": [ 72 | "Date datetime64[ns]\n", 73 | "Time object\n", 74 | "CO(GT) int64\n", 75 | "PT08.S1(CO) int64\n", 76 | "NMHC(GT) int64\n", 77 | "dtype: object" 78 | ] 79 | }, 80 | "execution_count": 36, 81 | "metadata": {}, 82 | "output_type": "execute_result" 83 | } 84 | ], 85 | "source": [ 86 | "data.dtypes" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [] 95 | } 96 | ], 97 | "metadata": { 98 | "kernelspec": { 99 | "display_name": "Python 3", 100 | "language": "python", 101 | "name": "python3" 102 | }, 103 | "language_info": { 104 | "codemirror_mode": { 105 | "name": "ipython", 106 | "version": 3 107 | }, 108 | "file_extension": ".py", 109 | "mimetype": "text/x-python", 110 | "name": "python", 111 | "nbconvert_exporter": "python", 112 | "pygments_lexer": "ipython3", 113 | "version": "3.7.1" 114 | } 115 | }, 116 | "nbformat": 4, 117 | "nbformat_minor": 2 118 | } 119 | -------------------------------------------------------------------------------- /Code/run python script.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Run a python script inside a notebook" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 4, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "%run add.py" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 5, 22 | "metadata": {}, 23 | "outputs": [ 24 | { 25 | "data": { 26 | "text/plain": [ 27 | "13" 28 | ] 29 | }, 30 | "execution_count": 5, 31 | "metadata": {}, 32 | "output_type": "execute_result" 33 | } 34 | ], 35 | "source": [ 36 | "sum = addition(6,7)\n", 37 | "sum" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [] 46 | } 47 | ], 48 | "metadata": { 49 | "kernelspec": { 50 | "display_name": "Python 3", 51 | "language": "python", 52 | "name": "python3" 53 | }, 54 | "language_info": { 55 | "codemirror_mode": { 56 | "name": "ipython", 57 | "version": 3 58 | }, 59 | "file_extension": ".py", 60 | "mimetype": "text/x-python", 61 | "name": "python", 62 | "nbconvert_exporter": "python", 63 | "pygments_lexer": "ipython3", 64 | "version": "3.7.1" 65 | } 66 | }, 67 | "nbformat": 4, 68 | "nbformat_minor": 2 69 | } 70 | -------------------------------------------------------------------------------- /Code/select_dtype.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 4, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "#import the dataset\n", 19 | "data = pd.read_csv('loan_train.csv')\n", 20 | "data.shape" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 35, 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "data": { 30 | "text/plain": [ 31 | "Loan_ID object\n", 32 | "Gender object\n", 33 | "Married object\n", 34 | "Dependents object\n", 35 | "Education object\n", 36 | "Self_Employed object\n", 37 | "ApplicantIncome int64\n", 38 | "CoapplicantIncome float64\n", 39 | "LoanAmount float64\n", 40 | "Loan_Amount_Term float64\n", 41 | "Credit_History float64\n", 42 | "Property_Area object\n", 43 | "Loan_Status object\n", 44 | "dtype: object" 45 | ] 46 | }, 47 | "execution_count": 35, 48 | "metadata": {}, 49 | "output_type": "execute_result" 50 | } 51 | ], 52 | "source": [ 53 | "#check data types of column\n", 54 | "data.dtypes" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 36, 60 | "metadata": {}, 61 | "outputs": [ 62 | { 63 | "data": { 64 | "text/html": [ 65 | "
\n", 66 | "\n", 79 | "\n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | "
Loan_IDGenderMarriedDependentsEducationSelf_EmployedProperty_AreaLoan_Status
0LP001002MaleNo0GraduateNoUrbanY
1LP001003MaleYes1GraduateNoRuralN
2LP001005MaleYes0GraduateYesUrbanY
3LP001006MaleYes0Not GraduateNoUrbanY
4LP001008MaleNo0GraduateNoUrbanY
\n", 151 | "
" 152 | ], 153 | "text/plain": [ 154 | " Loan_ID Gender Married Dependents Education Self_Employed \\\n", 155 | "0 LP001002 Male No 0 Graduate No \n", 156 | "1 LP001003 Male Yes 1 Graduate No \n", 157 | "2 LP001005 Male Yes 0 Graduate Yes \n", 158 | "3 LP001006 Male Yes 0 Not Graduate No \n", 159 | "4 LP001008 Male No 0 Graduate No \n", 160 | "\n", 161 | " Property_Area Loan_Status \n", 162 | "0 Urban Y \n", 163 | "1 Rural N \n", 164 | "2 Urban Y \n", 165 | "3 Urban Y \n", 166 | "4 Urban Y " 167 | ] 168 | }, 169 | "metadata": {}, 170 | "output_type": "display_data" 171 | } 172 | ], 173 | "source": [ 174 | "# Dataframe containing only categorical variable\n", 175 | "categorical_var = data.select_dtypes(\"object\").head()\n", 176 | "display(categorical_var.head())" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 37, 182 | "metadata": {}, 183 | "outputs": [ 184 | { 185 | "data": { 186 | "text/html": [ 187 | "
\n", 188 | "\n", 201 | "\n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | "
ApplicantIncomeCoapplicantIncomeLoanAmountLoan_Amount_TermCredit_History
058490.0NaN360.01.0
145831508.0128.0360.01.0
230000.066.0360.01.0
325832358.0120.0360.01.0
460000.0141.0360.01.0
\n", 255 | "
" 256 | ], 257 | "text/plain": [ 258 | " ApplicantIncome CoapplicantIncome LoanAmount Loan_Amount_Term \\\n", 259 | "0 5849 0.0 NaN 360.0 \n", 260 | "1 4583 1508.0 128.0 360.0 \n", 261 | "2 3000 0.0 66.0 360.0 \n", 262 | "3 2583 2358.0 120.0 360.0 \n", 263 | "4 6000 0.0 141.0 360.0 \n", 264 | "\n", 265 | " Credit_History \n", 266 | "0 1.0 \n", 267 | "1 1.0 \n", 268 | "2 1.0 \n", 269 | "3 1.0 \n", 270 | "4 1.0 " 271 | ] 272 | }, 273 | "metadata": {}, 274 | "output_type": "display_data" 275 | } 276 | ], 277 | "source": [ 278 | "# Dataframe containing only numeric variable\n", 279 | "numeric_var = data.select_dtypes(\"number\")\n", 280 | "display(numeric_var.head())" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": null, 286 | "metadata": {}, 287 | "outputs": [], 288 | "source": [] 289 | } 290 | ], 291 | "metadata": { 292 | "kernelspec": { 293 | "display_name": "Python 3", 294 | "language": "python", 295 | "name": "python3" 296 | }, 297 | "language_info": { 298 | "codemirror_mode": { 299 | "name": "ipython", 300 | "version": 3 301 | }, 302 | "file_extension": ".py", 303 | "mimetype": "text/x-python", 304 | "name": "python", 305 | "nbconvert_exporter": "python", 306 | "pygments_lexer": "ipython3", 307 | "version": "3.7.1" 308 | } 309 | }, 310 | "nbformat": 4, 311 | "nbformat_minor": 2 312 | } 313 | -------------------------------------------------------------------------------- /Code/split_pdf_pages.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from PyPDF2 import PdfFileWriter, PdfFileReader" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "FILE_NAME = '../Data/dummy_pdf.pdf'" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 3, 24 | "metadata": {}, 25 | "outputs": [ 26 | { 27 | "name": "stdout", 28 | "output_type": "stream", 29 | "text": [ 30 | "Completed page1.pdf\n", 31 | "Completed page2.pdf\n" 32 | ] 33 | } 34 | ], 35 | "source": [ 36 | "inputpdf = PdfFileReader(open(FILE_NAME, \"rb\"))\n", 37 | " \n", 38 | "for i in range(inputpdf.numPages):\n", 39 | " output = PdfFileWriter()\n", 40 | " output.addPage(inputpdf.getPage(i))\n", 41 | " temp_filename = \"page%s.pdf\" % str(i+1)\n", 42 | " with open(temp_filename, \"wb\") as outputStream:\n", 43 | " output.write(outputStream)\n", 44 | " print('Completed', temp_filename)" 45 | ] 46 | } 47 | ], 48 | "metadata": { 49 | "kernelspec": { 50 | "display_name": "Python 3", 51 | "language": "python", 52 | "name": "python3" 53 | }, 54 | "language_info": { 55 | "codemirror_mode": { 56 | "name": "ipython", 57 | "version": 3 58 | }, 59 | "file_extension": ".py", 60 | "mimetype": "text/x-python", 61 | "name": "python", 62 | "nbconvert_exporter": "python", 63 | "pygments_lexer": "ipython3", 64 | "version": "3.7.3" 65 | } 66 | }, 67 | "nbformat": 4, 68 | "nbformat_minor": 2 69 | } 70 | -------------------------------------------------------------------------------- /Code/str.split().ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# import necessary libraries\n", 10 | "import pandas as pd" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [ 18 | { 19 | "data": { 20 | "text/html": [ 21 | "
\n", 22 | "\n", 35 | "\n", 36 | " \n", 37 | " \n", 38 | " \n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | "
name
0Elise Mccann
1Aiden Berger
2Elle Kelley
\n", 57 | "
" 58 | ], 59 | "text/plain": [ 60 | " name\n", 61 | "0 Elise Mccann\n", 62 | "1 Aiden Berger\n", 63 | "2 Elle Kelley" 64 | ] 65 | }, 66 | "execution_count": 2, 67 | "metadata": {}, 68 | "output_type": "execute_result" 69 | } 70 | ], 71 | "source": [ 72 | "# create a dataframe\n", 73 | "df = pd.DataFrame({'name':['Elise Mccann', 'Aiden Berger', 'Elle Kelley']})\n", 74 | "df" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 3, 80 | "metadata": {}, 81 | "outputs": [ 82 | { 83 | "data": { 84 | "text/html": [ 85 | "
\n", 86 | "\n", 99 | "\n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | "
namefirst_namelast_name
0Elise MccannEliseMccann
1Aiden BergerAidenBerger
2Elle KelleyElleKelley
\n", 129 | "
" 130 | ], 131 | "text/plain": [ 132 | " name first_name last_name\n", 133 | "0 Elise Mccann Elise Mccann\n", 134 | "1 Aiden Berger Aiden Berger\n", 135 | "2 Elle Kelley Elle Kelley" 136 | ] 137 | }, 138 | "execution_count": 3, 139 | "metadata": {}, 140 | "output_type": "execute_result" 141 | } 142 | ], 143 | "source": [ 144 | "# extract first name and last name\n", 145 | "df['first_name'] = df['name'].str.split(' ', expand = True)[0]\n", 146 | "df['last_name'] = df['name'].str.split(' ', expand = True)[1]\n", 147 | "\n", 148 | "df" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [] 157 | } 158 | ], 159 | "metadata": { 160 | "kernelspec": { 161 | "display_name": "Python 3", 162 | "language": "python", 163 | "name": "python3" 164 | }, 165 | "language_info": { 166 | "codemirror_mode": { 167 | "name": "ipython", 168 | "version": 3 169 | }, 170 | "file_extension": ".py", 171 | "mimetype": "text/x-python", 172 | "name": "python", 173 | "nbconvert_exporter": "python", 174 | "pygments_lexer": "ipython3", 175 | "version": "3.7.1" 176 | } 177 | }, 178 | "nbformat": 4, 179 | "nbformat_minor": 2 180 | } 181 | -------------------------------------------------------------------------------- /Code/tqdm.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "#import tqdm\n", 10 | "from tqdm import tqdm_notebook\n", 11 | "k = 1" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "# nested loops \n", 21 | "k = k+1\n", 22 | "for outer_loop in tqdm_notebook(range(1,100), desc = 'outer_loop', leave = True):\n", 23 | " for inner_loop in tqdm_notebook(range(1,10000000,1), desc = 'inner_loop', leave = False):\n", 24 | " k+=100" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "outer_loop" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [] 42 | } 43 | ], 44 | "metadata": { 45 | "kernelspec": { 46 | "display_name": "Python 3", 47 | "language": "python", 48 | "name": "python3" 49 | } 50 | }, 51 | "nbformat": 4, 52 | "nbformat_minor": 2 53 | } 54 | -------------------------------------------------------------------------------- /Code/write python script.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Write contents of a cell in a file" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 2, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "name": "stdout", 17 | "output_type": "stream", 18 | "text": [ 19 | "Overwriting add.py\n" 20 | ] 21 | } 22 | ], 23 | "source": [ 24 | "%%writefile add.py\n", 25 | "def addition(x,y):\n", 26 | " return x+y" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [] 35 | } 36 | ], 37 | "metadata": { 38 | "kernelspec": { 39 | "display_name": "Python 3", 40 | "language": "python", 41 | "name": "python3" 42 | }, 43 | "language_info": { 44 | "codemirror_mode": { 45 | "name": "ipython", 46 | "version": 3 47 | }, 48 | "file_extension": ".py", 49 | "mimetype": "text/x-python", 50 | "name": "python", 51 | "nbconvert_exporter": "python", 52 | "pygments_lexer": "ipython3", 53 | "version": "3.7.1" 54 | } 55 | }, 56 | "nbformat": 4, 57 | "nbformat_minor": 2 58 | } 59 | -------------------------------------------------------------------------------- /Data/dummy_pdf.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kunalj101/Data-Science-Hacks/74b5339d6f84154b9b603d122cec560bbf26da6a/Data/dummy_pdf.pdf -------------------------------------------------------------------------------- /Data/dummy_pdf_page1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kunalj101/Data-Science-Hacks/74b5339d6f84154b9b603d122cec560bbf26da6a/Data/dummy_pdf_page1.pdf -------------------------------------------------------------------------------- /Data/dummy_pdf_page2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kunalj101/Data-Science-Hacks/74b5339d6f84154b9b603d122cec560bbf26da6a/Data/dummy_pdf_page2.pdf -------------------------------------------------------------------------------- /Data/muti_input_nn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kunalj101/Data-Science-Hacks/74b5339d6f84154b9b603d122cec560bbf26da6a/Data/muti_input_nn.png --------------------------------------------------------------------------------