├── plot.png
├── Data Files
    ├── my_fields.csv
    ├── data.h5
    ├── pickle_data
    ├── my_fields.xlsx
    ├── excel_data.xlsx
    ├── none_fields.csv
    ├── no_cols.csv
    ├── sep_fields.txt
    ├── daydates.csv
    ├── fields.csv
    ├── fields2.csv
    ├── new_fields.csv
    └── interupt_fields.csv
├── some_data.npy
├── some_data_archieve.npz
├── array_ex.txt
├── README.md
├── Replacing Values.ipynb
├── Methods for Boolean Arrays.ipynb
├── File Operations with Numpy Arrays.ipynb
├── Using JSON Module.ipynb
├── Set Logic Methods.ipynb
├── Handling DataTypes for ndarrays.ipynb
├── Handling Pandas Index Objects.ipynb
├── Combining Data with Overlap.ipynb
├── numpy.where.ipynb
├── Axis Indexes with Duplicate Values.ipynb
├── Reading Excel Files using Pandas.ipynb
├── Pickle Serialization.ipynb
├── Data Transformation using Functions or Mapping.ipynb
├── Reading CSV File in Pieces.ipynb
├── Using the CSV Module.ipynb
├── Using HDF5 Formats.ipynb
├── Numpy Random Number Generation.ipynb
├── Basic Array Statistical Methods.ipynb
├── Dropping Entries from Axis.ipynb
├── Index Object Methods.ipynb
├── Unique Values, Value Counts and Membership.ipynb
├── Handling Data from Databases.ipynb
├── Removing Duplicates.ipynb
├── Apply Methods for DataFrames.ipynb
├── Operations of Linear Algebra.ipynb
└── Renaming Axis Indexes.ipynb


/plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Hassan-Farid/Python-for-Data-Analysis/HEAD/plot.png


--------------------------------------------------------------------------------
/Data Files/my_fields.csv:
--------------------------------------------------------------------------------
1 | Day;Date;Year
2 | Monday;02;2020
3 | Tuesday;03;2020
4 | Friday;05;2020
5 | 


--------------------------------------------------------------------------------
/some_data.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Hassan-Farid/Python-for-Data-Analysis/HEAD/some_data.npy


--------------------------------------------------------------------------------
/Data Files/data.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Hassan-Farid/Python-for-Data-Analysis/HEAD/Data Files/data.h5


--------------------------------------------------------------------------------
/Data Files/pickle_data:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Hassan-Farid/Python-for-Data-Analysis/HEAD/Data Files/pickle_data


--------------------------------------------------------------------------------
/some_data_archieve.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Hassan-Farid/Python-for-Data-Analysis/HEAD/some_data_archieve.npz


--------------------------------------------------------------------------------
/Data Files/my_fields.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Hassan-Farid/Python-for-Data-Analysis/HEAD/Data Files/my_fields.xlsx


--------------------------------------------------------------------------------
/Data Files/excel_data.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Hassan-Farid/Python-for-Data-Analysis/HEAD/Data Files/excel_data.xlsx


--------------------------------------------------------------------------------
/Data Files/none_fields.csv:
--------------------------------------------------------------------------------
1 | ID,Name,Field
2 | 1,Adam Jones,Electrical
3 | 2,Edward Elrich,-
4 | 3,Ken Adams,
5 | 4,Ross Taylor,Sportsman
6 | ,Stain Steve,


--------------------------------------------------------------------------------
/Data Files/no_cols.csv:
--------------------------------------------------------------------------------
1 | 1,Adam Jones,Electrical
2 | 2,Edward Elrich,Mechanical
3 | 3,Stain Steve,Computer Science
4 | 4,Ken Adams,Media Science
5 | 5,Ross Taylor,Sportsman


--------------------------------------------------------------------------------
/Data Files/sep_fields.txt:
--------------------------------------------------------------------------------
1 | Name|Field
2 | 1|Adam Jones|Electrical
3 | 2|Edward Elrich|Mechanical
4 | 3|Stain Steve|Computer Science
5 | 4|Ken Adams|Media Science
6 | 5|Ross Taylor|Sportsman


--------------------------------------------------------------------------------
/Data Files/daydates.csv:
--------------------------------------------------------------------------------
1 | ,0
2 | 2020-01-01,Monday
3 | 2020-01-02,Tuesday
4 | 2020-01-03,Wednesday
5 | 2020-01-04,Thursday
6 | 2020-01-05,Friday
7 | 2020-01-06,Saturday
8 | 2020-01-07,Sunday
9 | 


--------------------------------------------------------------------------------
/Data Files/fields.csv:
--------------------------------------------------------------------------------
1 | ID,Name,Field
2 | 1,Adam Jones,Electrical
3 | 2,Edward Elrich,Mechanical
4 | 3,Stain Steve,Computer Science
5 | 4,Ken Adams,Media Science
6 | 5,Ross Taylor,Sportsman
7 | 


--------------------------------------------------------------------------------
/Data Files/fields2.csv:
--------------------------------------------------------------------------------
1 | Field|ID|Name
2 | Electrical|1|Adam Jones
3 | Mechanical|2|Edward Elrich
4 | Computer Science|3|Stain Steve
5 | Media Science|4|Ken Adams
6 | Sportsman|5|Ross Taylor
7 | 


--------------------------------------------------------------------------------
/Data Files/new_fields.csv:
--------------------------------------------------------------------------------
1 | ID,Field,Name
2 | 1,Electrical,Adam
3 | 2,Mechanical,Jones
4 | 3,Computer Science,Edward
5 | 4,Mechanical,Elrich
6 | 5,Electrical,Stain
7 | 6,Computer Science,Steve


--------------------------------------------------------------------------------
/array_ex.txt:
--------------------------------------------------------------------------------
1 | 1.032518385200751898e-01 1.462772003512045726e+00 3.997018720913923517e-01 -1.882154894423050118e-01
2 | -8.295909744816059028e-01 -1.569539155638059125e+00 8.959982769036259898e-01 -3.877866584234971326e-01
3 | -6.653994555347828577e-01 6.800770078903267679e-02 -6.443251304063275509e-01 -6.580105130695919347e-02
4 | -1.157403436222848025e+00 -9.428450708993425522e-01 -3.516229879549682136e-01 1.617873786276278647e+00
5 | 


--------------------------------------------------------------------------------
/Data Files/interupt_fields.csv:
--------------------------------------------------------------------------------
 1 | Hello there!
 2 | This dataset contains the names and fields of various students 
 3 | ID,Name,Field
 4 | Ooops! I mean not only students but professionals as well
 5 | 1,Adam Jones,Electrical
 6 | 2,Edward Elrich,Mechanical
 7 | Ohh!! And i forgot to mention... oops i forgot about it
 8 | 3,Stain Steve,Computer Science
 9 | Ahh yes,i remember now, but first seek the last two entries info
10 | 4,Ken Adams,Media Science
11 | 5,Ross Taylor,Sportsman
12 | Well, how are you gonna get your data nowwwww


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Python-for-Data-Analysis
 2 | A number of scripts describing use of various data analysis tools in python and their implementation on different case scenarios
 3 | 
 4 | 
 5 | The sequence for following the scripts instruction wise is as follows:
 6 | 
 7 | 1.  Getting started with ndarrays
 8 | 2.  Handling DataTypes for ndarrays
 9 | 3.  Basic Operations on Arrays
10 | 4.  Universal Functions
11 | 5.  Simple example using Numpy vectorization
12 | 6.  numpy.where
13 | 7.  Basic Array Statistical Methods
14 | 8.  Methods for Boolean Arrays
15 | 9.  Numpy Sorting
16 | 10. Set Logic Methods
17 | 11. File Operations with Numpy Arrays
18 | 12. Operations of Linear Algebra
19 | 13. Numpy Random Number Generation
20 | 14. Implementation Random Walks (Case Scenario Implementation)
21 | 15. Getting Started with Pandas Series
22 | 16. Getting Started with Pandas DataFrames
23 | 17. Handling Pandas Index Objects
24 | 18. Index Object Methods
25 | 19. Reindexing
26 | 20. Dropping Entries from Axis
27 | 21. Indexing, Selecting and Filtering Operations
28 | 22. Arithmetic and Data Alignment
29 | 23. Apply Methods for DataFrames
30 | 24. Sorting and Ranking
31 | 25. Axis Indexes with Duplicate Values
32 | 26. Computing Descriptive Statistics
33 | 27. Unique Values, Value Counts and Membership
34 | 28. Handling Missing Data Operations
35 | 29. Hierarchical Indexing
36 | 30. Reordering and Sorting Levels
37 | 31. Applying Summary Statistics to Levels
38 | 32. Using DataFrame Columns as a Hierarchical Form
39 | 33. Reading Data from CSV Files
40 | 34. Reading CSV Files in Pieces
41 | 35. Writing Data to CSV Files
42 | 36. Using the CSV Module
43 | 37. Using JSON Module
44 | 38. Pickle Serialization
45 | 39. Using HDF5 Formats
46 | 40. Reading Excel Files using Pandas
47 | 41. Handling Data from Databases
48 | 42. Database Style DataFrame Merges
49 | 43. Merging Data using Index
50 | 44. Concatenating Along an Axis
51 | 45. Combining Data with Overlap
52 | 46. Pivoting Long to Wide Format
53 | 47. Removing Duplicates
54 | 48. Data Transformation using Functions or Mapping
55 | 49. Replacing Values
56 | 50. Renaming Axes Indexes
57 | 


--------------------------------------------------------------------------------
/Replacing Values.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "#We now look at a more general method of replacement in pandas\n",
 10 |     "import pandas as pd\n",
 11 |     "from pandas import DataFrame, Series\n",
 12 |     "import numpy as np"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 6,
 18 |    "metadata": {},
 19 |    "outputs": [],
 20 |    "source": [
 21 |     "#Consider a Series of values\n",
 22 |     "data = Series([1.,-234,2.,-234,1000,-124,-999,456,-999,56,234])"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 7,
 28 |    "metadata": {},
 29 |    "outputs": [
 30 |     {
 31 |      "data": {
 32 |       "text/plain": [
 33 |        "0        1.0\n",
 34 |        "1     -234.0\n",
 35 |        "2        2.0\n",
 36 |        "3     -234.0\n",
 37 |        "4     1000.0\n",
 38 |        "5     -124.0\n",
 39 |        "6     -999.0\n",
 40 |        "7      456.0\n",
 41 |        "8     -999.0\n",
 42 |        "9       56.0\n",
 43 |        "10     234.0\n",
 44 |        "dtype: float64"
 45 |       ]
 46 |      },
 47 |      "execution_count": 7,
 48 |      "metadata": {},
 49 |      "output_type": "execute_result"
 50 |     }
 51 |    ],
 52 |    "source": [
 53 |     "data"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": 8,
 59 |    "metadata": {},
 60 |    "outputs": [
 61 |     {
 62 |      "data": {
 63 |       "text/plain": [
 64 |        "0        1.0\n",
 65 |        "1     -234.0\n",
 66 |        "2        2.0\n",
 67 |        "3     -234.0\n",
 68 |        "4     1000.0\n",
 69 |        "5     -124.0\n",
 70 |        "6        NaN\n",
 71 |        "7      456.0\n",
 72 |        "8        NaN\n",
 73 |        "9       56.0\n",
 74 |        "10     234.0\n",
 75 |        "dtype: float64"
 76 |       ]
 77 |      },
 78 |      "execution_count": 8,
 79 |      "metadata": {},
 80 |      "output_type": "execute_result"
 81 |     }
 82 |    ],
 83 |    "source": [
 84 |     "#The replace method allows you to change values easily\n",
 85 |     "#Suppsoe -999 be sentimental values for missing data\n",
 86 |     "\n",
 87 |     "data.replace(-999, np.nan) #Replacing all -999 values with NaN values"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": 11,
 93 |    "metadata": {},
 94 |    "outputs": [
 95 |     {
 96 |      "data": {
 97 |       "text/plain": [
 98 |        "0        1.0\n",
 99 |        "1        NaN\n",
100 |        "2        2.0\n",
101 |        "3        NaN\n",
102 |        "4     1000.0\n",
103 |        "5     -124.0\n",
104 |        "6        NaN\n",
105 |        "7      456.0\n",
106 |        "8        NaN\n",
107 |        "9       56.0\n",
108 |        "10     234.0\n",
109 |        "dtype: float64"
110 |       ]
111 |      },
112 |      "execution_count": 11,
113 |      "metadata": {},
114 |      "output_type": "execute_result"
115 |     }
116 |    ],
117 |    "source": [
118 |     "#We can also perform it on more than one items at a time by passing the items to replace as a list with a list of their replacement values\n",
119 |     "\n",
120 |     "data.replace([-999,-234], np.nan)"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": 12,
126 |    "metadata": {},
127 |    "outputs": [
128 |     {
129 |      "data": {
130 |       "text/plain": [
131 |        "0        1.0\n",
132 |        "1        0.0\n",
133 |        "2        2.0\n",
134 |        "3        0.0\n",
135 |        "4     1000.0\n",
136 |        "5     -124.0\n",
137 |        "6        NaN\n",
138 |        "7      456.0\n",
139 |        "8        NaN\n",
140 |        "9       56.0\n",
141 |        "10     234.0\n",
142 |        "dtype: float64"
143 |       ]
144 |      },
145 |      "execution_count": 12,
146 |      "metadata": {},
147 |      "output_type": "execute_result"
148 |     }
149 |    ],
150 |    "source": [
151 |     "data.replace([-999,-234],[np.nan,0])"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": 15,
157 |    "metadata": {},
158 |    "outputs": [
159 |     {
160 |      "data": {
161 |       "text/plain": [
162 |        "0        1.0\n",
163 |        "1        0.0\n",
164 |        "2        2.0\n",
165 |        "3        0.0\n",
166 |        "4     1000.0\n",
167 |        "5     -124.0\n",
168 |        "6        NaN\n",
169 |        "7      456.0\n",
170 |        "8        NaN\n",
171 |        "9       56.0\n",
172 |        "10     234.0\n",
173 |        "dtype: float64"
174 |       ]
175 |      },
176 |      "execution_count": 15,
177 |      "metadata": {},
178 |      "output_type": "execute_result"
179 |     }
180 |    ],
181 |    "source": [
182 |     "#We can also perform the operations using dicts instead of lists\n",
183 |     "data.replace({-999: np.nan, -234:0})"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "code",
188 |    "execution_count": null,
189 |    "metadata": {},
190 |    "outputs": [],
191 |    "source": []
192 |   }
193 |  ],
194 |  "metadata": {
195 |   "kernelspec": {
196 |    "display_name": "Python 3",
197 |    "language": "python",
198 |    "name": "python3"
199 |   },
200 |   "language_info": {
201 |    "codemirror_mode": {
202 |     "name": "ipython",
203 |     "version": 3
204 |    },
205 |    "file_extension": ".py",
206 |    "mimetype": "text/x-python",
207 |    "name": "python",
208 |    "nbconvert_exporter": "python",
209 |    "pygments_lexer": "ipython3",
210 |    "version": "3.8.2"
211 |   }
212 |  },
213 |  "nbformat": 4,
214 |  "nbformat_minor": 4
215 | }
216 | 


--------------------------------------------------------------------------------
/Methods for Boolean Arrays.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "data": {
 10 |       "text/plain": [
 11 |        "array([False,  True,  True, False,  True, False, False])"
 12 |       ]
 13 |      },
 14 |      "execution_count": 1,
 15 |      "metadata": {},
 16 |      "output_type": "execute_result"
 17 |     }
 18 |    ],
 19 |    "source": [
 20 |     "#Numpy can be used to carry out methods for boolean arrays\n",
 21 |     "\n",
 22 |     "import numpy as np\n",
 23 |     "from numpy.random import randn\n",
 24 |     "\n",
 25 |     "bool_arr = np.array([False, True, True, False, True, False, False])\n",
 26 |     "bool_arr"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 3,
 32 |    "metadata": {},
 33 |    "outputs": [
 34 |     {
 35 |      "data": {
 36 |       "text/plain": [
 37 |        "array([-0.82970925, -0.48842782,  0.86957697,  1.37041254, -1.4876376 ,\n",
 38 |        "        0.92170847, -0.98813075,  0.67024641, -0.06906026, -0.62078202,\n",
 39 |        "       -0.07863047,  0.89943169, -1.75947612, -1.28893664,  0.20412679,\n",
 40 |        "        0.39540743, -1.68352065,  0.82482254,  0.59171459, -0.89408004,\n",
 41 |        "       -0.45913128,  0.25794878, -1.08751663,  0.23117771, -1.34423815,\n",
 42 |        "       -0.02232406,  0.88415439,  0.51197638, -0.15096856, -0.00670537,\n",
 43 |        "        0.85985364, -0.00472732,  0.44990037, -1.08608628,  0.71948998,\n",
 44 |        "       -0.84585393,  1.24785555, -0.20212936,  0.68847375,  0.7670229 ,\n",
 45 |        "        0.95111833, -1.59459513, -1.09352728, -0.64458559, -0.3634872 ,\n",
 46 |        "       -1.3276456 ,  0.57444793,  0.76870495, -0.39937309, -0.29209341,\n",
 47 |        "       -1.2194886 , -0.17203274,  0.75005586, -0.03389137,  1.61555002,\n",
 48 |        "       -0.77622018,  0.65691334,  0.43114165, -1.3669775 ,  0.91468553,\n",
 49 |        "       -0.04366873,  1.81260005,  1.02728964, -0.31610149, -0.82800718,\n",
 50 |        "       -1.35359534, -0.75335943, -0.59118949,  1.65746694, -2.02029146,\n",
 51 |        "        1.46574871, -1.14418908, -0.01979856,  0.08159557, -0.79708698,\n",
 52 |        "        0.74049367,  0.92326584, -0.01123274, -0.05593027,  1.54018937,\n",
 53 |        "        0.81886412, -0.62970331, -0.62979455, -0.09733021,  0.29788235,\n",
 54 |        "        1.26489678, -0.33919618, -0.53359832, -0.07910365, -0.34341075,\n",
 55 |        "       -0.69232626, -0.09005864, -1.4931534 , -1.42775201,  0.4742894 ,\n",
 56 |        "        0.10223136,  0.5759892 , -1.15037282,  0.81923134,  1.00097885])"
 57 |       ]
 58 |      },
 59 |      "execution_count": 3,
 60 |      "metadata": {},
 61 |      "output_type": "execute_result"
 62 |     }
 63 |    ],
 64 |    "source": [
 65 |     "#Calculating number of positive values for a randomly generated array\n",
 66 |     "#Summation value function: sum\n",
 67 |     "\n",
 68 |     "data = randn(100)\n",
 69 |     "data"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 4,
 75 |    "metadata": {},
 76 |    "outputs": [
 77 |     {
 78 |      "data": {
 79 |       "text/plain": [
 80 |        "43"
 81 |       ]
 82 |      },
 83 |      "execution_count": 4,
 84 |      "metadata": {},
 85 |      "output_type": "execute_result"
 86 |     }
 87 |    ],
 88 |    "source": [
 89 |     "(data > 0).sum()"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": 6,
 95 |    "metadata": {},
 96 |    "outputs": [
 97 |     {
 98 |      "data": {
 99 |       "text/plain": [
100 |        "True"
101 |       ]
102 |      },
103 |      "execution_count": 6,
104 |      "metadata": {},
105 |      "output_type": "execute_result"
106 |     }
107 |    ],
108 |    "source": [
109 |     "#Any Truth Value Function: any\n",
110 |     "bool_arr.any()"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": 7,
116 |    "metadata": {},
117 |    "outputs": [
118 |     {
119 |      "data": {
120 |       "text/plain": [
121 |        "False"
122 |       ]
123 |      },
124 |      "execution_count": 7,
125 |      "metadata": {},
126 |      "output_type": "execute_result"
127 |     }
128 |    ],
129 |    "source": [
130 |     "#All Truth Values function: all\n",
131 |     "bool_arr.all()"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": 8,
137 |    "metadata": {},
138 |    "outputs": [
139 |     {
140 |      "data": {
141 |       "text/plain": [
142 |        "True"
143 |       ]
144 |      },
145 |      "execution_count": 8,
146 |      "metadata": {},
147 |      "output_type": "execute_result"
148 |     }
149 |    ],
150 |    "source": [
151 |     "#These functions are applicable for non-boolean values where 0 is False while all other values are True\n",
152 |     "data.any()"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": 9,
158 |    "metadata": {},
159 |    "outputs": [
160 |     {
161 |      "data": {
162 |       "text/plain": [
163 |        "True"
164 |       ]
165 |      },
166 |      "execution_count": 9,
167 |      "metadata": {},
168 |      "output_type": "execute_result"
169 |     }
170 |    ],
171 |    "source": [
172 |     "data.all()"
173 |    ]
174 |   }
175 |  ],
176 |  "metadata": {
177 |   "kernelspec": {
178 |    "display_name": "Python 3",
179 |    "language": "python",
180 |    "name": "python3"
181 |   },
182 |   "language_info": {
183 |    "codemirror_mode": {
184 |     "name": "ipython",
185 |     "version": 3
186 |    },
187 |    "file_extension": ".py",
188 |    "mimetype": "text/x-python",
189 |    "name": "python",
190 |    "nbconvert_exporter": "python",
191 |    "pygments_lexer": "ipython3",
192 |    "version": "3.7.4"
193 |   }
194 |  },
195 |  "nbformat": 4,
196 |  "nbformat_minor": 2
197 | }
198 | 


--------------------------------------------------------------------------------
/File Operations with Numpy Arrays.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "data": {
 10 |       "text/plain": [
 11 |        "array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])"
 12 |       ]
 13 |      },
 14 |      "execution_count": 1,
 15 |      "metadata": {},
 16 |      "output_type": "execute_result"
 17 |     }
 18 |    ],
 19 |    "source": [
 20 |     "#We can also perform file input/output operations using the Numpy arrays\n",
 21 |     "#Numpy allows to save and load array data on disk in text or binary format\n",
 22 |     "\n",
 23 |     "import numpy as np\n",
 24 |     "\n",
 25 |     "data = np.arange(10)\n",
 26 |     "data"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 2,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "#Storing Array Data on Disk as Binary Format\n",
 36 |     "\n",
 37 |     "#Saving Data Function: save\n",
 38 |     "np.save('some_data',data) #Saves data in .npy extension by default"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 3,
 44 |    "metadata": {},
 45 |    "outputs": [
 46 |     {
 47 |      "data": {
 48 |       "text/plain": [
 49 |        "array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])"
 50 |       ]
 51 |      },
 52 |      "execution_count": 3,
 53 |      "metadata": {},
 54 |      "output_type": "execute_result"
 55 |     }
 56 |    ],
 57 |    "source": [
 58 |     "#Loading Data Function: load\n",
 59 |     "np.load('some_data.npy')"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 4,
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "#Saving Data in .Zip File Function: savez\n",
 69 |     "np.savez('some_data_archieve.npz', a = data, b = data)"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 5,
 75 |    "metadata": {},
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "#Loading Data from .Zip File Function: load (returned object is a Dict)\n",
 79 |     "archieve_data = np.load('some_data_archieve.npz')"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 6,
 85 |    "metadata": {},
 86 |    "outputs": [
 87 |     {
 88 |      "data": {
 89 |       "text/plain": [
 90 |        "<numpy.lib.npyio.NpzFile at 0x988f750>"
 91 |       ]
 92 |      },
 93 |      "execution_count": 6,
 94 |      "metadata": {},
 95 |      "output_type": "execute_result"
 96 |     }
 97 |    ],
 98 |    "source": [
 99 |     "archieve_data"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": 7,
105 |    "metadata": {},
106 |    "outputs": [
107 |     {
108 |      "data": {
109 |       "text/plain": [
110 |        "array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])"
111 |       ]
112 |      },
113 |      "execution_count": 7,
114 |      "metadata": {},
115 |      "output_type": "execute_result"
116 |     }
117 |    ],
118 |    "source": [
119 |     "#We can extract the data from this Dict by calling the key values\n",
120 |     "archieve_data['b']"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": 8,
126 |    "metadata": {},
127 |    "outputs": [
128 |     {
129 |      "data": {
130 |       "text/plain": [
131 |        "array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])"
132 |       ]
133 |      },
134 |      "execution_count": 8,
135 |      "metadata": {},
136 |      "output_type": "execute_result"
137 |     }
138 |    ],
139 |    "source": [
140 |     "archieve_data['a']"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": 10,
146 |    "metadata": {},
147 |    "outputs": [
148 |     {
149 |      "data": {
150 |       "text/plain": [
151 |        "array([[ 0.10325184,  1.462772  ,  0.39970187, -0.18821549],\n",
152 |        "       [-0.82959097, -1.56953916,  0.89599828, -0.38778666],\n",
153 |        "       [-0.66539946,  0.0680077 , -0.64432513, -0.06580105],\n",
154 |        "       [-1.15740344, -0.94284507, -0.35162299,  1.61787379]])"
155 |       ]
156 |      },
157 |      "execution_count": 10,
158 |      "metadata": {},
159 |      "output_type": "execute_result"
160 |     }
161 |    ],
162 |    "source": [
163 |     "#Saving and Loading Text Files\n",
164 |     "from numpy.random import randn\n",
165 |     "\n",
166 |     "data2 = randn(4,4)\n",
167 |     "data2"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": 13,
173 |    "metadata": {},
174 |    "outputs": [],
175 |    "source": [
176 |     "#Saving Data into a Text File: savetxt\n",
177 |     "np.savetxt('array_ex.txt', data2)"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "code",
182 |    "execution_count": 15,
183 |    "metadata": {},
184 |    "outputs": [
185 |     {
186 |      "data": {
187 |       "text/plain": [
188 |        "array([[ 0.10325184,  1.462772  ,  0.39970187, -0.18821549],\n",
189 |        "       [-0.82959097, -1.56953916,  0.89599828, -0.38778666],\n",
190 |        "       [-0.66539946,  0.0680077 , -0.64432513, -0.06580105],\n",
191 |        "       [-1.15740344, -0.94284507, -0.35162299,  1.61787379]])"
192 |       ]
193 |      },
194 |      "execution_count": 15,
195 |      "metadata": {},
196 |      "output_type": "execute_result"
197 |     }
198 |    ],
199 |    "source": [
200 |     "#Loading Data from a Text File: loadtxt\n",
201 |     "np.loadtxt('array_ex.txt')"
202 |    ]
203 |   }
204 |  ],
205 |  "metadata": {
206 |   "kernelspec": {
207 |    "display_name": "Python 3",
208 |    "language": "python",
209 |    "name": "python3"
210 |   },
211 |   "language_info": {
212 |    "codemirror_mode": {
213 |     "name": "ipython",
214 |     "version": 3
215 |    },
216 |    "file_extension": ".py",
217 |    "mimetype": "text/x-python",
218 |    "name": "python",
219 |    "nbconvert_exporter": "python",
220 |    "pygments_lexer": "ipython3",
221 |    "version": "3.7.4"
222 |   }
223 |  },
224 |  "nbformat": 4,
225 |  "nbformat_minor": 2
226 | }
227 | 


--------------------------------------------------------------------------------
/Using JSON Module.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 36,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import json\n",
 10 |     "from pandas import DataFrame"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 27,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "#JSON stands for Java Script Object Notation\n",
 20 |     "#JSON is the standard format for sending data by HTTP requests between web browsers and other applications\n",
 21 |     "#We now look at some functionalities of the built-in python json module\n",
 22 |     "\n",
 23 |     "#First creating a json object\n",
 24 |     "\n",
 25 |     "obj = \"\"\"\n",
 26 |     "{\n",
 27 |     "    \"name\":\"Adam Jones\",\n",
 28 |     "    \"age\": 25,\n",
 29 |     "    \"places_lived\":[\"Pakistan\",\"UAE\",\"USA\"],\n",
 30 |     "    \"fields_exp\": [{\"company\":\"Folio3\",\"duration(years)\":2,\"job\":\"Tester\"},\n",
 31 |     "                    {\"company\":\"Siemens\",\"duration(years)\":7,\"job\":\"Software Eng\"}]}\n",
 32 |     "\"\"\""
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 28,
 38 |    "metadata": {},
 39 |    "outputs": [],
 40 |    "source": [
 41 |     "#The above is a json object whose format largely resembles that of python script dictionary\n",
 42 |     "#Since json is a text format, thus we have to create its object in a multi-string, otherwise we will get an error that json.loads accepts only str object\n",
 43 |     "\n",
 44 |     "#Now to load this object we can use the json.loads function\n",
 45 |     "json_reader = json.loads(obj)"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": 29,
 51 |    "metadata": {},
 52 |    "outputs": [
 53 |     {
 54 |      "data": {
 55 |       "text/plain": [
 56 |        "{'name': 'Adam Jones',\n",
 57 |        " 'age': 25,\n",
 58 |        " 'places_lived': ['Pakistan', 'UAE', 'USA'],\n",
 59 |        " 'fields_exp': [{'company': 'Folio3', 'duration(years)': 2, 'job': 'Tester'},\n",
 60 |        "  {'company': 'Siemens', 'duration(years)': 7, 'job': 'Software Eng'}]}"
 61 |       ]
 62 |      },
 63 |      "execution_count": 29,
 64 |      "metadata": {},
 65 |      "output_type": "execute_result"
 66 |     }
 67 |    ],
 68 |    "source": [
 69 |     "json_reader #To display the data in json object as a python object"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 30,
 75 |    "metadata": {},
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "#We can convert the python object into a json object by using the json.dumps method\n",
 79 |     "obj = json.dumps(json_reader)"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 31,
 85 |    "metadata": {},
 86 |    "outputs": [
 87 |     {
 88 |      "data": {
 89 |       "text/plain": [
 90 |        "'{\"name\": \"Adam Jones\", \"age\": 25, \"places_lived\": [\"Pakistan\", \"UAE\", \"USA\"], \"fields_exp\": [{\"company\": \"Folio3\", \"duration(years)\": 2, \"job\": \"Tester\"}, {\"company\": \"Siemens\", \"duration(years)\": 7, \"job\": \"Software Eng\"}]}'"
 91 |       ]
 92 |      },
 93 |      "execution_count": 31,
 94 |      "metadata": {},
 95 |      "output_type": "execute_result"
 96 |     }
 97 |    ],
 98 |    "source": [
 99 |     "obj #Displaying the json object"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": 32,
105 |    "metadata": {},
106 |    "outputs": [],
107 |    "source": [
108 |     "#We can pass the json data directly to a pandas DataFrame or pass it after converting it into a python object\n",
109 |     "\n",
110 |     "json_df = DataFrame(json_reader['fields_exp'], columns=['company','job','duration(years)'])"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": 33,
116 |    "metadata": {},
117 |    "outputs": [
118 |     {
119 |      "data": {
120 |       "text/html": [
121 |        "<div>\n",
122 |        "<style scoped>\n",
123 |        "    .dataframe tbody tr th:only-of-type {\n",
124 |        "        vertical-align: middle;\n",
125 |        "    }\n",
126 |        "\n",
127 |        "    .dataframe tbody tr th {\n",
128 |        "        vertical-align: top;\n",
129 |        "    }\n",
130 |        "\n",
131 |        "    .dataframe thead th {\n",
132 |        "        text-align: right;\n",
133 |        "    }\n",
134 |        "</style>\n",
135 |        "<table border=\"1\" class=\"dataframe\">\n",
136 |        "  <thead>\n",
137 |        "    <tr style=\"text-align: right;\">\n",
138 |        "      <th></th>\n",
139 |        "      <th>company</th>\n",
140 |        "      <th>job</th>\n",
141 |        "      <th>duration(years)</th>\n",
142 |        "    </tr>\n",
143 |        "  </thead>\n",
144 |        "  <tbody>\n",
145 |        "    <tr>\n",
146 |        "      <th>0</th>\n",
147 |        "      <td>Folio3</td>\n",
148 |        "      <td>Tester</td>\n",
149 |        "      <td>2</td>\n",
150 |        "    </tr>\n",
151 |        "    <tr>\n",
152 |        "      <th>1</th>\n",
153 |        "      <td>Siemens</td>\n",
154 |        "      <td>Software Eng</td>\n",
155 |        "      <td>7</td>\n",
156 |        "    </tr>\n",
157 |        "  </tbody>\n",
158 |        "</table>\n",
159 |        "</div>"
160 |       ],
161 |       "text/plain": [
162 |        "   company           job  duration(years)\n",
163 |        "0   Folio3        Tester                2\n",
164 |        "1  Siemens  Software Eng                7"
165 |       ]
166 |      },
167 |      "execution_count": 33,
168 |      "metadata": {},
169 |      "output_type": "execute_result"
170 |     }
171 |    ],
172 |    "source": [
173 |     "json_df"
174 |    ]
175 |   }
176 |  ],
177 |  "metadata": {
178 |   "kernelspec": {
179 |    "display_name": "Python 3",
180 |    "language": "python",
181 |    "name": "python3"
182 |   },
183 |   "language_info": {
184 |    "codemirror_mode": {
185 |     "name": "ipython",
186 |     "version": 3
187 |    },
188 |    "file_extension": ".py",
189 |    "mimetype": "text/x-python",
190 |    "name": "python",
191 |    "nbconvert_exporter": "python",
192 |    "pygments_lexer": "ipython3",
193 |    "version": "3.8.2"
194 |   }
195 |  },
196 |  "nbformat": 4,
197 |  "nbformat_minor": 4
198 | }
199 | 


--------------------------------------------------------------------------------
/Set Logic Methods.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "data": {
 10 |       "text/plain": [
 11 |        "array(['A', 'B', 'C', 'B', 'E', 'A', 'C', 'C', 'B', 'A'], dtype='<U1')"
 12 |       ]
 13 |      },
 14 |      "execution_count": 1,
 15 |      "metadata": {},
 16 |      "output_type": "execute_result"
 17 |     }
 18 |    ],
 19 |    "source": [
 20 |     "#Numpy can use basic set operations on ndarrays\n",
 21 |     "\n",
 22 |     "import numpy as np\n",
 23 |     "\n",
 24 |     "data = np.array(['A','B','C','B','E','A','C','C','B','A'])\n",
 25 |     "data"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 6,
 31 |    "metadata": {},
 32 |    "outputs": [
 33 |     {
 34 |      "data": {
 35 |       "text/plain": [
 36 |        "array(['A', 'B', 'C', 'E'], dtype='<U1')"
 37 |       ]
 38 |      },
 39 |      "execution_count": 6,
 40 |      "metadata": {},
 41 |      "output_type": "execute_result"
 42 |     }
 43 |    ],
 44 |    "source": [
 45 |     "#Sorted unique elements function: unique\n",
 46 |     "np.unique(data)"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 4,
 52 |    "metadata": {},
 53 |    "outputs": [
 54 |     {
 55 |      "data": {
 56 |       "text/plain": [
 57 |        "array([1, 2, 3, 2, 4, 5, 6, 4, 4, 3, 1, 5, 3, 6, 4, 2, 4, 6])"
 58 |       ]
 59 |      },
 60 |      "execution_count": 4,
 61 |      "metadata": {},
 62 |      "output_type": "execute_result"
 63 |     }
 64 |    ],
 65 |    "source": [
 66 |     "data2 = np.array([1,2,3,2,4,5,6,4,4,3,1,5,3,6,4,2,4,6])\n",
 67 |     "data2"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": 5,
 73 |    "metadata": {},
 74 |    "outputs": [
 75 |     {
 76 |      "data": {
 77 |       "text/plain": [
 78 |        "array([1, 2, 3, 4, 5, 6])"
 79 |       ]
 80 |      },
 81 |      "execution_count": 5,
 82 |      "metadata": {},
 83 |      "output_type": "execute_result"
 84 |     }
 85 |    ],
 86 |    "source": [
 87 |     "np.unique(data2)"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": 12,
 93 |    "metadata": {},
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "x = np.arange(1,8)\n",
 97 |     "y = np.arange(12,4,-1)"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": 13,
103 |    "metadata": {},
104 |    "outputs": [
105 |     {
106 |      "data": {
107 |       "text/plain": [
108 |        "array([1, 2, 3, 4, 5, 6, 7])"
109 |       ]
110 |      },
111 |      "execution_count": 13,
112 |      "metadata": {},
113 |      "output_type": "execute_result"
114 |     }
115 |    ],
116 |    "source": [
117 |     "x"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": 14,
123 |    "metadata": {},
124 |    "outputs": [
125 |     {
126 |      "data": {
127 |       "text/plain": [
128 |        "array([12, 11, 10,  9,  8,  7,  6,  5])"
129 |       ]
130 |      },
131 |      "execution_count": 14,
132 |      "metadata": {},
133 |      "output_type": "execute_result"
134 |     }
135 |    ],
136 |    "source": [
137 |     "y"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": 15,
143 |    "metadata": {},
144 |    "outputs": [
145 |     {
146 |      "data": {
147 |       "text/plain": [
148 |        "array([5, 6, 7])"
149 |       ]
150 |      },
151 |      "execution_count": 15,
152 |      "metadata": {},
153 |      "output_type": "execute_result"
154 |     }
155 |    ],
156 |    "source": [
157 |     "#Sorted intersection elements function: intersect1d\n",
158 |     "np.intersect1d(x,y)"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": 16,
164 |    "metadata": {},
165 |    "outputs": [
166 |     {
167 |      "data": {
168 |       "text/plain": [
169 |        "array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12])"
170 |       ]
171 |      },
172 |      "execution_count": 16,
173 |      "metadata": {},
174 |      "output_type": "execute_result"
175 |     }
176 |    ],
177 |    "source": [
178 |     "#Sorted union elements function: union1d\n",
179 |     "np.union1d(x,y)"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": 18,
185 |    "metadata": {},
186 |    "outputs": [
187 |     {
188 |      "data": {
189 |       "text/plain": [
190 |        "array([False, False, False, False,  True,  True,  True])"
191 |       ]
192 |      },
193 |      "execution_count": 18,
194 |      "metadata": {},
195 |      "output_type": "execute_result"
196 |     }
197 |    ],
198 |    "source": [
199 |     "#Elements indication function: in1d(x,y) indicates if elements of x are elements of y\n",
200 |     "np.in1d(x,y)"
201 |    ]
202 |   },
203 |   {
204 |    "cell_type": "code",
205 |    "execution_count": 19,
206 |    "metadata": {},
207 |    "outputs": [
208 |     {
209 |      "data": {
210 |       "text/plain": [
211 |        "array([1, 2, 3, 4])"
212 |       ]
213 |      },
214 |      "execution_count": 19,
215 |      "metadata": {},
216 |      "output_type": "execute_result"
217 |     }
218 |    ],
219 |    "source": [
220 |     "#Set Difference function: setdiff1d\n",
221 |     "np.setdiff1d(x,y)"
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "code",
226 |    "execution_count": 20,
227 |    "metadata": {},
228 |    "outputs": [
229 |     {
230 |      "data": {
231 |       "text/plain": [
232 |        "array([ 1,  2,  3,  4,  8,  9, 10, 11, 12])"
233 |       ]
234 |      },
235 |      "execution_count": 20,
236 |      "metadata": {},
237 |      "output_type": "execute_result"
238 |     }
239 |    ],
240 |    "source": [
241 |     "#Symmetric Difference function: setxor1d\n",
242 |     "np.setxor1d(x,y)"
243 |    ]
244 |   }
245 |  ],
246 |  "metadata": {
247 |   "kernelspec": {
248 |    "display_name": "Python 3",
249 |    "language": "python",
250 |    "name": "python3"
251 |   },
252 |   "language_info": {
253 |    "codemirror_mode": {
254 |     "name": "ipython",
255 |     "version": 3
256 |    },
257 |    "file_extension": ".py",
258 |    "mimetype": "text/x-python",
259 |    "name": "python",
260 |    "nbconvert_exporter": "python",
261 |    "pygments_lexer": "ipython3",
262 |    "version": "3.7.4"
263 |   }
264 |  },
265 |  "nbformat": 4,
266 |  "nbformat_minor": 2
267 | }
268 | 


--------------------------------------------------------------------------------
/Handling DataTypes for ndarrays.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np\n",
 10 |     "\n",
 11 |     "#D-types are named with the the name of type followed by the number of bits it represents \n",
 12 |     "#Following are the different d-types mentioned below:\n",
 13 |     "\n",
 14 |     "#int8/uint8 => Signed/Unsigned Integer(8-bits)\n",
 15 |     "#int16/uint16 => Signed/Unsigned Integer(16-bits)\n",
 16 |     "#int32/uint32 => Signed/Unsigned Integer(32-bits)\n",
 17 |     "#int64/uint64 => Signed/Unsigned Integer(64-bits)\n",
 18 |     "#float 16/32/64/128 => Floating Point(16/32/64/128 bits)\n",
 19 |     "#complex 64/128/256 => Complex Number(64/128/256 bits)\n",
 20 |     "#bool => Boolean number(1 bit i.e. either True or False)\n",
 21 |     "#object => Python object type(Bits based on the initializing in its class)\n",
 22 |     "#string_ => Fixed length string type(1 byte per character)\n",
 23 |     "#unicode_ => Fixed length unicode type(Number of bytes depend upon platform)\n"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 6,
 29 |    "metadata": {},
 30 |    "outputs": [
 31 |     {
 32 |      "data": {
 33 |       "text/plain": [
 34 |        "dtype('int32')"
 35 |       ]
 36 |      },
 37 |      "execution_count": 6,
 38 |      "metadata": {},
 39 |      "output_type": "execute_result"
 40 |     }
 41 |    ],
 42 |    "source": [
 43 |     "#ndarrays can be type casted from one dtype to another using astype()\n",
 44 |     "\n",
 45 |     "int_arr1 = np.array([1,2,3,4,5])\n",
 46 |     "int_arr1.dtype"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 11,
 52 |    "metadata": {},
 53 |    "outputs": [
 54 |     {
 55 |      "data": {
 56 |       "text/plain": [
 57 |        "array([1., 2., 3., 4., 5.])"
 58 |       ]
 59 |      },
 60 |      "execution_count": 11,
 61 |      "metadata": {},
 62 |      "output_type": "execute_result"
 63 |     }
 64 |    ],
 65 |    "source": [
 66 |     "float_arr1 = int_arr1.astype(np.float64) #typecasted into float64\n",
 67 |     "float_arr1"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": 12,
 73 |    "metadata": {},
 74 |    "outputs": [
 75 |     {
 76 |      "data": {
 77 |       "text/plain": [
 78 |        "dtype('float64')"
 79 |       ]
 80 |      },
 81 |      "execution_count": 12,
 82 |      "metadata": {},
 83 |      "output_type": "execute_result"
 84 |     }
 85 |    ],
 86 |    "source": [
 87 |     "float_arr2 = np.array([2.3,4.3,5.3,1.5,6.64,6.4])\n",
 88 |     "float_arr2.dtype"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": 13,
 94 |    "metadata": {},
 95 |    "outputs": [
 96 |     {
 97 |      "data": {
 98 |       "text/plain": [
 99 |        "array([2, 4, 5, 1, 6, 6])"
100 |       ]
101 |      },
102 |      "execution_count": 13,
103 |      "metadata": {},
104 |      "output_type": "execute_result"
105 |     }
106 |    ],
107 |    "source": [
108 |     "int_arr2 = float_arr2.astype(np.int32) #typecasted into int32\n",
109 |     "int_arr2"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": 14,
115 |    "metadata": {},
116 |    "outputs": [
117 |     {
118 |      "data": {
119 |       "text/plain": [
120 |        "array(['1.23', '-3.4', '4.234'], dtype='<U5')"
121 |       ]
122 |      },
123 |      "execution_count": 14,
124 |      "metadata": {},
125 |      "output_type": "execute_result"
126 |     }
127 |    ],
128 |    "source": [
129 |     "num_strings = np.array(['1.23','-3.4','4.234'])\n",
130 |     "num_strings"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "code",
135 |    "execution_count": 15,
136 |    "metadata": {},
137 |    "outputs": [
138 |     {
139 |      "data": {
140 |       "text/plain": [
141 |        "array([ 1.23 , -3.4  ,  4.234])"
142 |       ]
143 |      },
144 |      "execution_count": 15,
145 |      "metadata": {},
146 |      "output_type": "execute_result"
147 |     }
148 |    ],
149 |    "source": [
150 |     "float_arr3 = num_strings.astype(np.float64) #typecasted into float64\n",
151 |     "float_arr3"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": 17,
157 |    "metadata": {},
158 |    "outputs": [
159 |     {
160 |      "data": {
161 |       "text/plain": [
162 |        "array([ 1, -3,  4])"
163 |       ]
164 |      },
165 |      "execution_count": 17,
166 |      "metadata": {},
167 |      "output_type": "execute_result"
168 |     }
169 |    ],
170 |    "source": [
171 |     "#Type casting can also be achieved by using the dtype attribute of another array as the astype argument\n",
172 |     "int_arr3 = float_arr3.astype(int_arr2.dtype)\n",
173 |     "int_arr3"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "code",
178 |    "execution_count": 19,
179 |    "metadata": {},
180 |    "outputs": [
181 |     {
182 |      "data": {
183 |       "text/plain": [
184 |        "array([1550587296, 1550587232,    6554236,   74582379, 1550358752,\n",
185 |        "        160140528,  160140568,    2257486], dtype=uint32)"
186 |       ]
187 |      },
188 |      "execution_count": 19,
189 |      "metadata": {},
190 |      "output_type": "execute_result"
191 |     }
192 |    ],
193 |    "source": [
194 |     "#D-types can also be referred using shorthand type codes\n",
195 |     "new_arr = np.empty(8, dtype = 'u4')\n",
196 |     "new_arr"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "code",
201 |    "execution_count": 20,
202 |    "metadata": {},
203 |    "outputs": [],
204 |    "source": [
205 |     "#Various shorthand type codes are as under:\n",
206 |     "#int 8/16/32/64 = i1/i2/i4/8\n",
207 |     "#uint 8/16/32/64 = u1/u2/u4/u8\n",
208 |     "#float 16/32/64/128 = f2/f4 or f/f8 or d/f16 or g\n",
209 |     "#complex 64/128/256 = c8/c16/c32\n",
210 |     "#bool = ?\n",
211 |     "#object = O\n",
212 |     "#string_ = S\n",
213 |     "#unicode_ = U"
214 |    ]
215 |   }
216 |  ],
217 |  "metadata": {
218 |   "kernelspec": {
219 |    "display_name": "Python 3",
220 |    "language": "python",
221 |    "name": "python3"
222 |   },
223 |   "language_info": {
224 |    "codemirror_mode": {
225 |     "name": "ipython",
226 |     "version": 3
227 |    },
228 |    "file_extension": ".py",
229 |    "mimetype": "text/x-python",
230 |    "name": "python",
231 |    "nbconvert_exporter": "python",
232 |    "pygments_lexer": "ipython3",
233 |    "version": "3.7.4"
234 |   }
235 |  },
236 |  "nbformat": 4,
237 |  "nbformat_minor": 2
238 | }
239 | 


--------------------------------------------------------------------------------
/Handling Pandas Index Objects.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 4,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "data": {
 10 |       "text/plain": [
 11 |        "a    0\n",
 12 |        "b    1\n",
 13 |        "c    2\n",
 14 |        "d    3\n",
 15 |        "dtype: int32"
 16 |       ]
 17 |      },
 18 |      "execution_count": 4,
 19 |      "metadata": {},
 20 |      "output_type": "execute_result"
 21 |     }
 22 |    ],
 23 |    "source": [
 24 |     "#The Series and DataFrame objects are internally in the form of index objects\n",
 25 |     "#Index objects are responsible to hold axis labels and other metadata\n",
 26 |     "\n",
 27 |     "import numpy as np\n",
 28 |     "import pandas as pd\n",
 29 |     "from pandas import Series,DataFrame\n",
 30 |     "\n",
 31 |     "obj = Series(np.arange(4), index = ['a','b','c','d'])\n",
 32 |     "obj"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 5,
 38 |    "metadata": {},
 39 |    "outputs": [
 40 |     {
 41 |      "data": {
 42 |       "text/plain": [
 43 |        "Index(['a', 'b', 'c', 'd'], dtype='object')"
 44 |       ]
 45 |      },
 46 |      "execution_count": 5,
 47 |      "metadata": {},
 48 |      "output_type": "execute_result"
 49 |     }
 50 |    ],
 51 |    "source": [
 52 |     "index = obj.index\n",
 53 |     "index"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": 6,
 59 |    "metadata": {},
 60 |    "outputs": [
 61 |     {
 62 |      "data": {
 63 |       "text/plain": [
 64 |        "Index(['b', 'c', 'd'], dtype='object')"
 65 |       ]
 66 |      },
 67 |      "execution_count": 6,
 68 |      "metadata": {},
 69 |      "output_type": "execute_result"
 70 |     }
 71 |    ],
 72 |    "source": [
 73 |     "#We can apply slicing and indexing functions on the index objects\n",
 74 |     "index[1:]"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 7,
 80 |    "metadata": {},
 81 |    "outputs": [
 82 |     {
 83 |      "ename": "TypeError",
 84 |      "evalue": "Index does not support mutable operations",
 85 |      "output_type": "error",
 86 |      "traceback": [
 87 |       "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
 88 |       "\u001b[1;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
 89 |       "\u001b[1;32m<ipython-input-7-5309bd9ade06>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[1;31m#Index Types are immmutable i.e. cannot be altered by user, so that they can be easily shared among different data structures\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mindex\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m2\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m'e'\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
 90 |       "\u001b[1;32mc:\\users\\mr. ghori\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages\\pandas\\core\\indexes\\base.py\u001b[0m in \u001b[0;36m__setitem__\u001b[1;34m(self, key, value)\u001b[0m\n\u001b[0;32m   4258\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   4259\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0m__setitem__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 4260\u001b[1;33m         \u001b[1;32mraise\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Index does not support mutable operations\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   4261\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   4262\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0m__getitem__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
 91 |       "\u001b[1;31mTypeError\u001b[0m: Index does not support mutable operations"
 92 |      ]
 93 |     }
 94 |    ],
 95 |    "source": [
 96 |     "#Index Types are immmutable i.e. cannot be altered by user, so that they can be easily shared among different data structures\n",
 97 |     "index[2] = 'e'"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": 8,
103 |    "metadata": {},
104 |    "outputs": [
105 |     {
106 |      "data": {
107 |       "text/plain": [
108 |        "0    0.0\n",
109 |        "1    3.4\n",
110 |        "2   -2.0\n",
111 |        "dtype: float64"
112 |       ]
113 |      },
114 |      "execution_count": 8,
115 |      "metadata": {},
116 |      "output_type": "execute_result"
117 |     }
118 |    ],
119 |    "source": [
120 |     "index = pd.Index(np.arange(3))\n",
121 |     "obj2 = Series(np.array([0,3.4,-2]), index = index)\n",
122 |     "obj2"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": 11,
128 |    "metadata": {},
129 |    "outputs": [
130 |     {
131 |      "data": {
132 |       "text/plain": [
133 |        "Int64Index([0, 1, 2], dtype='int64')"
134 |       ]
135 |      },
136 |      "execution_count": 11,
137 |      "metadata": {},
138 |      "output_type": "execute_result"
139 |     }
140 |    ],
141 |    "source": [
142 |     "#In case of numpy array data as index object is of type Int64Index\n",
143 |     "obj2.index"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 12,
149 |    "metadata": {},
150 |    "outputs": [],
151 |    "source": [
152 |     "#Other types of index objects that can be obtained are:\n",
153 |     "\n",
154 |     "#MultiIndex => In case of hierarchical objects\n",
155 |     "#DatetimeIndex => Stores nanoseconds tiemstamps(datetime64 dtype)\n",
156 |     "#PeriodIndex => Specialized Index for Period data(timespans)"
157 |    ]
158 |   }
159 |  ],
160 |  "metadata": {
161 |   "kernelspec": {
162 |    "display_name": "Python 3",
163 |    "language": "python",
164 |    "name": "python3"
165 |   },
166 |   "language_info": {
167 |    "codemirror_mode": {
168 |     "name": "ipython",
169 |     "version": 3
170 |    },
171 |    "file_extension": ".py",
172 |    "mimetype": "text/x-python",
173 |    "name": "python",
174 |    "nbconvert_exporter": "python",
175 |    "pygments_lexer": "ipython3",
176 |    "version": "3.7.4"
177 |   }
178 |  },
179 |  "nbformat": 4,
180 |  "nbformat_minor": 2
181 | }
182 | 


--------------------------------------------------------------------------------
/Combining Data with Overlap.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "#Now we consider a special case in which the two datasets to combine have full or partial same indexes\n",
 10 |     "\n",
 11 |     "import pandas as pd\n",
 12 |     "from pandas import DataFrame, Series\n",
 13 |     "import numpy as np"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 5,
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "#Creating two Series objects with same indexes bu different values\n",
 23 |     "\n",
 24 |     "a = Series([np.nan, 2.5, np.nan, 3.4, 6.7, np.nan], index=list('fedbca'))\n",
 25 |     "b = Series(np.arange(len(a), dtype=np.float64), index=a.index)\n",
 26 |     "b[-1] = np.nan"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 6,
 32 |    "metadata": {},
 33 |    "outputs": [
 34 |     {
 35 |      "data": {
 36 |       "text/plain": [
 37 |        "f    NaN\n",
 38 |        "e    2.5\n",
 39 |        "d    NaN\n",
 40 |        "b    3.4\n",
 41 |        "c    6.7\n",
 42 |        "a    NaN\n",
 43 |        "dtype: float64"
 44 |       ]
 45 |      },
 46 |      "execution_count": 6,
 47 |      "metadata": {},
 48 |      "output_type": "execute_result"
 49 |     }
 50 |    ],
 51 |    "source": [
 52 |     "a"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": 7,
 58 |    "metadata": {},
 59 |    "outputs": [
 60 |     {
 61 |      "data": {
 62 |       "text/plain": [
 63 |        "f    0.0\n",
 64 |        "e    1.0\n",
 65 |        "d    2.0\n",
 66 |        "b    3.0\n",
 67 |        "c    4.0\n",
 68 |        "a    NaN\n",
 69 |        "dtype: float64"
 70 |       ]
 71 |      },
 72 |      "execution_count": 7,
 73 |      "metadata": {},
 74 |      "output_type": "execute_result"
 75 |     }
 76 |    ],
 77 |    "source": [
 78 |     "b"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": 9,
 84 |    "metadata": {},
 85 |    "outputs": [
 86 |     {
 87 |      "data": {
 88 |       "text/plain": [
 89 |        "array([0. , 2.5, 2. , 3.4, 6.7, nan])"
 90 |       ]
 91 |      },
 92 |      "execution_count": 9,
 93 |      "metadata": {},
 94 |      "output_type": "execute_result"
 95 |     }
 96 |    ],
 97 |    "source": [
 98 |     "#We can use the numpy where to pass a condition to combine the datasets as\n",
 99 |     "np.where(pd.isnull(a), b, a)"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": 11,
105 |    "metadata": {},
106 |    "outputs": [
107 |     {
108 |      "data": {
109 |       "text/plain": [
110 |        "a    NaN\n",
111 |        "b    3.0\n",
112 |        "c    6.7\n",
113 |        "d    2.0\n",
114 |        "e    1.0\n",
115 |        "f    0.0\n",
116 |        "dtype: float64"
117 |       ]
118 |      },
119 |      "execution_count": 11,
120 |      "metadata": {},
121 |      "output_type": "execute_result"
122 |     }
123 |    ],
124 |    "source": [
125 |     "#The above expression would return a ndarray with all values of a and values of b where a values were NaN\n",
126 |     "\n",
127 |     "#A similar function for pandas Series is the combine_first\n",
128 |     "b[:-2].combine_first(a[2:])"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": 15,
134 |    "metadata": {},
135 |    "outputs": [],
136 |    "source": [
137 |     "#The combine_first appears to perform the same task column by column, thus we can use it for dataframes as well\n",
138 |     "df1 = DataFrame({'a':[1, np.nan, 23, 4.5], 'b':[2.3,1.4, np.nan, 32.4], 'c':np.arange(6,10)})\n",
139 |     "df2 = DataFrame({'a':[np.nan, 2.4, np.nan, 5.6], 'b':[np.nan,1.4,3.4,np.nan], 'c':np.arange(7,11)})"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": 16,
145 |    "metadata": {},
146 |    "outputs": [
147 |     {
148 |      "data": {
149 |       "text/html": [
150 |        "<div>\n",
151 |        "<style scoped>\n",
152 |        "    .dataframe tbody tr th:only-of-type {\n",
153 |        "        vertical-align: middle;\n",
154 |        "    }\n",
155 |        "\n",
156 |        "    .dataframe tbody tr th {\n",
157 |        "        vertical-align: top;\n",
158 |        "    }\n",
159 |        "\n",
160 |        "    .dataframe thead th {\n",
161 |        "        text-align: right;\n",
162 |        "    }\n",
163 |        "</style>\n",
164 |        "<table border=\"1\" class=\"dataframe\">\n",
165 |        "  <thead>\n",
166 |        "    <tr style=\"text-align: right;\">\n",
167 |        "      <th></th>\n",
168 |        "      <th>a</th>\n",
169 |        "      <th>b</th>\n",
170 |        "      <th>c</th>\n",
171 |        "    </tr>\n",
172 |        "  </thead>\n",
173 |        "  <tbody>\n",
174 |        "    <tr>\n",
175 |        "      <th>0</th>\n",
176 |        "      <td>1.0</td>\n",
177 |        "      <td>2.3</td>\n",
178 |        "      <td>6</td>\n",
179 |        "    </tr>\n",
180 |        "    <tr>\n",
181 |        "      <th>1</th>\n",
182 |        "      <td>2.4</td>\n",
183 |        "      <td>1.4</td>\n",
184 |        "      <td>7</td>\n",
185 |        "    </tr>\n",
186 |        "    <tr>\n",
187 |        "      <th>2</th>\n",
188 |        "      <td>23.0</td>\n",
189 |        "      <td>3.4</td>\n",
190 |        "      <td>8</td>\n",
191 |        "    </tr>\n",
192 |        "    <tr>\n",
193 |        "      <th>3</th>\n",
194 |        "      <td>4.5</td>\n",
195 |        "      <td>32.4</td>\n",
196 |        "      <td>9</td>\n",
197 |        "    </tr>\n",
198 |        "  </tbody>\n",
199 |        "</table>\n",
200 |        "</div>"
201 |       ],
202 |       "text/plain": [
203 |        "      a     b  c\n",
204 |        "0   1.0   2.3  6\n",
205 |        "1   2.4   1.4  7\n",
206 |        "2  23.0   3.4  8\n",
207 |        "3   4.5  32.4  9"
208 |       ]
209 |      },
210 |      "execution_count": 16,
211 |      "metadata": {},
212 |      "output_type": "execute_result"
213 |     }
214 |    ],
215 |    "source": [
216 |     "df1.combine_first(df2)"
217 |    ]
218 |   }
219 |  ],
220 |  "metadata": {
221 |   "kernelspec": {
222 |    "display_name": "Python 3",
223 |    "language": "python",
224 |    "name": "python3"
225 |   },
226 |   "language_info": {
227 |    "codemirror_mode": {
228 |     "name": "ipython",
229 |     "version": 3
230 |    },
231 |    "file_extension": ".py",
232 |    "mimetype": "text/x-python",
233 |    "name": "python",
234 |    "nbconvert_exporter": "python",
235 |    "pygments_lexer": "ipython3",
236 |    "version": "3.8.2"
237 |   }
238 |  },
239 |  "nbformat": 4,
240 |  "nbformat_minor": 4
241 | }
242 | 


--------------------------------------------------------------------------------
/numpy.where.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np\n",
 10 |     "\n",
 11 |     "#numpy.where is vectorized form of python ternary expresssion (if else statement)\n",
 12 |     "\n",
 13 |     "xarr = np.array([1.,2.,3.,4.,5.])\n",
 14 |     "yarr = np.array([1.4,2.5,3.6,4.7,6.8])\n",
 15 |     "conditions = np.array([True, False, False, True, False])"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": 3,
 21 |    "metadata": {},
 22 |    "outputs": [
 23 |     {
 24 |      "data": {
 25 |       "text/plain": [
 26 |        "[1.0, 2.5, 3.6, 4.0, 6.8]"
 27 |       ]
 28 |      },
 29 |      "execution_count": 3,
 30 |      "metadata": {},
 31 |      "output_type": "execute_result"
 32 |     }
 33 |    ],
 34 |    "source": [
 35 |     "#Suppose we want to take value from xarr if condition is True and from yarr if condition is False\n",
 36 |     "\n",
 37 |     "#In case of pure Python, the task can be performed via list comprehension as:\n",
 38 |     "result = [(x if c else y) for x,y,c in zip(xarr,yarr,conditions)]\n",
 39 |     "result"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 4,
 45 |    "metadata": {},
 46 |    "outputs": [
 47 |     {
 48 |      "data": {
 49 |       "text/plain": [
 50 |        "array([1. , 2.5, 3.6, 4. , 6.8])"
 51 |       ]
 52 |      },
 53 |      "execution_count": 4,
 54 |      "metadata": {},
 55 |      "output_type": "execute_result"
 56 |     }
 57 |    ],
 58 |    "source": [
 59 |     "#In case of numpy, we can use where() to perform the above task as:\n",
 60 |     "result = np.where(conditions, xarr, yarr)\n",
 61 |     "result"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": 6,
 67 |    "metadata": {},
 68 |    "outputs": [
 69 |     {
 70 |      "data": {
 71 |       "text/plain": [
 72 |        "array([[ 1.32969813, -1.3431948 , -1.63296197, -0.31929917],\n",
 73 |        "       [-0.63439893,  0.088797  ,  0.04996672,  0.39516985],\n",
 74 |        "       [ 0.00767993, -0.93328636,  0.42585378,  0.03790392],\n",
 75 |        "       [ 0.67080532,  0.7480199 ,  0.26772152,  0.74426192]])"
 76 |       ]
 77 |      },
 78 |      "execution_count": 6,
 79 |      "metadata": {},
 80 |      "output_type": "execute_result"
 81 |     }
 82 |    ],
 83 |    "source": [
 84 |     "#Using np.where to change positive and negative values\n",
 85 |     "\n",
 86 |     "from numpy.random import randn\n",
 87 |     "\n",
 88 |     "data = randn(4,4)\n",
 89 |     "data"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": 7,
 95 |    "metadata": {},
 96 |    "outputs": [
 97 |     {
 98 |      "data": {
 99 |       "text/plain": [
100 |        "array([[1, 0, 0, 0],\n",
101 |        "       [0, 1, 1, 1],\n",
102 |        "       [1, 0, 1, 1],\n",
103 |        "       [1, 1, 1, 1]])"
104 |       ]
105 |      },
106 |      "execution_count": 7,
107 |      "metadata": {},
108 |      "output_type": "execute_result"
109 |     }
110 |    ],
111 |    "source": [
112 |     "#Suppose we want to replace all negative values to 0 and positive to 1 then:\n",
113 |     "\n",
114 |     "data = np.where(data < 0, 0, 1)\n",
115 |     "data"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": 29,
121 |    "metadata": {},
122 |    "outputs": [],
123 |    "source": [
124 |     "#Nested where expressions\n",
125 |     "#In case of two or more conditions, np.where can be nested to perform desired task\n",
126 |     "\n",
127 |     "#Suppose we want values 0,1,2,3 for each of the following different conditions satisfying the value of x, then:\n",
128 |     "#conitions = [false, false], [false, true], [true, false], [true, true]\n",
129 |     "\n",
130 |     "x = [-2,-1,0,1]\n",
131 |     "y = [-2,0,-1,1]"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": 37,
137 |    "metadata": {},
138 |    "outputs": [
139 |     {
140 |      "data": {
141 |       "text/plain": [
142 |        "[0, 1, 2, 3]"
143 |       ]
144 |      },
145 |      "execution_count": 37,
146 |      "metadata": {},
147 |      "output_type": "execute_result"
148 |     }
149 |    ],
150 |    "source": [
151 |     "#Using pure Python\n",
152 |     "result_1 = []\n",
153 |     "for i in range(4):\n",
154 |     "    if (x[i] < 0) and (y[i] < 0):\n",
155 |     "        result_1.append(0)\n",
156 |     "    elif (x[i] < 0) and (y[i] >= 0):\n",
157 |     "        result_1.append(1)\n",
158 |     "    elif (x[i] >= 0) and (y[i] < 0):\n",
159 |     "        result_1.append(2)\n",
160 |     "    else:\n",
161 |     "        result_1.append(3)\n",
162 |     "        \n",
163 |     "result_1"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "execution_count": 38,
169 |    "metadata": {},
170 |    "outputs": [
171 |     {
172 |      "data": {
173 |       "text/plain": [
174 |        "array([0., 1., 2., 3.])"
175 |       ]
176 |      },
177 |      "execution_count": 38,
178 |      "metadata": {},
179 |      "output_type": "execute_result"
180 |     }
181 |    ],
182 |    "source": [
183 |     "#Using np.where\n",
184 |     "result_2 = np.empty(4)\n",
185 |     "for j in range(4):\n",
186 |     "    result_2[j] = np.where((x[j] < 0) and (y[j] < 0), 0,\n",
187 |     "            np.where((x[j] < 0) and (y[j] >= 0), 1,\n",
188 |     "                    np.where((x[j] >= 0) and (y[j] < 0), 2, 3)))\n",
189 |     "result_2"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "code",
194 |    "execution_count": 41,
195 |    "metadata": {},
196 |    "outputs": [
197 |     {
198 |      "data": {
199 |       "text/plain": [
200 |        "array([0., 1., 2., 3.])"
201 |       ]
202 |      },
203 |      "execution_count": 41,
204 |      "metadata": {},
205 |      "output_type": "execute_result"
206 |     }
207 |    ],
208 |    "source": [
209 |     "#In some cases we can apply np.where furthur to remove the for loop\n",
210 |     "#Although for this approach, we can use the 0/1 values of conditional logic to our advantage and can evaluate the values based on simple Boolean Expression\n",
211 |     "\n",
212 |     "result_3 = np.empty(4)\n",
213 |     "for k in range(1, 4):\n",
214 |     "    result_3[k] = 1 * ((x[k] < 0) and (y[k] >= 0)) + 2 * ((x[k] >= 0) and (y[k] < 0)) + 3 * ((x[k] >= 0) and (y[k] >= 0))\n",
215 |     "result_3    "
216 |    ]
217 |   }
218 |  ],
219 |  "metadata": {
220 |   "kernelspec": {
221 |    "display_name": "Python 3",
222 |    "language": "python",
223 |    "name": "python3"
224 |   },
225 |   "language_info": {
226 |    "codemirror_mode": {
227 |     "name": "ipython",
228 |     "version": 3
229 |    },
230 |    "file_extension": ".py",
231 |    "mimetype": "text/x-python",
232 |    "name": "python",
233 |    "nbconvert_exporter": "python",
234 |    "pygments_lexer": "ipython3",
235 |    "version": "3.7.4"
236 |   }
237 |  },
238 |  "nbformat": 4,
239 |  "nbformat_minor": 2
240 | }
241 | 


--------------------------------------------------------------------------------
/Axis Indexes with Duplicate Values.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "#Now we will see how to deal with indexes in which labels are duplicate\n",
 10 |     "\n",
 11 |     "import pandas as pd\n",
 12 |     "import numpy as np\n",
 13 |     "from pandas import Series,DataFrame\n"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 4,
 19 |    "metadata": {},
 20 |    "outputs": [
 21 |     {
 22 |      "data": {
 23 |       "text/plain": [
 24 |        "a    0\n",
 25 |        "b    1\n",
 26 |        "a    2\n",
 27 |        "c    3\n",
 28 |        "d    4\n",
 29 |        "a    5\n",
 30 |        "c    6\n",
 31 |        "dtype: int32"
 32 |       ]
 33 |      },
 34 |      "execution_count": 4,
 35 |      "metadata": {},
 36 |      "output_type": "execute_result"
 37 |     }
 38 |    ],
 39 |    "source": [
 40 |     "series_obj = Series(np.arange(7), index=['a','b','a','c','d','a','c'])\n",
 41 |     "series_obj"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 7,
 47 |    "metadata": {},
 48 |    "outputs": [
 49 |     {
 50 |      "data": {
 51 |       "text/plain": [
 52 |        "False"
 53 |       ]
 54 |      },
 55 |      "execution_count": 7,
 56 |      "metadata": {},
 57 |      "output_type": "execute_result"
 58 |     }
 59 |    ],
 60 |    "source": [
 61 |     "#We can find whether a certain object's indexes are unique or not as:\n",
 62 |     "series_obj.index.is_unique"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 8,
 68 |    "metadata": {},
 69 |    "outputs": [
 70 |     {
 71 |      "data": {
 72 |       "text/plain": [
 73 |        "a    0\n",
 74 |        "a    2\n",
 75 |        "a    5\n",
 76 |        "dtype: int32"
 77 |       ]
 78 |      },
 79 |      "execution_count": 8,
 80 |      "metadata": {},
 81 |      "output_type": "execute_result"
 82 |     }
 83 |    ],
 84 |    "source": [
 85 |     "#In case of Data selection, a certain index returns all the values stored at that index\n",
 86 |     "series_obj['a']"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": 9,
 92 |    "metadata": {},
 93 |    "outputs": [
 94 |     {
 95 |      "data": {
 96 |       "text/html": [
 97 |        "<div>\n",
 98 |        "<style scoped>\n",
 99 |        "    .dataframe tbody tr th:only-of-type {\n",
100 |        "        vertical-align: middle;\n",
101 |        "    }\n",
102 |        "\n",
103 |        "    .dataframe tbody tr th {\n",
104 |        "        vertical-align: top;\n",
105 |        "    }\n",
106 |        "\n",
107 |        "    .dataframe thead th {\n",
108 |        "        text-align: right;\n",
109 |        "    }\n",
110 |        "</style>\n",
111 |        "<table border=\"1\" class=\"dataframe\">\n",
112 |        "  <thead>\n",
113 |        "    <tr style=\"text-align: right;\">\n",
114 |        "      <th></th>\n",
115 |        "      <th>0</th>\n",
116 |        "      <th>1</th>\n",
117 |        "      <th>2</th>\n",
118 |        "    </tr>\n",
119 |        "  </thead>\n",
120 |        "  <tbody>\n",
121 |        "    <tr>\n",
122 |        "      <td>a</td>\n",
123 |        "      <td>-1.333040</td>\n",
124 |        "      <td>-0.410786</td>\n",
125 |        "      <td>-0.465574</td>\n",
126 |        "    </tr>\n",
127 |        "    <tr>\n",
128 |        "      <td>b</td>\n",
129 |        "      <td>1.308799</td>\n",
130 |        "      <td>-1.623032</td>\n",
131 |        "      <td>0.257251</td>\n",
132 |        "    </tr>\n",
133 |        "    <tr>\n",
134 |        "      <td>a</td>\n",
135 |        "      <td>1.036575</td>\n",
136 |        "      <td>-1.997923</td>\n",
137 |        "      <td>-0.711075</td>\n",
138 |        "    </tr>\n",
139 |        "    <tr>\n",
140 |        "      <td>c</td>\n",
141 |        "      <td>0.805056</td>\n",
142 |        "      <td>1.859086</td>\n",
143 |        "      <td>3.042280</td>\n",
144 |        "    </tr>\n",
145 |        "  </tbody>\n",
146 |        "</table>\n",
147 |        "</div>"
148 |       ],
149 |       "text/plain": [
150 |        "          0         1         2\n",
151 |        "a -1.333040 -0.410786 -0.465574\n",
152 |        "b  1.308799 -1.623032  0.257251\n",
153 |        "a  1.036575 -1.997923 -0.711075\n",
154 |        "c  0.805056  1.859086  3.042280"
155 |       ]
156 |      },
157 |      "execution_count": 9,
158 |      "metadata": {},
159 |      "output_type": "execute_result"
160 |     }
161 |    ],
162 |    "source": [
163 |     "#Same goes in case of DataFrame\n",
164 |     "frame_obj = DataFrame(np.random.randn(4,3), index=['a','b','a','c'])\n",
165 |     "frame_obj"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": 10,
171 |    "metadata": {},
172 |    "outputs": [
173 |     {
174 |      "data": {
175 |       "text/html": [
176 |        "<div>\n",
177 |        "<style scoped>\n",
178 |        "    .dataframe tbody tr th:only-of-type {\n",
179 |        "        vertical-align: middle;\n",
180 |        "    }\n",
181 |        "\n",
182 |        "    .dataframe tbody tr th {\n",
183 |        "        vertical-align: top;\n",
184 |        "    }\n",
185 |        "\n",
186 |        "    .dataframe thead th {\n",
187 |        "        text-align: right;\n",
188 |        "    }\n",
189 |        "</style>\n",
190 |        "<table border=\"1\" class=\"dataframe\">\n",
191 |        "  <thead>\n",
192 |        "    <tr style=\"text-align: right;\">\n",
193 |        "      <th></th>\n",
194 |        "      <th>0</th>\n",
195 |        "      <th>1</th>\n",
196 |        "      <th>2</th>\n",
197 |        "    </tr>\n",
198 |        "  </thead>\n",
199 |        "  <tbody>\n",
200 |        "    <tr>\n",
201 |        "      <td>a</td>\n",
202 |        "      <td>-1.333040</td>\n",
203 |        "      <td>-0.410786</td>\n",
204 |        "      <td>-0.465574</td>\n",
205 |        "    </tr>\n",
206 |        "    <tr>\n",
207 |        "      <td>a</td>\n",
208 |        "      <td>1.036575</td>\n",
209 |        "      <td>-1.997923</td>\n",
210 |        "      <td>-0.711075</td>\n",
211 |        "    </tr>\n",
212 |        "  </tbody>\n",
213 |        "</table>\n",
214 |        "</div>"
215 |       ],
216 |       "text/plain": [
217 |        "          0         1         2\n",
218 |        "a -1.333040 -0.410786 -0.465574\n",
219 |        "a  1.036575 -1.997923 -0.711075"
220 |       ]
221 |      },
222 |      "execution_count": 10,
223 |      "metadata": {},
224 |      "output_type": "execute_result"
225 |     }
226 |    ],
227 |    "source": [
228 |     "frame_obj.loc['a']"
229 |    ]
230 |   }
231 |  ],
232 |  "metadata": {
233 |   "kernelspec": {
234 |    "display_name": "Python 3",
235 |    "language": "python",
236 |    "name": "python3"
237 |   },
238 |   "language_info": {
239 |    "codemirror_mode": {
240 |     "name": "ipython",
241 |     "version": 3
242 |    },
243 |    "file_extension": ".py",
244 |    "mimetype": "text/x-python",
245 |    "name": "python",
246 |    "nbconvert_exporter": "python",
247 |    "pygments_lexer": "ipython3",
248 |    "version": "3.7.4"
249 |   }
250 |  },
251 |  "nbformat": 4,
252 |  "nbformat_minor": 2
253 | }
254 | 


--------------------------------------------------------------------------------
/Reading Excel Files using Pandas.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "#Sometimes the data is stored in an excel file\n",
 10 |     "#We can extract data from excel files using pandas\n",
 11 |     "\n",
 12 |     "import pandas as pd\n",
 13 |     "from pandas import DataFrame"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 8,
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "#Like HDF5 to use this class of the pandas we should have dependencies installed\n",
 23 |     "#In case of excel files, pandas uses the xlrd and openpyxl packages\n",
 24 |     "\n",
 25 |     "xls_file = pd.ExcelFile('Data Files\\my_fields.xlsx')"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 11,
 31 |    "metadata": {},
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "#We can extract the data stored in the excel file using the parse method\n",
 35 |     "xls_tables = xls_file.parse('my_fields')"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 12,
 41 |    "metadata": {},
 42 |    "outputs": [
 43 |     {
 44 |      "data": {
 45 |       "text/html": [
 46 |        "<div>\n",
 47 |        "<style scoped>\n",
 48 |        "    .dataframe tbody tr th:only-of-type {\n",
 49 |        "        vertical-align: middle;\n",
 50 |        "    }\n",
 51 |        "\n",
 52 |        "    .dataframe tbody tr th {\n",
 53 |        "        vertical-align: top;\n",
 54 |        "    }\n",
 55 |        "\n",
 56 |        "    .dataframe thead th {\n",
 57 |        "        text-align: right;\n",
 58 |        "    }\n",
 59 |        "</style>\n",
 60 |        "<table border=\"1\" class=\"dataframe\">\n",
 61 |        "  <thead>\n",
 62 |        "    <tr style=\"text-align: right;\">\n",
 63 |        "      <th></th>\n",
 64 |        "      <th>Date</th>\n",
 65 |        "      <th>Day</th>\n",
 66 |        "      <th>Year</th>\n",
 67 |        "    </tr>\n",
 68 |        "  </thead>\n",
 69 |        "  <tbody>\n",
 70 |        "    <tr>\n",
 71 |        "      <th>0</th>\n",
 72 |        "      <td>Monday</td>\n",
 73 |        "      <td>2</td>\n",
 74 |        "      <td>2020</td>\n",
 75 |        "    </tr>\n",
 76 |        "    <tr>\n",
 77 |        "      <th>1</th>\n",
 78 |        "      <td>Tuesday</td>\n",
 79 |        "      <td>3</td>\n",
 80 |        "      <td>2020</td>\n",
 81 |        "    </tr>\n",
 82 |        "    <tr>\n",
 83 |        "      <th>2</th>\n",
 84 |        "      <td>Friday</td>\n",
 85 |        "      <td>5</td>\n",
 86 |        "      <td>2020</td>\n",
 87 |        "    </tr>\n",
 88 |        "  </tbody>\n",
 89 |        "</table>\n",
 90 |        "</div>"
 91 |       ],
 92 |       "text/plain": [
 93 |        "      Date  Day  Year\n",
 94 |        "0   Monday    2  2020\n",
 95 |        "1  Tuesday    3  2020\n",
 96 |        "2   Friday    5  2020"
 97 |       ]
 98 |      },
 99 |      "execution_count": 12,
100 |      "metadata": {},
101 |      "output_type": "execute_result"
102 |     }
103 |    ],
104 |    "source": [
105 |     "xls_tables"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": 13,
111 |    "metadata": {},
112 |    "outputs": [],
113 |    "source": [
114 |     "#We can also create excel files using the dataframe data\n",
115 |     "data = pd.read_csv(r'Data Files/fields.csv')"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": 14,
121 |    "metadata": {},
122 |    "outputs": [
123 |     {
124 |      "data": {
125 |       "text/html": [
126 |        "<div>\n",
127 |        "<style scoped>\n",
128 |        "    .dataframe tbody tr th:only-of-type {\n",
129 |        "        vertical-align: middle;\n",
130 |        "    }\n",
131 |        "\n",
132 |        "    .dataframe tbody tr th {\n",
133 |        "        vertical-align: top;\n",
134 |        "    }\n",
135 |        "\n",
136 |        "    .dataframe thead th {\n",
137 |        "        text-align: right;\n",
138 |        "    }\n",
139 |        "</style>\n",
140 |        "<table border=\"1\" class=\"dataframe\">\n",
141 |        "  <thead>\n",
142 |        "    <tr style=\"text-align: right;\">\n",
143 |        "      <th></th>\n",
144 |        "      <th>ID</th>\n",
145 |        "      <th>Name</th>\n",
146 |        "      <th>Field</th>\n",
147 |        "    </tr>\n",
148 |        "  </thead>\n",
149 |        "  <tbody>\n",
150 |        "    <tr>\n",
151 |        "      <th>0</th>\n",
152 |        "      <td>1</td>\n",
153 |        "      <td>Adam Jones</td>\n",
154 |        "      <td>Electrical</td>\n",
155 |        "    </tr>\n",
156 |        "    <tr>\n",
157 |        "      <th>1</th>\n",
158 |        "      <td>2</td>\n",
159 |        "      <td>Edward Elrich</td>\n",
160 |        "      <td>Mechanical</td>\n",
161 |        "    </tr>\n",
162 |        "    <tr>\n",
163 |        "      <th>2</th>\n",
164 |        "      <td>3</td>\n",
165 |        "      <td>Stain Steve</td>\n",
166 |        "      <td>Computer Science</td>\n",
167 |        "    </tr>\n",
168 |        "    <tr>\n",
169 |        "      <th>3</th>\n",
170 |        "      <td>4</td>\n",
171 |        "      <td>Ken Adams</td>\n",
172 |        "      <td>Media Science</td>\n",
173 |        "    </tr>\n",
174 |        "    <tr>\n",
175 |        "      <th>4</th>\n",
176 |        "      <td>5</td>\n",
177 |        "      <td>Ross Taylor</td>\n",
178 |        "      <td>Sportsman</td>\n",
179 |        "    </tr>\n",
180 |        "  </tbody>\n",
181 |        "</table>\n",
182 |        "</div>"
183 |       ],
184 |       "text/plain": [
185 |        "   ID           Name             Field\n",
186 |        "0   1     Adam Jones        Electrical\n",
187 |        "1   2  Edward Elrich        Mechanical\n",
188 |        "2   3    Stain Steve  Computer Science\n",
189 |        "3   4      Ken Adams     Media Science\n",
190 |        "4   5    Ross Taylor         Sportsman"
191 |       ]
192 |      },
193 |      "execution_count": 14,
194 |      "metadata": {},
195 |      "output_type": "execute_result"
196 |     }
197 |    ],
198 |    "source": [
199 |     "data"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "code",
204 |    "execution_count": 17,
205 |    "metadata": {},
206 |    "outputs": [],
207 |    "source": [
208 |     "#Using the Excel Writer and to_excel methods\n",
209 |     "writer = pd.ExcelWriter('Data Files\\excel_data.xlsx') #Creating file to save data in\n",
210 |     "data.to_excel(writer, 'Sheet1') #Writing data from dataframe to excel file\n",
211 |     "writer.save() #Saving data stored in the excel file"
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "code",
216 |    "execution_count": null,
217 |    "metadata": {},
218 |    "outputs": [],
219 |    "source": []
220 |   }
221 |  ],
222 |  "metadata": {
223 |   "kernelspec": {
224 |    "display_name": "Python 3",
225 |    "language": "python",
226 |    "name": "python3"
227 |   },
228 |   "language_info": {
229 |    "codemirror_mode": {
230 |     "name": "ipython",
231 |     "version": 3
232 |    },
233 |    "file_extension": ".py",
234 |    "mimetype": "text/x-python",
235 |    "name": "python",
236 |    "nbconvert_exporter": "python",
237 |    "pygments_lexer": "ipython3",
238 |    "version": "3.8.2"
239 |   }
240 |  },
241 |  "nbformat": 4,
242 |  "nbformat_minor": 4
243 | }
244 | 


--------------------------------------------------------------------------------
/Pickle Serialization.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "#Pickling or Pickle Serialization is the process of converting a python object into a byte stream or binary format\n",
 10 |     "#We can then use the inverse operation of deserialization to get ack the original object\n",
 11 |     "\n",
 12 |     "import pandas as pd\n",
 13 |     "from pandas import DataFrame"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 3,
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "#Reading a sample file\n",
 23 |     "data = pd.read_csv(r'Data Files\\fields.csv')"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 4,
 29 |    "metadata": {},
 30 |    "outputs": [
 31 |     {
 32 |      "data": {
 33 |       "text/html": [
 34 |        "<div>\n",
 35 |        "<style scoped>\n",
 36 |        "    .dataframe tbody tr th:only-of-type {\n",
 37 |        "        vertical-align: middle;\n",
 38 |        "    }\n",
 39 |        "\n",
 40 |        "    .dataframe tbody tr th {\n",
 41 |        "        vertical-align: top;\n",
 42 |        "    }\n",
 43 |        "\n",
 44 |        "    .dataframe thead th {\n",
 45 |        "        text-align: right;\n",
 46 |        "    }\n",
 47 |        "</style>\n",
 48 |        "<table border=\"1\" class=\"dataframe\">\n",
 49 |        "  <thead>\n",
 50 |        "    <tr style=\"text-align: right;\">\n",
 51 |        "      <th></th>\n",
 52 |        "      <th>ID</th>\n",
 53 |        "      <th>Name</th>\n",
 54 |        "      <th>Field</th>\n",
 55 |        "    </tr>\n",
 56 |        "  </thead>\n",
 57 |        "  <tbody>\n",
 58 |        "    <tr>\n",
 59 |        "      <th>0</th>\n",
 60 |        "      <td>1</td>\n",
 61 |        "      <td>Adam Jones</td>\n",
 62 |        "      <td>Electrical</td>\n",
 63 |        "    </tr>\n",
 64 |        "    <tr>\n",
 65 |        "      <th>1</th>\n",
 66 |        "      <td>2</td>\n",
 67 |        "      <td>Edward Elrich</td>\n",
 68 |        "      <td>Mechanical</td>\n",
 69 |        "    </tr>\n",
 70 |        "    <tr>\n",
 71 |        "      <th>2</th>\n",
 72 |        "      <td>3</td>\n",
 73 |        "      <td>Stain Steve</td>\n",
 74 |        "      <td>Computer Science</td>\n",
 75 |        "    </tr>\n",
 76 |        "    <tr>\n",
 77 |        "      <th>3</th>\n",
 78 |        "      <td>4</td>\n",
 79 |        "      <td>Ken Adams</td>\n",
 80 |        "      <td>Media Science</td>\n",
 81 |        "    </tr>\n",
 82 |        "    <tr>\n",
 83 |        "      <th>4</th>\n",
 84 |        "      <td>5</td>\n",
 85 |        "      <td>Ross Taylor</td>\n",
 86 |        "      <td>Sportsman</td>\n",
 87 |        "    </tr>\n",
 88 |        "  </tbody>\n",
 89 |        "</table>\n",
 90 |        "</div>"
 91 |       ],
 92 |       "text/plain": [
 93 |        "   ID           Name             Field\n",
 94 |        "0   1     Adam Jones        Electrical\n",
 95 |        "1   2  Edward Elrich        Mechanical\n",
 96 |        "2   3    Stain Steve  Computer Science\n",
 97 |        "3   4      Ken Adams     Media Science\n",
 98 |        "4   5    Ross Taylor         Sportsman"
 99 |       ]
100 |      },
101 |      "execution_count": 4,
102 |      "metadata": {},
103 |      "output_type": "execute_result"
104 |     }
105 |    ],
106 |    "source": [
107 |     "data"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": 6,
113 |    "metadata": {},
114 |    "outputs": [],
115 |    "source": [
116 |     "#We can save data in pickle format by using the pandas to_pickle method\n",
117 |     "#It automatically saves the data from a dataframe into a binary pickle format using serialization\n",
118 |     "data.to_pickle(r'Data Files\\pickle_data')"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": 7,
124 |    "metadata": {},
125 |    "outputs": [
126 |     {
127 |      "data": {
128 |       "text/html": [
129 |        "<div>\n",
130 |        "<style scoped>\n",
131 |        "    .dataframe tbody tr th:only-of-type {\n",
132 |        "        vertical-align: middle;\n",
133 |        "    }\n",
134 |        "\n",
135 |        "    .dataframe tbody tr th {\n",
136 |        "        vertical-align: top;\n",
137 |        "    }\n",
138 |        "\n",
139 |        "    .dataframe thead th {\n",
140 |        "        text-align: right;\n",
141 |        "    }\n",
142 |        "</style>\n",
143 |        "<table border=\"1\" class=\"dataframe\">\n",
144 |        "  <thead>\n",
145 |        "    <tr style=\"text-align: right;\">\n",
146 |        "      <th></th>\n",
147 |        "      <th>ID</th>\n",
148 |        "      <th>Name</th>\n",
149 |        "      <th>Field</th>\n",
150 |        "    </tr>\n",
151 |        "  </thead>\n",
152 |        "  <tbody>\n",
153 |        "    <tr>\n",
154 |        "      <th>0</th>\n",
155 |        "      <td>1</td>\n",
156 |        "      <td>Adam Jones</td>\n",
157 |        "      <td>Electrical</td>\n",
158 |        "    </tr>\n",
159 |        "    <tr>\n",
160 |        "      <th>1</th>\n",
161 |        "      <td>2</td>\n",
162 |        "      <td>Edward Elrich</td>\n",
163 |        "      <td>Mechanical</td>\n",
164 |        "    </tr>\n",
165 |        "    <tr>\n",
166 |        "      <th>2</th>\n",
167 |        "      <td>3</td>\n",
168 |        "      <td>Stain Steve</td>\n",
169 |        "      <td>Computer Science</td>\n",
170 |        "    </tr>\n",
171 |        "    <tr>\n",
172 |        "      <th>3</th>\n",
173 |        "      <td>4</td>\n",
174 |        "      <td>Ken Adams</td>\n",
175 |        "      <td>Media Science</td>\n",
176 |        "    </tr>\n",
177 |        "    <tr>\n",
178 |        "      <th>4</th>\n",
179 |        "      <td>5</td>\n",
180 |        "      <td>Ross Taylor</td>\n",
181 |        "      <td>Sportsman</td>\n",
182 |        "    </tr>\n",
183 |        "  </tbody>\n",
184 |        "</table>\n",
185 |        "</div>"
186 |       ],
187 |       "text/plain": [
188 |        "   ID           Name             Field\n",
189 |        "0   1     Adam Jones        Electrical\n",
190 |        "1   2  Edward Elrich        Mechanical\n",
191 |        "2   3    Stain Steve  Computer Science\n",
192 |        "3   4      Ken Adams     Media Science\n",
193 |        "4   5    Ross Taylor         Sportsman"
194 |       ]
195 |      },
196 |      "execution_count": 7,
197 |      "metadata": {},
198 |      "output_type": "execute_result"
199 |     }
200 |    ],
201 |    "source": [
202 |     "#Using the read_pickle method we can also read the pickle file by applying deserialization\n",
203 |     "pd.read_pickle(r'Data Files\\pickle_data')"
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "code",
208 |    "execution_count": null,
209 |    "metadata": {},
210 |    "outputs": [],
211 |    "source": []
212 |   }
213 |  ],
214 |  "metadata": {
215 |   "kernelspec": {
216 |    "display_name": "Python 3",
217 |    "language": "python",
218 |    "name": "python3"
219 |   },
220 |   "language_info": {
221 |    "codemirror_mode": {
222 |     "name": "ipython",
223 |     "version": 3
224 |    },
225 |    "file_extension": ".py",
226 |    "mimetype": "text/x-python",
227 |    "name": "python",
228 |    "nbconvert_exporter": "python",
229 |    "pygments_lexer": "ipython3",
230 |    "version": "3.8.2"
231 |   }
232 |  },
233 |  "nbformat": 4,
234 |  "nbformat_minor": 4
235 | }
236 | 


--------------------------------------------------------------------------------
/Data Transformation using Functions or Mapping.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "#Suppose we want to transform our data based on the values of an array, series or dataframe\n",
 10 |     "import pandas as pd\n",
 11 |     "from pandas import DataFrame, Series\n",
 12 |     "import numpy as np"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 2,
 18 |    "metadata": {},
 19 |    "outputs": [],
 20 |    "source": [
 21 |     "#Consider a dataset containing data about various types of meats\n",
 22 |     "data = DataFrame({'food':['mutton','beef','pastraml','beef','nova lox','beef'], 'ounces':[3.4,2.3,5.4,4.5,2.5,6.1]})"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 3,
 28 |    "metadata": {},
 29 |    "outputs": [
 30 |     {
 31 |      "data": {
 32 |       "text/html": [
 33 |        "<div>\n",
 34 |        "<style scoped>\n",
 35 |        "    .dataframe tbody tr th:only-of-type {\n",
 36 |        "        vertical-align: middle;\n",
 37 |        "    }\n",
 38 |        "\n",
 39 |        "    .dataframe tbody tr th {\n",
 40 |        "        vertical-align: top;\n",
 41 |        "    }\n",
 42 |        "\n",
 43 |        "    .dataframe thead th {\n",
 44 |        "        text-align: right;\n",
 45 |        "    }\n",
 46 |        "</style>\n",
 47 |        "<table border=\"1\" class=\"dataframe\">\n",
 48 |        "  <thead>\n",
 49 |        "    <tr style=\"text-align: right;\">\n",
 50 |        "      <th></th>\n",
 51 |        "      <th>food</th>\n",
 52 |        "      <th>ounces</th>\n",
 53 |        "    </tr>\n",
 54 |        "  </thead>\n",
 55 |        "  <tbody>\n",
 56 |        "    <tr>\n",
 57 |        "      <th>0</th>\n",
 58 |        "      <td>mutton</td>\n",
 59 |        "      <td>3.4</td>\n",
 60 |        "    </tr>\n",
 61 |        "    <tr>\n",
 62 |        "      <th>1</th>\n",
 63 |        "      <td>beef</td>\n",
 64 |        "      <td>2.3</td>\n",
 65 |        "    </tr>\n",
 66 |        "    <tr>\n",
 67 |        "      <th>2</th>\n",
 68 |        "      <td>pastraml</td>\n",
 69 |        "      <td>5.4</td>\n",
 70 |        "    </tr>\n",
 71 |        "    <tr>\n",
 72 |        "      <th>3</th>\n",
 73 |        "      <td>beef</td>\n",
 74 |        "      <td>4.5</td>\n",
 75 |        "    </tr>\n",
 76 |        "    <tr>\n",
 77 |        "      <th>4</th>\n",
 78 |        "      <td>nova lox</td>\n",
 79 |        "      <td>2.5</td>\n",
 80 |        "    </tr>\n",
 81 |        "    <tr>\n",
 82 |        "      <th>5</th>\n",
 83 |        "      <td>beef</td>\n",
 84 |        "      <td>6.1</td>\n",
 85 |        "    </tr>\n",
 86 |        "  </tbody>\n",
 87 |        "</table>\n",
 88 |        "</div>"
 89 |       ],
 90 |       "text/plain": [
 91 |        "       food  ounces\n",
 92 |        "0    mutton     3.4\n",
 93 |        "1      beef     2.3\n",
 94 |        "2  pastraml     5.4\n",
 95 |        "3      beef     4.5\n",
 96 |        "4  nova lox     2.5\n",
 97 |        "5      beef     6.1"
 98 |       ]
 99 |      },
100 |      "execution_count": 3,
101 |      "metadata": {},
102 |      "output_type": "execute_result"
103 |     }
104 |    ],
105 |    "source": [
106 |     "data"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": 4,
112 |    "metadata": {},
113 |    "outputs": [],
114 |    "source": [
115 |     "#Suppose we wanted to add a column to the dataframe idnicating the animal from which the meat came\n",
116 |     "#Create a map/dictionary to indicate each item\n",
117 |     "\n",
118 |     "meat_to_animal = {'mutton':'goat', 'beef':'cow', 'pastraml':'cow', 'nova lox':'salmon'}"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": 5,
124 |    "metadata": {},
125 |    "outputs": [],
126 |    "source": [
127 |     "#The map method allows to map a dictionary like object and can create a new value according to the item in the food column\n",
128 |     "data['animal'] = data['food'].map(meat_to_animal)"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": 6,
134 |    "metadata": {},
135 |    "outputs": [
136 |     {
137 |      "data": {
138 |       "text/html": [
139 |        "<div>\n",
140 |        "<style scoped>\n",
141 |        "    .dataframe tbody tr th:only-of-type {\n",
142 |        "        vertical-align: middle;\n",
143 |        "    }\n",
144 |        "\n",
145 |        "    .dataframe tbody tr th {\n",
146 |        "        vertical-align: top;\n",
147 |        "    }\n",
148 |        "\n",
149 |        "    .dataframe thead th {\n",
150 |        "        text-align: right;\n",
151 |        "    }\n",
152 |        "</style>\n",
153 |        "<table border=\"1\" class=\"dataframe\">\n",
154 |        "  <thead>\n",
155 |        "    <tr style=\"text-align: right;\">\n",
156 |        "      <th></th>\n",
157 |        "      <th>food</th>\n",
158 |        "      <th>ounces</th>\n",
159 |        "      <th>animal</th>\n",
160 |        "    </tr>\n",
161 |        "  </thead>\n",
162 |        "  <tbody>\n",
163 |        "    <tr>\n",
164 |        "      <th>0</th>\n",
165 |        "      <td>mutton</td>\n",
166 |        "      <td>3.4</td>\n",
167 |        "      <td>goat</td>\n",
168 |        "    </tr>\n",
169 |        "    <tr>\n",
170 |        "      <th>1</th>\n",
171 |        "      <td>beef</td>\n",
172 |        "      <td>2.3</td>\n",
173 |        "      <td>cow</td>\n",
174 |        "    </tr>\n",
175 |        "    <tr>\n",
176 |        "      <th>2</th>\n",
177 |        "      <td>pastraml</td>\n",
178 |        "      <td>5.4</td>\n",
179 |        "      <td>cow</td>\n",
180 |        "    </tr>\n",
181 |        "    <tr>\n",
182 |        "      <th>3</th>\n",
183 |        "      <td>beef</td>\n",
184 |        "      <td>4.5</td>\n",
185 |        "      <td>cow</td>\n",
186 |        "    </tr>\n",
187 |        "    <tr>\n",
188 |        "      <th>4</th>\n",
189 |        "      <td>nova lox</td>\n",
190 |        "      <td>2.5</td>\n",
191 |        "      <td>salmon</td>\n",
192 |        "    </tr>\n",
193 |        "    <tr>\n",
194 |        "      <th>5</th>\n",
195 |        "      <td>beef</td>\n",
196 |        "      <td>6.1</td>\n",
197 |        "      <td>cow</td>\n",
198 |        "    </tr>\n",
199 |        "  </tbody>\n",
200 |        "</table>\n",
201 |        "</div>"
202 |       ],
203 |       "text/plain": [
204 |        "       food  ounces  animal\n",
205 |        "0    mutton     3.4    goat\n",
206 |        "1      beef     2.3     cow\n",
207 |        "2  pastraml     5.4     cow\n",
208 |        "3      beef     4.5     cow\n",
209 |        "4  nova lox     2.5  salmon\n",
210 |        "5      beef     6.1     cow"
211 |       ]
212 |      },
213 |      "execution_count": 6,
214 |      "metadata": {},
215 |      "output_type": "execute_result"
216 |     }
217 |    ],
218 |    "source": [
219 |     "data"
220 |    ]
221 |   }
222 |  ],
223 |  "metadata": {
224 |   "kernelspec": {
225 |    "display_name": "Python 3",
226 |    "language": "python",
227 |    "name": "python3"
228 |   },
229 |   "language_info": {
230 |    "codemirror_mode": {
231 |     "name": "ipython",
232 |     "version": 3
233 |    },
234 |    "file_extension": ".py",
235 |    "mimetype": "text/x-python",
236 |    "name": "python",
237 |    "nbconvert_exporter": "python",
238 |    "pygments_lexer": "ipython3",
239 |    "version": "3.8.2"
240 |   }
241 |  },
242 |  "nbformat": 4,
243 |  "nbformat_minor": 4
244 | }
245 | 


--------------------------------------------------------------------------------
/Reading CSV File in Pieces.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np\n",
 10 |     "import pandas as pd\n",
 11 |     "from pandas import DataFrame, Series"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 2,
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "data_df = pd.read_csv('Data Files/fields.csv')"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 3,
 26 |    "metadata": {},
 27 |    "outputs": [
 28 |     {
 29 |      "data": {
 30 |       "text/html": [
 31 |        "<div>\n",
 32 |        "<style scoped>\n",
 33 |        "    .dataframe tbody tr th:only-of-type {\n",
 34 |        "        vertical-align: middle;\n",
 35 |        "    }\n",
 36 |        "\n",
 37 |        "    .dataframe tbody tr th {\n",
 38 |        "        vertical-align: top;\n",
 39 |        "    }\n",
 40 |        "\n",
 41 |        "    .dataframe thead th {\n",
 42 |        "        text-align: right;\n",
 43 |        "    }\n",
 44 |        "</style>\n",
 45 |        "<table border=\"1\" class=\"dataframe\">\n",
 46 |        "  <thead>\n",
 47 |        "    <tr style=\"text-align: right;\">\n",
 48 |        "      <th></th>\n",
 49 |        "      <th>ID</th>\n",
 50 |        "      <th>Name</th>\n",
 51 |        "      <th>Field</th>\n",
 52 |        "    </tr>\n",
 53 |        "  </thead>\n",
 54 |        "  <tbody>\n",
 55 |        "    <tr>\n",
 56 |        "      <th>0</th>\n",
 57 |        "      <td>1</td>\n",
 58 |        "      <td>Adam Jones</td>\n",
 59 |        "      <td>Electrical</td>\n",
 60 |        "    </tr>\n",
 61 |        "    <tr>\n",
 62 |        "      <th>1</th>\n",
 63 |        "      <td>2</td>\n",
 64 |        "      <td>Edward Elrich</td>\n",
 65 |        "      <td>Mechanical</td>\n",
 66 |        "    </tr>\n",
 67 |        "    <tr>\n",
 68 |        "      <th>2</th>\n",
 69 |        "      <td>3</td>\n",
 70 |        "      <td>Stain Steve</td>\n",
 71 |        "      <td>Computer Science</td>\n",
 72 |        "    </tr>\n",
 73 |        "    <tr>\n",
 74 |        "      <th>3</th>\n",
 75 |        "      <td>4</td>\n",
 76 |        "      <td>Ken Adams</td>\n",
 77 |        "      <td>Media Science</td>\n",
 78 |        "    </tr>\n",
 79 |        "    <tr>\n",
 80 |        "      <th>4</th>\n",
 81 |        "      <td>5</td>\n",
 82 |        "      <td>Ross Taylor</td>\n",
 83 |        "      <td>Sportsman</td>\n",
 84 |        "    </tr>\n",
 85 |        "  </tbody>\n",
 86 |        "</table>\n",
 87 |        "</div>"
 88 |       ],
 89 |       "text/plain": [
 90 |        "   ID           Name             Field\n",
 91 |        "0   1     Adam Jones        Electrical\n",
 92 |        "1   2  Edward Elrich        Mechanical\n",
 93 |        "2   3    Stain Steve  Computer Science\n",
 94 |        "3   4      Ken Adams     Media Science\n",
 95 |        "4   5    Ross Taylor         Sportsman"
 96 |       ]
 97 |      },
 98 |      "execution_count": 3,
 99 |      "metadata": {},
100 |      "output_type": "execute_result"
101 |     }
102 |    ],
103 |    "source": [
104 |     "data_df"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": 4,
110 |    "metadata": {},
111 |    "outputs": [],
112 |    "source": [
113 |     "#Suppose we have to read only 2 rows from the file, we can specify that to the nrows argument\n",
114 |     "data_df = pd.read_csv('Data Files/fields.csv', nrows=2)"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": 5,
120 |    "metadata": {},
121 |    "outputs": [
122 |     {
123 |      "data": {
124 |       "text/html": [
125 |        "<div>\n",
126 |        "<style scoped>\n",
127 |        "    .dataframe tbody tr th:only-of-type {\n",
128 |        "        vertical-align: middle;\n",
129 |        "    }\n",
130 |        "\n",
131 |        "    .dataframe tbody tr th {\n",
132 |        "        vertical-align: top;\n",
133 |        "    }\n",
134 |        "\n",
135 |        "    .dataframe thead th {\n",
136 |        "        text-align: right;\n",
137 |        "    }\n",
138 |        "</style>\n",
139 |        "<table border=\"1\" class=\"dataframe\">\n",
140 |        "  <thead>\n",
141 |        "    <tr style=\"text-align: right;\">\n",
142 |        "      <th></th>\n",
143 |        "      <th>ID</th>\n",
144 |        "      <th>Name</th>\n",
145 |        "      <th>Field</th>\n",
146 |        "    </tr>\n",
147 |        "  </thead>\n",
148 |        "  <tbody>\n",
149 |        "    <tr>\n",
150 |        "      <th>0</th>\n",
151 |        "      <td>1</td>\n",
152 |        "      <td>Adam Jones</td>\n",
153 |        "      <td>Electrical</td>\n",
154 |        "    </tr>\n",
155 |        "    <tr>\n",
156 |        "      <th>1</th>\n",
157 |        "      <td>2</td>\n",
158 |        "      <td>Edward Elrich</td>\n",
159 |        "      <td>Mechanical</td>\n",
160 |        "    </tr>\n",
161 |        "  </tbody>\n",
162 |        "</table>\n",
163 |        "</div>"
164 |       ],
165 |       "text/plain": [
166 |        "   ID           Name       Field\n",
167 |        "0   1     Adam Jones  Electrical\n",
168 |        "1   2  Edward Elrich  Mechanical"
169 |       ]
170 |      },
171 |      "execution_count": 5,
172 |      "metadata": {},
173 |      "output_type": "execute_result"
174 |     }
175 |    ],
176 |    "source": [
177 |     "data_df"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "code",
182 |    "execution_count": 46,
183 |    "metadata": {},
184 |    "outputs": [],
185 |    "source": [
186 |     "#We can also read a csv in pieces, which is often a case for big data files.\n",
187 |     "#Suppose we want to read the data in chunks of 2 i.e. the csv is seperated into chunks containing 2 rows each\n",
188 |     "#We can do this by passing the value of our chunksize to the chunksize argument\n",
189 |     "df_chunks = pd.read_csv('Data Files/fields.csv', chunksize=2)"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "code",
194 |    "execution_count": 47,
195 |    "metadata": {},
196 |    "outputs": [
197 |     {
198 |      "data": {
199 |       "text/plain": [
200 |        "<pandas.io.parsers.TextFileReader at 0x25b333cd880>"
201 |       ]
202 |      },
203 |      "execution_count": 47,
204 |      "metadata": {},
205 |      "output_type": "execute_result"
206 |     }
207 |    ],
208 |    "source": [
209 |     "df_chunks"
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "code",
214 |    "execution_count": 48,
215 |    "metadata": {},
216 |    "outputs": [
217 |     {
218 |      "name": "stdout",
219 |      "output_type": "stream",
220 |      "text": [
221 |       "0       Adam Jones\n",
222 |       "1    Edward Elrich\n",
223 |       "Name: Name, dtype: object\n",
224 |       "2    Stain Steve\n",
225 |       "3      Ken Adams\n",
226 |       "Name: Name, dtype: object\n",
227 |       "4    Ross Taylor\n",
228 |       "Name: Name, dtype: object\n"
229 |      ]
230 |     }
231 |    ],
232 |    "source": [
233 |     "#Using the returned text parser object we can iterate over the given csv file according to the chunk size as shown below\n",
234 |     "#If displayed it will show three different series each havingn chunks of the data passed through the csv\n",
235 |     "\n",
236 |     "for chunks in df_chunks:\n",
237 |     "    print(chunks['Name'])"
238 |    ]
239 |   }
240 |  ],
241 |  "metadata": {
242 |   "kernelspec": {
243 |    "display_name": "Python 3",
244 |    "language": "python",
245 |    "name": "python3"
246 |   },
247 |   "language_info": {
248 |    "codemirror_mode": {
249 |     "name": "ipython",
250 |     "version": 3
251 |    },
252 |    "file_extension": ".py",
253 |    "mimetype": "text/x-python",
254 |    "name": "python",
255 |    "nbconvert_exporter": "python",
256 |    "pygments_lexer": "ipython3",
257 |    "version": "3.8.2"
258 |   }
259 |  },
260 |  "nbformat": 4,
261 |  "nbformat_minor": 2
262 | }
263 | 


--------------------------------------------------------------------------------
/Using the CSV Module.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "#Sometimes we have to manually correct the info in the delimited files rather than using automation process\n",
 10 |     "#For that we will use the Python built-in csv module\n",
 11 |     "import csv"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 12,
 17 |    "metadata": {},
 18 |    "outputs": [
 19 |     {
 20 |      "name": "stdout",
 21 |      "output_type": "stream",
 22 |      "text": [
 23 |       "['ID', 'Name', 'Field']\n",
 24 |       "['1', 'Adam Jones', 'Electrical']\n",
 25 |       "['2', 'Edward Elrich', 'Mechanical']\n",
 26 |       "['3', 'Stain Steve', 'Computer Science']\n",
 27 |       "['4', 'Ken Adams', 'Media Science']\n",
 28 |       "['5', 'Ross Taylor', 'Sportsman']\n"
 29 |      ]
 30 |     }
 31 |    ],
 32 |    "source": [
 33 |     "#Reading files using the csv module\n",
 34 |     "with open(r'Data Files\\fields.csv', 'r') as csv_file:\n",
 35 |     "    csv_reader = csv.reader(csv_file)\n",
 36 |     "    \n",
 37 |     "    #Displaying the content of the csv file as a list\n",
 38 |     "    for line in csv_reader:\n",
 39 |     "        print(line)"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 14,
 45 |    "metadata": {},
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "#We can manually seperate the headers from the given file using basic list indexing\n",
 49 |     "#Opening the file for reading\n",
 50 |     "csv_file = open(r'Data Files\\fields.csv')"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": 17,
 56 |    "metadata": {},
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "#Reading the contents of the file to a list containing header and contents as elements\n",
 60 |     "reader = list(csv.reader(csv_file))"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": 18,
 66 |    "metadata": {},
 67 |    "outputs": [],
 68 |    "source": [
 69 |     "#Seperating the header and content from list using basic assignment\n",
 70 |     "header, content = reader[0], reader[1:]"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": 19,
 76 |    "metadata": {},
 77 |    "outputs": [
 78 |     {
 79 |      "name": "stdout",
 80 |      "output_type": "stream",
 81 |      "text": [
 82 |       "['ID', 'Name', 'Field']\n"
 83 |      ]
 84 |     }
 85 |    ],
 86 |    "source": [
 87 |     "print(header)"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": 20,
 93 |    "metadata": {},
 94 |    "outputs": [
 95 |     {
 96 |      "name": "stdout",
 97 |      "output_type": "stream",
 98 |      "text": [
 99 |       "[['1', 'Adam Jones', 'Electrical'], ['2', 'Edward Elrich', 'Mechanical'], ['3', 'Stain Steve', 'Computer Science'], ['4', 'Ken Adams', 'Media Science'], ['5', 'Ross Taylor', 'Sportsman']]\n"
100 |      ]
101 |     }
102 |    ],
103 |    "source": [
104 |     "print(content)"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": 21,
110 |    "metadata": {},
111 |    "outputs": [],
112 |    "source": [
113 |     "#Using this data we can create other data structures for suppose a dictionary\n",
114 |     "reader_dict = {h:c for h,c in zip(header, zip(*content))} #Creates a dictionary with key values as headers and ordered pairs of the content values"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": 22,
120 |    "metadata": {},
121 |    "outputs": [
122 |     {
123 |      "data": {
124 |       "text/plain": [
125 |        "{'ID': ('1', '2', '3', '4', '5'),\n",
126 |        " 'Name': ('Adam Jones',\n",
127 |        "  'Edward Elrich',\n",
128 |        "  'Stain Steve',\n",
129 |        "  'Ken Adams',\n",
130 |        "  'Ross Taylor'),\n",
131 |        " 'Field': ('Electrical',\n",
132 |        "  'Mechanical',\n",
133 |        "  'Computer Science',\n",
134 |        "  'Media Science',\n",
135 |        "  'Sportsman')}"
136 |       ]
137 |      },
138 |      "execution_count": 22,
139 |      "metadata": {},
140 |      "output_type": "execute_result"
141 |     }
142 |    ],
143 |    "source": [
144 |     "reader_dict"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": 26,
150 |    "metadata": {},
151 |    "outputs": [],
152 |    "source": [
153 |     "#Another great use of csv module is that we can create our own dialect of a csv file by passing arguments to it\n",
154 |     "#For a csv.Dialect class we have:\n",
155 |     "#delimiter: value seperator (by default: ',')\n",
156 |     "#lineterminator: line seperator (by default: '\\n')\n",
157 |     "#quotechar: special character seperator (by default: '\"')\n",
158 |     "#quoting: csv.QUOTE_ALL (quotes every character) or csv.QUOTE_MINIMAL (quotes only special characters)(by default), csv.QUOTE_NONNUMERIC (quotes only non-numeric characters), csv.QUOTE_NON (quotes no characters)\n",
159 |     "#skipintialspace: Ignore white space after each delimiter (by default: False)\n",
160 |     "#doublequote: Double quotes inside a field\n",
161 |     "#escapechar: String to escape delimiter for QUOTE_NON (by default: disabled)"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": 60,
167 |    "metadata": {},
168 |    "outputs": [],
169 |    "source": [
170 |     "#Lets create a simple csv.Dialect class and use it for our storage\n",
171 |     "class my_dialect(csv.Dialect): #creating subclass from csv.Dialect\n",
172 |     "    lineterminator = '\\n'\n",
173 |     "    delimiter = \";\"\n",
174 |     "    quotechar = '\"'\n",
175 |     "    quoting = csv.QUOTE_MINIMAL"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": 62,
181 |    "metadata": {},
182 |    "outputs": [],
183 |    "source": [
184 |     "csv_file = open(r'Data Files\\fields.csv')\n",
185 |     "csv_reader = csv.reader(csv_file, delimiter='|')"
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "code",
190 |    "execution_count": 63,
191 |    "metadata": {},
192 |    "outputs": [
193 |     {
194 |      "name": "stdout",
195 |      "output_type": "stream",
196 |      "text": [
197 |       "<_csv.reader object at 0x000002058D709B20>\n"
198 |      ]
199 |     }
200 |    ],
201 |    "source": [
202 |     "print(csv_reader)"
203 |    ]
204 |   },
205 |   {
206 |    "cell_type": "code",
207 |    "execution_count": 65,
208 |    "metadata": {},
209 |    "outputs": [],
210 |    "source": [
211 |     "#We can also write files manually by using the csv.writer function in our dialect we created\n",
212 |     "with open('Data Files/my_fields.csv', 'w') as csv_file:\n",
213 |     "    writer = csv.writer(csv_file, dialect=my_dialect)\n",
214 |     "    writer.writerow(('Day','Date','Year'))\n",
215 |     "    writer.writerow(('Monday','02','2020'))\n",
216 |     "    writer.writerow(('Tuesday','03','2020'))\n",
217 |     "    writer.writerow(('Friday','05','2020'))\n",
218 |     "    "
219 |    ]
220 |   },
221 |   {
222 |    "cell_type": "code",
223 |    "execution_count": 66,
224 |    "metadata": {},
225 |    "outputs": [],
226 |    "source": [
227 |     "csv_file = open(r'Data Files/my_fields.csv')"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "code",
232 |    "execution_count": 67,
233 |    "metadata": {},
234 |    "outputs": [],
235 |    "source": [
236 |     "csv_reader = csv.reader(csv_file)"
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "code",
241 |    "execution_count": 68,
242 |    "metadata": {},
243 |    "outputs": [
244 |     {
245 |      "name": "stdout",
246 |      "output_type": "stream",
247 |      "text": [
248 |       "['Day;Date;Year']\n",
249 |       "['Monday;02;2020']\n",
250 |       "['Tuesday;03;2020']\n",
251 |       "['Friday;05;2020']\n"
252 |      ]
253 |     }
254 |    ],
255 |    "source": [
256 |     "for line in csv_reader:\n",
257 |     "    print(line)"
258 |    ]
259 |   }
260 |  ],
261 |  "metadata": {
262 |   "kernelspec": {
263 |    "display_name": "Python 3",
264 |    "language": "python",
265 |    "name": "python3"
266 |   },
267 |   "language_info": {
268 |    "codemirror_mode": {
269 |     "name": "ipython",
270 |     "version": 3
271 |    },
272 |    "file_extension": ".py",
273 |    "mimetype": "text/x-python",
274 |    "name": "python",
275 |    "nbconvert_exporter": "python",
276 |    "pygments_lexer": "ipython3",
277 |    "version": "3.8.2"
278 |   }
279 |  },
280 |  "nbformat": 4,
281 |  "nbformat_minor": 4
282 | }
283 | 


--------------------------------------------------------------------------------
/Using HDF5 Formats.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "#Another binary format yet for large data purposes is the Heirarchical Data Format HDF\n",
 10 |     "#Pandas supports the HDF5 format with the use of pytables interface\n",
 11 |     "\n",
 12 |     "import pandas as pd\n",
 13 |     "from pandas import DataFrame"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 4,
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "#Creating a hdf5 format file using pandas (Dependency tables mst be installed in order to use it: pip install tables)\n",
 23 |     "hdf_file = pd.HDFStore('data.h5')"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 5,
 29 |    "metadata": {},
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "data = pd.read_csv('Data Files/fields.csv')"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 6,
 38 |    "metadata": {},
 39 |    "outputs": [
 40 |     {
 41 |      "data": {
 42 |       "text/html": [
 43 |        "<div>\n",
 44 |        "<style scoped>\n",
 45 |        "    .dataframe tbody tr th:only-of-type {\n",
 46 |        "        vertical-align: middle;\n",
 47 |        "    }\n",
 48 |        "\n",
 49 |        "    .dataframe tbody tr th {\n",
 50 |        "        vertical-align: top;\n",
 51 |        "    }\n",
 52 |        "\n",
 53 |        "    .dataframe thead th {\n",
 54 |        "        text-align: right;\n",
 55 |        "    }\n",
 56 |        "</style>\n",
 57 |        "<table border=\"1\" class=\"dataframe\">\n",
 58 |        "  <thead>\n",
 59 |        "    <tr style=\"text-align: right;\">\n",
 60 |        "      <th></th>\n",
 61 |        "      <th>ID</th>\n",
 62 |        "      <th>Name</th>\n",
 63 |        "      <th>Field</th>\n",
 64 |        "    </tr>\n",
 65 |        "  </thead>\n",
 66 |        "  <tbody>\n",
 67 |        "    <tr>\n",
 68 |        "      <th>0</th>\n",
 69 |        "      <td>1</td>\n",
 70 |        "      <td>Adam Jones</td>\n",
 71 |        "      <td>Electrical</td>\n",
 72 |        "    </tr>\n",
 73 |        "    <tr>\n",
 74 |        "      <th>1</th>\n",
 75 |        "      <td>2</td>\n",
 76 |        "      <td>Edward Elrich</td>\n",
 77 |        "      <td>Mechanical</td>\n",
 78 |        "    </tr>\n",
 79 |        "    <tr>\n",
 80 |        "      <th>2</th>\n",
 81 |        "      <td>3</td>\n",
 82 |        "      <td>Stain Steve</td>\n",
 83 |        "      <td>Computer Science</td>\n",
 84 |        "    </tr>\n",
 85 |        "    <tr>\n",
 86 |        "      <th>3</th>\n",
 87 |        "      <td>4</td>\n",
 88 |        "      <td>Ken Adams</td>\n",
 89 |        "      <td>Media Science</td>\n",
 90 |        "    </tr>\n",
 91 |        "    <tr>\n",
 92 |        "      <th>4</th>\n",
 93 |        "      <td>5</td>\n",
 94 |        "      <td>Ross Taylor</td>\n",
 95 |        "      <td>Sportsman</td>\n",
 96 |        "    </tr>\n",
 97 |        "  </tbody>\n",
 98 |        "</table>\n",
 99 |        "</div>"
100 |       ],
101 |       "text/plain": [
102 |        "   ID           Name             Field\n",
103 |        "0   1     Adam Jones        Electrical\n",
104 |        "1   2  Edward Elrich        Mechanical\n",
105 |        "2   3    Stain Steve  Computer Science\n",
106 |        "3   4      Ken Adams     Media Science\n",
107 |        "4   5    Ross Taylor         Sportsman"
108 |       ]
109 |      },
110 |      "execution_count": 6,
111 |      "metadata": {},
112 |      "output_type": "execute_result"
113 |     }
114 |    ],
115 |    "source": [
116 |     "data"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": 7,
122 |    "metadata": {},
123 |    "outputs": [],
124 |    "source": [
125 |     "#HDF5 storage works that of a dictionary, we pass keys and store whole dataframe or big data into it\n",
126 |     "hdf_file['obj1'] = data"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": 8,
132 |    "metadata": {},
133 |    "outputs": [],
134 |    "source": [
135 |     "hdf_file['obj2'] = data['Name'] #Storing the names as second object"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": 10,
141 |    "metadata": {},
142 |    "outputs": [
143 |     {
144 |      "data": {
145 |       "text/plain": [
146 |        "<class 'pandas.io.pytables.HDFStore'>\n",
147 |        "File path: data.h5"
148 |       ]
149 |      },
150 |      "execution_count": 10,
151 |      "metadata": {},
152 |      "output_type": "execute_result"
153 |     }
154 |    ],
155 |    "source": [
156 |     "hdf_file"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "code",
161 |    "execution_count": 11,
162 |    "metadata": {},
163 |    "outputs": [
164 |     {
165 |      "data": {
166 |       "text/html": [
167 |        "<div>\n",
168 |        "<style scoped>\n",
169 |        "    .dataframe tbody tr th:only-of-type {\n",
170 |        "        vertical-align: middle;\n",
171 |        "    }\n",
172 |        "\n",
173 |        "    .dataframe tbody tr th {\n",
174 |        "        vertical-align: top;\n",
175 |        "    }\n",
176 |        "\n",
177 |        "    .dataframe thead th {\n",
178 |        "        text-align: right;\n",
179 |        "    }\n",
180 |        "</style>\n",
181 |        "<table border=\"1\" class=\"dataframe\">\n",
182 |        "  <thead>\n",
183 |        "    <tr style=\"text-align: right;\">\n",
184 |        "      <th></th>\n",
185 |        "      <th>ID</th>\n",
186 |        "      <th>Name</th>\n",
187 |        "      <th>Field</th>\n",
188 |        "    </tr>\n",
189 |        "  </thead>\n",
190 |        "  <tbody>\n",
191 |        "    <tr>\n",
192 |        "      <th>0</th>\n",
193 |        "      <td>1</td>\n",
194 |        "      <td>Adam Jones</td>\n",
195 |        "      <td>Electrical</td>\n",
196 |        "    </tr>\n",
197 |        "    <tr>\n",
198 |        "      <th>1</th>\n",
199 |        "      <td>2</td>\n",
200 |        "      <td>Edward Elrich</td>\n",
201 |        "      <td>Mechanical</td>\n",
202 |        "    </tr>\n",
203 |        "    <tr>\n",
204 |        "      <th>2</th>\n",
205 |        "      <td>3</td>\n",
206 |        "      <td>Stain Steve</td>\n",
207 |        "      <td>Computer Science</td>\n",
208 |        "    </tr>\n",
209 |        "    <tr>\n",
210 |        "      <th>3</th>\n",
211 |        "      <td>4</td>\n",
212 |        "      <td>Ken Adams</td>\n",
213 |        "      <td>Media Science</td>\n",
214 |        "    </tr>\n",
215 |        "    <tr>\n",
216 |        "      <th>4</th>\n",
217 |        "      <td>5</td>\n",
218 |        "      <td>Ross Taylor</td>\n",
219 |        "      <td>Sportsman</td>\n",
220 |        "    </tr>\n",
221 |        "  </tbody>\n",
222 |        "</table>\n",
223 |        "</div>"
224 |       ],
225 |       "text/plain": [
226 |        "   ID           Name             Field\n",
227 |        "0   1     Adam Jones        Electrical\n",
228 |        "1   2  Edward Elrich        Mechanical\n",
229 |        "2   3    Stain Steve  Computer Science\n",
230 |        "3   4      Ken Adams     Media Science\n",
231 |        "4   5    Ross Taylor         Sportsman"
232 |       ]
233 |      },
234 |      "execution_count": 11,
235 |      "metadata": {},
236 |      "output_type": "execute_result"
237 |     }
238 |    ],
239 |    "source": [
240 |     "#To retieve the data simply pass the key like we do in case of a dictionary\n",
241 |     "hdf_file['obj1']"
242 |    ]
243 |   },
244 |   {
245 |    "cell_type": "code",
246 |    "execution_count": 12,
247 |    "metadata": {},
248 |    "outputs": [
249 |     {
250 |      "data": {
251 |       "text/plain": [
252 |        "0       Adam Jones\n",
253 |        "1    Edward Elrich\n",
254 |        "2      Stain Steve\n",
255 |        "3        Ken Adams\n",
256 |        "4      Ross Taylor\n",
257 |        "Name: Name, dtype: object"
258 |       ]
259 |      },
260 |      "execution_count": 12,
261 |      "metadata": {},
262 |      "output_type": "execute_result"
263 |     }
264 |    ],
265 |    "source": [
266 |     "hdf_file['obj2']"
267 |    ]
268 |   }
269 |  ],
270 |  "metadata": {
271 |   "kernelspec": {
272 |    "display_name": "Python 3",
273 |    "language": "python",
274 |    "name": "python3"
275 |   },
276 |   "language_info": {
277 |    "codemirror_mode": {
278 |     "name": "ipython",
279 |     "version": 3
280 |    },
281 |    "file_extension": ".py",
282 |    "mimetype": "text/x-python",
283 |    "name": "python",
284 |    "nbconvert_exporter": "python",
285 |    "pygments_lexer": "ipython3",
286 |    "version": "3.8.2"
287 |   }
288 |  },
289 |  "nbformat": 4,
290 |  "nbformat_minor": 4
291 | }
292 | 


--------------------------------------------------------------------------------
/Numpy Random Number Generation.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "#Numpy can be used to generate random values for a ceratin array elements\n",
 10 |     "#This generates sample values for arrays from many kinds of probability distributions\n",
 11 |     "\n",
 12 |     "import numpy as np\n",
 13 |     "import numpy.random\n"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 2,
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "#Seeds random number generator(gives initial value to random generator to create all random values): seed\n",
 23 |     "np.random.seed(1)"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 13,
 29 |    "metadata": {},
 30 |    "outputs": [
 31 |     {
 32 |      "data": {
 33 |       "text/plain": [
 34 |        "array([0.51815255, 0.86502025, 0.82914691, 0.82960336, 0.27304997,\n",
 35 |        "       0.0592432 , 0.67052804, 0.59306552, 0.6716541 , 0.41178788])"
 36 |       ]
 37 |      },
 38 |      "execution_count": 13,
 39 |      "metadata": {},
 40 |      "output_type": "execute_result"
 41 |     }
 42 |    ],
 43 |    "source": [
 44 |     "#Random sample from Uniform Distribution: rand\n",
 45 |     "X = np.random.rand(10)\n",
 46 |     "X"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 16,
 52 |    "metadata": {},
 53 |    "outputs": [
 54 |     {
 55 |      "data": {
 56 |       "text/plain": [
 57 |        "8"
 58 |       ]
 59 |      },
 60 |      "execution_count": 16,
 61 |      "metadata": {},
 62 |      "output_type": "execute_result"
 63 |     }
 64 |    ],
 65 |    "source": [
 66 |     "#Random integer sample from given low to high range: randint\n",
 67 |     "X2 = np.random.randint(1,11)\n",
 68 |     "X2"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": 31,
 74 |    "metadata": {},
 75 |    "outputs": [
 76 |     {
 77 |      "data": {
 78 |       "text/plain": [
 79 |        "array([-0.93786433, -1.3856563 , -0.33888054, -0.17925544, -0.24094727,\n",
 80 |        "       -0.24769856, -0.16851994,  0.57370922, -0.95677318, -0.65152255])"
 81 |       ]
 82 |      },
 83 |      "execution_count": 31,
 84 |      "metadata": {},
 85 |      "output_type": "execute_result"
 86 |     }
 87 |    ],
 88 |    "source": [
 89 |     "#Random sample from Normal Distribution with mean = 0 and Std = 1: randn\n",
 90 |     "X3 = np.random.randn(10)\n",
 91 |     "X3"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": 32,
 97 |    "metadata": {},
 98 |    "outputs": [
 99 |     {
100 |      "data": {
101 |       "text/plain": [
102 |        "array([3, 2, 2, 1, 0, 2, 2, 1, 1, 0, 0, 1, 0, 2, 1, 0, 2, 0, 0, 0, 0, 1,\n",
103 |        "       0, 0, 3, 1, 2, 1, 0, 1, 1, 2, 0, 1, 1, 1, 2, 1, 2, 0, 0, 2, 0, 0,\n",
104 |        "       0, 1, 2, 1, 1, 0, 1, 0, 1, 0, 1, 3, 0, 1, 0, 1, 2, 1, 0, 1, 2, 0,\n",
105 |        "       0, 3, 1, 0, 1, 0, 3, 0, 3, 0, 0, 2, 0, 2, 1, 0, 1, 0, 0, 0, 1, 1,\n",
106 |        "       2, 2, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0])"
107 |       ]
108 |      },
109 |      "execution_count": 32,
110 |      "metadata": {},
111 |      "output_type": "execute_result"
112 |     }
113 |    ],
114 |    "source": [
115 |     "#Random sample from Binomial Distribution: binomial\n",
116 |     "X4 = np.random.binomial(10,0.10,100)\n",
117 |     "X4"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": 33,
123 |    "metadata": {},
124 |    "outputs": [
125 |     {
126 |      "data": {
127 |       "text/plain": [
128 |        "10.176001143769438"
129 |       ]
130 |      },
131 |      "execution_count": 33,
132 |      "metadata": {},
133 |      "output_type": "execute_result"
134 |     }
135 |    ],
136 |    "source": [
137 |     "#Random sample from Gaussian Distribution: normal\n",
138 |     "X5 = np.random.normal(10)\n",
139 |     "X5"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": 36,
145 |    "metadata": {},
146 |    "outputs": [
147 |     {
148 |      "data": {
149 |       "text/plain": [
150 |        "0.35084536799172106"
151 |       ]
152 |      },
153 |      "execution_count": 36,
154 |      "metadata": {},
155 |      "output_type": "execute_result"
156 |     }
157 |    ],
158 |    "source": [
159 |     "#Random sample from Beta Distribution: beta\n",
160 |     "X6 = np.random.beta(10,10)\n",
161 |     "X6"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": 37,
167 |    "metadata": {},
168 |    "outputs": [
169 |     {
170 |      "data": {
171 |       "text/plain": [
172 |        "7.616863289989834"
173 |       ]
174 |      },
175 |      "execution_count": 37,
176 |      "metadata": {},
177 |      "output_type": "execute_result"
178 |     }
179 |    ],
180 |    "source": [
181 |     "#Random sample from Chi-square Distribution: chisquare\n",
182 |     "X7 = np.random.chisquare(10)\n",
183 |     "X7"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "code",
188 |    "execution_count": 39,
189 |    "metadata": {},
190 |    "outputs": [
191 |     {
192 |      "data": {
193 |       "text/plain": [
194 |        "6.390743513572102"
195 |       ]
196 |      },
197 |      "execution_count": 39,
198 |      "metadata": {},
199 |      "output_type": "execute_result"
200 |     }
201 |    ],
202 |    "source": [
203 |     "#Random sample from Gamma Distribution: gamma\n",
204 |     "X8 = np.random.gamma(10)\n",
205 |     "X8"
206 |    ]
207 |   },
208 |   {
209 |    "cell_type": "code",
210 |    "execution_count": 40,
211 |    "metadata": {},
212 |    "outputs": [
213 |     {
214 |      "data": {
215 |       "text/plain": [
216 |        "9.910002476795643"
217 |       ]
218 |      },
219 |      "execution_count": 40,
220 |      "metadata": {},
221 |      "output_type": "execute_result"
222 |     }
223 |    ],
224 |    "source": [
225 |     "#Random sample from uniform [0,1) Distribution: uniform\n",
226 |     "X9 = np.random.uniform(10)\n",
227 |     "X9"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "code",
232 |    "execution_count": 45,
233 |    "metadata": {},
234 |    "outputs": [
235 |     {
236 |      "data": {
237 |       "text/plain": [
238 |        "array([[ 0.3743189 , -1.84955116, -0.27810508,  0.31784693],\n",
239 |        "       [-0.35876103, -0.04193944,  1.25794105,  0.62834995],\n",
240 |        "       [-1.00292632,  0.78797796, -1.05711954,  0.54215727],\n",
241 |        "       [ 0.91940949,  0.05073418,  1.20135336, -0.08601853]])"
242 |       ]
243 |      },
244 |      "execution_count": 45,
245 |      "metadata": {},
246 |      "output_type": "execute_result"
247 |     }
248 |    ],
249 |    "source": [
250 |     "#In most functions stated above, we can use the size argument to tell the size of sample we want to generate\n",
251 |     "X10 = np.random.normal(size = (4,4))\n",
252 |     "X10"
253 |    ]
254 |   },
255 |   {
256 |    "cell_type": "code",
257 |    "execution_count": 52,
258 |    "metadata": {},
259 |    "outputs": [
260 |     {
261 |      "data": {
262 |       "text/plain": [
263 |        "array([[1.96225376, 0.77559961],\n",
264 |        "       [0.09694872, 2.57011088],\n",
265 |        "       [2.84800908, 1.04277678],\n",
266 |        "       [2.43705663, 1.76068542],\n",
267 |        "       [6.98429186, 0.72446192]])"
268 |       ]
269 |      },
270 |      "execution_count": 52,
271 |      "metadata": {},
272 |      "output_type": "execute_result"
273 |     }
274 |    ],
275 |    "source": [
276 |     "X11 = np.random.gamma(shape = 1 , scale = 2, size = (5,2))\n",
277 |     "X11"
278 |    ]
279 |   },
280 |   {
281 |    "cell_type": "code",
282 |    "execution_count": 60,
283 |    "metadata": {},
284 |    "outputs": [
285 |     {
286 |      "data": {
287 |       "text/plain": [
288 |        "array([[2.84800908, 1.04277678],\n",
289 |        "       [0.09694872, 2.57011088],\n",
290 |        "       [1.96225376, 0.77559961],\n",
291 |        "       [2.43705663, 1.76068542],\n",
292 |        "       [6.98429186, 0.72446192]])"
293 |       ]
294 |      },
295 |      "execution_count": 60,
296 |      "metadata": {},
297 |      "output_type": "execute_result"
298 |     }
299 |    ],
300 |    "source": [
301 |     "#Random Permutation function: permutation\n",
302 |     "X12 = np.random.permutation(X11)\n",
303 |     "X12"
304 |    ]
305 |   },
306 |   {
307 |    "cell_type": "code",
308 |    "execution_count": 61,
309 |    "metadata": {},
310 |    "outputs": [
311 |     {
312 |      "data": {
313 |       "text/plain": [
314 |        "array([[2.43705663, 1.76068542],\n",
315 |        "       [1.96225376, 0.77559961],\n",
316 |        "       [2.84800908, 1.04277678],\n",
317 |        "       [6.98429186, 0.72446192],\n",
318 |        "       [0.09694872, 2.57011088]])"
319 |       ]
320 |      },
321 |      "execution_count": 61,
322 |      "metadata": {},
323 |      "output_type": "execute_result"
324 |     }
325 |    ],
326 |    "source": [
327 |     "#Random Shuffling function: shuffle\n",
328 |     "np.random.shuffle(X11)\n",
329 |     "X11"
330 |    ]
331 |   }
332 |  ],
333 |  "metadata": {
334 |   "kernelspec": {
335 |    "display_name": "Python 3",
336 |    "language": "python",
337 |    "name": "python3"
338 |   },
339 |   "language_info": {
340 |    "codemirror_mode": {
341 |     "name": "ipython",
342 |     "version": 3
343 |    },
344 |    "file_extension": ".py",
345 |    "mimetype": "text/x-python",
346 |    "name": "python",
347 |    "nbconvert_exporter": "python",
348 |    "pygments_lexer": "ipython3",
349 |    "version": "3.7.4"
350 |   }
351 |  },
352 |  "nbformat": 4,
353 |  "nbformat_minor": 2
354 | }
355 | 


--------------------------------------------------------------------------------
/Basic Array Statistical Methods.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "data": {
 10 |       "text/plain": [
 11 |        "array([[-0.97872554, -0.46356244,  1.3922046 , -0.96398794],\n",
 12 |        "       [-0.50689651,  0.26098338, -0.65482834,  1.0093453 ],\n",
 13 |        "       [-0.44440787, -2.3193851 ,  0.7040289 ,  0.08837219],\n",
 14 |        "       [-1.22212494,  0.65296398, -0.03782775,  0.75681772],\n",
 15 |        "       [-1.31233681,  1.2051613 ,  0.01207815,  0.56413249]])"
 16 |       ]
 17 |      },
 18 |      "execution_count": 2,
 19 |      "metadata": {},
 20 |      "output_type": "execute_result"
 21 |     }
 22 |    ],
 23 |    "source": [
 24 |     "#We can use statstical methods in numpy on arrays for data processing purposes\n",
 25 |     "\n",
 26 |     "#Some of these are mentioned as under:\n",
 27 |     "\n",
 28 |     "import numpy as np\n",
 29 |     "from numpy.random import randn\n",
 30 |     "\n",
 31 |     "data = randn(5,4)\n",
 32 |     "data"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 3,
 38 |    "metadata": {},
 39 |    "outputs": [
 40 |     {
 41 |      "data": {
 42 |       "text/plain": [
 43 |        "-0.11289976128875331"
 44 |       ]
 45 |      },
 46 |      "execution_count": 3,
 47 |      "metadata": {},
 48 |      "output_type": "execute_result"
 49 |     }
 50 |    ],
 51 |    "source": [
 52 |     "#Mean value function: mean\n",
 53 |     "np.mean(data)"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": 4,
 59 |    "metadata": {},
 60 |    "outputs": [
 61 |     {
 62 |      "data": {
 63 |       "text/plain": [
 64 |        "array([-0.25351783,  0.02715096, -0.49284797,  0.03745725,  0.11725878])"
 65 |       ]
 66 |      },
 67 |      "execution_count": 4,
 68 |      "metadata": {},
 69 |      "output_type": "execute_result"
 70 |     }
 71 |    ],
 72 |    "source": [
 73 |     "#Additional axis argument can be given to mean, which finds means along that particular axis\n",
 74 |     "np.mean(data, axis = 1)"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 5,
 80 |    "metadata": {},
 81 |    "outputs": [
 82 |     {
 83 |      "data": {
 84 |       "text/plain": [
 85 |        "-2.2579952257750664"
 86 |       ]
 87 |      },
 88 |      "execution_count": 5,
 89 |      "metadata": {},
 90 |      "output_type": "execute_result"
 91 |     }
 92 |    ],
 93 |    "source": [
 94 |     "#Summation value function: sum\n",
 95 |     "np.sum(data)"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 6,
101 |    "metadata": {},
102 |    "outputs": [
103 |     {
104 |      "data": {
105 |       "text/plain": [
106 |        "array([-4.46449165, -0.66383889,  1.41565555,  1.45467976])"
107 |       ]
108 |      },
109 |      "execution_count": 6,
110 |      "metadata": {},
111 |      "output_type": "execute_result"
112 |     }
113 |    ],
114 |    "source": [
115 |     "#Additional axis argument can be given to sum, which finds sum along that particular axis\n",
116 |     "np.sum(data, 0)"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": 7,
122 |    "metadata": {},
123 |    "outputs": [
124 |     {
125 |      "data": {
126 |       "text/plain": [
127 |        "0.9387680172117109"
128 |       ]
129 |      },
130 |      "execution_count": 7,
131 |      "metadata": {},
132 |      "output_type": "execute_result"
133 |     }
134 |    ],
135 |    "source": [
136 |     "#Standard Deviation function: std\n",
137 |     "np.std(data)"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": 13,
143 |    "metadata": {},
144 |    "outputs": [
145 |     {
146 |      "data": {
147 |       "text/plain": [
148 |        "array([0.97252443, 0.66514769, 1.1301455 , 0.78875585, 0.92709688])"
149 |       ]
150 |      },
151 |      "execution_count": 13,
152 |      "metadata": {},
153 |      "output_type": "execute_result"
154 |     }
155 |    ],
156 |    "source": [
157 |     "#Additional argument allows to compute standard deviation along a certain dimension\n",
158 |     "np.std(data, 1)"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": 14,
164 |    "metadata": {},
165 |    "outputs": [
166 |     {
167 |      "data": {
168 |       "text/plain": [
169 |        "0.8812853901396072"
170 |       ]
171 |      },
172 |      "execution_count": 14,
173 |      "metadata": {},
174 |      "output_type": "execute_result"
175 |     }
176 |    ],
177 |    "source": [
178 |     "#Variance function: var\n",
179 |     "np.var(data)"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": 15,
185 |    "metadata": {},
186 |    "outputs": [
187 |     {
188 |      "data": {
189 |       "text/plain": [
190 |        "array([0.12836524, 1.4906378 , 0.49269024, 0.48473204])"
191 |       ]
192 |      },
193 |      "execution_count": 15,
194 |      "metadata": {},
195 |      "output_type": "execute_result"
196 |     }
197 |    ],
198 |    "source": [
199 |     "#Additional argument allows to compute variance along a particular axis\n",
200 |     "np.var(data, 0)"
201 |    ]
202 |   },
203 |   {
204 |    "cell_type": "code",
205 |    "execution_count": 16,
206 |    "metadata": {},
207 |    "outputs": [
208 |     {
209 |      "data": {
210 |       "text/plain": [
211 |        "1.3922046018160443"
212 |       ]
213 |      },
214 |      "execution_count": 16,
215 |      "metadata": {},
216 |      "output_type": "execute_result"
217 |     }
218 |    ],
219 |    "source": [
220 |     "#Maximum value function: max\n",
221 |     "np.max(data)"
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "code",
226 |    "execution_count": 17,
227 |    "metadata": {},
228 |    "outputs": [
229 |     {
230 |      "data": {
231 |       "text/plain": [
232 |        "-2.3193851000495096"
233 |       ]
234 |      },
235 |      "execution_count": 17,
236 |      "metadata": {},
237 |      "output_type": "execute_result"
238 |     }
239 |    ],
240 |    "source": [
241 |     "#Minimum value function: min\n",
242 |     "np.min(data)"
243 |    ]
244 |   },
245 |   {
246 |    "cell_type": "code",
247 |    "execution_count": 18,
248 |    "metadata": {},
249 |    "outputs": [
250 |     {
251 |      "data": {
252 |       "text/plain": [
253 |        "2"
254 |       ]
255 |      },
256 |      "execution_count": 18,
257 |      "metadata": {},
258 |      "output_type": "execute_result"
259 |     }
260 |    ],
261 |    "source": [
262 |     "#Maximum Index Value Function: argmax\n",
263 |     "np.argmax(data)"
264 |    ]
265 |   },
266 |   {
267 |    "cell_type": "code",
268 |    "execution_count": 19,
269 |    "metadata": {},
270 |    "outputs": [
271 |     {
272 |      "data": {
273 |       "text/plain": [
274 |        "9"
275 |       ]
276 |      },
277 |      "execution_count": 19,
278 |      "metadata": {},
279 |      "output_type": "execute_result"
280 |     }
281 |    ],
282 |    "source": [
283 |     "#Minimum Index Value Function: argmin\n",
284 |     "np.argmin(data)"
285 |    ]
286 |   },
287 |   {
288 |    "cell_type": "code",
289 |    "execution_count": 39,
290 |    "metadata": {},
291 |    "outputs": [
292 |     {
293 |      "data": {
294 |       "text/plain": [
295 |        "array([[-0.08561185, -0.09479014,  1.26101276],\n",
296 |        "       [ 0.09860458, -0.87188753, -0.7315243 ],\n",
297 |        "       [ 0.24212062,  1.11690374,  1.12849184],\n",
298 |        "       [-0.06623721,  1.71512039, -0.6974301 ]])"
299 |       ]
300 |      },
301 |      "execution_count": 39,
302 |      "metadata": {},
303 |      "output_type": "execute_result"
304 |     }
305 |    ],
306 |    "source": [
307 |     "#Cumulative Sum function: cumsum (Starting from 0)\n",
308 |     "data2 = randn(4,3)\n",
309 |     "data2"
310 |    ]
311 |   },
312 |   {
313 |    "cell_type": "code",
314 |    "execution_count": 40,
315 |    "metadata": {},
316 |    "outputs": [
317 |     {
318 |      "data": {
319 |       "text/plain": [
320 |        "array([-0.08561185, -0.18040199,  1.08061077,  1.17921535,  0.30732782,\n",
321 |        "       -0.42419649, -0.18207587,  0.93482787,  2.06331971,  1.9970825 ,\n",
322 |        "        3.71220289,  3.01477279])"
323 |       ]
324 |      },
325 |      "execution_count": 40,
326 |      "metadata": {},
327 |      "output_type": "execute_result"
328 |     }
329 |    ],
330 |    "source": [
331 |     "np.cumsum(data2)"
332 |    ]
333 |   },
334 |   {
335 |    "cell_type": "code",
336 |    "execution_count": 41,
337 |    "metadata": {},
338 |    "outputs": [
339 |     {
340 |      "data": {
341 |       "text/plain": [
342 |        "array([-8.56118494e-02,  8.11515930e-03,  1.02333194e-02,  1.00905218e-03,\n",
343 |        "       -8.79780019e-04,  6.43580464e-04,  1.55824101e-04,  1.74040521e-04,\n",
344 |        "        1.96403307e-04, -1.30092069e-05, -2.23123560e-05,  1.55613088e-05])"
345 |       ]
346 |      },
347 |      "execution_count": 41,
348 |      "metadata": {},
349 |      "output_type": "execute_result"
350 |     }
351 |    ],
352 |    "source": [
353 |     "#Cumulative product: cumprod (Starting from 1)\n",
354 |     "np.cumprod(data2)"
355 |    ]
356 |   }
357 |  ],
358 |  "metadata": {
359 |   "kernelspec": {
360 |    "display_name": "Python 3",
361 |    "language": "python",
362 |    "name": "python3"
363 |   },
364 |   "language_info": {
365 |    "codemirror_mode": {
366 |     "name": "ipython",
367 |     "version": 3
368 |    },
369 |    "file_extension": ".py",
370 |    "mimetype": "text/x-python",
371 |    "name": "python",
372 |    "nbconvert_exporter": "python",
373 |    "pygments_lexer": "ipython3",
374 |    "version": "3.7.4"
375 |   }
376 |  },
377 |  "nbformat": 4,
378 |  "nbformat_minor": 2
379 | }
380 | 


--------------------------------------------------------------------------------
/Dropping Entries from Axis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 5,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "data": {
 10 |       "text/plain": [
 11 |        "a    0.0\n",
 12 |        "b    1.0\n",
 13 |        "c    2.0\n",
 14 |        "d    3.0\n",
 15 |        "e    4.0\n",
 16 |        "dtype: float64"
 17 |       ]
 18 |      },
 19 |      "execution_count": 5,
 20 |      "metadata": {},
 21 |      "output_type": "execute_result"
 22 |     }
 23 |    ],
 24 |    "source": [
 25 |     "#Now we view another functionalities i.e. dropping an entry into an array which does not have those entries through an axis\n",
 26 |     "\n",
 27 |     "import pandas as pd\n",
 28 |     "import numpy as np\n",
 29 |     "from pandas import Series, DataFrame\n",
 30 |     "\n",
 31 |     "series_obj = Series(np.arange(5.), index = ['a','b','c','d','e'])\n",
 32 |     "series_obj"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 7,
 38 |    "metadata": {},
 39 |    "outputs": [
 40 |     {
 41 |      "data": {
 42 |       "text/plain": [
 43 |        "a    0.0\n",
 44 |        "b    1.0\n",
 45 |        "d    3.0\n",
 46 |        "e    4.0\n",
 47 |        "dtype: float64"
 48 |       ]
 49 |      },
 50 |      "execution_count": 7,
 51 |      "metadata": {},
 52 |      "output_type": "execute_result"
 53 |     }
 54 |    ],
 55 |    "source": [
 56 |     "#Values can be dropped using the drop method\n",
 57 |     "series_obj.drop('c')"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": 8,
 63 |    "metadata": {},
 64 |    "outputs": [
 65 |     {
 66 |      "data": {
 67 |       "text/plain": [
 68 |        "b    1.0\n",
 69 |        "e    4.0\n",
 70 |        "dtype: float64"
 71 |       ]
 72 |      },
 73 |      "execution_count": 8,
 74 |      "metadata": {},
 75 |      "output_type": "execute_result"
 76 |     }
 77 |    ],
 78 |    "source": [
 79 |     "#The values to be dropped can also be a list of values\n",
 80 |     "series_obj.drop(['a','c','d'])"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": 13,
 86 |    "metadata": {},
 87 |    "outputs": [
 88 |     {
 89 |      "data": {
 90 |       "text/html": [
 91 |        "<div>\n",
 92 |        "<style scoped>\n",
 93 |        "    .dataframe tbody tr th:only-of-type {\n",
 94 |        "        vertical-align: middle;\n",
 95 |        "    }\n",
 96 |        "\n",
 97 |        "    .dataframe tbody tr th {\n",
 98 |        "        vertical-align: top;\n",
 99 |        "    }\n",
100 |        "\n",
101 |        "    .dataframe thead th {\n",
102 |        "        text-align: right;\n",
103 |        "    }\n",
104 |        "</style>\n",
105 |        "<table border=\"1\" class=\"dataframe\">\n",
106 |        "  <thead>\n",
107 |        "    <tr style=\"text-align: right;\">\n",
108 |        "      <th></th>\n",
109 |        "      <th>July</th>\n",
110 |        "      <th>December</th>\n",
111 |        "    </tr>\n",
112 |        "  </thead>\n",
113 |        "  <tbody>\n",
114 |        "    <tr>\n",
115 |        "      <td>1998</td>\n",
116 |        "      <td>NaN</td>\n",
117 |        "      <td>19.0</td>\n",
118 |        "    </tr>\n",
119 |        "    <tr>\n",
120 |        "      <td>2001</td>\n",
121 |        "      <td>12.0</td>\n",
122 |        "      <td>NaN</td>\n",
123 |        "    </tr>\n",
124 |        "    <tr>\n",
125 |        "      <td>2003</td>\n",
126 |        "      <td>3.0</td>\n",
127 |        "      <td>21.0</td>\n",
128 |        "    </tr>\n",
129 |        "  </tbody>\n",
130 |        "</table>\n",
131 |        "</div>"
132 |       ],
133 |       "text/plain": [
134 |        "      July  December\n",
135 |        "1998   NaN      19.0\n",
136 |        "2001  12.0       NaN\n",
137 |        "2003   3.0      21.0"
138 |       ]
139 |      },
140 |      "execution_count": 13,
141 |      "metadata": {},
142 |      "output_type": "execute_result"
143 |     }
144 |    ],
145 |    "source": [
146 |     "#In case of DataFrame, values canbe dropped from any axis given the axis number\n",
147 |     "frame_data = {'July': Series([12,3], index = [2001,2003]), 'December': Series([19,21], index = [1998,2003])}\n",
148 |     "frame_obj = DataFrame(frame_data)\n",
149 |     "frame_obj"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": 16,
155 |    "metadata": {},
156 |    "outputs": [
157 |     {
158 |      "data": {
159 |       "text/html": [
160 |        "<div>\n",
161 |        "<style scoped>\n",
162 |        "    .dataframe tbody tr th:only-of-type {\n",
163 |        "        vertical-align: middle;\n",
164 |        "    }\n",
165 |        "\n",
166 |        "    .dataframe tbody tr th {\n",
167 |        "        vertical-align: top;\n",
168 |        "    }\n",
169 |        "\n",
170 |        "    .dataframe thead th {\n",
171 |        "        text-align: right;\n",
172 |        "    }\n",
173 |        "</style>\n",
174 |        "<table border=\"1\" class=\"dataframe\">\n",
175 |        "  <thead>\n",
176 |        "    <tr style=\"text-align: right;\">\n",
177 |        "      <th></th>\n",
178 |        "      <th>July</th>\n",
179 |        "      <th>December</th>\n",
180 |        "    </tr>\n",
181 |        "  </thead>\n",
182 |        "  <tbody>\n",
183 |        "    <tr>\n",
184 |        "      <td>1998</td>\n",
185 |        "      <td>NaN</td>\n",
186 |        "      <td>19.0</td>\n",
187 |        "    </tr>\n",
188 |        "    <tr>\n",
189 |        "      <td>2003</td>\n",
190 |        "      <td>3.0</td>\n",
191 |        "      <td>21.0</td>\n",
192 |        "    </tr>\n",
193 |        "  </tbody>\n",
194 |        "</table>\n",
195 |        "</div>"
196 |       ],
197 |       "text/plain": [
198 |        "      July  December\n",
199 |        "1998   NaN      19.0\n",
200 |        "2003   3.0      21.0"
201 |       ]
202 |      },
203 |      "execution_count": 16,
204 |      "metadata": {},
205 |      "output_type": "execute_result"
206 |     }
207 |    ],
208 |    "source": [
209 |     "frame_obj.drop([2001])"
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "code",
214 |    "execution_count": 17,
215 |    "metadata": {},
216 |    "outputs": [
217 |     {
218 |      "data": {
219 |       "text/html": [
220 |        "<div>\n",
221 |        "<style scoped>\n",
222 |        "    .dataframe tbody tr th:only-of-type {\n",
223 |        "        vertical-align: middle;\n",
224 |        "    }\n",
225 |        "\n",
226 |        "    .dataframe tbody tr th {\n",
227 |        "        vertical-align: top;\n",
228 |        "    }\n",
229 |        "\n",
230 |        "    .dataframe thead th {\n",
231 |        "        text-align: right;\n",
232 |        "    }\n",
233 |        "</style>\n",
234 |        "<table border=\"1\" class=\"dataframe\">\n",
235 |        "  <thead>\n",
236 |        "    <tr style=\"text-align: right;\">\n",
237 |        "      <th></th>\n",
238 |        "      <th>July</th>\n",
239 |        "      <th>December</th>\n",
240 |        "    </tr>\n",
241 |        "  </thead>\n",
242 |        "  <tbody>\n",
243 |        "    <tr>\n",
244 |        "      <td>2001</td>\n",
245 |        "      <td>12.0</td>\n",
246 |        "      <td>NaN</td>\n",
247 |        "    </tr>\n",
248 |        "  </tbody>\n",
249 |        "</table>\n",
250 |        "</div>"
251 |       ],
252 |       "text/plain": [
253 |        "      July  December\n",
254 |        "2001  12.0       NaN"
255 |       ]
256 |      },
257 |      "execution_count": 17,
258 |      "metadata": {},
259 |      "output_type": "execute_result"
260 |     }
261 |    ],
262 |    "source": [
263 |     "frame_obj.drop([1998,2003])"
264 |    ]
265 |   },
266 |   {
267 |    "cell_type": "code",
268 |    "execution_count": 18,
269 |    "metadata": {},
270 |    "outputs": [
271 |     {
272 |      "data": {
273 |       "text/html": [
274 |        "<div>\n",
275 |        "<style scoped>\n",
276 |        "    .dataframe tbody tr th:only-of-type {\n",
277 |        "        vertical-align: middle;\n",
278 |        "    }\n",
279 |        "\n",
280 |        "    .dataframe tbody tr th {\n",
281 |        "        vertical-align: top;\n",
282 |        "    }\n",
283 |        "\n",
284 |        "    .dataframe thead th {\n",
285 |        "        text-align: right;\n",
286 |        "    }\n",
287 |        "</style>\n",
288 |        "<table border=\"1\" class=\"dataframe\">\n",
289 |        "  <thead>\n",
290 |        "    <tr style=\"text-align: right;\">\n",
291 |        "      <th></th>\n",
292 |        "      <th>December</th>\n",
293 |        "    </tr>\n",
294 |        "  </thead>\n",
295 |        "  <tbody>\n",
296 |        "    <tr>\n",
297 |        "      <td>1998</td>\n",
298 |        "      <td>19.0</td>\n",
299 |        "    </tr>\n",
300 |        "    <tr>\n",
301 |        "      <td>2001</td>\n",
302 |        "      <td>NaN</td>\n",
303 |        "    </tr>\n",
304 |        "    <tr>\n",
305 |        "      <td>2003</td>\n",
306 |        "      <td>21.0</td>\n",
307 |        "    </tr>\n",
308 |        "  </tbody>\n",
309 |        "</table>\n",
310 |        "</div>"
311 |       ],
312 |       "text/plain": [
313 |        "      December\n",
314 |        "1998      19.0\n",
315 |        "2001       NaN\n",
316 |        "2003      21.0"
317 |       ]
318 |      },
319 |      "execution_count": 18,
320 |      "metadata": {},
321 |      "output_type": "execute_result"
322 |     }
323 |    ],
324 |    "source": [
325 |     "frame_obj.drop(['July'],axis=1)"
326 |    ]
327 |   }
328 |  ],
329 |  "metadata": {
330 |   "kernelspec": {
331 |    "display_name": "Python 3",
332 |    "language": "python",
333 |    "name": "python3"
334 |   },
335 |   "language_info": {
336 |    "codemirror_mode": {
337 |     "name": "ipython",
338 |     "version": 3
339 |    },
340 |    "file_extension": ".py",
341 |    "mimetype": "text/x-python",
342 |    "name": "python",
343 |    "nbconvert_exporter": "python",
344 |    "pygments_lexer": "ipython3",
345 |    "version": "3.7.4"
346 |   }
347 |  },
348 |  "nbformat": 4,
349 |  "nbformat_minor": 2
350 | }
351 | 


--------------------------------------------------------------------------------
/Index Object Methods.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 11,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "data": {
 10 |       "text/html": [
 11 |        "<div>\n",
 12 |        "<style scoped>\n",
 13 |        "    .dataframe tbody tr th:only-of-type {\n",
 14 |        "        vertical-align: middle;\n",
 15 |        "    }\n",
 16 |        "\n",
 17 |        "    .dataframe tbody tr th {\n",
 18 |        "        vertical-align: top;\n",
 19 |        "    }\n",
 20 |        "\n",
 21 |        "    .dataframe thead th {\n",
 22 |        "        text-align: right;\n",
 23 |        "    }\n",
 24 |        "</style>\n",
 25 |        "<table border=\"1\" class=\"dataframe\">\n",
 26 |        "  <thead>\n",
 27 |        "    <tr style=\"text-align: right;\">\n",
 28 |        "      <th></th>\n",
 29 |        "      <th>values</th>\n",
 30 |        "    </tr>\n",
 31 |        "  </thead>\n",
 32 |        "  <tbody>\n",
 33 |        "    <tr>\n",
 34 |        "      <td>0</td>\n",
 35 |        "      <td>0.472986</td>\n",
 36 |        "    </tr>\n",
 37 |        "    <tr>\n",
 38 |        "      <td>1</td>\n",
 39 |        "      <td>-0.681426</td>\n",
 40 |        "    </tr>\n",
 41 |        "    <tr>\n",
 42 |        "      <td>2</td>\n",
 43 |        "      <td>0.242439</td>\n",
 44 |        "    </tr>\n",
 45 |        "    <tr>\n",
 46 |        "      <td>3</td>\n",
 47 |        "      <td>-1.700736</td>\n",
 48 |        "    </tr>\n",
 49 |        "  </tbody>\n",
 50 |        "</table>\n",
 51 |        "</div>"
 52 |       ],
 53 |       "text/plain": [
 54 |        "     values\n",
 55 |        "0  0.472986\n",
 56 |        "1 -0.681426\n",
 57 |        "2  0.242439\n",
 58 |        "3 -1.700736"
 59 |       ]
 60 |      },
 61 |      "execution_count": 11,
 62 |      "metadata": {},
 63 |      "output_type": "execute_result"
 64 |     }
 65 |    ],
 66 |    "source": [
 67 |     "#There are a certain methods which can be applied on the index objects listed below:\n",
 68 |     "\n",
 69 |     "import numpy as np\n",
 70 |     "import pandas as pd\n",
 71 |     "from pandas import Series, DataFrame\n",
 72 |     "\n",
 73 |     "np.random.seed(12)\n",
 74 |     "frame = DataFrame(np.random.randn(4), columns = ['values'], index = pd.Index(np.arange(4)))\n",
 75 |     "frame"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": 13,
 81 |    "metadata": {},
 82 |    "outputs": [
 83 |     {
 84 |      "data": {
 85 |       "text/plain": [
 86 |        "Int64Index([0, 1, 2, 3], dtype='int64')"
 87 |       ]
 88 |      },
 89 |      "execution_count": 13,
 90 |      "metadata": {},
 91 |      "output_type": "execute_result"
 92 |     }
 93 |    ],
 94 |    "source": [
 95 |     "index_obj1 = frame.index\n",
 96 |     "index_obj1"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": 14,
102 |    "metadata": {},
103 |    "outputs": [
104 |     {
105 |      "data": {
106 |       "text/plain": [
107 |        "2    2\n",
108 |        "3    3\n",
109 |        "4    4\n",
110 |        "5    5\n",
111 |        "dtype: int64"
112 |       ]
113 |      },
114 |      "execution_count": 14,
115 |      "metadata": {},
116 |      "output_type": "execute_result"
117 |     }
118 |    ],
119 |    "source": [
120 |     "series = Series([2,3,4,5], index = np.array([2,3,4,5]))\n",
121 |     "series"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": 16,
127 |    "metadata": {},
128 |    "outputs": [
129 |     {
130 |      "data": {
131 |       "text/plain": [
132 |        "Int64Index([2, 3, 4, 5], dtype='int64')"
133 |       ]
134 |      },
135 |      "execution_count": 16,
136 |      "metadata": {},
137 |      "output_type": "execute_result"
138 |     }
139 |    ],
140 |    "source": [
141 |     "index_obj2 = series.index\n",
142 |     "index_obj2"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": 21,
148 |    "metadata": {},
149 |    "outputs": [
150 |     {
151 |      "data": {
152 |       "text/plain": [
153 |        "Int64Index([0, 1, 2, 3, 2, 3, 4, 5], dtype='int64')"
154 |       ]
155 |      },
156 |      "execution_count": 21,
157 |      "metadata": {},
158 |      "output_type": "execute_result"
159 |     }
160 |    ],
161 |    "source": [
162 |     "#Concatenate two indexes\n",
163 |     "pd.Index.append(index_obj1, index_obj2)"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "execution_count": 27,
169 |    "metadata": {},
170 |    "outputs": [
171 |     {
172 |      "data": {
173 |       "text/plain": [
174 |        "Int64Index([0, 1], dtype='int64')"
175 |       ]
176 |      },
177 |      "execution_count": 27,
178 |      "metadata": {},
179 |      "output_type": "execute_result"
180 |     }
181 |    ],
182 |    "source": [
183 |     "#Set Difference of two indexes\n",
184 |     "pd.Index.difference(index_obj1, index_obj2)"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": 28,
190 |    "metadata": {},
191 |    "outputs": [
192 |     {
193 |      "data": {
194 |       "text/plain": [
195 |        "Int64Index([2, 3], dtype='int64')"
196 |       ]
197 |      },
198 |      "execution_count": 28,
199 |      "metadata": {},
200 |      "output_type": "execute_result"
201 |     }
202 |    ],
203 |    "source": [
204 |     "#Intersection of two indexes\n",
205 |     "pd.Index.intersection(index_obj1, index_obj2)"
206 |    ]
207 |   },
208 |   {
209 |    "cell_type": "code",
210 |    "execution_count": 29,
211 |    "metadata": {},
212 |    "outputs": [
213 |     {
214 |      "data": {
215 |       "text/plain": [
216 |        "Int64Index([0, 1, 2, 3, 4, 5], dtype='int64')"
217 |       ]
218 |      },
219 |      "execution_count": 29,
220 |      "metadata": {},
221 |      "output_type": "execute_result"
222 |     }
223 |    ],
224 |    "source": [
225 |     "#Union of two indexes\n",
226 |     "pd.Index.union(index_obj1, index_obj2)"
227 |    ]
228 |   },
229 |   {
230 |    "cell_type": "code",
231 |    "execution_count": 30,
232 |    "metadata": {},
233 |    "outputs": [
234 |     {
235 |      "data": {
236 |       "text/plain": [
237 |        "array([False, False,  True,  True])"
238 |       ]
239 |      },
240 |      "execution_count": 30,
241 |      "metadata": {},
242 |      "output_type": "execute_result"
243 |     }
244 |    ],
245 |    "source": [
246 |     "#Comparision if both have same indexes\n",
247 |     "pd.Index.isin(index_obj1, index_obj2)"
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "code",
252 |    "execution_count": 36,
253 |    "metadata": {},
254 |    "outputs": [
255 |     {
256 |      "data": {
257 |       "text/plain": [
258 |        "Int64Index([0, 2, 3], dtype='int64')"
259 |       ]
260 |      },
261 |      "execution_count": 36,
262 |      "metadata": {},
263 |      "output_type": "execute_result"
264 |     }
265 |    ],
266 |    "source": [
267 |     "#Delete a certain index position\n",
268 |     "pd.Index.delete(index_obj1, 1)"
269 |    ]
270 |   },
271 |   {
272 |    "cell_type": "code",
273 |    "execution_count": 37,
274 |    "metadata": {},
275 |    "outputs": [
276 |     {
277 |      "data": {
278 |       "text/plain": [
279 |        "Int64Index([0, 3], dtype='int64')"
280 |       ]
281 |      },
282 |      "execution_count": 37,
283 |      "metadata": {},
284 |      "output_type": "execute_result"
285 |     }
286 |    ],
287 |    "source": [
288 |     "#Delete a certain index by its index value\n",
289 |     "pd.Index.drop(index_obj1, [1,2])"
290 |    ]
291 |   },
292 |   {
293 |    "cell_type": "code",
294 |    "execution_count": 39,
295 |    "metadata": {},
296 |    "outputs": [
297 |     {
298 |      "data": {
299 |       "text/plain": [
300 |        "Index([0, 1, 'New', 2, 3], dtype='object')"
301 |       ]
302 |      },
303 |      "execution_count": 39,
304 |      "metadata": {},
305 |      "output_type": "execute_result"
306 |     }
307 |    ],
308 |    "source": [
309 |     "#Insert a new index\n",
310 |     "pd.Index.insert(index_obj1, 2, 'New')"
311 |    ]
312 |   },
313 |   {
314 |    "cell_type": "code",
315 |    "execution_count": 46,
316 |    "metadata": {},
317 |    "outputs": [
318 |     {
319 |      "data": {
320 |       "text/plain": [
321 |        "True"
322 |       ]
323 |      },
324 |      "execution_count": 46,
325 |      "metadata": {},
326 |      "output_type": "execute_result"
327 |     }
328 |    ],
329 |    "source": [
330 |     "#Checks for increasing monotonic sequence\n",
331 |     "index_obj1.is_monotonic_increasing"
332 |    ]
333 |   },
334 |   {
335 |    "cell_type": "code",
336 |    "execution_count": 47,
337 |    "metadata": {},
338 |    "outputs": [
339 |     {
340 |      "data": {
341 |       "text/plain": [
342 |        "True"
343 |       ]
344 |      },
345 |      "execution_count": 47,
346 |      "metadata": {},
347 |      "output_type": "execute_result"
348 |     }
349 |    ],
350 |    "source": [
351 |     "#Checks if index has no duplicate values\n",
352 |     "index_obj1.is_unique"
353 |    ]
354 |   },
355 |   {
356 |    "cell_type": "code",
357 |    "execution_count": 49,
358 |    "metadata": {},
359 |    "outputs": [
360 |     {
361 |      "data": {
362 |       "text/plain": [
363 |        "Int64Index([0, 1, 2, 3, 4, 5], dtype='int64')"
364 |       ]
365 |      },
366 |      "execution_count": 49,
367 |      "metadata": {},
368 |      "output_type": "execute_result"
369 |     }
370 |    ],
371 |    "source": [
372 |     "#Computes array of unique values in index only\n",
373 |     "pd.Index.unique(pd.Index.append(index_obj1,index_obj2))"
374 |    ]
375 |   }
376 |  ],
377 |  "metadata": {
378 |   "kernelspec": {
379 |    "display_name": "Python 3",
380 |    "language": "python",
381 |    "name": "python3"
382 |   },
383 |   "language_info": {
384 |    "codemirror_mode": {
385 |     "name": "ipython",
386 |     "version": 3
387 |    },
388 |    "file_extension": ".py",
389 |    "mimetype": "text/x-python",
390 |    "name": "python",
391 |    "nbconvert_exporter": "python",
392 |    "pygments_lexer": "ipython3",
393 |    "version": "3.7.4"
394 |   }
395 |  },
396 |  "nbformat": 4,
397 |  "nbformat_minor": 2
398 | }
399 | 


--------------------------------------------------------------------------------
/Unique Values, Value Counts and Membership.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "#We now look into more operations on Sequence of values\n",
 10 |     "import pandas as pd\n",
 11 |     "import numpy as np\n",
 12 |     "from pandas import Series,DataFrame"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 2,
 18 |    "metadata": {},
 19 |    "outputs": [
 20 |     {
 21 |      "data": {
 22 |       "text/plain": [
 23 |        "0     a\n",
 24 |        "1     b\n",
 25 |        "2     d\n",
 26 |        "3     e\n",
 27 |        "4     c\n",
 28 |        "5     d\n",
 29 |        "6     a\n",
 30 |        "7     f\n",
 31 |        "8     e\n",
 32 |        "9     g\n",
 33 |        "10    a\n",
 34 |        "11    d\n",
 35 |        "12    e\n",
 36 |        "dtype: object"
 37 |       ]
 38 |      },
 39 |      "execution_count": 2,
 40 |      "metadata": {},
 41 |      "output_type": "execute_result"
 42 |     }
 43 |    ],
 44 |    "source": [
 45 |     "series_obj = Series(list('abdecdafegade'))\n",
 46 |     "series_obj"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 24,
 52 |    "metadata": {},
 53 |    "outputs": [
 54 |     {
 55 |      "data": {
 56 |       "text/html": [
 57 |        "<div>\n",
 58 |        "<style scoped>\n",
 59 |        "    .dataframe tbody tr th:only-of-type {\n",
 60 |        "        vertical-align: middle;\n",
 61 |        "    }\n",
 62 |        "\n",
 63 |        "    .dataframe tbody tr th {\n",
 64 |        "        vertical-align: top;\n",
 65 |        "    }\n",
 66 |        "\n",
 67 |        "    .dataframe thead th {\n",
 68 |        "        text-align: right;\n",
 69 |        "    }\n",
 70 |        "</style>\n",
 71 |        "<table border=\"1\" class=\"dataframe\">\n",
 72 |        "  <thead>\n",
 73 |        "    <tr style=\"text-align: right;\">\n",
 74 |        "      <th></th>\n",
 75 |        "      <th>Data1</th>\n",
 76 |        "      <th>Data2</th>\n",
 77 |        "      <th>Data3</th>\n",
 78 |        "    </tr>\n",
 79 |        "  </thead>\n",
 80 |        "  <tbody>\n",
 81 |        "    <tr>\n",
 82 |        "      <td>0</td>\n",
 83 |        "      <td>0</td>\n",
 84 |        "      <td>1</td>\n",
 85 |        "      <td>5</td>\n",
 86 |        "    </tr>\n",
 87 |        "    <tr>\n",
 88 |        "      <td>1</td>\n",
 89 |        "      <td>1</td>\n",
 90 |        "      <td>3</td>\n",
 91 |        "      <td>6</td>\n",
 92 |        "    </tr>\n",
 93 |        "    <tr>\n",
 94 |        "      <td>2</td>\n",
 95 |        "      <td>2</td>\n",
 96 |        "      <td>5</td>\n",
 97 |        "      <td>7</td>\n",
 98 |        "    </tr>\n",
 99 |        "    <tr>\n",
100 |        "      <td>3</td>\n",
101 |        "      <td>3</td>\n",
102 |        "      <td>7</td>\n",
103 |        "      <td>8</td>\n",
104 |        "    </tr>\n",
105 |        "  </tbody>\n",
106 |        "</table>\n",
107 |        "</div>"
108 |       ],
109 |       "text/plain": [
110 |        "   Data1  Data2  Data3\n",
111 |        "0      0      1      5\n",
112 |        "1      1      3      6\n",
113 |        "2      2      5      7\n",
114 |        "3      3      7      8"
115 |       ]
116 |      },
117 |      "execution_count": 24,
118 |      "metadata": {},
119 |      "output_type": "execute_result"
120 |     }
121 |    ],
122 |    "source": [
123 |     "frame_obj = DataFrame({'Data1':np.arange(0,4), 'Data2':np.array([1,3,5,7]), 'Data3':np.arange(5,9)})\n",
124 |     "frame_obj"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": 5,
130 |    "metadata": {},
131 |    "outputs": [
132 |     {
133 |      "data": {
134 |       "text/plain": [
135 |        "array(['a', 'b', 'd', 'e', 'c', 'f', 'g'], dtype=object)"
136 |       ]
137 |      },
138 |      "execution_count": 5,
139 |      "metadata": {},
140 |      "output_type": "execute_result"
141 |     }
142 |    ],
143 |    "source": [
144 |     "#Unique value method: unique (For Series)\n",
145 |     "series_obj.unique() #Gives a list of unique values in Series object"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "code",
150 |    "execution_count": 20,
151 |    "metadata": {},
152 |    "outputs": [
153 |     {
154 |      "data": {
155 |       "text/plain": [
156 |        "d    3\n",
157 |        "e    3\n",
158 |        "a    3\n",
159 |        "f    1\n",
160 |        "b    1\n",
161 |        "g    1\n",
162 |        "c    1\n",
163 |        "dtype: int64"
164 |       ]
165 |      },
166 |      "execution_count": 20,
167 |      "metadata": {},
168 |      "output_type": "execute_result"
169 |     }
170 |    ],
171 |    "source": [
172 |     "#Value count method: value_counts (For Series)\n",
173 |     "series_obj.value_counts() #Gives a Series containing the value frequency of each item"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "code",
178 |    "execution_count": 21,
179 |    "metadata": {},
180 |    "outputs": [
181 |     {
182 |      "data": {
183 |       "text/plain": [
184 |        "a    3\n",
185 |        "e    3\n",
186 |        "d    3\n",
187 |        "c    1\n",
188 |        "g    1\n",
189 |        "b    1\n",
190 |        "f    1\n",
191 |        "dtype: int64"
192 |       ]
193 |      },
194 |      "execution_count": 21,
195 |      "metadata": {},
196 |      "output_type": "execute_result"
197 |     }
198 |    ],
199 |    "source": [
200 |     "#This method is also available for pandas library i.e.\n",
201 |     "pd.value_counts(series_obj, sort=False) "
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": 22,
207 |    "metadata": {},
208 |    "outputs": [
209 |     {
210 |      "data": {
211 |       "text/plain": [
212 |        "0      True\n",
213 |        "1     False\n",
214 |        "2      True\n",
215 |        "3     False\n",
216 |        "4     False\n",
217 |        "5      True\n",
218 |        "6      True\n",
219 |        "7     False\n",
220 |        "8     False\n",
221 |        "9      True\n",
222 |        "10     True\n",
223 |        "11     True\n",
224 |        "12    False\n",
225 |        "dtype: bool"
226 |       ]
227 |      },
228 |      "execution_count": 22,
229 |      "metadata": {},
230 |      "output_type": "execute_result"
231 |     }
232 |    ],
233 |    "source": [
234 |     "#Comparison method: isin (Returns a Boolean value)\n",
235 |     "series_obj.isin(list('agd')) #Returns True for elements in provided list, returns False for other elements"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "code",
240 |    "execution_count": 25,
241 |    "metadata": {},
242 |    "outputs": [
243 |     {
244 |      "data": {
245 |       "text/html": [
246 |        "<div>\n",
247 |        "<style scoped>\n",
248 |        "    .dataframe tbody tr th:only-of-type {\n",
249 |        "        vertical-align: middle;\n",
250 |        "    }\n",
251 |        "\n",
252 |        "    .dataframe tbody tr th {\n",
253 |        "        vertical-align: top;\n",
254 |        "    }\n",
255 |        "\n",
256 |        "    .dataframe thead th {\n",
257 |        "        text-align: right;\n",
258 |        "    }\n",
259 |        "</style>\n",
260 |        "<table border=\"1\" class=\"dataframe\">\n",
261 |        "  <thead>\n",
262 |        "    <tr style=\"text-align: right;\">\n",
263 |        "      <th></th>\n",
264 |        "      <th>Data1</th>\n",
265 |        "      <th>Data2</th>\n",
266 |        "      <th>Data3</th>\n",
267 |        "    </tr>\n",
268 |        "  </thead>\n",
269 |        "  <tbody>\n",
270 |        "    <tr>\n",
271 |        "      <td>0</td>\n",
272 |        "      <td>1.0</td>\n",
273 |        "      <td>0.0</td>\n",
274 |        "      <td>0.0</td>\n",
275 |        "    </tr>\n",
276 |        "    <tr>\n",
277 |        "      <td>1</td>\n",
278 |        "      <td>1.0</td>\n",
279 |        "      <td>1.0</td>\n",
280 |        "      <td>0.0</td>\n",
281 |        "    </tr>\n",
282 |        "    <tr>\n",
283 |        "      <td>2</td>\n",
284 |        "      <td>1.0</td>\n",
285 |        "      <td>0.0</td>\n",
286 |        "      <td>0.0</td>\n",
287 |        "    </tr>\n",
288 |        "    <tr>\n",
289 |        "      <td>3</td>\n",
290 |        "      <td>1.0</td>\n",
291 |        "      <td>1.0</td>\n",
292 |        "      <td>0.0</td>\n",
293 |        "    </tr>\n",
294 |        "    <tr>\n",
295 |        "      <td>5</td>\n",
296 |        "      <td>0.0</td>\n",
297 |        "      <td>1.0</td>\n",
298 |        "      <td>1.0</td>\n",
299 |        "    </tr>\n",
300 |        "    <tr>\n",
301 |        "      <td>6</td>\n",
302 |        "      <td>0.0</td>\n",
303 |        "      <td>0.0</td>\n",
304 |        "      <td>1.0</td>\n",
305 |        "    </tr>\n",
306 |        "    <tr>\n",
307 |        "      <td>7</td>\n",
308 |        "      <td>0.0</td>\n",
309 |        "      <td>1.0</td>\n",
310 |        "      <td>1.0</td>\n",
311 |        "    </tr>\n",
312 |        "    <tr>\n",
313 |        "      <td>8</td>\n",
314 |        "      <td>0.0</td>\n",
315 |        "      <td>0.0</td>\n",
316 |        "      <td>1.0</td>\n",
317 |        "    </tr>\n",
318 |        "  </tbody>\n",
319 |        "</table>\n",
320 |        "</div>"
321 |       ],
322 |       "text/plain": [
323 |        "   Data1  Data2  Data3\n",
324 |        "0    1.0    0.0    0.0\n",
325 |        "1    1.0    1.0    0.0\n",
326 |        "2    1.0    0.0    0.0\n",
327 |        "3    1.0    1.0    0.0\n",
328 |        "5    0.0    1.0    1.0\n",
329 |        "6    0.0    0.0    1.0\n",
330 |        "7    0.0    1.0    1.0\n",
331 |        "8    0.0    0.0    1.0"
332 |       ]
333 |      },
334 |      "execution_count": 25,
335 |      "metadata": {},
336 |      "output_type": "execute_result"
337 |     }
338 |    ],
339 |    "source": [
340 |     "#Applying value_counts method on DataFrame\n",
341 |     "#We can use the apply method to apply the value_counts method on the DataFrame object to get frequency values of items in the columns, this may help in creating various charts like Histogram, etc.\n",
342 |     "\n",
343 |     "hist_res = frame_obj.apply(pd.value_counts).fillna(0) #Calculates frequency value and puts 0 for all NaN values\n",
344 |     "hist_res"
345 |    ]
346 |   }
347 |  ],
348 |  "metadata": {
349 |   "kernelspec": {
350 |    "display_name": "Python 3",
351 |    "language": "python",
352 |    "name": "python3"
353 |   },
354 |   "language_info": {
355 |    "codemirror_mode": {
356 |     "name": "ipython",
357 |     "version": 3
358 |    },
359 |    "file_extension": ".py",
360 |    "mimetype": "text/x-python",
361 |    "name": "python",
362 |    "nbconvert_exporter": "python",
363 |    "pygments_lexer": "ipython3",
364 |    "version": "3.7.4"
365 |   }
366 |  },
367 |  "nbformat": 4,
368 |  "nbformat_minor": 2
369 | }
370 | 


--------------------------------------------------------------------------------
/Handling Data from Databases.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "#We can also handle data from daabases using pandas\n",
 10 |     "#For this case, we will be using sqllite as our database\n",
 11 |     "\n",
 12 |     "import sqlite3 as sqlite\n",
 13 |     "import pandas as pd\n",
 14 |     "import numpy as np\n",
 15 |     "from pandas import DataFrame"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": 2,
 21 |    "metadata": {},
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "#Creating a query \n",
 25 |     "\n",
 26 |     "query = \"\"\"\n",
 27 |     "    CREATE TABLE test\n",
 28 |     "    (a VARCHAR(20), b VARCHAR(20),\n",
 29 |     "     c REAL, d INTEGER\n",
 30 |     "    );\n",
 31 |     "\"\"\""
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": 3,
 37 |    "metadata": {},
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "#Connecting to the database with memory i.e. the database will reside in RAM instead of on disk\n",
 41 |     "con = sqlite.connect(':memory:')"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 4,
 47 |    "metadata": {},
 48 |    "outputs": [
 49 |     {
 50 |      "data": {
 51 |       "text/plain": [
 52 |        "<sqlite3.Cursor at 0x290f87b7810>"
 53 |       ]
 54 |      },
 55 |      "execution_count": 4,
 56 |      "metadata": {},
 57 |      "output_type": "execute_result"
 58 |     }
 59 |    ],
 60 |    "source": [
 61 |     "#Executing the query on the database\n",
 62 |     "#First it creates a cursor object which would be used throughout the database\n",
 63 |     "#After that the cursor executes the SQL statement passed as query to the database\n",
 64 |     "con.execute(query)"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": 5,
 70 |    "metadata": {},
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "#Commiting to the database connection so that the changes can be seen by other db connections\n",
 74 |     "con.commit()"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 6,
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "#Creating data to insert into the database\n",
 84 |     "data = [('Andrew','Garfield',12.5,21),\n",
 85 |     "        ('John','Phillips',13.7,32),\n",
 86 |     "        ('Dough','Don',41.6,78)]"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": 7,
 92 |    "metadata": {},
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "#Creating insert statement to be executed by the database\n",
 96 |     "stmnt = \"INSERT INTO test VALUES(?,?,?,?)\""
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": 8,
102 |    "metadata": {},
103 |    "outputs": [
104 |     {
105 |      "data": {
106 |       "text/plain": [
107 |        "<sqlite3.Cursor at 0x290f87b7b90>"
108 |       ]
109 |      },
110 |      "execution_count": 8,
111 |      "metadata": {},
112 |      "output_type": "execute_result"
113 |     }
114 |    ],
115 |    "source": [
116 |     "#Executing the statement for all entries in data\n",
117 |     "con.executemany(stmnt, data)"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": 9,
123 |    "metadata": {},
124 |    "outputs": [],
125 |    "source": [
126 |     "con.commit()"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": 10,
132 |    "metadata": {},
133 |    "outputs": [],
134 |    "source": [
135 |     "#Executing the SELECT query\n",
136 |     "#In case of python SQLdrivers, the data is usually in a list of tuples\n",
137 |     "\n",
138 |     "#The cursor will execute the select all query on the test table\n",
139 |     "cursor = con.execute('select * from test') "
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": 11,
145 |    "metadata": {},
146 |    "outputs": [],
147 |    "source": [
148 |     "rows = cursor.fetchall() #Fetches all datas in the rows of a table and returns as a list of tuples"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": 12,
154 |    "metadata": {},
155 |    "outputs": [
156 |     {
157 |      "data": {
158 |       "text/plain": [
159 |        "[('Andrew', 'Garfield', 12.5, 21),\n",
160 |        " ('John', 'Phillips', 13.7, 32),\n",
161 |        " ('Dough', 'Don', 41.6, 78)]"
162 |       ]
163 |      },
164 |      "execution_count": 12,
165 |      "metadata": {},
166 |      "output_type": "execute_result"
167 |     }
168 |    ],
169 |    "source": [
170 |     "rows #Displays the data in the table"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "code",
175 |    "execution_count": 13,
176 |    "metadata": {},
177 |    "outputs": [
178 |     {
179 |      "data": {
180 |       "text/plain": [
181 |        "(('a', None, None, None, None, None, None),\n",
182 |        " ('b', None, None, None, None, None, None),\n",
183 |        " ('c', None, None, None, None, None, None),\n",
184 |        " ('d', None, None, None, None, None, None))"
185 |       ]
186 |      },
187 |      "execution_count": 13,
188 |      "metadata": {},
189 |      "output_type": "execute_result"
190 |     }
191 |    ],
192 |    "source": [
193 |     "cursor.description #Describes the column values for a result"
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "code",
198 |    "execution_count": 14,
199 |    "metadata": {},
200 |    "outputs": [],
201 |    "source": [
202 |     "#Storing rows as rows in dataframe and the first element of each tuple in description list as indexes\n",
203 |     "db_df = DataFrame(rows, columns = list(zip(*cursor.description))[0])"
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "code",
208 |    "execution_count": 15,
209 |    "metadata": {},
210 |    "outputs": [
211 |     {
212 |      "data": {
213 |       "text/html": [
214 |        "<div>\n",
215 |        "<style scoped>\n",
216 |        "    .dataframe tbody tr th:only-of-type {\n",
217 |        "        vertical-align: middle;\n",
218 |        "    }\n",
219 |        "\n",
220 |        "    .dataframe tbody tr th {\n",
221 |        "        vertical-align: top;\n",
222 |        "    }\n",
223 |        "\n",
224 |        "    .dataframe thead th {\n",
225 |        "        text-align: right;\n",
226 |        "    }\n",
227 |        "</style>\n",
228 |        "<table border=\"1\" class=\"dataframe\">\n",
229 |        "  <thead>\n",
230 |        "    <tr style=\"text-align: right;\">\n",
231 |        "      <th></th>\n",
232 |        "      <th>a</th>\n",
233 |        "      <th>b</th>\n",
234 |        "      <th>c</th>\n",
235 |        "      <th>d</th>\n",
236 |        "    </tr>\n",
237 |        "  </thead>\n",
238 |        "  <tbody>\n",
239 |        "    <tr>\n",
240 |        "      <th>0</th>\n",
241 |        "      <td>Andrew</td>\n",
242 |        "      <td>Garfield</td>\n",
243 |        "      <td>12.5</td>\n",
244 |        "      <td>21</td>\n",
245 |        "    </tr>\n",
246 |        "    <tr>\n",
247 |        "      <th>1</th>\n",
248 |        "      <td>John</td>\n",
249 |        "      <td>Phillips</td>\n",
250 |        "      <td>13.7</td>\n",
251 |        "      <td>32</td>\n",
252 |        "    </tr>\n",
253 |        "    <tr>\n",
254 |        "      <th>2</th>\n",
255 |        "      <td>Dough</td>\n",
256 |        "      <td>Don</td>\n",
257 |        "      <td>41.6</td>\n",
258 |        "      <td>78</td>\n",
259 |        "    </tr>\n",
260 |        "  </tbody>\n",
261 |        "</table>\n",
262 |        "</div>"
263 |       ],
264 |       "text/plain": [
265 |        "        a         b     c   d\n",
266 |        "0  Andrew  Garfield  12.5  21\n",
267 |        "1    John  Phillips  13.7  32\n",
268 |        "2   Dough       Don  41.6  78"
269 |       ]
270 |      },
271 |      "execution_count": 15,
272 |      "metadata": {},
273 |      "output_type": "execute_result"
274 |     }
275 |    ],
276 |    "source": [
277 |     "db_df"
278 |    ]
279 |   },
280 |   {
281 |    "cell_type": "code",
282 |    "execution_count": 16,
283 |    "metadata": {},
284 |    "outputs": [
285 |     {
286 |      "data": {
287 |       "text/html": [
288 |        "<div>\n",
289 |        "<style scoped>\n",
290 |        "    .dataframe tbody tr th:only-of-type {\n",
291 |        "        vertical-align: middle;\n",
292 |        "    }\n",
293 |        "\n",
294 |        "    .dataframe tbody tr th {\n",
295 |        "        vertical-align: top;\n",
296 |        "    }\n",
297 |        "\n",
298 |        "    .dataframe thead th {\n",
299 |        "        text-align: right;\n",
300 |        "    }\n",
301 |        "</style>\n",
302 |        "<table border=\"1\" class=\"dataframe\">\n",
303 |        "  <thead>\n",
304 |        "    <tr style=\"text-align: right;\">\n",
305 |        "      <th></th>\n",
306 |        "      <th>a</th>\n",
307 |        "      <th>b</th>\n",
308 |        "      <th>c</th>\n",
309 |        "      <th>d</th>\n",
310 |        "    </tr>\n",
311 |        "  </thead>\n",
312 |        "  <tbody>\n",
313 |        "    <tr>\n",
314 |        "      <th>0</th>\n",
315 |        "      <td>Andrew</td>\n",
316 |        "      <td>Garfield</td>\n",
317 |        "      <td>12.5</td>\n",
318 |        "      <td>21</td>\n",
319 |        "    </tr>\n",
320 |        "    <tr>\n",
321 |        "      <th>1</th>\n",
322 |        "      <td>John</td>\n",
323 |        "      <td>Phillips</td>\n",
324 |        "      <td>13.7</td>\n",
325 |        "      <td>32</td>\n",
326 |        "    </tr>\n",
327 |        "    <tr>\n",
328 |        "      <th>2</th>\n",
329 |        "      <td>Dough</td>\n",
330 |        "      <td>Don</td>\n",
331 |        "      <td>41.6</td>\n",
332 |        "      <td>78</td>\n",
333 |        "    </tr>\n",
334 |        "  </tbody>\n",
335 |        "</table>\n",
336 |        "</div>"
337 |       ],
338 |       "text/plain": [
339 |        "        a         b     c   d\n",
340 |        "0  Andrew  Garfield  12.5  21\n",
341 |        "1    John  Phillips  13.7  32\n",
342 |        "2   Dough       Don  41.6  78"
343 |       ]
344 |      },
345 |      "execution_count": 16,
346 |      "metadata": {},
347 |      "output_type": "execute_result"
348 |     }
349 |    ],
350 |    "source": [
351 |     "#Another method of creating dataframe using the database data is by simply using read_sql method from pandas sql module\n",
352 |     "\n",
353 |     "import pandas.io.sql as sql\n",
354 |     "\n",
355 |     "sql.read_sql('select * from test', con) #Reads SQL query into a dataframe"
356 |    ]
357 |   }
358 |  ],
359 |  "metadata": {
360 |   "kernelspec": {
361 |    "display_name": "Python 3",
362 |    "language": "python",
363 |    "name": "python3"
364 |   },
365 |   "language_info": {
366 |    "codemirror_mode": {
367 |     "name": "ipython",
368 |     "version": 3
369 |    },
370 |    "file_extension": ".py",
371 |    "mimetype": "text/x-python",
372 |    "name": "python",
373 |    "nbconvert_exporter": "python",
374 |    "pygments_lexer": "ipython3",
375 |    "version": "3.8.2"
376 |   }
377 |  },
378 |  "nbformat": 4,
379 |  "nbformat_minor": 4
380 | }
381 | 


--------------------------------------------------------------------------------
/Removing Duplicates.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "#We now look at a method to perform data cleaning i.e. removing duplicate values from a dataframe\n",
 10 |     "\n",
 11 |     "import pandas as pd\n",
 12 |     "from pandas import DataFrame\n",
 13 |     "import numpy as np"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 4,
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "#Consider a dataframe with duplicate values\n",
 23 |     "data = DataFrame({'A': ['one'] * 4 + ['two']*3, 'B':[1,1,2,3,3,4,4]})"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 5,
 29 |    "metadata": {},
 30 |    "outputs": [
 31 |     {
 32 |      "data": {
 33 |       "text/html": [
 34 |        "<div>\n",
 35 |        "<style scoped>\n",
 36 |        "    .dataframe tbody tr th:only-of-type {\n",
 37 |        "        vertical-align: middle;\n",
 38 |        "    }\n",
 39 |        "\n",
 40 |        "    .dataframe tbody tr th {\n",
 41 |        "        vertical-align: top;\n",
 42 |        "    }\n",
 43 |        "\n",
 44 |        "    .dataframe thead th {\n",
 45 |        "        text-align: right;\n",
 46 |        "    }\n",
 47 |        "</style>\n",
 48 |        "<table border=\"1\" class=\"dataframe\">\n",
 49 |        "  <thead>\n",
 50 |        "    <tr style=\"text-align: right;\">\n",
 51 |        "      <th></th>\n",
 52 |        "      <th>A</th>\n",
 53 |        "      <th>B</th>\n",
 54 |        "    </tr>\n",
 55 |        "  </thead>\n",
 56 |        "  <tbody>\n",
 57 |        "    <tr>\n",
 58 |        "      <th>0</th>\n",
 59 |        "      <td>one</td>\n",
 60 |        "      <td>1</td>\n",
 61 |        "    </tr>\n",
 62 |        "    <tr>\n",
 63 |        "      <th>1</th>\n",
 64 |        "      <td>one</td>\n",
 65 |        "      <td>1</td>\n",
 66 |        "    </tr>\n",
 67 |        "    <tr>\n",
 68 |        "      <th>2</th>\n",
 69 |        "      <td>one</td>\n",
 70 |        "      <td>2</td>\n",
 71 |        "    </tr>\n",
 72 |        "    <tr>\n",
 73 |        "      <th>3</th>\n",
 74 |        "      <td>one</td>\n",
 75 |        "      <td>3</td>\n",
 76 |        "    </tr>\n",
 77 |        "    <tr>\n",
 78 |        "      <th>4</th>\n",
 79 |        "      <td>two</td>\n",
 80 |        "      <td>3</td>\n",
 81 |        "    </tr>\n",
 82 |        "    <tr>\n",
 83 |        "      <th>5</th>\n",
 84 |        "      <td>two</td>\n",
 85 |        "      <td>4</td>\n",
 86 |        "    </tr>\n",
 87 |        "    <tr>\n",
 88 |        "      <th>6</th>\n",
 89 |        "      <td>two</td>\n",
 90 |        "      <td>4</td>\n",
 91 |        "    </tr>\n",
 92 |        "  </tbody>\n",
 93 |        "</table>\n",
 94 |        "</div>"
 95 |       ],
 96 |       "text/plain": [
 97 |        "     A  B\n",
 98 |        "0  one  1\n",
 99 |        "1  one  1\n",
100 |        "2  one  2\n",
101 |        "3  one  3\n",
102 |        "4  two  3\n",
103 |        "5  two  4\n",
104 |        "6  two  4"
105 |       ]
106 |      },
107 |      "execution_count": 5,
108 |      "metadata": {},
109 |      "output_type": "execute_result"
110 |     }
111 |    ],
112 |    "source": [
113 |     "data"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": 6,
119 |    "metadata": {},
120 |    "outputs": [
121 |     {
122 |      "data": {
123 |       "text/plain": [
124 |        "0    False\n",
125 |        "1     True\n",
126 |        "2    False\n",
127 |        "3    False\n",
128 |        "4    False\n",
129 |        "5    False\n",
130 |        "6     True\n",
131 |        "dtype: bool"
132 |       ]
133 |      },
134 |      "execution_count": 6,
135 |      "metadata": {},
136 |      "output_type": "execute_result"
137 |     }
138 |    ],
139 |    "source": [
140 |     "#DataFrame has a method duplicated which returns a boolean series of whether the item is a duplicate or not\n",
141 |     "data.duplicated()"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": 7,
147 |    "metadata": {},
148 |    "outputs": [
149 |     {
150 |      "data": {
151 |       "text/html": [
152 |        "<div>\n",
153 |        "<style scoped>\n",
154 |        "    .dataframe tbody tr th:only-of-type {\n",
155 |        "        vertical-align: middle;\n",
156 |        "    }\n",
157 |        "\n",
158 |        "    .dataframe tbody tr th {\n",
159 |        "        vertical-align: top;\n",
160 |        "    }\n",
161 |        "\n",
162 |        "    .dataframe thead th {\n",
163 |        "        text-align: right;\n",
164 |        "    }\n",
165 |        "</style>\n",
166 |        "<table border=\"1\" class=\"dataframe\">\n",
167 |        "  <thead>\n",
168 |        "    <tr style=\"text-align: right;\">\n",
169 |        "      <th></th>\n",
170 |        "      <th>A</th>\n",
171 |        "      <th>B</th>\n",
172 |        "    </tr>\n",
173 |        "  </thead>\n",
174 |        "  <tbody>\n",
175 |        "    <tr>\n",
176 |        "      <th>0</th>\n",
177 |        "      <td>one</td>\n",
178 |        "      <td>1</td>\n",
179 |        "    </tr>\n",
180 |        "    <tr>\n",
181 |        "      <th>2</th>\n",
182 |        "      <td>one</td>\n",
183 |        "      <td>2</td>\n",
184 |        "    </tr>\n",
185 |        "    <tr>\n",
186 |        "      <th>3</th>\n",
187 |        "      <td>one</td>\n",
188 |        "      <td>3</td>\n",
189 |        "    </tr>\n",
190 |        "    <tr>\n",
191 |        "      <th>4</th>\n",
192 |        "      <td>two</td>\n",
193 |        "      <td>3</td>\n",
194 |        "    </tr>\n",
195 |        "    <tr>\n",
196 |        "      <th>5</th>\n",
197 |        "      <td>two</td>\n",
198 |        "      <td>4</td>\n",
199 |        "    </tr>\n",
200 |        "  </tbody>\n",
201 |        "</table>\n",
202 |        "</div>"
203 |       ],
204 |       "text/plain": [
205 |        "     A  B\n",
206 |        "0  one  1\n",
207 |        "2  one  2\n",
208 |        "3  one  3\n",
209 |        "4  two  3\n",
210 |        "5  two  4"
211 |       ]
212 |      },
213 |      "execution_count": 7,
214 |      "metadata": {},
215 |      "output_type": "execute_result"
216 |     }
217 |    ],
218 |    "source": [
219 |     "#Using drop_duplicates we can remove these duplicates from the Dataframe\n",
220 |     "data.drop_duplicates()"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "code",
225 |    "execution_count": 8,
226 |    "metadata": {},
227 |    "outputs": [
228 |     {
229 |      "data": {
230 |       "text/html": [
231 |        "<div>\n",
232 |        "<style scoped>\n",
233 |        "    .dataframe tbody tr th:only-of-type {\n",
234 |        "        vertical-align: middle;\n",
235 |        "    }\n",
236 |        "\n",
237 |        "    .dataframe tbody tr th {\n",
238 |        "        vertical-align: top;\n",
239 |        "    }\n",
240 |        "\n",
241 |        "    .dataframe thead th {\n",
242 |        "        text-align: right;\n",
243 |        "    }\n",
244 |        "</style>\n",
245 |        "<table border=\"1\" class=\"dataframe\">\n",
246 |        "  <thead>\n",
247 |        "    <tr style=\"text-align: right;\">\n",
248 |        "      <th></th>\n",
249 |        "      <th>A</th>\n",
250 |        "      <th>B</th>\n",
251 |        "    </tr>\n",
252 |        "  </thead>\n",
253 |        "  <tbody>\n",
254 |        "    <tr>\n",
255 |        "      <th>0</th>\n",
256 |        "      <td>one</td>\n",
257 |        "      <td>1</td>\n",
258 |        "    </tr>\n",
259 |        "    <tr>\n",
260 |        "      <th>4</th>\n",
261 |        "      <td>two</td>\n",
262 |        "      <td>3</td>\n",
263 |        "    </tr>\n",
264 |        "  </tbody>\n",
265 |        "</table>\n",
266 |        "</div>"
267 |       ],
268 |       "text/plain": [
269 |        "     A  B\n",
270 |        "0  one  1\n",
271 |        "4  two  3"
272 |       ]
273 |      },
274 |      "execution_count": 8,
275 |      "metadata": {},
276 |      "output_type": "execute_result"
277 |     }
278 |    ],
279 |    "source": [
280 |     "#Suppose we want to drop duplicates based on a particular column, then we can pass that column as a list to the method\n",
281 |     "data.drop_duplicates(['A'])"
282 |    ]
283 |   },
284 |   {
285 |    "cell_type": "code",
286 |    "execution_count": 12,
287 |    "metadata": {},
288 |    "outputs": [
289 |     {
290 |      "data": {
291 |       "text/html": [
292 |        "<div>\n",
293 |        "<style scoped>\n",
294 |        "    .dataframe tbody tr th:only-of-type {\n",
295 |        "        vertical-align: middle;\n",
296 |        "    }\n",
297 |        "\n",
298 |        "    .dataframe tbody tr th {\n",
299 |        "        vertical-align: top;\n",
300 |        "    }\n",
301 |        "\n",
302 |        "    .dataframe thead th {\n",
303 |        "        text-align: right;\n",
304 |        "    }\n",
305 |        "</style>\n",
306 |        "<table border=\"1\" class=\"dataframe\">\n",
307 |        "  <thead>\n",
308 |        "    <tr style=\"text-align: right;\">\n",
309 |        "      <th></th>\n",
310 |        "      <th>A</th>\n",
311 |        "      <th>B</th>\n",
312 |        "    </tr>\n",
313 |        "  </thead>\n",
314 |        "  <tbody>\n",
315 |        "    <tr>\n",
316 |        "      <th>1</th>\n",
317 |        "      <td>one</td>\n",
318 |        "      <td>1</td>\n",
319 |        "    </tr>\n",
320 |        "    <tr>\n",
321 |        "      <th>2</th>\n",
322 |        "      <td>one</td>\n",
323 |        "      <td>2</td>\n",
324 |        "    </tr>\n",
325 |        "    <tr>\n",
326 |        "      <th>3</th>\n",
327 |        "      <td>one</td>\n",
328 |        "      <td>3</td>\n",
329 |        "    </tr>\n",
330 |        "    <tr>\n",
331 |        "      <th>4</th>\n",
332 |        "      <td>two</td>\n",
333 |        "      <td>3</td>\n",
334 |        "    </tr>\n",
335 |        "    <tr>\n",
336 |        "      <th>6</th>\n",
337 |        "      <td>two</td>\n",
338 |        "      <td>4</td>\n",
339 |        "    </tr>\n",
340 |        "  </tbody>\n",
341 |        "</table>\n",
342 |        "</div>"
343 |       ],
344 |       "text/plain": [
345 |        "     A  B\n",
346 |        "1  one  1\n",
347 |        "2  one  2\n",
348 |        "3  one  3\n",
349 |        "4  two  3\n",
350 |        "6  two  4"
351 |       ]
352 |      },
353 |      "execution_count": 12,
354 |      "metadata": {},
355 |      "output_type": "execute_result"
356 |     }
357 |    ],
358 |    "source": [
359 |     "#In case of dropping, if we want the last combination of the duplicate values rathar than the first combination which is by default then we can give keep argument last value\n",
360 |     "\n",
361 |     "data.drop_duplicates(keep='last')"
362 |    ]
363 |   }
364 |  ],
365 |  "metadata": {
366 |   "kernelspec": {
367 |    "display_name": "Python 3",
368 |    "language": "python",
369 |    "name": "python3"
370 |   },
371 |   "language_info": {
372 |    "codemirror_mode": {
373 |     "name": "ipython",
374 |     "version": 3
375 |    },
376 |    "file_extension": ".py",
377 |    "mimetype": "text/x-python",
378 |    "name": "python",
379 |    "nbconvert_exporter": "python",
380 |    "pygments_lexer": "ipython3",
381 |    "version": "3.8.2"
382 |   }
383 |  },
384 |  "nbformat": 4,
385 |  "nbformat_minor": 4
386 | }
387 | 


--------------------------------------------------------------------------------
/Apply Methods for DataFrames.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "data": {
 10 |       "text/html": [
 11 |        "<div>\n",
 12 |        "<style scoped>\n",
 13 |        "    .dataframe tbody tr th:only-of-type {\n",
 14 |        "        vertical-align: middle;\n",
 15 |        "    }\n",
 16 |        "\n",
 17 |        "    .dataframe tbody tr th {\n",
 18 |        "        vertical-align: top;\n",
 19 |        "    }\n",
 20 |        "\n",
 21 |        "    .dataframe thead th {\n",
 22 |        "        text-align: right;\n",
 23 |        "    }\n",
 24 |        "</style>\n",
 25 |        "<table border=\"1\" class=\"dataframe\">\n",
 26 |        "  <thead>\n",
 27 |        "    <tr style=\"text-align: right;\">\n",
 28 |        "      <th></th>\n",
 29 |        "      <th>b</th>\n",
 30 |        "      <th>d</th>\n",
 31 |        "      <th>e</th>\n",
 32 |        "    </tr>\n",
 33 |        "  </thead>\n",
 34 |        "  <tbody>\n",
 35 |        "    <tr>\n",
 36 |        "      <td>First</td>\n",
 37 |        "      <td>1.850328</td>\n",
 38 |        "      <td>0.000154</td>\n",
 39 |        "      <td>-2.013637</td>\n",
 40 |        "    </tr>\n",
 41 |        "    <tr>\n",
 42 |        "      <td>Second</td>\n",
 43 |        "      <td>0.318101</td>\n",
 44 |        "      <td>-1.497292</td>\n",
 45 |        "      <td>-0.198700</td>\n",
 46 |        "    </tr>\n",
 47 |        "    <tr>\n",
 48 |        "      <td>Third</td>\n",
 49 |        "      <td>-0.507836</td>\n",
 50 |        "      <td>0.841405</td>\n",
 51 |        "      <td>2.139488</td>\n",
 52 |        "    </tr>\n",
 53 |        "    <tr>\n",
 54 |        "      <td>Fourth</td>\n",
 55 |        "      <td>-0.628587</td>\n",
 56 |        "      <td>0.941262</td>\n",
 57 |        "      <td>1.083527</td>\n",
 58 |        "    </tr>\n",
 59 |        "  </tbody>\n",
 60 |        "</table>\n",
 61 |        "</div>"
 62 |       ],
 63 |       "text/plain": [
 64 |        "               b         d         e\n",
 65 |        "First   1.850328  0.000154 -2.013637\n",
 66 |        "Second  0.318101 -1.497292 -0.198700\n",
 67 |        "Third  -0.507836  0.841405  2.139488\n",
 68 |        "Fourth -0.628587  0.941262  1.083527"
 69 |       ]
 70 |      },
 71 |      "execution_count": 2,
 72 |      "metadata": {},
 73 |      "output_type": "execute_result"
 74 |     }
 75 |    ],
 76 |    "source": [
 77 |     "#Now we will deal with the apply methods for DataFrames\n",
 78 |     "\n",
 79 |     "import pandas as pd\n",
 80 |     "import numpy as np\n",
 81 |     "from pandas import Series, DataFrame\n",
 82 |     "\n",
 83 |     "frame_obj = DataFrame(np.random.randn(4,3), columns=list('bde'), index=['First','Second','Third','Fourth'])\n",
 84 |     "frame_obj"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": 5,
 90 |    "metadata": {},
 91 |    "outputs": [
 92 |     {
 93 |      "data": {
 94 |       "text/plain": [
 95 |        "b    2.478915\n",
 96 |        "d    2.438554\n",
 97 |        "e    4.153125\n",
 98 |        "dtype: float64"
 99 |       ]
100 |      },
101 |      "execution_count": 5,
102 |      "metadata": {},
103 |      "output_type": "execute_result"
104 |     }
105 |    ],
106 |    "source": [
107 |     "#We can perform a particular function on a DataFrame's data using the Apply method \n",
108 |     "\n",
109 |     "f = lambda x: x.max() - x.min() #A simple function to calculate max-min value for a set of values\n",
110 |     "\n",
111 |     "#Now applying the function on the DataFrame\n",
112 |     "frame_obj.apply(f) #Applies function to columns"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": 6,
118 |    "metadata": {},
119 |    "outputs": [
120 |     {
121 |      "data": {
122 |       "text/plain": [
123 |        "First     3.863965\n",
124 |        "Second    1.815393\n",
125 |        "Third     2.647324\n",
126 |        "Fourth    1.712114\n",
127 |        "dtype: float64"
128 |       ]
129 |      },
130 |      "execution_count": 6,
131 |      "metadata": {},
132 |      "output_type": "execute_result"
133 |     }
134 |    ],
135 |    "source": [
136 |     "#Now by defualt the axis value for this apply method is 0, we can alter it to apply the function to other axes\n",
137 |     "frame_obj.apply(f, axis = 1) #Applies function to rows"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": 8,
143 |    "metadata": {},
144 |    "outputs": [
145 |     {
146 |      "data": {
147 |       "text/html": [
148 |        "<div>\n",
149 |        "<style scoped>\n",
150 |        "    .dataframe tbody tr th:only-of-type {\n",
151 |        "        vertical-align: middle;\n",
152 |        "    }\n",
153 |        "\n",
154 |        "    .dataframe tbody tr th {\n",
155 |        "        vertical-align: top;\n",
156 |        "    }\n",
157 |        "\n",
158 |        "    .dataframe thead th {\n",
159 |        "        text-align: right;\n",
160 |        "    }\n",
161 |        "</style>\n",
162 |        "<table border=\"1\" class=\"dataframe\">\n",
163 |        "  <thead>\n",
164 |        "    <tr style=\"text-align: right;\">\n",
165 |        "      <th></th>\n",
166 |        "      <th>b</th>\n",
167 |        "      <th>d</th>\n",
168 |        "      <th>e</th>\n",
169 |        "    </tr>\n",
170 |        "  </thead>\n",
171 |        "  <tbody>\n",
172 |        "    <tr>\n",
173 |        "      <td>min</td>\n",
174 |        "      <td>-0.628587</td>\n",
175 |        "      <td>-1.497292</td>\n",
176 |        "      <td>-2.013637</td>\n",
177 |        "    </tr>\n",
178 |        "    <tr>\n",
179 |        "      <td>max</td>\n",
180 |        "      <td>1.850328</td>\n",
181 |        "      <td>0.941262</td>\n",
182 |        "      <td>2.139488</td>\n",
183 |        "    </tr>\n",
184 |        "  </tbody>\n",
185 |        "</table>\n",
186 |        "</div>"
187 |       ],
188 |       "text/plain": [
189 |        "            b         d         e\n",
190 |        "min -0.628587 -1.497292 -2.013637\n",
191 |        "max  1.850328  0.941262  2.139488"
192 |       ]
193 |      },
194 |      "execution_count": 8,
195 |      "metadata": {},
196 |      "output_type": "execute_result"
197 |     }
198 |    ],
199 |    "source": [
200 |     "#The function that apply uses doesnt necessarily have to be a scalar value, we can return structures through it as well\n",
201 |     "\n",
202 |     "def f(x):\n",
203 |     "    return Series([min(x),max(x)], index=['min','max']) #returns a series containing max and min elements of x\n",
204 |     "\n",
205 |     "#Applying the function to DataFrame object\n",
206 |     "frame_obj.apply(f) #Computes the max and min values and returns it as a Series as per the function"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": 9,
212 |    "metadata": {},
213 |    "outputs": [
214 |     {
215 |      "data": {
216 |       "text/html": [
217 |        "<div>\n",
218 |        "<style scoped>\n",
219 |        "    .dataframe tbody tr th:only-of-type {\n",
220 |        "        vertical-align: middle;\n",
221 |        "    }\n",
222 |        "\n",
223 |        "    .dataframe tbody tr th {\n",
224 |        "        vertical-align: top;\n",
225 |        "    }\n",
226 |        "\n",
227 |        "    .dataframe thead th {\n",
228 |        "        text-align: right;\n",
229 |        "    }\n",
230 |        "</style>\n",
231 |        "<table border=\"1\" class=\"dataframe\">\n",
232 |        "  <thead>\n",
233 |        "    <tr style=\"text-align: right;\">\n",
234 |        "      <th></th>\n",
235 |        "      <th>min</th>\n",
236 |        "      <th>max</th>\n",
237 |        "    </tr>\n",
238 |        "  </thead>\n",
239 |        "  <tbody>\n",
240 |        "    <tr>\n",
241 |        "      <td>First</td>\n",
242 |        "      <td>-2.013637</td>\n",
243 |        "      <td>1.850328</td>\n",
244 |        "    </tr>\n",
245 |        "    <tr>\n",
246 |        "      <td>Second</td>\n",
247 |        "      <td>-1.497292</td>\n",
248 |        "      <td>0.318101</td>\n",
249 |        "    </tr>\n",
250 |        "    <tr>\n",
251 |        "      <td>Third</td>\n",
252 |        "      <td>-0.507836</td>\n",
253 |        "      <td>2.139488</td>\n",
254 |        "    </tr>\n",
255 |        "    <tr>\n",
256 |        "      <td>Fourth</td>\n",
257 |        "      <td>-0.628587</td>\n",
258 |        "      <td>1.083527</td>\n",
259 |        "    </tr>\n",
260 |        "  </tbody>\n",
261 |        "</table>\n",
262 |        "</div>"
263 |       ],
264 |       "text/plain": [
265 |        "             min       max\n",
266 |        "First  -2.013637  1.850328\n",
267 |        "Second -1.497292  0.318101\n",
268 |        "Third  -0.507836  2.139488\n",
269 |        "Fourth -0.628587  1.083527"
270 |       ]
271 |      },
272 |      "execution_count": 9,
273 |      "metadata": {},
274 |      "output_type": "execute_result"
275 |     }
276 |    ],
277 |    "source": [
278 |     "#In case of row wise\n",
279 |     "frame_obj.apply(f, axis = 1)"
280 |    ]
281 |   },
282 |   {
283 |    "cell_type": "code",
284 |    "execution_count": 10,
285 |    "metadata": {},
286 |    "outputs": [
287 |     {
288 |      "data": {
289 |       "text/html": [
290 |        "<div>\n",
291 |        "<style scoped>\n",
292 |        "    .dataframe tbody tr th:only-of-type {\n",
293 |        "        vertical-align: middle;\n",
294 |        "    }\n",
295 |        "\n",
296 |        "    .dataframe tbody tr th {\n",
297 |        "        vertical-align: top;\n",
298 |        "    }\n",
299 |        "\n",
300 |        "    .dataframe thead th {\n",
301 |        "        text-align: right;\n",
302 |        "    }\n",
303 |        "</style>\n",
304 |        "<table border=\"1\" class=\"dataframe\">\n",
305 |        "  <thead>\n",
306 |        "    <tr style=\"text-align: right;\">\n",
307 |        "      <th></th>\n",
308 |        "      <th>b</th>\n",
309 |        "      <th>d</th>\n",
310 |        "      <th>e</th>\n",
311 |        "    </tr>\n",
312 |        "  </thead>\n",
313 |        "  <tbody>\n",
314 |        "    <tr>\n",
315 |        "      <td>First</td>\n",
316 |        "      <td>1.850328</td>\n",
317 |        "      <td>0.000154</td>\n",
318 |        "      <td>-2.013637</td>\n",
319 |        "    </tr>\n",
320 |        "    <tr>\n",
321 |        "      <td>Second</td>\n",
322 |        "      <td>0.318101</td>\n",
323 |        "      <td>-1.497292</td>\n",
324 |        "      <td>-0.198700</td>\n",
325 |        "    </tr>\n",
326 |        "    <tr>\n",
327 |        "      <td>Third</td>\n",
328 |        "      <td>-0.507836</td>\n",
329 |        "      <td>0.841405</td>\n",
330 |        "      <td>2.139488</td>\n",
331 |        "    </tr>\n",
332 |        "    <tr>\n",
333 |        "      <td>Fourth</td>\n",
334 |        "      <td>-0.628587</td>\n",
335 |        "      <td>0.941262</td>\n",
336 |        "      <td>1.083527</td>\n",
337 |        "    </tr>\n",
338 |        "  </tbody>\n",
339 |        "</table>\n",
340 |        "</div>"
341 |       ],
342 |       "text/plain": [
343 |        "                b          d          e\n",
344 |        "First    1.850328   0.000154  -2.013637\n",
345 |        "Second   0.318101  -1.497292  -0.198700\n",
346 |        "Third   -0.507836   0.841405   2.139488\n",
347 |        "Fourth  -0.628587   0.941262   1.083527"
348 |       ]
349 |      },
350 |      "execution_count": 10,
351 |      "metadata": {},
352 |      "output_type": "execute_result"
353 |     }
354 |    ],
355 |    "source": [
356 |     "#In case of applying an element wise function, we have\n",
357 |     "def format(x): #Returns the formatted string of the items in x\n",
358 |     "    return '%2f'%x\n",
359 |     "    \n",
360 |     "frame_obj.applymap(format)"
361 |    ]
362 |   },
363 |   {
364 |    "cell_type": "code",
365 |    "execution_count": 13,
366 |    "metadata": {},
367 |    "outputs": [
368 |     {
369 |      "data": {
370 |       "text/plain": [
371 |        "First     -2.013637\n",
372 |        "Second    -0.198700\n",
373 |        "Third      2.139488\n",
374 |        "Fourth     1.083527\n",
375 |        "Name: e, dtype: object"
376 |       ]
377 |      },
378 |      "execution_count": 13,
379 |      "metadata": {},
380 |      "output_type": "execute_result"
381 |     }
382 |    ],
383 |    "source": [
384 |     "#In case we want to apply the function to only the 'e' column of the DataFrame\n",
385 |     "frame_obj['e'].map(format)"
386 |    ]
387 |   }
388 |  ],
389 |  "metadata": {
390 |   "kernelspec": {
391 |    "display_name": "Python 3",
392 |    "language": "python",
393 |    "name": "python3"
394 |   },
395 |   "language_info": {
396 |    "codemirror_mode": {
397 |     "name": "ipython",
398 |     "version": 3
399 |    },
400 |    "file_extension": ".py",
401 |    "mimetype": "text/x-python",
402 |    "name": "python",
403 |    "nbconvert_exporter": "python",
404 |    "pygments_lexer": "ipython3",
405 |    "version": "3.7.4"
406 |   }
407 |  },
408 |  "nbformat": 4,
409 |  "nbformat_minor": 2
410 | }
411 | 


--------------------------------------------------------------------------------
/Operations of Linear Algebra.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "#Numpy Arrays can evaluate operations of Linear Algebra\n",
 10 |     "import numpy as np\n",
 11 |     "from numpy.random import randn\n",
 12 |     "\n",
 13 |     "A = np.array([[1,2,3],[4,5,6]]) #2x2 matrix\n",
 14 |     "B = np.array([[1,2],[3,4],[5,6]]) #3x2 matrix"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 2,
 20 |    "metadata": {},
 21 |    "outputs": [
 22 |     {
 23 |      "data": {
 24 |       "text/plain": [
 25 |        "array([[1, 2, 3],\n",
 26 |        "       [4, 5, 6]])"
 27 |       ]
 28 |      },
 29 |      "execution_count": 2,
 30 |      "metadata": {},
 31 |      "output_type": "execute_result"
 32 |     }
 33 |    ],
 34 |    "source": [
 35 |     "A"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 3,
 41 |    "metadata": {},
 42 |    "outputs": [
 43 |     {
 44 |      "data": {
 45 |       "text/plain": [
 46 |        "array([[1, 2],\n",
 47 |        "       [3, 4],\n",
 48 |        "       [5, 6]])"
 49 |       ]
 50 |      },
 51 |      "execution_count": 3,
 52 |      "metadata": {},
 53 |      "output_type": "execute_result"
 54 |     }
 55 |    ],
 56 |    "source": [
 57 |     "B"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": 5,
 63 |    "metadata": {},
 64 |    "outputs": [
 65 |     {
 66 |      "data": {
 67 |       "text/plain": [
 68 |        "array([[22, 28],\n",
 69 |        "       [49, 64]])"
 70 |       ]
 71 |      },
 72 |      "execution_count": 5,
 73 |      "metadata": {},
 74 |      "output_type": "execute_result"
 75 |     }
 76 |    ],
 77 |    "source": [
 78 |     "#Dot product function: dot\n",
 79 |     "np.dot(A,B)"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 7,
 85 |    "metadata": {},
 86 |    "outputs": [
 87 |     {
 88 |      "data": {
 89 |       "text/plain": [
 90 |        "array([ 6., 15.])"
 91 |       ]
 92 |      },
 93 |      "execution_count": 7,
 94 |      "metadata": {},
 95 |      "output_type": "execute_result"
 96 |     }
 97 |    ],
 98 |    "source": [
 99 |     "#Using suitable matrix, we can get 1D result from dot product of a 2D and 1D array\n",
100 |     "np.dot(A, np.ones(3))"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": 11,
106 |    "metadata": {},
107 |    "outputs": [
108 |     {
109 |      "data": {
110 |       "text/plain": [
111 |        "array([[-0.92330513, -0.27545177,  1.01606084,  0.47656796, -0.25881727],\n",
112 |        "       [ 0.92064529, -1.16781398, -3.13578792, -0.40787525,  1.56836767],\n",
113 |        "       [ 0.16504961,  1.64581198,  0.78009535, -1.51762351, -1.34317362],\n",
114 |        "       [-0.75060235, -0.21383346, -0.27263813, -0.68501248, -0.85525025],\n",
115 |        "       [-1.69681292,  0.46144554, -2.47395717,  0.87598453,  1.66015274]])"
116 |       ]
117 |      },
118 |      "execution_count": 11,
119 |      "metadata": {},
120 |      "output_type": "execute_result"
121 |     }
122 |    ],
123 |    "source": [
124 |     "#Numpy gives the linalg module to perform linear algebra functionalities\n",
125 |     "\n",
126 |     "import numpy.linalg\n",
127 |     "\n",
128 |     "X = randn(5, 5)\n",
129 |     "X"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": 13,
135 |    "metadata": {},
136 |    "outputs": [
137 |     {
138 |      "data": {
139 |       "text/plain": [
140 |        "array([-0.92330513, -1.16781398,  0.78009535, -0.68501248,  1.66015274])"
141 |       ]
142 |      },
143 |      "execution_count": 13,
144 |      "metadata": {},
145 |      "output_type": "execute_result"
146 |     }
147 |    ],
148 |    "source": [
149 |     "#Diagonal Values Function: diag\n",
150 |     "np.diag(X)"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": 14,
156 |    "metadata": {},
157 |    "outputs": [
158 |     {
159 |      "data": {
160 |       "text/plain": [
161 |        "-0.3358834923328249"
162 |       ]
163 |      },
164 |      "execution_count": 14,
165 |      "metadata": {},
166 |      "output_type": "execute_result"
167 |     }
168 |    ],
169 |    "source": [
170 |     "#Trace Value Function: trace\n",
171 |     "np.trace(X)"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "code",
176 |    "execution_count": 15,
177 |    "metadata": {},
178 |    "outputs": [
179 |     {
180 |      "data": {
181 |       "text/plain": [
182 |        "-7.284272432422328"
183 |       ]
184 |      },
185 |      "execution_count": 15,
186 |      "metadata": {},
187 |      "output_type": "execute_result"
188 |     }
189 |    ],
190 |    "source": [
191 |     "#Determinant Value Function: det\n",
192 |     "np.linalg.det(X)"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "code",
197 |    "execution_count": 16,
198 |    "metadata": {},
199 |    "outputs": [
200 |     {
201 |      "data": {
202 |       "text/plain": [
203 |        "(array([ 2.16223833+0.j        , -1.30624168+1.75899067j,\n",
204 |        "        -1.30624168-1.75899067j,  0.89686451+0.j        ,\n",
205 |        "        -0.78250298+0.j        ]),\n",
206 |        " array([[ 0.16685568+0.j        , -0.23207051+0.18212845j,\n",
207 |        "         -0.23207051-0.18212845j,  0.14041108+0.j        ,\n",
208 |        "         -0.35315952+0.j        ],\n",
209 |        "        [-0.43898452+0.j        ,  0.67822071+0.j        ,\n",
210 |        "          0.67822071-0.j        , -0.27583369+0.j        ,\n",
211 |        "         -0.53361388+0.j        ],\n",
212 |        "        [ 0.06005357+0.j        , -0.14210975-0.4837585j ,\n",
213 |        "         -0.14210975+0.4837585j , -0.23787721+0.j        ,\n",
214 |        "          0.20106415+0.j        ],\n",
215 |        "        [ 0.23797086+0.j        ,  0.11225518-0.16507914j,\n",
216 |        "          0.11225518+0.16507914j,  0.44685659+0.j        ,\n",
217 |        "         -0.6774018 +0.j        ],\n",
218 |        "        [-0.84806557+0.j        , -0.1785741 -0.3564136j ,\n",
219 |        "         -0.1785741 +0.3564136j , -0.80494355+0.j        ,\n",
220 |        "          0.30205063+0.j        ]]))"
221 |       ]
222 |      },
223 |      "execution_count": 16,
224 |      "metadata": {},
225 |      "output_type": "execute_result"
226 |     }
227 |    ],
228 |    "source": [
229 |     "#Eigen Value Function: eig\n",
230 |     "np.linalg.eig(X)"
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "code",
235 |    "execution_count": 17,
236 |    "metadata": {},
237 |    "outputs": [
238 |     {
239 |      "data": {
240 |       "text/plain": [
241 |        "array([[-1.14204732, -0.32262037, -0.37128838, -0.00564796, -0.17656773],\n",
242 |        "       [-0.71073733, -0.39354616,  0.11311952, -0.17575424,  0.2619634 ],\n",
243 |        "       [ 1.55309694,  0.52906657,  0.58561973, -0.74901444, -0.16974848],\n",
244 |        "       [-2.41421948, -1.29958748, -1.28384275,  0.70079412,  0.1736707 ],\n",
245 |        "       [ 2.61857842,  1.25378842,  1.1391838 , -1.44287746,  0.00447696]])"
246 |       ]
247 |      },
248 |      "execution_count": 17,
249 |      "metadata": {},
250 |      "output_type": "execute_result"
251 |     }
252 |    ],
253 |    "source": [
254 |     "#Inverse Value Function: inv\n",
255 |     "np.linalg.inv(X)"
256 |    ]
257 |   },
258 |   {
259 |    "cell_type": "code",
260 |    "execution_count": 18,
261 |    "metadata": {},
262 |    "outputs": [
263 |     {
264 |      "data": {
265 |       "text/plain": [
266 |        "array([[-1.14204732, -0.32262037, -0.37128838, -0.00564796, -0.17656773],\n",
267 |        "       [-0.71073733, -0.39354616,  0.11311952, -0.17575424,  0.2619634 ],\n",
268 |        "       [ 1.55309694,  0.52906657,  0.58561973, -0.74901444, -0.16974848],\n",
269 |        "       [-2.41421948, -1.29958748, -1.28384275,  0.70079412,  0.1736707 ],\n",
270 |        "       [ 2.61857842,  1.25378842,  1.1391838 , -1.44287746,  0.00447696]])"
271 |       ]
272 |      },
273 |      "execution_count": 18,
274 |      "metadata": {},
275 |      "output_type": "execute_result"
276 |     }
277 |    ],
278 |    "source": [
279 |     "#Pseudo Inverse Value Function (Moore-Penrose pseudo-inverse): pinv\n",
280 |     "np.linalg.pinv(X)"
281 |    ]
282 |   },
283 |   {
284 |    "cell_type": "code",
285 |    "execution_count": 19,
286 |    "metadata": {},
287 |    "outputs": [
288 |     {
289 |      "data": {
290 |       "text/plain": [
291 |        "(array([[-0.40607308, -0.23817498,  0.41949282, -0.14740517, -0.76202023],\n",
292 |        "        [ 0.40490327, -0.47132063, -0.63571499, -0.27686549, -0.36485909],\n",
293 |        "        [ 0.07258944,  0.82710805, -0.20271468, -0.39953961, -0.33150853],\n",
294 |        "        [-0.33011775, -0.18866395,  0.04695207, -0.82275165,  0.419885  ],\n",
295 |        "        [-0.74626472,  0.03778527, -0.61367295,  0.25507851, -0.00130282]]),\n",
296 |        " array([[ 2.27374129, -0.51529989,  0.3105706 , -0.89641647, -0.31394444],\n",
297 |        "        [ 0.        ,  2.03506339,  1.83914228, -1.01416869, -1.56442575],\n",
298 |        "        [ 0.        ,  0.        ,  3.76696051,  0.19712304, -1.89227242],\n",
299 |        "        [ 0.        ,  0.        ,  0.        ,  1.43606868,  1.26770303],\n",
300 |        "        [ 0.        ,  0.        ,  0.        ,  0.        , -0.29100531]]))"
301 |       ]
302 |      },
303 |      "execution_count": 19,
304 |      "metadata": {},
305 |      "output_type": "execute_result"
306 |     }
307 |    ],
308 |    "source": [
309 |     "#QR Decomposition Function: qr\n",
310 |     "np.linalg.qr(X)"
311 |    ]
312 |   },
313 |   {
314 |    "cell_type": "code",
315 |    "execution_count": 20,
316 |    "metadata": {},
317 |    "outputs": [
318 |     {
319 |      "data": {
320 |       "text/plain": [
321 |        "(array([[ 1.53279987e-01,  4.29782060e-01,  1.93259855e-01,\n",
322 |        "         -3.46098645e-01, -7.96654871e-01],\n",
323 |        "        [-6.92175955e-01, -5.91557891e-01,  7.26728861e-03,\n",
324 |        "         -1.75243654e-01, -3.74417624e-01],\n",
325 |        "        [ 3.83564000e-01, -2.89961580e-01, -7.81855475e-01,\n",
326 |        "          1.83370734e-01, -3.51963251e-01],\n",
327 |        "        [ 4.98775688e-02,  4.76164759e-04, -3.26819100e-01,\n",
328 |        "         -8.89012542e-01,  3.16793274e-01],\n",
329 |        "        [-5.89736025e-01,  6.17469593e-01, -4.94458398e-01,\n",
330 |        "          1.59803830e-01,  3.02712963e-02]]),\n",
331 |        " array([5.05640143, 2.60015677, 2.23832488, 1.27342239, 0.19437907]),\n",
332 |        " array([[ 0.04900083,  0.22043131,  0.80509004, -0.15376595, -0.52649313],\n",
333 |        "        [-0.78356076,  0.14616422,  0.20681983,  0.54870627,  0.14427594],\n",
334 |        "        [ 0.35004857, -0.67317684,  0.3913755 ,  0.47644416,  0.21005981],\n",
335 |        "        [ 0.45909384,  0.67975922,  0.14759219,  0.29622587,  0.46650512],\n",
336 |        "        [ 0.22434416,  0.12168154, -0.36619305,  0.60044039, -0.6635038 ]]))"
337 |       ]
338 |      },
339 |      "execution_count": 20,
340 |      "metadata": {},
341 |      "output_type": "execute_result"
342 |     }
343 |    ],
344 |    "source": [
345 |     "#SVD Decomposition Function: svd\n",
346 |     "np.linalg.svd(X)"
347 |    ]
348 |   },
349 |   {
350 |    "cell_type": "code",
351 |    "execution_count": 22,
352 |    "metadata": {},
353 |    "outputs": [
354 |     {
355 |      "data": {
356 |       "text/plain": [
357 |        "array([[-1.14204732, -0.32262037, -0.37128838, -0.00564796, -0.17656773],\n",
358 |        "       [-0.71073733, -0.39354616,  0.11311952, -0.17575424,  0.2619634 ],\n",
359 |        "       [ 1.55309694,  0.52906657,  0.58561973, -0.74901444, -0.16974848],\n",
360 |        "       [-2.41421948, -1.29958748, -1.28384275,  0.70079412,  0.1736707 ],\n",
361 |        "       [ 2.61857842,  1.25378842,  1.1391838 , -1.44287746,  0.00447696]])"
362 |       ]
363 |      },
364 |      "execution_count": 22,
365 |      "metadata": {},
366 |      "output_type": "execute_result"
367 |     }
368 |    ],
369 |    "source": [
370 |     "#Linear System Solution Value Function (x for Ax = b): solve\n",
371 |     "np.linalg.solve(X, np.eye(5))"
372 |    ]
373 |   },
374 |   {
375 |    "cell_type": "code",
376 |    "execution_count": 25,
377 |    "metadata": {},
378 |    "outputs": [
379 |     {
380 |      "data": {
381 |       "text/plain": [
382 |        "array([-2.01817175, -0.9049548 ,  1.74902032, -4.12318489,  3.57315015])"
383 |       ]
384 |      },
385 |      "execution_count": 25,
386 |      "metadata": {},
387 |      "output_type": "execute_result"
388 |     }
389 |    ],
390 |    "source": [
391 |     "#Least Squares Solution Value Function (y for y = Xb): lstsq\n",
392 |     "np.linalg.solve(X, np.ones(5))"
393 |    ]
394 |   }
395 |  ],
396 |  "metadata": {
397 |   "kernelspec": {
398 |    "display_name": "Python 3",
399 |    "language": "python",
400 |    "name": "python3"
401 |   },
402 |   "language_info": {
403 |    "codemirror_mode": {
404 |     "name": "ipython",
405 |     "version": 3
406 |    },
407 |    "file_extension": ".py",
408 |    "mimetype": "text/x-python",
409 |    "name": "python",
410 |    "nbconvert_exporter": "python",
411 |    "pygments_lexer": "ipython3",
412 |    "version": "3.7.4"
413 |   }
414 |  },
415 |  "nbformat": 4,
416 |  "nbformat_minor": 2
417 | }
418 | 


--------------------------------------------------------------------------------
/Renaming Axis Indexes.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "#Like values, we can also modify the index labels using mapping\n",
 10 |     "import pandas as pd\n",
 11 |     "from pandas import DataFrame, Series\n",
 12 |     "import numpy as np"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 2,
 18 |    "metadata": {},
 19 |    "outputs": [],
 20 |    "source": [
 21 |     "#Consider a dataframe\n",
 22 |     "\n",
 23 |     "data = DataFrame(np.arange(12).reshape(3,4), index=list('abc'), columns=['one','two','three','four'])"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 3,
 29 |    "metadata": {},
 30 |    "outputs": [
 31 |     {
 32 |      "data": {
 33 |       "text/html": [
 34 |        "<div>\n",
 35 |        "<style scoped>\n",
 36 |        "    .dataframe tbody tr th:only-of-type {\n",
 37 |        "        vertical-align: middle;\n",
 38 |        "    }\n",
 39 |        "\n",
 40 |        "    .dataframe tbody tr th {\n",
 41 |        "        vertical-align: top;\n",
 42 |        "    }\n",
 43 |        "\n",
 44 |        "    .dataframe thead th {\n",
 45 |        "        text-align: right;\n",
 46 |        "    }\n",
 47 |        "</style>\n",
 48 |        "<table border=\"1\" class=\"dataframe\">\n",
 49 |        "  <thead>\n",
 50 |        "    <tr style=\"text-align: right;\">\n",
 51 |        "      <th></th>\n",
 52 |        "      <th>one</th>\n",
 53 |        "      <th>two</th>\n",
 54 |        "      <th>three</th>\n",
 55 |        "      <th>four</th>\n",
 56 |        "    </tr>\n",
 57 |        "  </thead>\n",
 58 |        "  <tbody>\n",
 59 |        "    <tr>\n",
 60 |        "      <th>a</th>\n",
 61 |        "      <td>0</td>\n",
 62 |        "      <td>1</td>\n",
 63 |        "      <td>2</td>\n",
 64 |        "      <td>3</td>\n",
 65 |        "    </tr>\n",
 66 |        "    <tr>\n",
 67 |        "      <th>b</th>\n",
 68 |        "      <td>4</td>\n",
 69 |        "      <td>5</td>\n",
 70 |        "      <td>6</td>\n",
 71 |        "      <td>7</td>\n",
 72 |        "    </tr>\n",
 73 |        "    <tr>\n",
 74 |        "      <th>c</th>\n",
 75 |        "      <td>8</td>\n",
 76 |        "      <td>9</td>\n",
 77 |        "      <td>10</td>\n",
 78 |        "      <td>11</td>\n",
 79 |        "    </tr>\n",
 80 |        "  </tbody>\n",
 81 |        "</table>\n",
 82 |        "</div>"
 83 |       ],
 84 |       "text/plain": [
 85 |        "   one  two  three  four\n",
 86 |        "a    0    1      2     3\n",
 87 |        "b    4    5      6     7\n",
 88 |        "c    8    9     10    11"
 89 |       ]
 90 |      },
 91 |      "execution_count": 3,
 92 |      "metadata": {},
 93 |      "output_type": "execute_result"
 94 |     }
 95 |    ],
 96 |    "source": [
 97 |     "data"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": 4,
103 |    "metadata": {},
104 |    "outputs": [
105 |     {
106 |      "data": {
107 |       "text/plain": [
108 |        "Index(['A', 'B', 'C'], dtype='object')"
109 |       ]
110 |      },
111 |      "execution_count": 4,
112 |      "metadata": {},
113 |      "output_type": "execute_result"
114 |     }
115 |    ],
116 |    "source": [
117 |     "#Using the index map method we can assign new indexes to different values\n",
118 |     "#Suppose we want to capitalize the index names\n",
119 |     "\n",
120 |     "data.index.map(str.upper)"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": 5,
126 |    "metadata": {},
127 |    "outputs": [],
128 |    "source": [
129 |     "#Now assigning it to the the index, we get\n",
130 |     "data.index = data.index.map(str.upper)"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "code",
135 |    "execution_count": 6,
136 |    "metadata": {},
137 |    "outputs": [
138 |     {
139 |      "data": {
140 |       "text/html": [
141 |        "<div>\n",
142 |        "<style scoped>\n",
143 |        "    .dataframe tbody tr th:only-of-type {\n",
144 |        "        vertical-align: middle;\n",
145 |        "    }\n",
146 |        "\n",
147 |        "    .dataframe tbody tr th {\n",
148 |        "        vertical-align: top;\n",
149 |        "    }\n",
150 |        "\n",
151 |        "    .dataframe thead th {\n",
152 |        "        text-align: right;\n",
153 |        "    }\n",
154 |        "</style>\n",
155 |        "<table border=\"1\" class=\"dataframe\">\n",
156 |        "  <thead>\n",
157 |        "    <tr style=\"text-align: right;\">\n",
158 |        "      <th></th>\n",
159 |        "      <th>one</th>\n",
160 |        "      <th>two</th>\n",
161 |        "      <th>three</th>\n",
162 |        "      <th>four</th>\n",
163 |        "    </tr>\n",
164 |        "  </thead>\n",
165 |        "  <tbody>\n",
166 |        "    <tr>\n",
167 |        "      <th>A</th>\n",
168 |        "      <td>0</td>\n",
169 |        "      <td>1</td>\n",
170 |        "      <td>2</td>\n",
171 |        "      <td>3</td>\n",
172 |        "    </tr>\n",
173 |        "    <tr>\n",
174 |        "      <th>B</th>\n",
175 |        "      <td>4</td>\n",
176 |        "      <td>5</td>\n",
177 |        "      <td>6</td>\n",
178 |        "      <td>7</td>\n",
179 |        "    </tr>\n",
180 |        "    <tr>\n",
181 |        "      <th>C</th>\n",
182 |        "      <td>8</td>\n",
183 |        "      <td>9</td>\n",
184 |        "      <td>10</td>\n",
185 |        "      <td>11</td>\n",
186 |        "    </tr>\n",
187 |        "  </tbody>\n",
188 |        "</table>\n",
189 |        "</div>"
190 |       ],
191 |       "text/plain": [
192 |        "   one  two  three  four\n",
193 |        "A    0    1      2     3\n",
194 |        "B    4    5      6     7\n",
195 |        "C    8    9     10    11"
196 |       ]
197 |      },
198 |      "execution_count": 6,
199 |      "metadata": {},
200 |      "output_type": "execute_result"
201 |     }
202 |    ],
203 |    "source": [
204 |     "data"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "code",
209 |    "execution_count": 7,
210 |    "metadata": {},
211 |    "outputs": [
212 |     {
213 |      "data": {
214 |       "text/html": [
215 |        "<div>\n",
216 |        "<style scoped>\n",
217 |        "    .dataframe tbody tr th:only-of-type {\n",
218 |        "        vertical-align: middle;\n",
219 |        "    }\n",
220 |        "\n",
221 |        "    .dataframe tbody tr th {\n",
222 |        "        vertical-align: top;\n",
223 |        "    }\n",
224 |        "\n",
225 |        "    .dataframe thead th {\n",
226 |        "        text-align: right;\n",
227 |        "    }\n",
228 |        "</style>\n",
229 |        "<table border=\"1\" class=\"dataframe\">\n",
230 |        "  <thead>\n",
231 |        "    <tr style=\"text-align: right;\">\n",
232 |        "      <th></th>\n",
233 |        "      <th>ONE</th>\n",
234 |        "      <th>TWO</th>\n",
235 |        "      <th>THREE</th>\n",
236 |        "      <th>FOUR</th>\n",
237 |        "    </tr>\n",
238 |        "  </thead>\n",
239 |        "  <tbody>\n",
240 |        "    <tr>\n",
241 |        "      <th>A</th>\n",
242 |        "      <td>0</td>\n",
243 |        "      <td>1</td>\n",
244 |        "      <td>2</td>\n",
245 |        "      <td>3</td>\n",
246 |        "    </tr>\n",
247 |        "    <tr>\n",
248 |        "      <th>B</th>\n",
249 |        "      <td>4</td>\n",
250 |        "      <td>5</td>\n",
251 |        "      <td>6</td>\n",
252 |        "      <td>7</td>\n",
253 |        "    </tr>\n",
254 |        "    <tr>\n",
255 |        "      <th>C</th>\n",
256 |        "      <td>8</td>\n",
257 |        "      <td>9</td>\n",
258 |        "      <td>10</td>\n",
259 |        "      <td>11</td>\n",
260 |        "    </tr>\n",
261 |        "  </tbody>\n",
262 |        "</table>\n",
263 |        "</div>"
264 |       ],
265 |       "text/plain": [
266 |        "   ONE  TWO  THREE  FOUR\n",
267 |        "A    0    1      2     3\n",
268 |        "B    4    5      6     7\n",
269 |        "C    8    9     10    11"
270 |       ]
271 |      },
272 |      "execution_count": 7,
273 |      "metadata": {},
274 |      "output_type": "execute_result"
275 |     }
276 |    ],
277 |    "source": [
278 |     "#We can use the rename method to apply changes to the dataframe without actually modifying the actual one\n",
279 |     "\n",
280 |     "data.rename(index=str.title, columns=str.upper) #Capitalizes the columns"
281 |    ]
282 |   },
283 |   {
284 |    "cell_type": "code",
285 |    "execution_count": 9,
286 |    "metadata": {},
287 |    "outputs": [
288 |     {
289 |      "data": {
290 |       "text/html": [
291 |        "<div>\n",
292 |        "<style scoped>\n",
293 |        "    .dataframe tbody tr th:only-of-type {\n",
294 |        "        vertical-align: middle;\n",
295 |        "    }\n",
296 |        "\n",
297 |        "    .dataframe tbody tr th {\n",
298 |        "        vertical-align: top;\n",
299 |        "    }\n",
300 |        "\n",
301 |        "    .dataframe thead th {\n",
302 |        "        text-align: right;\n",
303 |        "    }\n",
304 |        "</style>\n",
305 |        "<table border=\"1\" class=\"dataframe\">\n",
306 |        "  <thead>\n",
307 |        "    <tr style=\"text-align: right;\">\n",
308 |        "      <th></th>\n",
309 |        "      <th>one</th>\n",
310 |        "      <th>two</th>\n",
311 |        "      <th>five</th>\n",
312 |        "      <th>four</th>\n",
313 |        "    </tr>\n",
314 |        "  </thead>\n",
315 |        "  <tbody>\n",
316 |        "    <tr>\n",
317 |        "      <th>A</th>\n",
318 |        "      <td>0</td>\n",
319 |        "      <td>1</td>\n",
320 |        "      <td>2</td>\n",
321 |        "      <td>3</td>\n",
322 |        "    </tr>\n",
323 |        "    <tr>\n",
324 |        "      <th>D</th>\n",
325 |        "      <td>4</td>\n",
326 |        "      <td>5</td>\n",
327 |        "      <td>6</td>\n",
328 |        "      <td>7</td>\n",
329 |        "    </tr>\n",
330 |        "    <tr>\n",
331 |        "      <th>C</th>\n",
332 |        "      <td>8</td>\n",
333 |        "      <td>9</td>\n",
334 |        "      <td>10</td>\n",
335 |        "      <td>11</td>\n",
336 |        "    </tr>\n",
337 |        "  </tbody>\n",
338 |        "</table>\n",
339 |        "</div>"
340 |       ],
341 |       "text/plain": [
342 |        "   one  two  five  four\n",
343 |        "A    0    1     2     3\n",
344 |        "D    4    5     6     7\n",
345 |        "C    8    9    10    11"
346 |       ]
347 |      },
348 |      "execution_count": 9,
349 |      "metadata": {},
350 |      "output_type": "execute_result"
351 |     }
352 |    ],
353 |    "source": [
354 |     "#We can also use rename to modify the name of an existing label with some other one\n",
355 |     "\n",
356 |     "data.rename(index={'B':'D'}, columns={'three':'five'})"
357 |    ]
358 |   },
359 |   {
360 |    "cell_type": "code",
361 |    "execution_count": 10,
362 |    "metadata": {},
363 |    "outputs": [],
364 |    "source": [
365 |     "#We can modify our actual value using rename by passing inplace=True\n",
366 |     "\n",
367 |     "_ = data.rename(index={'B':'D'}, columns={'three':'five'}, inplace=True)"
368 |    ]
369 |   },
370 |   {
371 |    "cell_type": "code",
372 |    "execution_count": 11,
373 |    "metadata": {},
374 |    "outputs": [
375 |     {
376 |      "data": {
377 |       "text/html": [
378 |        "<div>\n",
379 |        "<style scoped>\n",
380 |        "    .dataframe tbody tr th:only-of-type {\n",
381 |        "        vertical-align: middle;\n",
382 |        "    }\n",
383 |        "\n",
384 |        "    .dataframe tbody tr th {\n",
385 |        "        vertical-align: top;\n",
386 |        "    }\n",
387 |        "\n",
388 |        "    .dataframe thead th {\n",
389 |        "        text-align: right;\n",
390 |        "    }\n",
391 |        "</style>\n",
392 |        "<table border=\"1\" class=\"dataframe\">\n",
393 |        "  <thead>\n",
394 |        "    <tr style=\"text-align: right;\">\n",
395 |        "      <th></th>\n",
396 |        "      <th>one</th>\n",
397 |        "      <th>two</th>\n",
398 |        "      <th>five</th>\n",
399 |        "      <th>four</th>\n",
400 |        "    </tr>\n",
401 |        "  </thead>\n",
402 |        "  <tbody>\n",
403 |        "    <tr>\n",
404 |        "      <th>A</th>\n",
405 |        "      <td>0</td>\n",
406 |        "      <td>1</td>\n",
407 |        "      <td>2</td>\n",
408 |        "      <td>3</td>\n",
409 |        "    </tr>\n",
410 |        "    <tr>\n",
411 |        "      <th>D</th>\n",
412 |        "      <td>4</td>\n",
413 |        "      <td>5</td>\n",
414 |        "      <td>6</td>\n",
415 |        "      <td>7</td>\n",
416 |        "    </tr>\n",
417 |        "    <tr>\n",
418 |        "      <th>C</th>\n",
419 |        "      <td>8</td>\n",
420 |        "      <td>9</td>\n",
421 |        "      <td>10</td>\n",
422 |        "      <td>11</td>\n",
423 |        "    </tr>\n",
424 |        "  </tbody>\n",
425 |        "</table>\n",
426 |        "</div>"
427 |       ],
428 |       "text/plain": [
429 |        "   one  two  five  four\n",
430 |        "A    0    1     2     3\n",
431 |        "D    4    5     6     7\n",
432 |        "C    8    9    10    11"
433 |       ]
434 |      },
435 |      "execution_count": 11,
436 |      "metadata": {},
437 |      "output_type": "execute_result"
438 |     }
439 |    ],
440 |    "source": [
441 |     "data"
442 |    ]
443 |   }
444 |  ],
445 |  "metadata": {
446 |   "kernelspec": {
447 |    "display_name": "Python 3",
448 |    "language": "python",
449 |    "name": "python3"
450 |   },
451 |   "language_info": {
452 |    "codemirror_mode": {
453 |     "name": "ipython",
454 |     "version": 3
455 |    },
456 |    "file_extension": ".py",
457 |    "mimetype": "text/x-python",
458 |    "name": "python",
459 |    "nbconvert_exporter": "python",
460 |    "pygments_lexer": "ipython3",
461 |    "version": "3.8.2"
462 |   }
463 |  },
464 |  "nbformat": 4,
465 |  "nbformat_minor": 4
466 | }
467 | 


--------------------------------------------------------------------------------