├── .flake8
├── .github
    └── ISSUE_TEMPLATE
    │   ├── config.yml
    │   └── issue_template.md
├── .gitignore
├── .travis.yml
├── README.md
├── datasets
    ├── ds1.jpg
    ├── ds2.jpg
    ├── mycsv.csv
    ├── mycsv_few_columns.csv
    ├── myexcel.xls
    ├── season.csv
    ├── titanic.xls
    ├── travel.csv
    ├── weather-dataset.zip
    ├── weather.csv
    └── weather_and_house.xls
├── pandas1.png
├── pandas_part1.ipynb
├── pandas_part10.ipynb
├── pandas_part2.ipynb
├── pandas_part3.ipynb
├── pandas_part4.ipynb
├── pandas_part5.ipynb
├── pandas_part6.ipynb
├── pandas_part7.ipynb
├── pandas_part8.ipynb
└── pandas_part9.ipynb


/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 100
3 | ignore = E121,E123,E126,E221,E222,E225,E226,E242,E701,E702,E704,E731,W503,F405,F841
4 | exclude = tests
5 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: true
2 | contact_links:
3 |   - name: GitHub Community Support
4 |     url: https://github.community/
5 |     about: Please ask and answer questions here.
6 |   - name: GitHub Security Bug Bounty
7 |     url: https://bounty.github.com/
8 |     about: Please report security vulnerabilities here.
9 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/issue_template.md:
--------------------------------------------------------------------------------
1 | 
2 | ### Issue type
3 | Fill 'x' without quote if you want to checked the below boxes.
4 | - [ ] Code improvements
5 | - [ ] I want to add files
6 | - [ ] Suggestions
7 | 
8 | ##### Explain in brief what you have selected.
9 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language:
 2 |   - python
 3 | 
 4 | python:
 5 |   - "3.6"
 6 | 
 7 | install:
 8 |   - pip install pandas
 9 |   - pip install numpy
10 |   - pip install flake8
11 |   - pip install ipython
12 |   - pip install matplotlib
13 |   - pip install ipywidgets
14 | script:
15 |   - python script.py
16 | 
17 | after_success:
18 |   - flake8 --max-line-length 100 --ignore=E121,E123,E126,E221,E222,E225,E226,E242,E701,E702,E704,E731,W503 .
19 | 
20 | notifications:
21 |   email: false
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ![pandas for data analysis](pandas1.png)
 2 |                                             <br>**Fig. 1** 
 3 | 
 4 |    Welcome to the tutorial Data analysis with pandas. In this tutorial i have covered all the topics of pandas and tried to explain each and every concepts with lesser words in jupyter notebook so that you can observe the function of every methods in pandas from the ground level.<br>
 5 |    <br>
 6 |    <br>First of all let's understand "what is data analysis and why should we use pandas for analysis ?".
 7 |    
 8 | ### What is data analysis ?
 9 |    Suppose you are working in a company which daily generates a lot of data of customers and you are assigned a task to extract some useful information out of it with certain deadline. What will you do if you have very limited time you can not exract information just by looking into the dataset because size of the data is huge. So you asked for help from your collegue he said just read about pandas for data analysis. You study about pandas and you found that pandas makes your life easier than just looking at dataset and finding useful informations.
10 | ### What does pandas actually do ?
11 |    This official documentation says- <br>
12 |    pandas is a Python package providing fast, flexible, and expressive data structures designed to make working with “relational” or “labeled” data both easy and intuitive. It aims to be the fundamental high-level building block for doing practical, real world data analysis in Python. Additionally, it has the broader goal of becoming the most powerful and flexible open source data analysis / manipulation tool available in any language. It is already well on its way toward this goal.
13 |    <br>
14 |    <br>
15 |    pandas is well suited for many different kinds of data:
16 |    * Tabular data with heterogeneously-typed columns, as in an SQL table or Excel spreadsheet.
17 |    * Ordered and unordered (not necessarily fixed-frequency) time series data.
18 |    * Arbitrary matrix data (homogeneously typed or heterogeneous) with row and column labels.
19 |    * Any other form of observational / statistical data sets. The data actually need not be labeled at all to be placed into a pandas data structure.
20 | 
21 | ### Dependencies-
22 |    ```
23 |    pip install pandas 
24 |    pip install numpy
25 |    ```
26 | 
27 | ### Table of contents -
28 |   * [Introduction to pandas](https://github.com/dshahid380/Data-analysis-with-pandas/blob/master/pandas_part1.ipynb)
29 |   * [Dataframe Object](https://github.com/dshahid380/Data-analysis-with-pandas/blob/master/pandas_part2.ipynb)
30 |   * [Reading, Writing CSV and EXCEL file](https://github.com/dshahid380/Data-analysis-with-pandas/blob/master/pandas_part3.ipynb)
31 |   * [Handling Missing Data part-1](https://github.com/dshahid380/Data-analysis-with-pandas/blob/master/pandas_part4.ipynb)
32 |   * [Handling Missing Data part-2](https://github.com/dshahid380/Data-analysis-with-pandas/blob/master/pandas_part5.ipynb)
33 |   * [Groupby : Split, Combine and Apply](https://github.com/dshahid380/Data-analysis-with-pandas/blob/master/pandas_part6.ipynb)
34 |   * [Concat Dataframes](https://github.com/dshahid380/Data-analysis-with-pandas/blob/master/pandas_part7.ipynb)
35 |   * [Merging Dataframes](https://github.com/dshahid380/Data-analysis-with-pandas/blob/master/pandas_part8.ipynb)
36 |   * [Pivot and Pivot table](https://github.com/dshahid380/Data-analysis-with-pandas/blob/master/pandas_part9.ipynb)
37 |   * [Reshaping Dataframes](https://github.com/dshahid380/Data-analysis-with-pandas/blob/master/pandas_part10.ipynb)
38 |  <br>
39 |   
40 | ### Introduction to pandas :
41 |    ![](https://i1.wp.com/www.ugandaletsgotravel.com/holidays/wp-content/uploads/2018/04/holidays-panda-breeding-china-600x400.jpg)
42 |    <br> **Fig.2** <br><br>
43 |    Pandas is used as data cleaning tool in the field of data science.You can do whatever operation you want in the dataset with this tool.Now question arises, can we clean or change the value in the dataset manually ? Answer is yes we can if size of the dataset is small.What if we have a large dataset then we can not do it manually it will take a lot of time.Pandas makes data science very easy and effective.
44 |    <br>
45 |    To use pandas you need to first import the pandas module in your program
46 |    ```
47 |      import pandas as pd 
48 |    ```
49 |   <br>  
50 |   
51 |   
52 | ####  Reading CSV and Excel sheets:
53 | **d=pd.read_csv("path"):**
54 |    * pd.read_csv() is the function to read the CSV(Comma separated values) file from your computer.
55 |    * In the function you have to pass "path" of the CSV file under quote.
56 |    * Store the dataframe in any variable,here i stored it in variable "d".
57 |    * read_csv() function makes the CSV file into dataframe so that you can access it just like a disctionary. <br>
58 |  
59 |  **d=pd.read_excel("path") :**
60 |    * It is same as the read_csv() but it reads excel sheet or file. Here i am using the weather dataset which has all the data of weather. In my case,weather.csv file is in my current directory that is why the path of the file is file name itself.
61 |    ```
62 |    d=pd.read_csv('datasets/weather.csv')
63 |    print(d)
64 |    ```
65 |  <br>
66 |  
67 |   **For futher tutorial go to the above link given in the Table of contents or click this [link](https://github.com/dshahid380/Data-analysis-with-pandas)**
68 |  
69 |  
70 | 
71 |  
72 | ### References :
73 |  * [Pandas Official documentation](https://pandas.pydata.org/pandas-docs/stable/tutorials.html)
74 |  * [Tutorials points](https://www.tutorialspoint.com/python_pandas)
75 |  * [Datacamp](https://www.datacamp.com/courses/pandas-foundations) 
76 |  
77 |  <br>
78 | 
79 | <script defer src="https://use.fontawesome.com/releases/v5.6.3/js/all.js" integrity="sha384-EIHISlAOj4zgYieurP0SdoiBYfGJKkgWedPHH4jCzpCXLmzVsw1ouK59MuUtP4a1" crossorigin="anonymous"></script>   
80 | <i class="fab fa-github"></i> [dshahid380](https://github.com/dshahid380)
81 | <i class="fab fa-linkedin"></i>[ Md Shahid](https://www.linkedin.com/in/dshahid380/)
82 | 


--------------------------------------------------------------------------------
/datasets/ds1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dshahid380/Data-analysis-with-pandas/84a92bb63ab285160668dc3a7003a45269736ef2/datasets/ds1.jpg


--------------------------------------------------------------------------------
/datasets/ds2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dshahid380/Data-analysis-with-pandas/84a92bb63ab285160668dc3a7003a45269736ef2/datasets/ds2.jpg


--------------------------------------------------------------------------------
/datasets/mycsv.csv:
--------------------------------------------------------------------------------
1 | dates,day,temp,wind-speed
2 | 02-01-12,sunny,45,12
3 | 03-01-12,rainy,46,34
4 | 04-01-12,hot,47,45
5 | 05-01-12,sunny,48,56
6 | 06-01-12,hot,49,67
7 | 


--------------------------------------------------------------------------------
/datasets/mycsv_few_columns.csv:
--------------------------------------------------------------------------------
1 | day,temp,wind-speed
2 | sunny,45,12
3 | rainy,46,34
4 | hot,47,45
5 | sunny,48,56
6 | hot,49,67
7 | 


--------------------------------------------------------------------------------
/datasets/myexcel.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dshahid380/Data-analysis-with-pandas/84a92bb63ab285160668dc3a7003a45269736ef2/datasets/myexcel.xls


--------------------------------------------------------------------------------
/datasets/season.csv:
--------------------------------------------------------------------------------
 1 | dates,day,temp,wind-speed
 2 | 2/1/2012,sunny,45,12
 3 | 3/1/2012,rainy,46,34
 4 | 4/1/2012,hot,47,45
 5 | 5/1/2012,NaN,NaN,56
 6 | 6/1/2012,hot,49,Not available
 7 | 7/1/2012,NaN,NaN,Not available
 8 | 8/1/2012,hot,12,45
 9 | 9/1/2012,rainy,23,41
10 | 10/1/2012,NaN,NaN,NaN
11 | 11/1/2012,NaN,NaN,NaN
12 | 


--------------------------------------------------------------------------------
/datasets/titanic.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dshahid380/Data-analysis-with-pandas/84a92bb63ab285160668dc3a7003a45269736ef2/datasets/titanic.xls


--------------------------------------------------------------------------------
/datasets/travel.csv:
--------------------------------------------------------------------------------
 1 | Age,Name,No_of_pkg,Package,travel_id
 2 | 20yrs,Bikash Kumar,1 packages,$100 ,1
 3 | 21yrs,Ashish Shaw,5 packages,$200 ,2
 4 | 23years,Dipak Kumar,2pkgs,$100 ,3
 5 | 20 Years,John Doe,3 pkgs,$100 ,4
 6 | 2000,Elisha,5000,$400 ,5
 7 | 5000,Md Shahid,10 packages,$200 ,6
 8 | 21 yrs,Adrika Roy,7pkgs,$300 ,7
 9 | 24 yrs,Shashi Kumar,2000,$500 ,8
10 | 


--------------------------------------------------------------------------------
/datasets/weather-dataset.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dshahid380/Data-analysis-with-pandas/84a92bb63ab285160668dc3a7003a45269736ef2/datasets/weather-dataset.zip


--------------------------------------------------------------------------------
/datasets/weather_and_house.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dshahid380/Data-analysis-with-pandas/84a92bb63ab285160668dc3a7003a45269736ef2/datasets/weather_and_house.xls


--------------------------------------------------------------------------------
/pandas1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dshahid380/Data-analysis-with-pandas/84a92bb63ab285160668dc3a7003a45269736ef2/pandas1.png


--------------------------------------------------------------------------------
/pandas_part10.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Reshaping Dataframe \n",
  8 |     "    In this technique we will discuss about reshape the dataframe with melt() method.\n",
  9 |     "    You have to pass the dataframe and the column which want to keep the same and other argumets are optional for you\n",
 10 |     "    "
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 1,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "import pandas as pd"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 5,
 25 |    "metadata": {},
 26 |    "outputs": [
 27 |     {
 28 |      "data": {
 29 |       "text/html": [
 30 |        "<div>\n",
 31 |        "<style scoped>\n",
 32 |        "    .dataframe tbody tr th:only-of-type {\n",
 33 |        "        vertical-align: middle;\n",
 34 |        "    }\n",
 35 |        "\n",
 36 |        "    .dataframe tbody tr th {\n",
 37 |        "        vertical-align: top;\n",
 38 |        "    }\n",
 39 |        "\n",
 40 |        "    .dataframe thead th {\n",
 41 |        "        text-align: right;\n",
 42 |        "    }\n",
 43 |        "</style>\n",
 44 |        "<table border=\"1\" class=\"dataframe\">\n",
 45 |        "  <thead>\n",
 46 |        "    <tr style=\"text-align: right;\">\n",
 47 |        "      <th></th>\n",
 48 |        "      <th>Day</th>\n",
 49 |        "      <th>Kerala</th>\n",
 50 |        "      <th>Kolkata</th>\n",
 51 |        "      <th>Mumbai</th>\n",
 52 |        "    </tr>\n",
 53 |        "  </thead>\n",
 54 |        "  <tbody>\n",
 55 |        "    <tr>\n",
 56 |        "      <th>0</th>\n",
 57 |        "      <td>Mon</td>\n",
 58 |        "      <td>23</td>\n",
 59 |        "      <td>10</td>\n",
 60 |        "      <td>10</td>\n",
 61 |        "    </tr>\n",
 62 |        "    <tr>\n",
 63 |        "      <th>1</th>\n",
 64 |        "      <td>Tue</td>\n",
 65 |        "      <td>32</td>\n",
 66 |        "      <td>20</td>\n",
 67 |        "      <td>20</td>\n",
 68 |        "    </tr>\n",
 69 |        "    <tr>\n",
 70 |        "      <th>2</th>\n",
 71 |        "      <td>Wed</td>\n",
 72 |        "      <td>13</td>\n",
 73 |        "      <td>30</td>\n",
 74 |        "      <td>30</td>\n",
 75 |        "    </tr>\n",
 76 |        "    <tr>\n",
 77 |        "      <th>3</th>\n",
 78 |        "      <td>Thu</td>\n",
 79 |        "      <td>42</td>\n",
 80 |        "      <td>40</td>\n",
 81 |        "      <td>40</td>\n",
 82 |        "    </tr>\n",
 83 |        "    <tr>\n",
 84 |        "      <th>4</th>\n",
 85 |        "      <td>Fri</td>\n",
 86 |        "      <td>13</td>\n",
 87 |        "      <td>32</td>\n",
 88 |        "      <td>32</td>\n",
 89 |        "    </tr>\n",
 90 |        "    <tr>\n",
 91 |        "      <th>5</th>\n",
 92 |        "      <td>Sat</td>\n",
 93 |        "      <td>43</td>\n",
 94 |        "      <td>34</td>\n",
 95 |        "      <td>34</td>\n",
 96 |        "    </tr>\n",
 97 |        "    <tr>\n",
 98 |        "      <th>6</th>\n",
 99 |        "      <td>Sun</td>\n",
100 |        "      <td>23</td>\n",
101 |        "      <td>23</td>\n",
102 |        "      <td>23</td>\n",
103 |        "    </tr>\n",
104 |        "  </tbody>\n",
105 |        "</table>\n",
106 |        "</div>"
107 |       ],
108 |       "text/plain": [
109 |        "   Day  Kerala  Kolkata  Mumbai\n",
110 |        "0  Mon      23       10      10\n",
111 |        "1  Tue      32       20      20\n",
112 |        "2  Wed      13       30      30\n",
113 |        "3  Thu      42       40      40\n",
114 |        "4  Fri      13       32      32\n",
115 |        "5  Sat      43       34      34\n",
116 |        "6  Sun      23       23      23"
117 |       ]
118 |      },
119 |      "execution_count": 5,
120 |      "metadata": {},
121 |      "output_type": "execute_result"
122 |     }
123 |    ],
124 |    "source": [
125 |     "mydis={\n",
126 |     "    'Day':['Mon','Tue','Wed','Thu','Fri','Sat','Sun'],\n",
127 |     "    'Kolkata':[10,20,30,40,32,34,23],\n",
128 |     "    'Kerala':[23,32,13,42,13,43,23],\n",
129 |     "    'Mumbai':[10,20,30,40,32,34,23]\n",
130 |     "}\n",
131 |     "df=pd.DataFrame(mydis)\n",
132 |     "df"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": 10,
138 |    "metadata": {},
139 |    "outputs": [
140 |     {
141 |      "data": {
142 |       "text/html": [
143 |        "<div>\n",
144 |        "<style scoped>\n",
145 |        "    .dataframe tbody tr th:only-of-type {\n",
146 |        "        vertical-align: middle;\n",
147 |        "    }\n",
148 |        "\n",
149 |        "    .dataframe tbody tr th {\n",
150 |        "        vertical-align: top;\n",
151 |        "    }\n",
152 |        "\n",
153 |        "    .dataframe thead th {\n",
154 |        "        text-align: right;\n",
155 |        "    }\n",
156 |        "</style>\n",
157 |        "<table border=\"1\" class=\"dataframe\">\n",
158 |        "  <thead>\n",
159 |        "    <tr style=\"text-align: right;\">\n",
160 |        "      <th></th>\n",
161 |        "      <th>Day</th>\n",
162 |        "      <th>variable</th>\n",
163 |        "      <th>value</th>\n",
164 |        "    </tr>\n",
165 |        "  </thead>\n",
166 |        "  <tbody>\n",
167 |        "    <tr>\n",
168 |        "      <th>0</th>\n",
169 |        "      <td>Mon</td>\n",
170 |        "      <td>Kerala</td>\n",
171 |        "      <td>23</td>\n",
172 |        "    </tr>\n",
173 |        "    <tr>\n",
174 |        "      <th>1</th>\n",
175 |        "      <td>Tue</td>\n",
176 |        "      <td>Kerala</td>\n",
177 |        "      <td>32</td>\n",
178 |        "    </tr>\n",
179 |        "    <tr>\n",
180 |        "      <th>2</th>\n",
181 |        "      <td>Wed</td>\n",
182 |        "      <td>Kerala</td>\n",
183 |        "      <td>13</td>\n",
184 |        "    </tr>\n",
185 |        "    <tr>\n",
186 |        "      <th>3</th>\n",
187 |        "      <td>Thu</td>\n",
188 |        "      <td>Kerala</td>\n",
189 |        "      <td>42</td>\n",
190 |        "    </tr>\n",
191 |        "    <tr>\n",
192 |        "      <th>4</th>\n",
193 |        "      <td>Fri</td>\n",
194 |        "      <td>Kerala</td>\n",
195 |        "      <td>13</td>\n",
196 |        "    </tr>\n",
197 |        "    <tr>\n",
198 |        "      <th>5</th>\n",
199 |        "      <td>Sat</td>\n",
200 |        "      <td>Kerala</td>\n",
201 |        "      <td>43</td>\n",
202 |        "    </tr>\n",
203 |        "    <tr>\n",
204 |        "      <th>6</th>\n",
205 |        "      <td>Sun</td>\n",
206 |        "      <td>Kerala</td>\n",
207 |        "      <td>23</td>\n",
208 |        "    </tr>\n",
209 |        "    <tr>\n",
210 |        "      <th>7</th>\n",
211 |        "      <td>Mon</td>\n",
212 |        "      <td>Kolkata</td>\n",
213 |        "      <td>10</td>\n",
214 |        "    </tr>\n",
215 |        "    <tr>\n",
216 |        "      <th>8</th>\n",
217 |        "      <td>Tue</td>\n",
218 |        "      <td>Kolkata</td>\n",
219 |        "      <td>20</td>\n",
220 |        "    </tr>\n",
221 |        "    <tr>\n",
222 |        "      <th>9</th>\n",
223 |        "      <td>Wed</td>\n",
224 |        "      <td>Kolkata</td>\n",
225 |        "      <td>30</td>\n",
226 |        "    </tr>\n",
227 |        "    <tr>\n",
228 |        "      <th>10</th>\n",
229 |        "      <td>Thu</td>\n",
230 |        "      <td>Kolkata</td>\n",
231 |        "      <td>40</td>\n",
232 |        "    </tr>\n",
233 |        "    <tr>\n",
234 |        "      <th>11</th>\n",
235 |        "      <td>Fri</td>\n",
236 |        "      <td>Kolkata</td>\n",
237 |        "      <td>32</td>\n",
238 |        "    </tr>\n",
239 |        "    <tr>\n",
240 |        "      <th>12</th>\n",
241 |        "      <td>Sat</td>\n",
242 |        "      <td>Kolkata</td>\n",
243 |        "      <td>34</td>\n",
244 |        "    </tr>\n",
245 |        "    <tr>\n",
246 |        "      <th>13</th>\n",
247 |        "      <td>Sun</td>\n",
248 |        "      <td>Kolkata</td>\n",
249 |        "      <td>23</td>\n",
250 |        "    </tr>\n",
251 |        "    <tr>\n",
252 |        "      <th>14</th>\n",
253 |        "      <td>Mon</td>\n",
254 |        "      <td>Mumbai</td>\n",
255 |        "      <td>10</td>\n",
256 |        "    </tr>\n",
257 |        "    <tr>\n",
258 |        "      <th>15</th>\n",
259 |        "      <td>Tue</td>\n",
260 |        "      <td>Mumbai</td>\n",
261 |        "      <td>20</td>\n",
262 |        "    </tr>\n",
263 |        "    <tr>\n",
264 |        "      <th>16</th>\n",
265 |        "      <td>Wed</td>\n",
266 |        "      <td>Mumbai</td>\n",
267 |        "      <td>30</td>\n",
268 |        "    </tr>\n",
269 |        "    <tr>\n",
270 |        "      <th>17</th>\n",
271 |        "      <td>Thu</td>\n",
272 |        "      <td>Mumbai</td>\n",
273 |        "      <td>40</td>\n",
274 |        "    </tr>\n",
275 |        "    <tr>\n",
276 |        "      <th>18</th>\n",
277 |        "      <td>Fri</td>\n",
278 |        "      <td>Mumbai</td>\n",
279 |        "      <td>32</td>\n",
280 |        "    </tr>\n",
281 |        "    <tr>\n",
282 |        "      <th>19</th>\n",
283 |        "      <td>Sat</td>\n",
284 |        "      <td>Mumbai</td>\n",
285 |        "      <td>34</td>\n",
286 |        "    </tr>\n",
287 |        "    <tr>\n",
288 |        "      <th>20</th>\n",
289 |        "      <td>Sun</td>\n",
290 |        "      <td>Mumbai</td>\n",
291 |        "      <td>23</td>\n",
292 |        "    </tr>\n",
293 |        "  </tbody>\n",
294 |        "</table>\n",
295 |        "</div>"
296 |       ],
297 |       "text/plain": [
298 |        "    Day variable  value\n",
299 |        "0   Mon   Kerala     23\n",
300 |        "1   Tue   Kerala     32\n",
301 |        "2   Wed   Kerala     13\n",
302 |        "3   Thu   Kerala     42\n",
303 |        "4   Fri   Kerala     13\n",
304 |        "5   Sat   Kerala     43\n",
305 |        "6   Sun   Kerala     23\n",
306 |        "7   Mon  Kolkata     10\n",
307 |        "8   Tue  Kolkata     20\n",
308 |        "9   Wed  Kolkata     30\n",
309 |        "10  Thu  Kolkata     40\n",
310 |        "11  Fri  Kolkata     32\n",
311 |        "12  Sat  Kolkata     34\n",
312 |        "13  Sun  Kolkata     23\n",
313 |        "14  Mon   Mumbai     10\n",
314 |        "15  Tue   Mumbai     20\n",
315 |        "16  Wed   Mumbai     30\n",
316 |        "17  Thu   Mumbai     40\n",
317 |        "18  Fri   Mumbai     32\n",
318 |        "19  Sat   Mumbai     34\n",
319 |        "20  Sun   Mumbai     23"
320 |       ]
321 |      },
322 |      "execution_count": 10,
323 |      "metadata": {},
324 |      "output_type": "execute_result"
325 |     }
326 |    ],
327 |    "source": [
328 |     "df2=pd.melt(df,id_vars=['Day'],var_name='City',value_name='Temp')\n",
329 |     "df2"
330 |    ]
331 |   },
332 |   {
333 |    "cell_type": "code",
334 |    "execution_count": null,
335 |    "metadata": {},
336 |    "outputs": [],
337 |    "source": []
338 |   }
339 |  ],
340 |  "metadata": {
341 |   "kernelspec": {
342 |    "display_name": "Python 3",
343 |    "language": "python",
344 |    "name": "python3"
345 |   },
346 |   "language_info": {
347 |    "codemirror_mode": {
348 |     "name": "ipython",
349 |     "version": 3
350 |    },
351 |    "file_extension": ".py",
352 |    "mimetype": "text/x-python",
353 |    "name": "python",
354 |    "nbconvert_exporter": "python",
355 |    "pygments_lexer": "ipython3",
356 |    "version": "3.6.4"
357 |   }
358 |  },
359 |  "nbformat": 4,
360 |  "nbformat_minor": 2
361 | }
362 | 


--------------------------------------------------------------------------------
/pandas_part3.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "# Reading, writing CSV and Excel file <br>"
   8 |    ]
   9 |   },
  10 |   {
  11 |    "cell_type": "code",
  12 |    "execution_count": 16,
  13 |    "metadata": {},
  14 |    "outputs": [],
  15 |    "source": [
  16 |     "import pandas as pd"
  17 |    ]
  18 |   },
  19 |   {
  20 |    "cell_type": "code",
  21 |    "execution_count": 17,
  22 |    "metadata": {
  23 |     "scrolled": true
  24 |    },
  25 |    "outputs": [
  26 |     {
  27 |      "data": {
  28 |       "text/html": [
  29 |        "<div>\n",
  30 |        "<style scoped>\n",
  31 |        "    .dataframe tbody tr th:only-of-type {\n",
  32 |        "        vertical-align: middle;\n",
  33 |        "    }\n",
  34 |        "\n",
  35 |        "    .dataframe tbody tr th {\n",
  36 |        "        vertical-align: top;\n",
  37 |        "    }\n",
  38 |        "\n",
  39 |        "    .dataframe thead th {\n",
  40 |        "        text-align: right;\n",
  41 |        "    }\n",
  42 |        "</style>\n",
  43 |        "<table border=\"1\" class=\"dataframe\">\n",
  44 |        "  <thead>\n",
  45 |        "    <tr style=\"text-align: right;\">\n",
  46 |        "      <th></th>\n",
  47 |        "      <th>Formatted Date</th>\n",
  48 |        "      <th>Summary</th>\n",
  49 |        "      <th>Precip Type</th>\n",
  50 |        "      <th>Temperature (C)</th>\n",
  51 |        "      <th>Apparent Temperature (C)</th>\n",
  52 |        "      <th>Humidity</th>\n",
  53 |        "      <th>Wind Speed (km/h)</th>\n",
  54 |        "      <th>Wind Bearing (degrees)</th>\n",
  55 |        "      <th>Visibility (km)</th>\n",
  56 |        "      <th>Loud Cover</th>\n",
  57 |        "      <th>Pressure (millibars)</th>\n",
  58 |        "      <th>Daily Summary</th>\n",
  59 |        "    </tr>\n",
  60 |        "  </thead>\n",
  61 |        "  <tbody>\n",
  62 |        "    <tr>\n",
  63 |        "      <th>0</th>\n",
  64 |        "      <td>2006-04-01 00:00:00.000 +0200</td>\n",
  65 |        "      <td>Partly Cloudy</td>\n",
  66 |        "      <td>rain</td>\n",
  67 |        "      <td>9.472222</td>\n",
  68 |        "      <td>7.388889</td>\n",
  69 |        "      <td>0.89</td>\n",
  70 |        "      <td>14.1197</td>\n",
  71 |        "      <td>251.0</td>\n",
  72 |        "      <td>15.8263</td>\n",
  73 |        "      <td>0.0</td>\n",
  74 |        "      <td>1015.13</td>\n",
  75 |        "      <td>Partly cloudy throughout the day.</td>\n",
  76 |        "    </tr>\n",
  77 |        "    <tr>\n",
  78 |        "      <th>1</th>\n",
  79 |        "      <td>2006-04-01 01:00:00.000 +0200</td>\n",
  80 |        "      <td>Partly Cloudy</td>\n",
  81 |        "      <td>rain</td>\n",
  82 |        "      <td>9.355556</td>\n",
  83 |        "      <td>7.227778</td>\n",
  84 |        "      <td>0.86</td>\n",
  85 |        "      <td>14.2646</td>\n",
  86 |        "      <td>259.0</td>\n",
  87 |        "      <td>15.8263</td>\n",
  88 |        "      <td>0.0</td>\n",
  89 |        "      <td>1015.63</td>\n",
  90 |        "      <td>Partly cloudy throughout the day.</td>\n",
  91 |        "    </tr>\n",
  92 |        "    <tr>\n",
  93 |        "      <th>2</th>\n",
  94 |        "      <td>2006-04-01 02:00:00.000 +0200</td>\n",
  95 |        "      <td>Mostly Cloudy</td>\n",
  96 |        "      <td>rain</td>\n",
  97 |        "      <td>9.377778</td>\n",
  98 |        "      <td>9.377778</td>\n",
  99 |        "      <td>0.89</td>\n",
 100 |        "      <td>3.9284</td>\n",
 101 |        "      <td>204.0</td>\n",
 102 |        "      <td>14.9569</td>\n",
 103 |        "      <td>0.0</td>\n",
 104 |        "      <td>1015.94</td>\n",
 105 |        "      <td>Partly cloudy throughout the day.</td>\n",
 106 |        "    </tr>\n",
 107 |        "  </tbody>\n",
 108 |        "</table>\n",
 109 |        "</div>"
 110 |       ],
 111 |       "text/plain": [
 112 |        "                  Formatted Date        Summary Precip Type  Temperature (C)  \\\n",
 113 |        "0  2006-04-01 00:00:00.000 +0200  Partly Cloudy        rain         9.472222   \n",
 114 |        "1  2006-04-01 01:00:00.000 +0200  Partly Cloudy        rain         9.355556   \n",
 115 |        "2  2006-04-01 02:00:00.000 +0200  Mostly Cloudy        rain         9.377778   \n",
 116 |        "\n",
 117 |        "   Apparent Temperature (C)  Humidity  Wind Speed (km/h)  \\\n",
 118 |        "0                  7.388889      0.89            14.1197   \n",
 119 |        "1                  7.227778      0.86            14.2646   \n",
 120 |        "2                  9.377778      0.89             3.9284   \n",
 121 |        "\n",
 122 |        "   Wind Bearing (degrees)  Visibility (km)  Loud Cover  Pressure (millibars)  \\\n",
 123 |        "0                   251.0          15.8263         0.0               1015.13   \n",
 124 |        "1                   259.0          15.8263         0.0               1015.63   \n",
 125 |        "2                   204.0          14.9569         0.0               1015.94   \n",
 126 |        "\n",
 127 |        "                       Daily Summary  \n",
 128 |        "0  Partly cloudy throughout the day.  \n",
 129 |        "1  Partly cloudy throughout the day.  \n",
 130 |        "2  Partly cloudy throughout the day.  "
 131 |       ]
 132 |      },
 133 |      "execution_count": 17,
 134 |      "metadata": {},
 135 |      "output_type": "execute_result"
 136 |     }
 137 |    ],
 138 |    "source": [
 139 |     "#Reading CSV file\n",
 140 |     "d=pd.read_csv('datasets/weather.csv')\n",
 141 |     "d.head(3)"
 142 |    ]
 143 |   },
 144 |   {
 145 |    "cell_type": "code",
 146 |    "execution_count": 18,
 147 |    "metadata": {},
 148 |    "outputs": [
 149 |     {
 150 |      "data": {
 151 |       "text/html": [
 152 |        "<div>\n",
 153 |        "<style scoped>\n",
 154 |        "    .dataframe tbody tr th:only-of-type {\n",
 155 |        "        vertical-align: middle;\n",
 156 |        "    }\n",
 157 |        "\n",
 158 |        "    .dataframe tbody tr th {\n",
 159 |        "        vertical-align: top;\n",
 160 |        "    }\n",
 161 |        "\n",
 162 |        "    .dataframe thead th {\n",
 163 |        "        text-align: right;\n",
 164 |        "    }\n",
 165 |        "</style>\n",
 166 |        "<table border=\"1\" class=\"dataframe\">\n",
 167 |        "  <thead>\n",
 168 |        "    <tr style=\"text-align: right;\">\n",
 169 |        "      <th></th>\n",
 170 |        "      <th>pclass</th>\n",
 171 |        "      <th>survived</th>\n",
 172 |        "      <th>name</th>\n",
 173 |        "      <th>sex</th>\n",
 174 |        "      <th>age</th>\n",
 175 |        "      <th>sibsp</th>\n",
 176 |        "      <th>parch</th>\n",
 177 |        "      <th>ticket</th>\n",
 178 |        "      <th>fare</th>\n",
 179 |        "      <th>cabin</th>\n",
 180 |        "      <th>embarked</th>\n",
 181 |        "      <th>boat</th>\n",
 182 |        "      <th>body</th>\n",
 183 |        "      <th>home.dest</th>\n",
 184 |        "    </tr>\n",
 185 |        "  </thead>\n",
 186 |        "  <tbody>\n",
 187 |        "    <tr>\n",
 188 |        "      <th>0</th>\n",
 189 |        "      <td>1</td>\n",
 190 |        "      <td>1</td>\n",
 191 |        "      <td>Allen, Miss. Elisabeth Walton</td>\n",
 192 |        "      <td>female</td>\n",
 193 |        "      <td>29</td>\n",
 194 |        "      <td>0</td>\n",
 195 |        "      <td>0</td>\n",
 196 |        "      <td>24160</td>\n",
 197 |        "      <td>211.3375</td>\n",
 198 |        "      <td>B5</td>\n",
 199 |        "      <td>S</td>\n",
 200 |        "      <td>2</td>\n",
 201 |        "      <td>None</td>\n",
 202 |        "      <td>St Louis, MO</td>\n",
 203 |        "    </tr>\n",
 204 |        "    <tr>\n",
 205 |        "      <th>1</th>\n",
 206 |        "      <td>1</td>\n",
 207 |        "      <td>1</td>\n",
 208 |        "      <td>Allison, Master. Hudson Trevor</td>\n",
 209 |        "      <td>male</td>\n",
 210 |        "      <td>0.9167</td>\n",
 211 |        "      <td>1</td>\n",
 212 |        "      <td>2</td>\n",
 213 |        "      <td>113781</td>\n",
 214 |        "      <td>151.5500</td>\n",
 215 |        "      <td>C22 C26</td>\n",
 216 |        "      <td>S</td>\n",
 217 |        "      <td>11</td>\n",
 218 |        "      <td>None</td>\n",
 219 |        "      <td>Montreal, PQ / Chesterville, ON</td>\n",
 220 |        "    </tr>\n",
 221 |        "    <tr>\n",
 222 |        "      <th>2</th>\n",
 223 |        "      <td>1</td>\n",
 224 |        "      <td>0</td>\n",
 225 |        "      <td>Allison, Miss. Helen Loraine</td>\n",
 226 |        "      <td>female</td>\n",
 227 |        "      <td>2</td>\n",
 228 |        "      <td>1</td>\n",
 229 |        "      <td>2</td>\n",
 230 |        "      <td>113781</td>\n",
 231 |        "      <td>151.5500</td>\n",
 232 |        "      <td>C22 C26</td>\n",
 233 |        "      <td>S</td>\n",
 234 |        "      <td>None</td>\n",
 235 |        "      <td>None</td>\n",
 236 |        "      <td>Montreal, PQ / Chesterville, ON</td>\n",
 237 |        "    </tr>\n",
 238 |        "  </tbody>\n",
 239 |        "</table>\n",
 240 |        "</div>"
 241 |       ],
 242 |       "text/plain": [
 243 |        "   pclass  survived                            name     sex     age  sibsp  \\\n",
 244 |        "0       1         1   Allen, Miss. Elisabeth Walton  female      29      0   \n",
 245 |        "1       1         1  Allison, Master. Hudson Trevor    male  0.9167      1   \n",
 246 |        "2       1         0    Allison, Miss. Helen Loraine  female       2      1   \n",
 247 |        "\n",
 248 |        "   parch  ticket      fare    cabin embarked  boat  body  \\\n",
 249 |        "0      0   24160  211.3375       B5        S     2  None   \n",
 250 |        "1      2  113781  151.5500  C22 C26        S    11  None   \n",
 251 |        "2      2  113781  151.5500  C22 C26        S  None  None   \n",
 252 |        "\n",
 253 |        "                         home.dest  \n",
 254 |        "0                     St Louis, MO  \n",
 255 |        "1  Montreal, PQ / Chesterville, ON  \n",
 256 |        "2  Montreal, PQ / Chesterville, ON  "
 257 |       ]
 258 |      },
 259 |      "execution_count": 18,
 260 |      "metadata": {},
 261 |      "output_type": "execute_result"
 262 |     }
 263 |    ],
 264 |    "source": [
 265 |     "#Reading excel file\n",
 266 |     "df=pd.read_excel('datasets/titanic.xls')\n",
 267 |     "df.head(3)"
 268 |    ]
 269 |   },
 270 |   {
 271 |    "cell_type": "markdown",
 272 |    "metadata": {},
 273 |    "source": [
 274 |     "### If you want to read only few rows rather than all"
 275 |    ]
 276 |   },
 277 |   {
 278 |    "cell_type": "code",
 279 |    "execution_count": 19,
 280 |    "metadata": {},
 281 |    "outputs": [
 282 |     {
 283 |      "data": {
 284 |       "text/html": [
 285 |        "<div>\n",
 286 |        "<style scoped>\n",
 287 |        "    .dataframe tbody tr th:only-of-type {\n",
 288 |        "        vertical-align: middle;\n",
 289 |        "    }\n",
 290 |        "\n",
 291 |        "    .dataframe tbody tr th {\n",
 292 |        "        vertical-align: top;\n",
 293 |        "    }\n",
 294 |        "\n",
 295 |        "    .dataframe thead th {\n",
 296 |        "        text-align: right;\n",
 297 |        "    }\n",
 298 |        "</style>\n",
 299 |        "<table border=\"1\" class=\"dataframe\">\n",
 300 |        "  <thead>\n",
 301 |        "    <tr style=\"text-align: right;\">\n",
 302 |        "      <th></th>\n",
 303 |        "      <th>Formatted Date</th>\n",
 304 |        "      <th>Summary</th>\n",
 305 |        "      <th>Precip Type</th>\n",
 306 |        "      <th>Temperature (C)</th>\n",
 307 |        "      <th>Apparent Temperature (C)</th>\n",
 308 |        "      <th>Humidity</th>\n",
 309 |        "      <th>Wind Speed (km/h)</th>\n",
 310 |        "      <th>Wind Bearing (degrees)</th>\n",
 311 |        "      <th>Visibility (km)</th>\n",
 312 |        "      <th>Loud Cover</th>\n",
 313 |        "      <th>Pressure (millibars)</th>\n",
 314 |        "      <th>Daily Summary</th>\n",
 315 |        "    </tr>\n",
 316 |        "  </thead>\n",
 317 |        "  <tbody>\n",
 318 |        "    <tr>\n",
 319 |        "      <th>0</th>\n",
 320 |        "      <td>2006-04-01 00:00:00.000 +0200</td>\n",
 321 |        "      <td>Partly Cloudy</td>\n",
 322 |        "      <td>rain</td>\n",
 323 |        "      <td>9.472222</td>\n",
 324 |        "      <td>7.388889</td>\n",
 325 |        "      <td>0.89</td>\n",
 326 |        "      <td>14.1197</td>\n",
 327 |        "      <td>251.0</td>\n",
 328 |        "      <td>15.8263</td>\n",
 329 |        "      <td>0.0</td>\n",
 330 |        "      <td>1015.13</td>\n",
 331 |        "      <td>Partly cloudy throughout the day.</td>\n",
 332 |        "    </tr>\n",
 333 |        "    <tr>\n",
 334 |        "      <th>1</th>\n",
 335 |        "      <td>2006-04-01 01:00:00.000 +0200</td>\n",
 336 |        "      <td>Partly Cloudy</td>\n",
 337 |        "      <td>rain</td>\n",
 338 |        "      <td>9.355556</td>\n",
 339 |        "      <td>7.227778</td>\n",
 340 |        "      <td>0.86</td>\n",
 341 |        "      <td>14.2646</td>\n",
 342 |        "      <td>259.0</td>\n",
 343 |        "      <td>15.8263</td>\n",
 344 |        "      <td>0.0</td>\n",
 345 |        "      <td>1015.63</td>\n",
 346 |        "      <td>Partly cloudy throughout the day.</td>\n",
 347 |        "    </tr>\n",
 348 |        "    <tr>\n",
 349 |        "      <th>2</th>\n",
 350 |        "      <td>2006-04-01 02:00:00.000 +0200</td>\n",
 351 |        "      <td>Mostly Cloudy</td>\n",
 352 |        "      <td>rain</td>\n",
 353 |        "      <td>9.377778</td>\n",
 354 |        "      <td>9.377778</td>\n",
 355 |        "      <td>0.89</td>\n",
 356 |        "      <td>3.9284</td>\n",
 357 |        "      <td>204.0</td>\n",
 358 |        "      <td>14.9569</td>\n",
 359 |        "      <td>0.0</td>\n",
 360 |        "      <td>1015.94</td>\n",
 361 |        "      <td>Partly cloudy throughout the day.</td>\n",
 362 |        "    </tr>\n",
 363 |        "    <tr>\n",
 364 |        "      <th>3</th>\n",
 365 |        "      <td>2006-04-01 03:00:00.000 +0200</td>\n",
 366 |        "      <td>Partly Cloudy</td>\n",
 367 |        "      <td>rain</td>\n",
 368 |        "      <td>8.288889</td>\n",
 369 |        "      <td>5.944444</td>\n",
 370 |        "      <td>0.83</td>\n",
 371 |        "      <td>14.1036</td>\n",
 372 |        "      <td>269.0</td>\n",
 373 |        "      <td>15.8263</td>\n",
 374 |        "      <td>0.0</td>\n",
 375 |        "      <td>1016.41</td>\n",
 376 |        "      <td>Partly cloudy throughout the day.</td>\n",
 377 |        "    </tr>\n",
 378 |        "    <tr>\n",
 379 |        "      <th>4</th>\n",
 380 |        "      <td>2006-04-01 04:00:00.000 +0200</td>\n",
 381 |        "      <td>Mostly Cloudy</td>\n",
 382 |        "      <td>rain</td>\n",
 383 |        "      <td>8.755556</td>\n",
 384 |        "      <td>6.977778</td>\n",
 385 |        "      <td>0.83</td>\n",
 386 |        "      <td>11.0446</td>\n",
 387 |        "      <td>259.0</td>\n",
 388 |        "      <td>15.8263</td>\n",
 389 |        "      <td>0.0</td>\n",
 390 |        "      <td>1016.51</td>\n",
 391 |        "      <td>Partly cloudy throughout the day.</td>\n",
 392 |        "    </tr>\n",
 393 |        "    <tr>\n",
 394 |        "      <th>5</th>\n",
 395 |        "      <td>2006-04-01 05:00:00.000 +0200</td>\n",
 396 |        "      <td>Partly Cloudy</td>\n",
 397 |        "      <td>rain</td>\n",
 398 |        "      <td>9.222222</td>\n",
 399 |        "      <td>7.111111</td>\n",
 400 |        "      <td>0.85</td>\n",
 401 |        "      <td>13.9587</td>\n",
 402 |        "      <td>258.0</td>\n",
 403 |        "      <td>14.9569</td>\n",
 404 |        "      <td>0.0</td>\n",
 405 |        "      <td>1016.66</td>\n",
 406 |        "      <td>Partly cloudy throughout the day.</td>\n",
 407 |        "    </tr>\n",
 408 |        "    <tr>\n",
 409 |        "      <th>6</th>\n",
 410 |        "      <td>2006-04-01 06:00:00.000 +0200</td>\n",
 411 |        "      <td>Partly Cloudy</td>\n",
 412 |        "      <td>rain</td>\n",
 413 |        "      <td>7.733333</td>\n",
 414 |        "      <td>5.522222</td>\n",
 415 |        "      <td>0.95</td>\n",
 416 |        "      <td>12.3648</td>\n",
 417 |        "      <td>259.0</td>\n",
 418 |        "      <td>9.9820</td>\n",
 419 |        "      <td>0.0</td>\n",
 420 |        "      <td>1016.72</td>\n",
 421 |        "      <td>Partly cloudy throughout the day.</td>\n",
 422 |        "    </tr>\n",
 423 |        "  </tbody>\n",
 424 |        "</table>\n",
 425 |        "</div>"
 426 |       ],
 427 |       "text/plain": [
 428 |        "                  Formatted Date        Summary Precip Type  Temperature (C)  \\\n",
 429 |        "0  2006-04-01 00:00:00.000 +0200  Partly Cloudy        rain         9.472222   \n",
 430 |        "1  2006-04-01 01:00:00.000 +0200  Partly Cloudy        rain         9.355556   \n",
 431 |        "2  2006-04-01 02:00:00.000 +0200  Mostly Cloudy        rain         9.377778   \n",
 432 |        "3  2006-04-01 03:00:00.000 +0200  Partly Cloudy        rain         8.288889   \n",
 433 |        "4  2006-04-01 04:00:00.000 +0200  Mostly Cloudy        rain         8.755556   \n",
 434 |        "5  2006-04-01 05:00:00.000 +0200  Partly Cloudy        rain         9.222222   \n",
 435 |        "6  2006-04-01 06:00:00.000 +0200  Partly Cloudy        rain         7.733333   \n",
 436 |        "\n",
 437 |        "   Apparent Temperature (C)  Humidity  Wind Speed (km/h)  \\\n",
 438 |        "0                  7.388889      0.89            14.1197   \n",
 439 |        "1                  7.227778      0.86            14.2646   \n",
 440 |        "2                  9.377778      0.89             3.9284   \n",
 441 |        "3                  5.944444      0.83            14.1036   \n",
 442 |        "4                  6.977778      0.83            11.0446   \n",
 443 |        "5                  7.111111      0.85            13.9587   \n",
 444 |        "6                  5.522222      0.95            12.3648   \n",
 445 |        "\n",
 446 |        "   Wind Bearing (degrees)  Visibility (km)  Loud Cover  Pressure (millibars)  \\\n",
 447 |        "0                   251.0          15.8263         0.0               1015.13   \n",
 448 |        "1                   259.0          15.8263         0.0               1015.63   \n",
 449 |        "2                   204.0          14.9569         0.0               1015.94   \n",
 450 |        "3                   269.0          15.8263         0.0               1016.41   \n",
 451 |        "4                   259.0          15.8263         0.0               1016.51   \n",
 452 |        "5                   258.0          14.9569         0.0               1016.66   \n",
 453 |        "6                   259.0           9.9820         0.0               1016.72   \n",
 454 |        "\n",
 455 |        "                       Daily Summary  \n",
 456 |        "0  Partly cloudy throughout the day.  \n",
 457 |        "1  Partly cloudy throughout the day.  \n",
 458 |        "2  Partly cloudy throughout the day.  \n",
 459 |        "3  Partly cloudy throughout the day.  \n",
 460 |        "4  Partly cloudy throughout the day.  \n",
 461 |        "5  Partly cloudy throughout the day.  \n",
 462 |        "6  Partly cloudy throughout the day.  "
 463 |       ]
 464 |      },
 465 |      "execution_count": 19,
 466 |      "metadata": {},
 467 |      "output_type": "execute_result"
 468 |     }
 469 |    ],
 470 |    "source": [
 471 |     "df=pd.read_csv('datasets/weather.csv',nrows=7)\n",
 472 |     "df"
 473 |    ]
 474 |   },
 475 |   {
 476 |    "cell_type": "markdown",
 477 |    "metadata": {},
 478 |    "source": [
 479 |     "### Changing specific values with NaN while reading\n",
 480 |     " * While reading the dataset you can change the specific value with NaN\n",
 481 |     "   > df = pd.read_excel ( \" titanic.xls \" , na_values = [ list of element which you want to change to NaN ] ) "
 482 |    ]
 483 |   },
 484 |   {
 485 |    "cell_type": "code",
 486 |    "execution_count": 20,
 487 |    "metadata": {},
 488 |    "outputs": [
 489 |     {
 490 |      "data": {
 491 |       "text/html": [
 492 |        "<div>\n",
 493 |        "<style scoped>\n",
 494 |        "    .dataframe tbody tr th:only-of-type {\n",
 495 |        "        vertical-align: middle;\n",
 496 |        "    }\n",
 497 |        "\n",
 498 |        "    .dataframe tbody tr th {\n",
 499 |        "        vertical-align: top;\n",
 500 |        "    }\n",
 501 |        "\n",
 502 |        "    .dataframe thead th {\n",
 503 |        "        text-align: right;\n",
 504 |        "    }\n",
 505 |        "</style>\n",
 506 |        "<table border=\"1\" class=\"dataframe\">\n",
 507 |        "  <thead>\n",
 508 |        "    <tr style=\"text-align: right;\">\n",
 509 |        "      <th></th>\n",
 510 |        "      <th>pclass</th>\n",
 511 |        "      <th>survived</th>\n",
 512 |        "      <th>name</th>\n",
 513 |        "      <th>sex</th>\n",
 514 |        "      <th>age</th>\n",
 515 |        "      <th>sibsp</th>\n",
 516 |        "      <th>parch</th>\n",
 517 |        "      <th>ticket</th>\n",
 518 |        "      <th>fare</th>\n",
 519 |        "      <th>cabin</th>\n",
 520 |        "      <th>embarked</th>\n",
 521 |        "      <th>boat</th>\n",
 522 |        "      <th>body</th>\n",
 523 |        "      <th>home.dest</th>\n",
 524 |        "    </tr>\n",
 525 |        "  </thead>\n",
 526 |        "  <tbody>\n",
 527 |        "    <tr>\n",
 528 |        "      <th>0</th>\n",
 529 |        "      <td>1</td>\n",
 530 |        "      <td>1</td>\n",
 531 |        "      <td>Allen, Miss. Elisabeth Walton</td>\n",
 532 |        "      <td>female</td>\n",
 533 |        "      <td>29.0000</td>\n",
 534 |        "      <td>0</td>\n",
 535 |        "      <td>0</td>\n",
 536 |        "      <td>24160</td>\n",
 537 |        "      <td>211.3375</td>\n",
 538 |        "      <td>B5</td>\n",
 539 |        "      <td>S</td>\n",
 540 |        "      <td>2</td>\n",
 541 |        "      <td>NaN</td>\n",
 542 |        "      <td>St Louis, MO</td>\n",
 543 |        "    </tr>\n",
 544 |        "    <tr>\n",
 545 |        "      <th>1</th>\n",
 546 |        "      <td>1</td>\n",
 547 |        "      <td>1</td>\n",
 548 |        "      <td>Allison, Master. Hudson Trevor</td>\n",
 549 |        "      <td>male</td>\n",
 550 |        "      <td>0.9167</td>\n",
 551 |        "      <td>1</td>\n",
 552 |        "      <td>2</td>\n",
 553 |        "      <td>113781</td>\n",
 554 |        "      <td>151.5500</td>\n",
 555 |        "      <td>C22 C26</td>\n",
 556 |        "      <td>S</td>\n",
 557 |        "      <td>11</td>\n",
 558 |        "      <td>NaN</td>\n",
 559 |        "      <td>Montreal, PQ / Chesterville, ON</td>\n",
 560 |        "    </tr>\n",
 561 |        "    <tr>\n",
 562 |        "      <th>2</th>\n",
 563 |        "      <td>1</td>\n",
 564 |        "      <td>0</td>\n",
 565 |        "      <td>Allison, Miss. Helen Loraine</td>\n",
 566 |        "      <td>female</td>\n",
 567 |        "      <td>2.0000</td>\n",
 568 |        "      <td>1</td>\n",
 569 |        "      <td>2</td>\n",
 570 |        "      <td>113781</td>\n",
 571 |        "      <td>151.5500</td>\n",
 572 |        "      <td>C22 C26</td>\n",
 573 |        "      <td>S</td>\n",
 574 |        "      <td>NaN</td>\n",
 575 |        "      <td>NaN</td>\n",
 576 |        "      <td>Montreal, PQ / Chesterville, ON</td>\n",
 577 |        "    </tr>\n",
 578 |        "    <tr>\n",
 579 |        "      <th>3</th>\n",
 580 |        "      <td>1</td>\n",
 581 |        "      <td>0</td>\n",
 582 |        "      <td>Allison, Mr. Hudson Joshua Creighton</td>\n",
 583 |        "      <td>male</td>\n",
 584 |        "      <td>30.0000</td>\n",
 585 |        "      <td>1</td>\n",
 586 |        "      <td>2</td>\n",
 587 |        "      <td>113781</td>\n",
 588 |        "      <td>151.5500</td>\n",
 589 |        "      <td>C22 C26</td>\n",
 590 |        "      <td>S</td>\n",
 591 |        "      <td>NaN</td>\n",
 592 |        "      <td>135.0</td>\n",
 593 |        "      <td>Montreal, PQ / Chesterville, ON</td>\n",
 594 |        "    </tr>\n",
 595 |        "    <tr>\n",
 596 |        "      <th>4</th>\n",
 597 |        "      <td>1</td>\n",
 598 |        "      <td>0</td>\n",
 599 |        "      <td>Allison, Mrs. Hudson J C (Bessie Waldo Daniels)</td>\n",
 600 |        "      <td>female</td>\n",
 601 |        "      <td>25.0000</td>\n",
 602 |        "      <td>1</td>\n",
 603 |        "      <td>2</td>\n",
 604 |        "      <td>113781</td>\n",
 605 |        "      <td>151.5500</td>\n",
 606 |        "      <td>C22 C26</td>\n",
 607 |        "      <td>S</td>\n",
 608 |        "      <td>NaN</td>\n",
 609 |        "      <td>NaN</td>\n",
 610 |        "      <td>Montreal, PQ / Chesterville, ON</td>\n",
 611 |        "    </tr>\n",
 612 |        "  </tbody>\n",
 613 |        "</table>\n",
 614 |        "</div>"
 615 |       ],
 616 |       "text/plain": [
 617 |        "   pclass  survived                                             name     sex  \\\n",
 618 |        "0       1         1                    Allen, Miss. Elisabeth Walton  female   \n",
 619 |        "1       1         1                   Allison, Master. Hudson Trevor    male   \n",
 620 |        "2       1         0                     Allison, Miss. Helen Loraine  female   \n",
 621 |        "3       1         0             Allison, Mr. Hudson Joshua Creighton    male   \n",
 622 |        "4       1         0  Allison, Mrs. Hudson J C (Bessie Waldo Daniels)  female   \n",
 623 |        "\n",
 624 |        "       age  sibsp  parch  ticket      fare    cabin embarked boat   body  \\\n",
 625 |        "0  29.0000      0      0   24160  211.3375       B5        S    2    NaN   \n",
 626 |        "1   0.9167      1      2  113781  151.5500  C22 C26        S   11    NaN   \n",
 627 |        "2   2.0000      1      2  113781  151.5500  C22 C26        S  NaN    NaN   \n",
 628 |        "3  30.0000      1      2  113781  151.5500  C22 C26        S  NaN  135.0   \n",
 629 |        "4  25.0000      1      2  113781  151.5500  C22 C26        S  NaN    NaN   \n",
 630 |        "\n",
 631 |        "                         home.dest  \n",
 632 |        "0                     St Louis, MO  \n",
 633 |        "1  Montreal, PQ / Chesterville, ON  \n",
 634 |        "2  Montreal, PQ / Chesterville, ON  \n",
 635 |        "3  Montreal, PQ / Chesterville, ON  \n",
 636 |        "4  Montreal, PQ / Chesterville, ON  "
 637 |       ]
 638 |      },
 639 |      "execution_count": 20,
 640 |      "metadata": {},
 641 |      "output_type": "execute_result"
 642 |     }
 643 |    ],
 644 |    "source": [
 645 |     "df = pd.read_excel(\"datasets/titanic.xls\",na_values=[None]) \n",
 646 |     "df.head()"
 647 |    ]
 648 |   },
 649 |   {
 650 |    "cell_type": "markdown",
 651 |    "metadata": {},
 652 |    "source": [
 653 |     "### Changing specific values with NaN columns wise while reading"
 654 |    ]
 655 |   },
 656 |   {
 657 |    "cell_type": "code",
 658 |    "execution_count": 21,
 659 |    "metadata": {},
 660 |    "outputs": [
 661 |     {
 662 |      "data": {
 663 |       "text/html": [
 664 |        "<div>\n",
 665 |        "<style scoped>\n",
 666 |        "    .dataframe tbody tr th:only-of-type {\n",
 667 |        "        vertical-align: middle;\n",
 668 |        "    }\n",
 669 |        "\n",
 670 |        "    .dataframe tbody tr th {\n",
 671 |        "        vertical-align: top;\n",
 672 |        "    }\n",
 673 |        "\n",
 674 |        "    .dataframe thead th {\n",
 675 |        "        text-align: right;\n",
 676 |        "    }\n",
 677 |        "</style>\n",
 678 |        "<table border=\"1\" class=\"dataframe\">\n",
 679 |        "  <thead>\n",
 680 |        "    <tr style=\"text-align: right;\">\n",
 681 |        "      <th></th>\n",
 682 |        "      <th>pclass</th>\n",
 683 |        "      <th>survived</th>\n",
 684 |        "      <th>name</th>\n",
 685 |        "      <th>sex</th>\n",
 686 |        "      <th>age</th>\n",
 687 |        "      <th>sibsp</th>\n",
 688 |        "      <th>parch</th>\n",
 689 |        "      <th>ticket</th>\n",
 690 |        "      <th>fare</th>\n",
 691 |        "      <th>cabin</th>\n",
 692 |        "      <th>embarked</th>\n",
 693 |        "      <th>boat</th>\n",
 694 |        "      <th>body</th>\n",
 695 |        "      <th>home.dest</th>\n",
 696 |        "    </tr>\n",
 697 |        "  </thead>\n",
 698 |        "  <tbody>\n",
 699 |        "    <tr>\n",
 700 |        "      <th>0</th>\n",
 701 |        "      <td>1</td>\n",
 702 |        "      <td>1</td>\n",
 703 |        "      <td>Allen, Miss. Elisabeth Walton</td>\n",
 704 |        "      <td>female</td>\n",
 705 |        "      <td>29</td>\n",
 706 |        "      <td>0</td>\n",
 707 |        "      <td>0</td>\n",
 708 |        "      <td>24160</td>\n",
 709 |        "      <td>211.3375</td>\n",
 710 |        "      <td>B5</td>\n",
 711 |        "      <td>S</td>\n",
 712 |        "      <td>2</td>\n",
 713 |        "      <td>None</td>\n",
 714 |        "      <td>St Louis, MO</td>\n",
 715 |        "    </tr>\n",
 716 |        "    <tr>\n",
 717 |        "      <th>1</th>\n",
 718 |        "      <td>1</td>\n",
 719 |        "      <td>1</td>\n",
 720 |        "      <td>Allison, Master. Hudson Trevor</td>\n",
 721 |        "      <td>male</td>\n",
 722 |        "      <td>0.9167</td>\n",
 723 |        "      <td>1</td>\n",
 724 |        "      <td>2</td>\n",
 725 |        "      <td>113781</td>\n",
 726 |        "      <td>151.5500</td>\n",
 727 |        "      <td>C22 C26</td>\n",
 728 |        "      <td>S</td>\n",
 729 |        "      <td>11</td>\n",
 730 |        "      <td>None</td>\n",
 731 |        "      <td>Montreal, PQ / Chesterville, ON</td>\n",
 732 |        "    </tr>\n",
 733 |        "    <tr>\n",
 734 |        "      <th>2</th>\n",
 735 |        "      <td>1</td>\n",
 736 |        "      <td>0</td>\n",
 737 |        "      <td>Allison, Miss. Helen Loraine</td>\n",
 738 |        "      <td>female</td>\n",
 739 |        "      <td>2</td>\n",
 740 |        "      <td>1</td>\n",
 741 |        "      <td>2</td>\n",
 742 |        "      <td>113781</td>\n",
 743 |        "      <td>151.5500</td>\n",
 744 |        "      <td>C22 C26</td>\n",
 745 |        "      <td>S</td>\n",
 746 |        "      <td>None</td>\n",
 747 |        "      <td>None</td>\n",
 748 |        "      <td>Montreal, PQ / Chesterville, ON</td>\n",
 749 |        "    </tr>\n",
 750 |        "    <tr>\n",
 751 |        "      <th>3</th>\n",
 752 |        "      <td>1</td>\n",
 753 |        "      <td>0</td>\n",
 754 |        "      <td>Allison, Mr. Hudson Joshua Creighton</td>\n",
 755 |        "      <td>male</td>\n",
 756 |        "      <td>30</td>\n",
 757 |        "      <td>1</td>\n",
 758 |        "      <td>2</td>\n",
 759 |        "      <td>113781</td>\n",
 760 |        "      <td>151.5500</td>\n",
 761 |        "      <td>C22 C26</td>\n",
 762 |        "      <td>S</td>\n",
 763 |        "      <td>None</td>\n",
 764 |        "      <td>135</td>\n",
 765 |        "      <td>Montreal, PQ / Chesterville, ON</td>\n",
 766 |        "    </tr>\n",
 767 |        "    <tr>\n",
 768 |        "      <th>4</th>\n",
 769 |        "      <td>1</td>\n",
 770 |        "      <td>0</td>\n",
 771 |        "      <td>Allison, Mrs. Hudson J C (Bessie Waldo Daniels)</td>\n",
 772 |        "      <td>female</td>\n",
 773 |        "      <td>25</td>\n",
 774 |        "      <td>1</td>\n",
 775 |        "      <td>2</td>\n",
 776 |        "      <td>113781</td>\n",
 777 |        "      <td>151.5500</td>\n",
 778 |        "      <td>C22 C26</td>\n",
 779 |        "      <td>S</td>\n",
 780 |        "      <td>None</td>\n",
 781 |        "      <td>None</td>\n",
 782 |        "      <td>Montreal, PQ / Chesterville, ON</td>\n",
 783 |        "    </tr>\n",
 784 |        "  </tbody>\n",
 785 |        "</table>\n",
 786 |        "</div>"
 787 |       ],
 788 |       "text/plain": [
 789 |        "   pclass  survived                                             name     sex  \\\n",
 790 |        "0       1         1                    Allen, Miss. Elisabeth Walton  female   \n",
 791 |        "1       1         1                   Allison, Master. Hudson Trevor    male   \n",
 792 |        "2       1         0                     Allison, Miss. Helen Loraine  female   \n",
 793 |        "3       1         0             Allison, Mr. Hudson Joshua Creighton    male   \n",
 794 |        "4       1         0  Allison, Mrs. Hudson J C (Bessie Waldo Daniels)  female   \n",
 795 |        "\n",
 796 |        "      age  sibsp  parch  ticket      fare    cabin embarked  boat  body  \\\n",
 797 |        "0      29      0      0   24160  211.3375       B5        S     2  None   \n",
 798 |        "1  0.9167      1      2  113781  151.5500  C22 C26        S    11  None   \n",
 799 |        "2       2      1      2  113781  151.5500  C22 C26        S  None  None   \n",
 800 |        "3      30      1      2  113781  151.5500  C22 C26        S  None   135   \n",
 801 |        "4      25      1      2  113781  151.5500  C22 C26        S  None  None   \n",
 802 |        "\n",
 803 |        "                         home.dest  \n",
 804 |        "0                     St Louis, MO  \n",
 805 |        "1  Montreal, PQ / Chesterville, ON  \n",
 806 |        "2  Montreal, PQ / Chesterville, ON  \n",
 807 |        "3  Montreal, PQ / Chesterville, ON  \n",
 808 |        "4  Montreal, PQ / Chesterville, ON  "
 809 |       ]
 810 |      },
 811 |      "execution_count": 21,
 812 |      "metadata": {},
 813 |      "output_type": "execute_result"
 814 |     }
 815 |    ],
 816 |    "source": [
 817 |     "df=pd.read_excel('datasets/titanic.xls')\n",
 818 |     "df.head()"
 819 |    ]
 820 |   },
 821 |   {
 822 |    "cell_type": "code",
 823 |    "execution_count": 22,
 824 |    "metadata": {},
 825 |    "outputs": [],
 826 |    "source": [
 827 |     "df=pd.read_excel('datasets/titanic.xls',na_values={'body':[None],'boat':[None],'parch':[2,0]})\n"
 828 |    ]
 829 |   },
 830 |   {
 831 |    "cell_type": "markdown",
 832 |    "metadata": {},
 833 |    "source": [
 834 |     " * <b>na_values </b>argument is used to replace all the values passes in the list with NaN.You can also replace some invalid values column wise as you can in the above code. If you run the above code you will a dataframe where 'None' in body column is replaced by NaN,again None in the boat column is replaced by NaN and 2 & 0 in parch column are replaced by NaN."
 835 |    ]
 836 |   },
 837 |   {
 838 |    "cell_type": "markdown",
 839 |    "metadata": {},
 840 |    "source": [
 841 |     "### Writing into CSV or excel \n",
 842 |     " * You can convert any dataframe in a new CSV or excel file file"
 843 |    ]
 844 |   },
 845 |   {
 846 |    "cell_type": "code",
 847 |    "execution_count": 23,
 848 |    "metadata": {},
 849 |    "outputs": [
 850 |     {
 851 |      "data": {
 852 |       "text/html": [
 853 |        "<div>\n",
 854 |        "<style scoped>\n",
 855 |        "    .dataframe tbody tr th:only-of-type {\n",
 856 |        "        vertical-align: middle;\n",
 857 |        "    }\n",
 858 |        "\n",
 859 |        "    .dataframe tbody tr th {\n",
 860 |        "        vertical-align: top;\n",
 861 |        "    }\n",
 862 |        "\n",
 863 |        "    .dataframe thead th {\n",
 864 |        "        text-align: right;\n",
 865 |        "    }\n",
 866 |        "</style>\n",
 867 |        "<table border=\"1\" class=\"dataframe\">\n",
 868 |        "  <thead>\n",
 869 |        "    <tr style=\"text-align: right;\">\n",
 870 |        "      <th></th>\n",
 871 |        "      <th>dates</th>\n",
 872 |        "      <th>day</th>\n",
 873 |        "      <th>temp</th>\n",
 874 |        "      <th>wind-speed</th>\n",
 875 |        "    </tr>\n",
 876 |        "  </thead>\n",
 877 |        "  <tbody>\n",
 878 |        "    <tr>\n",
 879 |        "      <th>0</th>\n",
 880 |        "      <td>02-01-12</td>\n",
 881 |        "      <td>sunny</td>\n",
 882 |        "      <td>45</td>\n",
 883 |        "      <td>12</td>\n",
 884 |        "    </tr>\n",
 885 |        "    <tr>\n",
 886 |        "      <th>1</th>\n",
 887 |        "      <td>03-01-12</td>\n",
 888 |        "      <td>rainy</td>\n",
 889 |        "      <td>46</td>\n",
 890 |        "      <td>34</td>\n",
 891 |        "    </tr>\n",
 892 |        "    <tr>\n",
 893 |        "      <th>2</th>\n",
 894 |        "      <td>04-01-12</td>\n",
 895 |        "      <td>hot</td>\n",
 896 |        "      <td>47</td>\n",
 897 |        "      <td>45</td>\n",
 898 |        "    </tr>\n",
 899 |        "    <tr>\n",
 900 |        "      <th>3</th>\n",
 901 |        "      <td>05-01-12</td>\n",
 902 |        "      <td>sunny</td>\n",
 903 |        "      <td>48</td>\n",
 904 |        "      <td>56</td>\n",
 905 |        "    </tr>\n",
 906 |        "    <tr>\n",
 907 |        "      <th>4</th>\n",
 908 |        "      <td>06-01-12</td>\n",
 909 |        "      <td>hot</td>\n",
 910 |        "      <td>49</td>\n",
 911 |        "      <td>67</td>\n",
 912 |        "    </tr>\n",
 913 |        "  </tbody>\n",
 914 |        "</table>\n",
 915 |        "</div>"
 916 |       ],
 917 |       "text/plain": [
 918 |        "      dates    day  temp  wind-speed\n",
 919 |        "0  02-01-12  sunny    45          12\n",
 920 |        "1  03-01-12  rainy    46          34\n",
 921 |        "2  04-01-12    hot    47          45\n",
 922 |        "3  05-01-12  sunny    48          56\n",
 923 |        "4  06-01-12    hot    49          67"
 924 |       ]
 925 |      },
 926 |      "execution_count": 23,
 927 |      "metadata": {},
 928 |      "output_type": "execute_result"
 929 |     }
 930 |    ],
 931 |    "source": [
 932 |     "#Creatiing my own disctionary\n",
 933 |     "mydict={\n",
 934 |     "         'dates':['02-01-12','03-01-12','04-01-12','05-01-12','06-01-12'],\n",
 935 |     "         'day':['sunny','rainy','hot','sunny','hot'],\n",
 936 |     "         'wind-speed':[12,34,45,56,67],\n",
 937 |     "         'temp':[45,46,47,48,49]\n",
 938 |     "       }\n",
 939 |     "#Converting disction to dataframe object\n",
 940 |     "df=pd.DataFrame(mydict)\n",
 941 |     "\n",
 942 |     "#Printing the dataframe\n",
 943 |     "df"
 944 |    ]
 945 |   },
 946 |   {
 947 |    "cell_type": "markdown",
 948 |    "metadata": {},
 949 |    "source": [
 950 |     "### Creating a new file\n"
 951 |    ]
 952 |   },
 953 |   {
 954 |    "cell_type": "markdown",
 955 |    "metadata": {},
 956 |    "source": [
 957 |     "#### Creating a new csv file mycsv.csv"
 958 |    ]
 959 |   },
 960 |   {
 961 |    "cell_type": "markdown",
 962 |    "metadata": {},
 963 |    "source": [
 964 |     "Suppose you have read a csv or excel file as dataframe object and you did some modification.Now you want to write a new csv or excel file that contains the modified dataframe. In that case you will need to_csv() or to_excel() function to create a new file.See the example below -"
 965 |    ]
 966 |   },
 967 |   {
 968 |    "cell_type": "code",
 969 |    "execution_count": 24,
 970 |    "metadata": {},
 971 |    "outputs": [],
 972 |    "source": [
 973 |     "df.to_csv('datasets/mycsv.csv',index=False)\n"
 974 |    ]
 975 |   },
 976 |   {
 977 |    "cell_type": "markdown",
 978 |    "metadata": {},
 979 |    "source": [
 980 |     "#### Creating a new excel file myexcel.xls"
 981 |    ]
 982 |   },
 983 |   {
 984 |    "cell_type": "code",
 985 |    "execution_count": 25,
 986 |    "metadata": {},
 987 |    "outputs": [],
 988 |    "source": [
 989 |     "df.to_excel('datasets/myexcel.xls',index=False)"
 990 |    ]
 991 |   },
 992 |   {
 993 |    "cell_type": "markdown",
 994 |    "metadata": {},
 995 |    "source": [
 996 |     "### Writing into csv or excel only selected rows\n",
 997 |     "<br>\n",
 998 |     "Suppose you modified the dataframe and you want to write it in another csv file with only selected columns.In the following example we have created the new csv file mycsv_few_columns.csv by \"df\" dataframe allowing only three columns which we want i.e, day, temp, wind-speed and i have ignored the date column."
 999 |    ]
1000 |   },
1001 |   {
1002 |    "cell_type": "markdown",
1003 |    "metadata": {},
1004 |    "source": [
1005 |     "#### how many columns in dataframe df ?"
1006 |    ]
1007 |   },
1008 |   {
1009 |    "cell_type": "code",
1010 |    "execution_count": 26,
1011 |    "metadata": {},
1012 |    "outputs": [
1013 |     {
1014 |      "data": {
1015 |       "text/plain": [
1016 |        "Index(['dates', 'day', 'temp', 'wind-speed'], dtype='object')"
1017 |       ]
1018 |      },
1019 |      "execution_count": 26,
1020 |      "metadata": {},
1021 |      "output_type": "execute_result"
1022 |     }
1023 |    ],
1024 |    "source": [
1025 |     "df.columns"
1026 |    ]
1027 |   },
1028 |   {
1029 |    "cell_type": "code",
1030 |    "execution_count": 27,
1031 |    "metadata": {},
1032 |    "outputs": [
1033 |     {
1034 |      "data": {
1035 |       "text/html": [
1036 |        "<div>\n",
1037 |        "<style scoped>\n",
1038 |        "    .dataframe tbody tr th:only-of-type {\n",
1039 |        "        vertical-align: middle;\n",
1040 |        "    }\n",
1041 |        "\n",
1042 |        "    .dataframe tbody tr th {\n",
1043 |        "        vertical-align: top;\n",
1044 |        "    }\n",
1045 |        "\n",
1046 |        "    .dataframe thead th {\n",
1047 |        "        text-align: right;\n",
1048 |        "    }\n",
1049 |        "</style>\n",
1050 |        "<table border=\"1\" class=\"dataframe\">\n",
1051 |        "  <thead>\n",
1052 |        "    <tr style=\"text-align: right;\">\n",
1053 |        "      <th></th>\n",
1054 |        "      <th>day</th>\n",
1055 |        "      <th>temp</th>\n",
1056 |        "      <th>wind-speed</th>\n",
1057 |        "    </tr>\n",
1058 |        "  </thead>\n",
1059 |        "  <tbody>\n",
1060 |        "    <tr>\n",
1061 |        "      <th>0</th>\n",
1062 |        "      <td>sunny</td>\n",
1063 |        "      <td>45</td>\n",
1064 |        "      <td>12</td>\n",
1065 |        "    </tr>\n",
1066 |        "    <tr>\n",
1067 |        "      <th>1</th>\n",
1068 |        "      <td>rainy</td>\n",
1069 |        "      <td>46</td>\n",
1070 |        "      <td>34</td>\n",
1071 |        "    </tr>\n",
1072 |        "    <tr>\n",
1073 |        "      <th>2</th>\n",
1074 |        "      <td>hot</td>\n",
1075 |        "      <td>47</td>\n",
1076 |        "      <td>45</td>\n",
1077 |        "    </tr>\n",
1078 |        "    <tr>\n",
1079 |        "      <th>3</th>\n",
1080 |        "      <td>sunny</td>\n",
1081 |        "      <td>48</td>\n",
1082 |        "      <td>56</td>\n",
1083 |        "    </tr>\n",
1084 |        "    <tr>\n",
1085 |        "      <th>4</th>\n",
1086 |        "      <td>hot</td>\n",
1087 |        "      <td>49</td>\n",
1088 |        "      <td>67</td>\n",
1089 |        "    </tr>\n",
1090 |        "  </tbody>\n",
1091 |        "</table>\n",
1092 |        "</div>"
1093 |       ],
1094 |       "text/plain": [
1095 |        "     day  temp  wind-speed\n",
1096 |        "0  sunny    45          12\n",
1097 |        "1  rainy    46          34\n",
1098 |        "2    hot    47          45\n",
1099 |        "3  sunny    48          56\n",
1100 |        "4    hot    49          67"
1101 |       ]
1102 |      },
1103 |      "execution_count": 27,
1104 |      "metadata": {},
1105 |      "output_type": "execute_result"
1106 |     }
1107 |    ],
1108 |    "source": [
1109 |     "#Writing only three columns day,temp,wind-speed\n",
1110 |     "df.to_csv('datasets/mycsv_few_columns.csv',columns=['day','temp','wind-speed'],index=False)\n",
1111 |     "\n",
1112 |     "#Again reading to see the result\n",
1113 |     "d=pd.read_csv('datasets/mycsv_few_columns.csv')\n",
1114 |     "d"
1115 |    ]
1116 |   },
1117 |   {
1118 |    "cell_type": "markdown",
1119 |    "metadata": {},
1120 |    "source": [
1121 |     "### Converters :"
1122 |    ]
1123 |   },
1124 |   {
1125 |    "cell_type": "markdown",
1126 |    "metadata": {},
1127 |    "source": [
1128 |     "In various cases you dont have clean dataset. Invalid values in the dataset leads to a lot of problem while predicting or extracting the meaningful information.To avoid this problem we use converters.\n",
1129 |     "* Converters are basically functions which convert the specific value of a column in your desire value\n",
1130 |     "* Converter function passed into the disctionary like in the na_values.\n"
1131 |    ]
1132 |   },
1133 |   {
1134 |    "cell_type": "markdown",
1135 |    "metadata": {},
1136 |    "source": [
1137 |     "This is our dataset in which you can see that there are lots of invalid values are present.These values are noise in our dataset."
1138 |    ]
1139 |   },
1140 |   {
1141 |    "cell_type": "code",
1142 |    "execution_count": 28,
1143 |    "metadata": {},
1144 |    "outputs": [
1145 |     {
1146 |      "data": {
1147 |       "text/html": [
1148 |        "<div>\n",
1149 |        "<style scoped>\n",
1150 |        "    .dataframe tbody tr th:only-of-type {\n",
1151 |        "        vertical-align: middle;\n",
1152 |        "    }\n",
1153 |        "\n",
1154 |        "    .dataframe tbody tr th {\n",
1155 |        "        vertical-align: top;\n",
1156 |        "    }\n",
1157 |        "\n",
1158 |        "    .dataframe thead th {\n",
1159 |        "        text-align: right;\n",
1160 |        "    }\n",
1161 |        "</style>\n",
1162 |        "<table border=\"1\" class=\"dataframe\">\n",
1163 |        "  <thead>\n",
1164 |        "    <tr style=\"text-align: right;\">\n",
1165 |        "      <th></th>\n",
1166 |        "      <th>dates</th>\n",
1167 |        "      <th>day</th>\n",
1168 |        "      <th>temp</th>\n",
1169 |        "      <th>wind-speed</th>\n",
1170 |        "    </tr>\n",
1171 |        "  </thead>\n",
1172 |        "  <tbody>\n",
1173 |        "    <tr>\n",
1174 |        "      <th>0</th>\n",
1175 |        "      <td>2/1/2012</td>\n",
1176 |        "      <td>sunny</td>\n",
1177 |        "      <td>45.0</td>\n",
1178 |        "      <td>12</td>\n",
1179 |        "    </tr>\n",
1180 |        "    <tr>\n",
1181 |        "      <th>1</th>\n",
1182 |        "      <td>3/1/2012</td>\n",
1183 |        "      <td>rainy</td>\n",
1184 |        "      <td>46.0</td>\n",
1185 |        "      <td>34</td>\n",
1186 |        "    </tr>\n",
1187 |        "    <tr>\n",
1188 |        "      <th>2</th>\n",
1189 |        "      <td>4/1/2012</td>\n",
1190 |        "      <td>hot</td>\n",
1191 |        "      <td>47.0</td>\n",
1192 |        "      <td>45</td>\n",
1193 |        "    </tr>\n",
1194 |        "    <tr>\n",
1195 |        "      <th>3</th>\n",
1196 |        "      <td>5/1/2012</td>\n",
1197 |        "      <td>NaN</td>\n",
1198 |        "      <td>NaN</td>\n",
1199 |        "      <td>56</td>\n",
1200 |        "    </tr>\n",
1201 |        "    <tr>\n",
1202 |        "      <th>4</th>\n",
1203 |        "      <td>6/1/2012</td>\n",
1204 |        "      <td>hot</td>\n",
1205 |        "      <td>49.0</td>\n",
1206 |        "      <td>Not available</td>\n",
1207 |        "    </tr>\n",
1208 |        "    <tr>\n",
1209 |        "      <th>5</th>\n",
1210 |        "      <td>7/1/2012</td>\n",
1211 |        "      <td>NaN</td>\n",
1212 |        "      <td>NaN</td>\n",
1213 |        "      <td>Not available</td>\n",
1214 |        "    </tr>\n",
1215 |        "    <tr>\n",
1216 |        "      <th>6</th>\n",
1217 |        "      <td>8/1/2012</td>\n",
1218 |        "      <td>hot</td>\n",
1219 |        "      <td>12.0</td>\n",
1220 |        "      <td>45</td>\n",
1221 |        "    </tr>\n",
1222 |        "    <tr>\n",
1223 |        "      <th>7</th>\n",
1224 |        "      <td>9/1/2012</td>\n",
1225 |        "      <td>rainy</td>\n",
1226 |        "      <td>23.0</td>\n",
1227 |        "      <td>41</td>\n",
1228 |        "    </tr>\n",
1229 |        "    <tr>\n",
1230 |        "      <th>8</th>\n",
1231 |        "      <td>10/1/2012</td>\n",
1232 |        "      <td>NaN</td>\n",
1233 |        "      <td>NaN</td>\n",
1234 |        "      <td>NaN</td>\n",
1235 |        "    </tr>\n",
1236 |        "    <tr>\n",
1237 |        "      <th>9</th>\n",
1238 |        "      <td>11/1/2012</td>\n",
1239 |        "      <td>NaN</td>\n",
1240 |        "      <td>NaN</td>\n",
1241 |        "      <td>NaN</td>\n",
1242 |        "    </tr>\n",
1243 |        "  </tbody>\n",
1244 |        "</table>\n",
1245 |        "</div>"
1246 |       ],
1247 |       "text/plain": [
1248 |        "       dates    day  temp     wind-speed\n",
1249 |        "0   2/1/2012  sunny  45.0             12\n",
1250 |        "1   3/1/2012  rainy  46.0             34\n",
1251 |        "2   4/1/2012    hot  47.0             45\n",
1252 |        "3   5/1/2012    NaN   NaN             56\n",
1253 |        "4   6/1/2012    hot  49.0  Not available\n",
1254 |        "5   7/1/2012    NaN   NaN  Not available\n",
1255 |        "6   8/1/2012    hot  12.0             45\n",
1256 |        "7   9/1/2012  rainy  23.0             41\n",
1257 |        "8  10/1/2012    NaN   NaN            NaN\n",
1258 |        "9  11/1/2012    NaN   NaN            NaN"
1259 |       ]
1260 |      },
1261 |      "execution_count": 28,
1262 |      "metadata": {},
1263 |      "output_type": "execute_result"
1264 |     }
1265 |    ],
1266 |    "source": [
1267 |     "df=pd.read_csv('datasets/season.csv')\n",
1268 |     "df"
1269 |    ]
1270 |   },
1271 |   {
1272 |    "cell_type": "markdown",
1273 |    "metadata": {},
1274 |    "source": [
1275 |     "#### The following function will convert any cell of column having 'NaN' into 40, so here we will apply this function into 'temp' column and so for the 'day' and 'wind-speed' columns."
1276 |    ]
1277 |   },
1278 |   {
1279 |    "cell_type": "code",
1280 |    "execution_count": 37,
1281 |    "metadata": {},
1282 |    "outputs": [],
1283 |    "source": [
1284 |     "def converter_for_temp(col):\n",
1285 |     "    if col=='NaN':\n",
1286 |     "        return 40\n",
1287 |     "    else:\n",
1288 |     "        return col\n",
1289 |     "def converter_for_day(col):\n",
1290 |     "    if col=='NaN':\n",
1291 |     "        return 'sunny'\n",
1292 |     "    else:\n",
1293 |     "        return col\n",
1294 |     "def converter_for_wind_speed(col):\n",
1295 |     "    if col=='Not available':\n",
1296 |     "        return 30\n",
1297 |     "    elif col==\"NaN\":\n",
1298 |     "        return 48\n",
1299 |     "    else:\n",
1300 |     "        return col"
1301 |    ]
1302 |   },
1303 |   {
1304 |    "cell_type": "code",
1305 |    "execution_count": 38,
1306 |    "metadata": {},
1307 |    "outputs": [
1308 |     {
1309 |      "data": {
1310 |       "text/html": [
1311 |        "<div>\n",
1312 |        "<style scoped>\n",
1313 |        "    .dataframe tbody tr th:only-of-type {\n",
1314 |        "        vertical-align: middle;\n",
1315 |        "    }\n",
1316 |        "\n",
1317 |        "    .dataframe tbody tr th {\n",
1318 |        "        vertical-align: top;\n",
1319 |        "    }\n",
1320 |        "\n",
1321 |        "    .dataframe thead th {\n",
1322 |        "        text-align: right;\n",
1323 |        "    }\n",
1324 |        "</style>\n",
1325 |        "<table border=\"1\" class=\"dataframe\">\n",
1326 |        "  <thead>\n",
1327 |        "    <tr style=\"text-align: right;\">\n",
1328 |        "      <th></th>\n",
1329 |        "      <th>dates</th>\n",
1330 |        "      <th>day</th>\n",
1331 |        "      <th>temp</th>\n",
1332 |        "      <th>wind-speed</th>\n",
1333 |        "    </tr>\n",
1334 |        "  </thead>\n",
1335 |        "  <tbody>\n",
1336 |        "    <tr>\n",
1337 |        "      <th>0</th>\n",
1338 |        "      <td>2/1/2012</td>\n",
1339 |        "      <td>sunny</td>\n",
1340 |        "      <td>45</td>\n",
1341 |        "      <td>12</td>\n",
1342 |        "    </tr>\n",
1343 |        "    <tr>\n",
1344 |        "      <th>1</th>\n",
1345 |        "      <td>3/1/2012</td>\n",
1346 |        "      <td>rainy</td>\n",
1347 |        "      <td>46</td>\n",
1348 |        "      <td>34</td>\n",
1349 |        "    </tr>\n",
1350 |        "    <tr>\n",
1351 |        "      <th>2</th>\n",
1352 |        "      <td>4/1/2012</td>\n",
1353 |        "      <td>hot</td>\n",
1354 |        "      <td>47</td>\n",
1355 |        "      <td>45</td>\n",
1356 |        "    </tr>\n",
1357 |        "    <tr>\n",
1358 |        "      <th>3</th>\n",
1359 |        "      <td>5/1/2012</td>\n",
1360 |        "      <td>sunny</td>\n",
1361 |        "      <td>40</td>\n",
1362 |        "      <td>56</td>\n",
1363 |        "    </tr>\n",
1364 |        "    <tr>\n",
1365 |        "      <th>4</th>\n",
1366 |        "      <td>6/1/2012</td>\n",
1367 |        "      <td>hot</td>\n",
1368 |        "      <td>49</td>\n",
1369 |        "      <td>30</td>\n",
1370 |        "    </tr>\n",
1371 |        "    <tr>\n",
1372 |        "      <th>5</th>\n",
1373 |        "      <td>7/1/2012</td>\n",
1374 |        "      <td>sunny</td>\n",
1375 |        "      <td>40</td>\n",
1376 |        "      <td>30</td>\n",
1377 |        "    </tr>\n",
1378 |        "    <tr>\n",
1379 |        "      <th>6</th>\n",
1380 |        "      <td>8/1/2012</td>\n",
1381 |        "      <td>hot</td>\n",
1382 |        "      <td>12</td>\n",
1383 |        "      <td>45</td>\n",
1384 |        "    </tr>\n",
1385 |        "    <tr>\n",
1386 |        "      <th>7</th>\n",
1387 |        "      <td>9/1/2012</td>\n",
1388 |        "      <td>rainy</td>\n",
1389 |        "      <td>23</td>\n",
1390 |        "      <td>41</td>\n",
1391 |        "    </tr>\n",
1392 |        "    <tr>\n",
1393 |        "      <th>8</th>\n",
1394 |        "      <td>10/1/2012</td>\n",
1395 |        "      <td>sunny</td>\n",
1396 |        "      <td>40</td>\n",
1397 |        "      <td>48</td>\n",
1398 |        "    </tr>\n",
1399 |        "    <tr>\n",
1400 |        "      <th>9</th>\n",
1401 |        "      <td>11/1/2012</td>\n",
1402 |        "      <td>sunny</td>\n",
1403 |        "      <td>40</td>\n",
1404 |        "      <td>48</td>\n",
1405 |        "    </tr>\n",
1406 |        "  </tbody>\n",
1407 |        "</table>\n",
1408 |        "</div>"
1409 |       ],
1410 |       "text/plain": [
1411 |        "       dates    day temp wind-speed\n",
1412 |        "0   2/1/2012  sunny   45         12\n",
1413 |        "1   3/1/2012  rainy   46         34\n",
1414 |        "2   4/1/2012    hot   47         45\n",
1415 |        "3   5/1/2012  sunny   40         56\n",
1416 |        "4   6/1/2012    hot   49         30\n",
1417 |        "5   7/1/2012  sunny   40         30\n",
1418 |        "6   8/1/2012    hot   12         45\n",
1419 |        "7   9/1/2012  rainy   23         41\n",
1420 |        "8  10/1/2012  sunny   40         48\n",
1421 |        "9  11/1/2012  sunny   40         48"
1422 |       ]
1423 |      },
1424 |      "execution_count": 38,
1425 |      "metadata": {},
1426 |      "output_type": "execute_result"
1427 |     }
1428 |    ],
1429 |    "source": [
1430 |     "df=pd.read_csv('datasets/season.csv',converters={\n",
1431 |     "    'day':converter_for_day,\n",
1432 |     "    'temp':converter_for_temp,\n",
1433 |     "    'wind-speed':converter_for_wind_speed\n",
1434 |     "   })\n",
1435 |     "df"
1436 |    ]
1437 |   },
1438 |   {
1439 |    "cell_type": "markdown",
1440 |    "metadata": {},
1441 |    "source": [
1442 |     "You can observe that previously the columns 'day', 'temp' and 'wind-speed' had some invalid data like 'NaN','NAN','Not available' but after applying the converter functions we got a cleaned dataset.Now we can apply some data analysis techniquw to predict something in our dataset."
1443 |    ]
1444 |   },
1445 |   {
1446 |    "cell_type": "markdown",
1447 |    "metadata": {},
1448 |    "source": [
1449 |     "### Writing different dataframes into one file but different sheet names"
1450 |    ]
1451 |   },
1452 |   {
1453 |    "cell_type": "markdown",
1454 |    "metadata": {},
1455 |    "source": [
1456 |     "Let's assume that you have two different dataframes and you want to write it in the same excel sheet but different sheet names. \n",
1457 |     "<br>\n",
1458 |     "\n",
1459 |     "Let's take two disctionary one is \"weather1\" and another is \"house1\" and make it two dataframe \"weather\" & \"house\""
1460 |    ]
1461 |   },
1462 |   {
1463 |    "cell_type": "code",
1464 |    "execution_count": 31,
1465 |    "metadata": {},
1466 |    "outputs": [],
1467 |    "source": [
1468 |     "#Creating weather disctionary\n",
1469 |     "weather1={\n",
1470 |     "         'dates':['02-01-12','03-01-12','04-01-12','05-01-12','06-01-12'],\n",
1471 |     "         'day':['sunny','rainy','hot','sunny','hot'],\n",
1472 |     "         'wind-speed':[12,34,45,56,67],\n",
1473 |     "         'temp':[45,46,47,48,49]\n",
1474 |     "       }\n",
1475 |     "#Converting disction to dataframe object\n",
1476 |     "weather=pd.DataFrame(weather1)\n",
1477 |     "\n",
1478 |     "#Creating house disctionary\n",
1479 |     "house1={\n",
1480 |     "         'dates':['02-01-12','03-01-12','04-01-12','05-01-12','06-01-12'],\n",
1481 |     "         'price':[20000,30000,40000,50000,60000],\n",
1482 |     "         'bhk':[1,3,2,1,2],\n",
1483 |     "         'how-old':[2,5,2,7,4]\n",
1484 |     "       }\n",
1485 |     "\n",
1486 |     "#converting house disctionary to dataframe object\n",
1487 |     "\n",
1488 |     "house=pd.DataFrame(house1)\n"
1489 |    ]
1490 |   },
1491 |   {
1492 |    "cell_type": "markdown",
1493 |    "metadata": {},
1494 |    "source": [
1495 |     " Call the \"ExcelWriter\" and make a object \"writer\".Now call to_excel() function and pass three argument -\n",
1496 |     "  * <b>1. \"writer\" object :</b>\n",
1497 |     "  * <b>2. sheet_name : </b>\n",
1498 |     "  * <b>3. Index :</b> This is optional for you\n",
1499 |     "  "
1500 |    ]
1501 |   },
1502 |   {
1503 |    "cell_type": "code",
1504 |    "execution_count": 32,
1505 |    "metadata": {},
1506 |    "outputs": [],
1507 |    "source": [
1508 |     "with pd.ExcelWriter('datasets/weather_and_house.xls') as writer:\n",
1509 |     "   weather.to_excel(writer,sheet_name='Weather',index=False)\n",
1510 |     "   house.to_excel(writer,sheet_name='House',index=False)\n"
1511 |    ]
1512 |   },
1513 |   {
1514 |    "cell_type": "markdown",
1515 |    "metadata": {},
1516 |    "source": [
1517 |     "#### Now access the sheets separately by passing its name while reading\n",
1518 |     "See the example"
1519 |    ]
1520 |   },
1521 |   {
1522 |    "cell_type": "code",
1523 |    "execution_count": 33,
1524 |    "metadata": {},
1525 |    "outputs": [
1526 |     {
1527 |      "data": {
1528 |       "text/html": [
1529 |        "<div>\n",
1530 |        "<style scoped>\n",
1531 |        "    .dataframe tbody tr th:only-of-type {\n",
1532 |        "        vertical-align: middle;\n",
1533 |        "    }\n",
1534 |        "\n",
1535 |        "    .dataframe tbody tr th {\n",
1536 |        "        vertical-align: top;\n",
1537 |        "    }\n",
1538 |        "\n",
1539 |        "    .dataframe thead th {\n",
1540 |        "        text-align: right;\n",
1541 |        "    }\n",
1542 |        "</style>\n",
1543 |        "<table border=\"1\" class=\"dataframe\">\n",
1544 |        "  <thead>\n",
1545 |        "    <tr style=\"text-align: right;\">\n",
1546 |        "      <th></th>\n",
1547 |        "      <th>dates</th>\n",
1548 |        "      <th>day</th>\n",
1549 |        "      <th>temp</th>\n",
1550 |        "      <th>wind-speed</th>\n",
1551 |        "    </tr>\n",
1552 |        "  </thead>\n",
1553 |        "  <tbody>\n",
1554 |        "    <tr>\n",
1555 |        "      <th>0</th>\n",
1556 |        "      <td>02-01-12</td>\n",
1557 |        "      <td>sunny</td>\n",
1558 |        "      <td>45</td>\n",
1559 |        "      <td>12</td>\n",
1560 |        "    </tr>\n",
1561 |        "    <tr>\n",
1562 |        "      <th>1</th>\n",
1563 |        "      <td>03-01-12</td>\n",
1564 |        "      <td>rainy</td>\n",
1565 |        "      <td>46</td>\n",
1566 |        "      <td>34</td>\n",
1567 |        "    </tr>\n",
1568 |        "    <tr>\n",
1569 |        "      <th>2</th>\n",
1570 |        "      <td>04-01-12</td>\n",
1571 |        "      <td>hot</td>\n",
1572 |        "      <td>47</td>\n",
1573 |        "      <td>45</td>\n",
1574 |        "    </tr>\n",
1575 |        "    <tr>\n",
1576 |        "      <th>3</th>\n",
1577 |        "      <td>05-01-12</td>\n",
1578 |        "      <td>sunny</td>\n",
1579 |        "      <td>48</td>\n",
1580 |        "      <td>56</td>\n",
1581 |        "    </tr>\n",
1582 |        "    <tr>\n",
1583 |        "      <th>4</th>\n",
1584 |        "      <td>06-01-12</td>\n",
1585 |        "      <td>hot</td>\n",
1586 |        "      <td>49</td>\n",
1587 |        "      <td>67</td>\n",
1588 |        "    </tr>\n",
1589 |        "  </tbody>\n",
1590 |        "</table>\n",
1591 |        "</div>"
1592 |       ],
1593 |       "text/plain": [
1594 |        "      dates    day  temp  wind-speed\n",
1595 |        "0  02-01-12  sunny    45          12\n",
1596 |        "1  03-01-12  rainy    46          34\n",
1597 |        "2  04-01-12    hot    47          45\n",
1598 |        "3  05-01-12  sunny    48          56\n",
1599 |        "4  06-01-12    hot    49          67"
1600 |       ]
1601 |      },
1602 |      "execution_count": 33,
1603 |      "metadata": {},
1604 |      "output_type": "execute_result"
1605 |     }
1606 |    ],
1607 |    "source": [
1608 |     "d1=pd.read_excel('datasets/weather_and_house.xls','Weather')\n",
1609 |     "d1"
1610 |    ]
1611 |   },
1612 |   {
1613 |    "cell_type": "code",
1614 |    "execution_count": 34,
1615 |    "metadata": {},
1616 |    "outputs": [
1617 |     {
1618 |      "data": {
1619 |       "text/html": [
1620 |        "<div>\n",
1621 |        "<style scoped>\n",
1622 |        "    .dataframe tbody tr th:only-of-type {\n",
1623 |        "        vertical-align: middle;\n",
1624 |        "    }\n",
1625 |        "\n",
1626 |        "    .dataframe tbody tr th {\n",
1627 |        "        vertical-align: top;\n",
1628 |        "    }\n",
1629 |        "\n",
1630 |        "    .dataframe thead th {\n",
1631 |        "        text-align: right;\n",
1632 |        "    }\n",
1633 |        "</style>\n",
1634 |        "<table border=\"1\" class=\"dataframe\">\n",
1635 |        "  <thead>\n",
1636 |        "    <tr style=\"text-align: right;\">\n",
1637 |        "      <th></th>\n",
1638 |        "      <th>bhk</th>\n",
1639 |        "      <th>dates</th>\n",
1640 |        "      <th>how-old</th>\n",
1641 |        "      <th>price</th>\n",
1642 |        "    </tr>\n",
1643 |        "  </thead>\n",
1644 |        "  <tbody>\n",
1645 |        "    <tr>\n",
1646 |        "      <th>0</th>\n",
1647 |        "      <td>1</td>\n",
1648 |        "      <td>02-01-12</td>\n",
1649 |        "      <td>2</td>\n",
1650 |        "      <td>20000</td>\n",
1651 |        "    </tr>\n",
1652 |        "    <tr>\n",
1653 |        "      <th>1</th>\n",
1654 |        "      <td>3</td>\n",
1655 |        "      <td>03-01-12</td>\n",
1656 |        "      <td>5</td>\n",
1657 |        "      <td>30000</td>\n",
1658 |        "    </tr>\n",
1659 |        "    <tr>\n",
1660 |        "      <th>2</th>\n",
1661 |        "      <td>2</td>\n",
1662 |        "      <td>04-01-12</td>\n",
1663 |        "      <td>2</td>\n",
1664 |        "      <td>40000</td>\n",
1665 |        "    </tr>\n",
1666 |        "    <tr>\n",
1667 |        "      <th>3</th>\n",
1668 |        "      <td>1</td>\n",
1669 |        "      <td>05-01-12</td>\n",
1670 |        "      <td>7</td>\n",
1671 |        "      <td>50000</td>\n",
1672 |        "    </tr>\n",
1673 |        "    <tr>\n",
1674 |        "      <th>4</th>\n",
1675 |        "      <td>2</td>\n",
1676 |        "      <td>06-01-12</td>\n",
1677 |        "      <td>4</td>\n",
1678 |        "      <td>60000</td>\n",
1679 |        "    </tr>\n",
1680 |        "  </tbody>\n",
1681 |        "</table>\n",
1682 |        "</div>"
1683 |       ],
1684 |       "text/plain": [
1685 |        "   bhk     dates  how-old  price\n",
1686 |        "0    1  02-01-12        2  20000\n",
1687 |        "1    3  03-01-12        5  30000\n",
1688 |        "2    2  04-01-12        2  40000\n",
1689 |        "3    1  05-01-12        7  50000\n",
1690 |        "4    2  06-01-12        4  60000"
1691 |       ]
1692 |      },
1693 |      "execution_count": 34,
1694 |      "metadata": {},
1695 |      "output_type": "execute_result"
1696 |     }
1697 |    ],
1698 |    "source": [
1699 |     "d2=pd.read_excel('datasets/weather_and_house.xls','House')\n",
1700 |     "d2"
1701 |    ]
1702 |   },
1703 |   {
1704 |    "cell_type": "markdown",
1705 |    "metadata": {},
1706 |    "source": []
1707 |   }
1708 |  ],
1709 |  "metadata": {
1710 |   "kernelspec": {
1711 |    "display_name": "Python 3",
1712 |    "language": "python",
1713 |    "name": "python3"
1714 |   },
1715 |   "language_info": {
1716 |    "codemirror_mode": {
1717 |     "name": "ipython",
1718 |     "version": 3
1719 |    },
1720 |    "file_extension": ".py",
1721 |    "mimetype": "text/x-python",
1722 |    "name": "python",
1723 |    "nbconvert_exporter": "python",
1724 |    "pygments_lexer": "ipython3",
1725 |    "version": "3.6.4"
1726 |   }
1727 |  },
1728 |  "nbformat": 4,
1729 |  "nbformat_minor": 2
1730 | }
1731 | 


--------------------------------------------------------------------------------
/pandas_part5.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Handling Missing data part-2\n",
  8 |     "------------"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": 7,
 14 |    "metadata": {},
 15 |    "outputs": [],
 16 |    "source": [
 17 |     "import pandas as pd\n",
 18 |     "import numpy as np"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 17,
 24 |    "metadata": {},
 25 |    "outputs": [
 26 |     {
 27 |      "data": {
 28 |       "text/html": [
 29 |        "<div>\n",
 30 |        "<style scoped>\n",
 31 |        "    .dataframe tbody tr th:only-of-type {\n",
 32 |        "        vertical-align: middle;\n",
 33 |        "    }\n",
 34 |        "\n",
 35 |        "    .dataframe tbody tr th {\n",
 36 |        "        vertical-align: top;\n",
 37 |        "    }\n",
 38 |        "\n",
 39 |        "    .dataframe thead th {\n",
 40 |        "        text-align: right;\n",
 41 |        "    }\n",
 42 |        "</style>\n",
 43 |        "<table border=\"1\" class=\"dataframe\">\n",
 44 |        "  <thead>\n",
 45 |        "    <tr style=\"text-align: right;\">\n",
 46 |        "      <th></th>\n",
 47 |        "      <th>Age</th>\n",
 48 |        "      <th>Name</th>\n",
 49 |        "      <th>No_of_pkg</th>\n",
 50 |        "      <th>Package</th>\n",
 51 |        "      <th>travel_id</th>\n",
 52 |        "    </tr>\n",
 53 |        "  </thead>\n",
 54 |        "  <tbody>\n",
 55 |        "    <tr>\n",
 56 |        "      <th>0</th>\n",
 57 |        "      <td>20yrs</td>\n",
 58 |        "      <td>Bikash Kumar</td>\n",
 59 |        "      <td>1 packages</td>\n",
 60 |        "      <td>$100</td>\n",
 61 |        "      <td>1</td>\n",
 62 |        "    </tr>\n",
 63 |        "    <tr>\n",
 64 |        "      <th>1</th>\n",
 65 |        "      <td>21yrs</td>\n",
 66 |        "      <td>Ashish Shaw</td>\n",
 67 |        "      <td>5 packages</td>\n",
 68 |        "      <td>$200</td>\n",
 69 |        "      <td>2</td>\n",
 70 |        "    </tr>\n",
 71 |        "    <tr>\n",
 72 |        "      <th>2</th>\n",
 73 |        "      <td>23years</td>\n",
 74 |        "      <td>Dipak Kumar</td>\n",
 75 |        "      <td>2pkgs</td>\n",
 76 |        "      <td>$100</td>\n",
 77 |        "      <td>3</td>\n",
 78 |        "    </tr>\n",
 79 |        "    <tr>\n",
 80 |        "      <th>3</th>\n",
 81 |        "      <td>20 Years</td>\n",
 82 |        "      <td>John Doe</td>\n",
 83 |        "      <td>3 pkgs</td>\n",
 84 |        "      <td>$100</td>\n",
 85 |        "      <td>4</td>\n",
 86 |        "    </tr>\n",
 87 |        "    <tr>\n",
 88 |        "      <th>4</th>\n",
 89 |        "      <td>2000</td>\n",
 90 |        "      <td>Elisha</td>\n",
 91 |        "      <td>5000</td>\n",
 92 |        "      <td>$400</td>\n",
 93 |        "      <td>5</td>\n",
 94 |        "    </tr>\n",
 95 |        "    <tr>\n",
 96 |        "      <th>5</th>\n",
 97 |        "      <td>5000</td>\n",
 98 |        "      <td>Md Shahid</td>\n",
 99 |        "      <td>10 packages</td>\n",
100 |        "      <td>$200</td>\n",
101 |        "      <td>6</td>\n",
102 |        "    </tr>\n",
103 |        "    <tr>\n",
104 |        "      <th>6</th>\n",
105 |        "      <td>21 yrs</td>\n",
106 |        "      <td>Adrika Roy</td>\n",
107 |        "      <td>7pkgs</td>\n",
108 |        "      <td>$300</td>\n",
109 |        "      <td>7</td>\n",
110 |        "    </tr>\n",
111 |        "    <tr>\n",
112 |        "      <th>7</th>\n",
113 |        "      <td>24 yrs</td>\n",
114 |        "      <td>Shashi Kumar</td>\n",
115 |        "      <td>2000</td>\n",
116 |        "      <td>$500</td>\n",
117 |        "      <td>8</td>\n",
118 |        "    </tr>\n",
119 |        "  </tbody>\n",
120 |        "</table>\n",
121 |        "</div>"
122 |       ],
123 |       "text/plain": [
124 |        "        Age          Name    No_of_pkg Package  travel_id\n",
125 |        "0     20yrs  Bikash Kumar   1 packages   $100           1\n",
126 |        "1     21yrs   Ashish Shaw   5 packages   $200           2\n",
127 |        "2   23years   Dipak Kumar        2pkgs   $100           3\n",
128 |        "3  20 Years      John Doe       3 pkgs   $100           4\n",
129 |        "4      2000        Elisha         5000   $400           5\n",
130 |        "5      5000     Md Shahid  10 packages   $200           6\n",
131 |        "6    21 yrs    Adrika Roy        7pkgs   $300           7\n",
132 |        "7    24 yrs  Shashi Kumar         2000   $500           8"
133 |       ]
134 |      },
135 |      "execution_count": 17,
136 |      "metadata": {},
137 |      "output_type": "execute_result"
138 |     }
139 |    ],
140 |    "source": [
141 |     "df=pd.read_csv(\"datasets/travel.csv\")\n",
142 |     "df"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "markdown",
147 |    "metadata": {},
148 |    "source": [
149 |     "### Replacing specific value some another value"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "markdown",
154 |    "metadata": {},
155 |    "source": [
156 |     "       Suppose your dataframe contains some invalid values and you want to replace it with some other values like 0 or NaN.\n",
157 |     "    In this case special values are 5000 and 2000.You can see the following result where 2000 and 5000 is replaced by NaN\n",
158 |     "    value\n"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": 19,
164 |    "metadata": {},
165 |    "outputs": [
166 |     {
167 |      "data": {
168 |       "text/html": [
169 |        "<div>\n",
170 |        "<style scoped>\n",
171 |        "    .dataframe tbody tr th:only-of-type {\n",
172 |        "        vertical-align: middle;\n",
173 |        "    }\n",
174 |        "\n",
175 |        "    .dataframe tbody tr th {\n",
176 |        "        vertical-align: top;\n",
177 |        "    }\n",
178 |        "\n",
179 |        "    .dataframe thead th {\n",
180 |        "        text-align: right;\n",
181 |        "    }\n",
182 |        "</style>\n",
183 |        "<table border=\"1\" class=\"dataframe\">\n",
184 |        "  <thead>\n",
185 |        "    <tr style=\"text-align: right;\">\n",
186 |        "      <th></th>\n",
187 |        "      <th>Age</th>\n",
188 |        "      <th>Name</th>\n",
189 |        "      <th>No_of_pkg</th>\n",
190 |        "      <th>Package</th>\n",
191 |        "      <th>travel_id</th>\n",
192 |        "    </tr>\n",
193 |        "  </thead>\n",
194 |        "  <tbody>\n",
195 |        "    <tr>\n",
196 |        "      <th>0</th>\n",
197 |        "      <td>20yrs</td>\n",
198 |        "      <td>Bikash Kumar</td>\n",
199 |        "      <td>1 packages</td>\n",
200 |        "      <td>$100</td>\n",
201 |        "      <td>1</td>\n",
202 |        "    </tr>\n",
203 |        "    <tr>\n",
204 |        "      <th>1</th>\n",
205 |        "      <td>21yrs</td>\n",
206 |        "      <td>Ashish Shaw</td>\n",
207 |        "      <td>5 packages</td>\n",
208 |        "      <td>$200</td>\n",
209 |        "      <td>2</td>\n",
210 |        "    </tr>\n",
211 |        "    <tr>\n",
212 |        "      <th>2</th>\n",
213 |        "      <td>23years</td>\n",
214 |        "      <td>Dipak Kumar</td>\n",
215 |        "      <td>2pkgs</td>\n",
216 |        "      <td>$100</td>\n",
217 |        "      <td>3</td>\n",
218 |        "    </tr>\n",
219 |        "    <tr>\n",
220 |        "      <th>3</th>\n",
221 |        "      <td>20 Years</td>\n",
222 |        "      <td>John Doe</td>\n",
223 |        "      <td>3 pkgs</td>\n",
224 |        "      <td>$100</td>\n",
225 |        "      <td>4</td>\n",
226 |        "    </tr>\n",
227 |        "    <tr>\n",
228 |        "      <th>4</th>\n",
229 |        "      <td>NaN</td>\n",
230 |        "      <td>Elisha</td>\n",
231 |        "      <td>NaN</td>\n",
232 |        "      <td>$400</td>\n",
233 |        "      <td>5</td>\n",
234 |        "    </tr>\n",
235 |        "    <tr>\n",
236 |        "      <th>5</th>\n",
237 |        "      <td>NaN</td>\n",
238 |        "      <td>Md Shahid</td>\n",
239 |        "      <td>10 packages</td>\n",
240 |        "      <td>$200</td>\n",
241 |        "      <td>6</td>\n",
242 |        "    </tr>\n",
243 |        "    <tr>\n",
244 |        "      <th>6</th>\n",
245 |        "      <td>21 yrs</td>\n",
246 |        "      <td>Adrika Roy</td>\n",
247 |        "      <td>7pkgs</td>\n",
248 |        "      <td>$300</td>\n",
249 |        "      <td>7</td>\n",
250 |        "    </tr>\n",
251 |        "    <tr>\n",
252 |        "      <th>7</th>\n",
253 |        "      <td>24 yrs</td>\n",
254 |        "      <td>Shashi Kumar</td>\n",
255 |        "      <td>NaN</td>\n",
256 |        "      <td>$500</td>\n",
257 |        "      <td>8</td>\n",
258 |        "    </tr>\n",
259 |        "  </tbody>\n",
260 |        "</table>\n",
261 |        "</div>"
262 |       ],
263 |       "text/plain": [
264 |        "        Age          Name    No_of_pkg Package  travel_id\n",
265 |        "0     20yrs  Bikash Kumar   1 packages   $100           1\n",
266 |        "1     21yrs   Ashish Shaw   5 packages   $200           2\n",
267 |        "2   23years   Dipak Kumar        2pkgs   $100           3\n",
268 |        "3  20 Years      John Doe       3 pkgs   $100           4\n",
269 |        "4       NaN        Elisha          NaN   $400           5\n",
270 |        "5       NaN     Md Shahid  10 packages   $200           6\n",
271 |        "6    21 yrs    Adrika Roy        7pkgs   $300           7\n",
272 |        "7    24 yrs  Shashi Kumar          NaN   $500           8"
273 |       ]
274 |      },
275 |      "execution_count": 19,
276 |      "metadata": {},
277 |      "output_type": "execute_result"
278 |     }
279 |    ],
280 |    "source": [
281 |     "df2=df.replace([\"5000\",\"2000\"],np.NaN)\n",
282 |     "df2"
283 |    ]
284 |   },
285 |   {
286 |    "cell_type": "markdown",
287 |    "metadata": {},
288 |    "source": [
289 |     "    There is one problem with this approach is it will replace all the values which you have passed in the list with\n",
290 |     "    you your value but in many other cases you dont want it like if you have 50000 in price column it is valid but if\n",
291 |     "    50000 is in name column it is not valid in this case.So you only want to replace 50000 of name column with NaN but\n",
292 |     "    not of price column.\n",
293 |     "    In that case you need to pass the disctionary in the replace column.This disctionary will contain name of the column\n",
294 |     "    and the value you want to replace"
295 |    ]
296 |   },
297 |   {
298 |    "cell_type": "code",
299 |    "execution_count": 21,
300 |    "metadata": {},
301 |    "outputs": [
302 |     {
303 |      "data": {
304 |       "text/html": [
305 |        "<div>\n",
306 |        "<style scoped>\n",
307 |        "    .dataframe tbody tr th:only-of-type {\n",
308 |        "        vertical-align: middle;\n",
309 |        "    }\n",
310 |        "\n",
311 |        "    .dataframe tbody tr th {\n",
312 |        "        vertical-align: top;\n",
313 |        "    }\n",
314 |        "\n",
315 |        "    .dataframe thead th {\n",
316 |        "        text-align: right;\n",
317 |        "    }\n",
318 |        "</style>\n",
319 |        "<table border=\"1\" class=\"dataframe\">\n",
320 |        "  <thead>\n",
321 |        "    <tr style=\"text-align: right;\">\n",
322 |        "      <th></th>\n",
323 |        "      <th>Age</th>\n",
324 |        "      <th>Name</th>\n",
325 |        "      <th>No_of_pkg</th>\n",
326 |        "      <th>Package</th>\n",
327 |        "      <th>travel_id</th>\n",
328 |        "    </tr>\n",
329 |        "  </thead>\n",
330 |        "  <tbody>\n",
331 |        "    <tr>\n",
332 |        "      <th>0</th>\n",
333 |        "      <td>20yrs</td>\n",
334 |        "      <td>Bikash Kumar</td>\n",
335 |        "      <td>1 packages</td>\n",
336 |        "      <td>$100</td>\n",
337 |        "      <td>1</td>\n",
338 |        "    </tr>\n",
339 |        "    <tr>\n",
340 |        "      <th>1</th>\n",
341 |        "      <td>21yrs</td>\n",
342 |        "      <td>Ashish Shaw</td>\n",
343 |        "      <td>5 packages</td>\n",
344 |        "      <td>$200</td>\n",
345 |        "      <td>2</td>\n",
346 |        "    </tr>\n",
347 |        "    <tr>\n",
348 |        "      <th>2</th>\n",
349 |        "      <td>23years</td>\n",
350 |        "      <td>Dipak Kumar</td>\n",
351 |        "      <td>2pkgs</td>\n",
352 |        "      <td>$100</td>\n",
353 |        "      <td>3</td>\n",
354 |        "    </tr>\n",
355 |        "    <tr>\n",
356 |        "      <th>3</th>\n",
357 |        "      <td>20 Years</td>\n",
358 |        "      <td>John Doe</td>\n",
359 |        "      <td>3 pkgs</td>\n",
360 |        "      <td>$100</td>\n",
361 |        "      <td>4</td>\n",
362 |        "    </tr>\n",
363 |        "    <tr>\n",
364 |        "      <th>4</th>\n",
365 |        "      <td>NaN</td>\n",
366 |        "      <td>Elisha</td>\n",
367 |        "      <td>NaN</td>\n",
368 |        "      <td>$400</td>\n",
369 |        "      <td>5</td>\n",
370 |        "    </tr>\n",
371 |        "    <tr>\n",
372 |        "      <th>5</th>\n",
373 |        "      <td>NaN</td>\n",
374 |        "      <td>Md Shahid</td>\n",
375 |        "      <td>10 packages</td>\n",
376 |        "      <td>$200</td>\n",
377 |        "      <td>6</td>\n",
378 |        "    </tr>\n",
379 |        "    <tr>\n",
380 |        "      <th>6</th>\n",
381 |        "      <td>21 yrs</td>\n",
382 |        "      <td>Adrika Roy</td>\n",
383 |        "      <td>7pkgs</td>\n",
384 |        "      <td>$300</td>\n",
385 |        "      <td>7</td>\n",
386 |        "    </tr>\n",
387 |        "    <tr>\n",
388 |        "      <th>7</th>\n",
389 |        "      <td>24 yrs</td>\n",
390 |        "      <td>Shashi Kumar</td>\n",
391 |        "      <td>NaN</td>\n",
392 |        "      <td>$500</td>\n",
393 |        "      <td>8</td>\n",
394 |        "    </tr>\n",
395 |        "  </tbody>\n",
396 |        "</table>\n",
397 |        "</div>"
398 |       ],
399 |       "text/plain": [
400 |        "        Age          Name    No_of_pkg Package  travel_id\n",
401 |        "0     20yrs  Bikash Kumar   1 packages   $100           1\n",
402 |        "1     21yrs   Ashish Shaw   5 packages   $200           2\n",
403 |        "2   23years   Dipak Kumar        2pkgs   $100           3\n",
404 |        "3  20 Years      John Doe       3 pkgs   $100           4\n",
405 |        "4       NaN        Elisha          NaN   $400           5\n",
406 |        "5       NaN     Md Shahid  10 packages   $200           6\n",
407 |        "6    21 yrs    Adrika Roy        7pkgs   $300           7\n",
408 |        "7    24 yrs  Shashi Kumar          NaN   $500           8"
409 |       ]
410 |      },
411 |      "execution_count": 21,
412 |      "metadata": {},
413 |      "output_type": "execute_result"
414 |     }
415 |    ],
416 |    "source": [
417 |     "df2=df.replace({\n",
418 |     "    'Age':[\"2000\",\"5000\"],\n",
419 |     "    'No_of_pkg':[\"2000\",\"5000\"],\n",
420 |     "    \"travel_id\":[0]\n",
421 |     "},np.NaN)\n",
422 |     "df2"
423 |    ]
424 |   },
425 |   {
426 |    "cell_type": "markdown",
427 |    "metadata": {},
428 |    "source": [
429 |     "    If you want to replace some specific value like 5000 with any other value and so on.In that case you need \n",
430 |     "    to pass the disctionary with all keys which you want to replace and values which you want to replace with.\n",
431 |     "    Here 5000,2000,8 are values to replaced are the keys and np.NaN & 10 are values to be replaced with are value\n",
432 |     "    of the disctionary."
433 |    ]
434 |   },
435 |   {
436 |    "cell_type": "code",
437 |    "execution_count": 25,
438 |    "metadata": {},
439 |    "outputs": [
440 |     {
441 |      "data": {
442 |       "text/html": [
443 |        "<div>\n",
444 |        "<style scoped>\n",
445 |        "    .dataframe tbody tr th:only-of-type {\n",
446 |        "        vertical-align: middle;\n",
447 |        "    }\n",
448 |        "\n",
449 |        "    .dataframe tbody tr th {\n",
450 |        "        vertical-align: top;\n",
451 |        "    }\n",
452 |        "\n",
453 |        "    .dataframe thead th {\n",
454 |        "        text-align: right;\n",
455 |        "    }\n",
456 |        "</style>\n",
457 |        "<table border=\"1\" class=\"dataframe\">\n",
458 |        "  <thead>\n",
459 |        "    <tr style=\"text-align: right;\">\n",
460 |        "      <th></th>\n",
461 |        "      <th>Age</th>\n",
462 |        "      <th>Name</th>\n",
463 |        "      <th>No_of_pkg</th>\n",
464 |        "      <th>Package</th>\n",
465 |        "      <th>travel_id</th>\n",
466 |        "    </tr>\n",
467 |        "  </thead>\n",
468 |        "  <tbody>\n",
469 |        "    <tr>\n",
470 |        "      <th>0</th>\n",
471 |        "      <td>20yrs</td>\n",
472 |        "      <td>Bikash Kumar</td>\n",
473 |        "      <td>1 packages</td>\n",
474 |        "      <td>$100</td>\n",
475 |        "      <td>1</td>\n",
476 |        "    </tr>\n",
477 |        "    <tr>\n",
478 |        "      <th>1</th>\n",
479 |        "      <td>21yrs</td>\n",
480 |        "      <td>Ashish Shaw</td>\n",
481 |        "      <td>5 packages</td>\n",
482 |        "      <td>$200</td>\n",
483 |        "      <td>2</td>\n",
484 |        "    </tr>\n",
485 |        "    <tr>\n",
486 |        "      <th>2</th>\n",
487 |        "      <td>23years</td>\n",
488 |        "      <td>Dipak Kumar</td>\n",
489 |        "      <td>2pkgs</td>\n",
490 |        "      <td>$100</td>\n",
491 |        "      <td>3</td>\n",
492 |        "    </tr>\n",
493 |        "    <tr>\n",
494 |        "      <th>3</th>\n",
495 |        "      <td>20 Years</td>\n",
496 |        "      <td>John Doe</td>\n",
497 |        "      <td>3 pkgs</td>\n",
498 |        "      <td>$100</td>\n",
499 |        "      <td>4</td>\n",
500 |        "    </tr>\n",
501 |        "    <tr>\n",
502 |        "      <th>4</th>\n",
503 |        "      <td>NaN</td>\n",
504 |        "      <td>Elisha</td>\n",
505 |        "      <td>NaN</td>\n",
506 |        "      <td>$400</td>\n",
507 |        "      <td>5</td>\n",
508 |        "    </tr>\n",
509 |        "    <tr>\n",
510 |        "      <th>5</th>\n",
511 |        "      <td>NaN</td>\n",
512 |        "      <td>Md Shahid</td>\n",
513 |        "      <td>10 packages</td>\n",
514 |        "      <td>$200</td>\n",
515 |        "      <td>6</td>\n",
516 |        "    </tr>\n",
517 |        "    <tr>\n",
518 |        "      <th>6</th>\n",
519 |        "      <td>21 yrs</td>\n",
520 |        "      <td>Adrika Roy</td>\n",
521 |        "      <td>7pkgs</td>\n",
522 |        "      <td>$300</td>\n",
523 |        "      <td>7</td>\n",
524 |        "    </tr>\n",
525 |        "    <tr>\n",
526 |        "      <th>7</th>\n",
527 |        "      <td>24 yrs</td>\n",
528 |        "      <td>Shashi Kumar</td>\n",
529 |        "      <td>NaN</td>\n",
530 |        "      <td>$500</td>\n",
531 |        "      <td>10</td>\n",
532 |        "    </tr>\n",
533 |        "  </tbody>\n",
534 |        "</table>\n",
535 |        "</div>"
536 |       ],
537 |       "text/plain": [
538 |        "        Age          Name    No_of_pkg Package  travel_id\n",
539 |        "0     20yrs  Bikash Kumar   1 packages   $100           1\n",
540 |        "1     21yrs   Ashish Shaw   5 packages   $200           2\n",
541 |        "2   23years   Dipak Kumar        2pkgs   $100           3\n",
542 |        "3  20 Years      John Doe       3 pkgs   $100           4\n",
543 |        "4       NaN        Elisha          NaN   $400           5\n",
544 |        "5       NaN     Md Shahid  10 packages   $200           6\n",
545 |        "6    21 yrs    Adrika Roy        7pkgs   $300           7\n",
546 |        "7    24 yrs  Shashi Kumar          NaN   $500          10"
547 |       ]
548 |      },
549 |      "execution_count": 25,
550 |      "metadata": {},
551 |      "output_type": "execute_result"
552 |     }
553 |    ],
554 |    "source": [
555 |     "df2=df.replace({\n",
556 |     "    \"5000\":np.NaN,\n",
557 |     "    \"2000\":np.NaN,\n",
558 |     "       8:10\n",
559 |     "})\n",
560 |     "df2"
561 |    ]
562 |   },
563 |   {
564 |    "cell_type": "markdown",
565 |    "metadata": {},
566 |    "source": [
567 |     "**Note:** All the values in the dataframe that belongs to keys of disctionary will be replaced no matter what column it is."
568 |    ]
569 |   },
570 |   {
571 |    "cell_type": "markdown",
572 |    "metadata": {},
573 |    "source": [
574 |     "### Removing unnecessary character from columns"
575 |    ]
576 |   },
577 |   {
578 |    "cell_type": "markdown",
579 |    "metadata": {},
580 |    "source": [
581 |     "    Suppose your dataframe contains unnecessary characters with your data values.Here years/yrs/Yrs/Years\n",
582 |     "    in Age column, same in the No_of_pkg & Package columns are unnecessary charactors which you dont want \n",
583 |     "    and these charactor will prevent you from applying any kind of operation in data analysis.So you want\n",
584 |     "    to get rid of it.In that case you have to pass regex as a value and column name as a key of the \n",
585 |     "    disctionary which you have passed in the replace() function as well as you also have to set regex=True\n",
586 |     "    and a pass an empty string\n",
587 |     "    \n",
588 |     "* ** <code> [A-Za-z] </code> ** : This is the regex of all the character from A to Z and a to z.\n",
589 |     "* <b><code> \\$ </code></b> : This is the regex for **<code>$</code>** sign.\n",
590 |     "* For futher information about regex go to this link https://medium.com/factory-mind/regex-tutorial-a-simple-cheatsheet-by-examples-649dc1c3f285\n"
591 |    ]
592 |   },
593 |   {
594 |    "cell_type": "code",
595 |    "execution_count": 26,
596 |    "metadata": {},
597 |    "outputs": [
598 |     {
599 |      "data": {
600 |       "text/html": [
601 |        "<div>\n",
602 |        "<style scoped>\n",
603 |        "    .dataframe tbody tr th:only-of-type {\n",
604 |        "        vertical-align: middle;\n",
605 |        "    }\n",
606 |        "\n",
607 |        "    .dataframe tbody tr th {\n",
608 |        "        vertical-align: top;\n",
609 |        "    }\n",
610 |        "\n",
611 |        "    .dataframe thead th {\n",
612 |        "        text-align: right;\n",
613 |        "    }\n",
614 |        "</style>\n",
615 |        "<table border=\"1\" class=\"dataframe\">\n",
616 |        "  <thead>\n",
617 |        "    <tr style=\"text-align: right;\">\n",
618 |        "      <th></th>\n",
619 |        "      <th>Age</th>\n",
620 |        "      <th>Name</th>\n",
621 |        "      <th>No_of_pkg</th>\n",
622 |        "      <th>Package</th>\n",
623 |        "      <th>travel_id</th>\n",
624 |        "    </tr>\n",
625 |        "  </thead>\n",
626 |        "  <tbody>\n",
627 |        "    <tr>\n",
628 |        "      <th>0</th>\n",
629 |        "      <td>20</td>\n",
630 |        "      <td>Bikash Kumar</td>\n",
631 |        "      <td>1</td>\n",
632 |        "      <td>100</td>\n",
633 |        "      <td>1</td>\n",
634 |        "    </tr>\n",
635 |        "    <tr>\n",
636 |        "      <th>1</th>\n",
637 |        "      <td>21</td>\n",
638 |        "      <td>Ashish Shaw</td>\n",
639 |        "      <td>5</td>\n",
640 |        "      <td>200</td>\n",
641 |        "      <td>2</td>\n",
642 |        "    </tr>\n",
643 |        "    <tr>\n",
644 |        "      <th>2</th>\n",
645 |        "      <td>23</td>\n",
646 |        "      <td>Dipak Kumar</td>\n",
647 |        "      <td>2</td>\n",
648 |        "      <td>100</td>\n",
649 |        "      <td>3</td>\n",
650 |        "    </tr>\n",
651 |        "    <tr>\n",
652 |        "      <th>3</th>\n",
653 |        "      <td>20</td>\n",
654 |        "      <td>John Doe</td>\n",
655 |        "      <td>3</td>\n",
656 |        "      <td>100</td>\n",
657 |        "      <td>4</td>\n",
658 |        "    </tr>\n",
659 |        "    <tr>\n",
660 |        "      <th>4</th>\n",
661 |        "      <td>NaN</td>\n",
662 |        "      <td>Elisha</td>\n",
663 |        "      <td>NaN</td>\n",
664 |        "      <td>400</td>\n",
665 |        "      <td>5</td>\n",
666 |        "    </tr>\n",
667 |        "    <tr>\n",
668 |        "      <th>5</th>\n",
669 |        "      <td>NaN</td>\n",
670 |        "      <td>Md Shahid</td>\n",
671 |        "      <td>10</td>\n",
672 |        "      <td>200</td>\n",
673 |        "      <td>6</td>\n",
674 |        "    </tr>\n",
675 |        "    <tr>\n",
676 |        "      <th>6</th>\n",
677 |        "      <td>21</td>\n",
678 |        "      <td>Adrika Roy</td>\n",
679 |        "      <td>7</td>\n",
680 |        "      <td>300</td>\n",
681 |        "      <td>7</td>\n",
682 |        "    </tr>\n",
683 |        "    <tr>\n",
684 |        "      <th>7</th>\n",
685 |        "      <td>24</td>\n",
686 |        "      <td>Shashi Kumar</td>\n",
687 |        "      <td>NaN</td>\n",
688 |        "      <td>500</td>\n",
689 |        "      <td>10</td>\n",
690 |        "    </tr>\n",
691 |        "  </tbody>\n",
692 |        "</table>\n",
693 |        "</div>"
694 |       ],
695 |       "text/plain": [
696 |        "   Age          Name No_of_pkg Package  travel_id\n",
697 |        "0   20  Bikash Kumar        1     100           1\n",
698 |        "1   21   Ashish Shaw        5     200           2\n",
699 |        "2   23   Dipak Kumar         2    100           3\n",
700 |        "3  20       John Doe        3     100           4\n",
701 |        "4  NaN        Elisha       NaN    400           5\n",
702 |        "5  NaN     Md Shahid       10     200           6\n",
703 |        "6  21     Adrika Roy         7    300           7\n",
704 |        "7  24   Shashi Kumar       NaN    500          10"
705 |       ]
706 |      },
707 |      "execution_count": 26,
708 |      "metadata": {},
709 |      "output_type": "execute_result"
710 |     }
711 |    ],
712 |    "source": [
713 |     "df3=df2.replace({\n",
714 |     "    'Age':'[A-Za-z]',\n",
715 |     "    'No_of_pkg':'[A-Za-z]',\n",
716 |     "    'Package':'\\$'\n",
717 |     "},\"\",regex=True)\n",
718 |     "df3"
719 |    ]
720 |   },
721 |   {
722 |    "cell_type": "markdown",
723 |    "metadata": {},
724 |    "source": [
725 |     "### Mapping from one list to another list "
726 |    ]
727 |   },
728 |   {
729 |    "cell_type": "markdown",
730 |    "metadata": {},
731 |    "source": [
732 |     "     If your dataset contains data which is repeating more than once or you want to change some set of string in to\n",
733 |     "     number then you have apply list mapping."
734 |    ]
735 |   },
736 |   {
737 |    "cell_type": "code",
738 |    "execution_count": 29,
739 |    "metadata": {},
740 |    "outputs": [
741 |     {
742 |      "data": {
743 |       "text/html": [
744 |        "<div>\n",
745 |        "<style scoped>\n",
746 |        "    .dataframe tbody tr th:only-of-type {\n",
747 |        "        vertical-align: middle;\n",
748 |        "    }\n",
749 |        "\n",
750 |        "    .dataframe tbody tr th {\n",
751 |        "        vertical-align: top;\n",
752 |        "    }\n",
753 |        "\n",
754 |        "    .dataframe thead th {\n",
755 |        "        text-align: right;\n",
756 |        "    }\n",
757 |        "</style>\n",
758 |        "<table border=\"1\" class=\"dataframe\">\n",
759 |        "  <thead>\n",
760 |        "    <tr style=\"text-align: right;\">\n",
761 |        "      <th></th>\n",
762 |        "      <th>grades</th>\n",
763 |        "      <th>name</th>\n",
764 |        "    </tr>\n",
765 |        "  </thead>\n",
766 |        "  <tbody>\n",
767 |        "    <tr>\n",
768 |        "      <th>0</th>\n",
769 |        "      <td>poor</td>\n",
770 |        "      <td>Shahid</td>\n",
771 |        "    </tr>\n",
772 |        "    <tr>\n",
773 |        "      <th>1</th>\n",
774 |        "      <td>excellent</td>\n",
775 |        "      <td>Adrika</td>\n",
776 |        "    </tr>\n",
777 |        "    <tr>\n",
778 |        "      <th>2</th>\n",
779 |        "      <td>very good</td>\n",
780 |        "      <td>Bikash</td>\n",
781 |        "    </tr>\n",
782 |        "    <tr>\n",
783 |        "      <th>3</th>\n",
784 |        "      <td>average</td>\n",
785 |        "      <td>Ashish</td>\n",
786 |        "    </tr>\n",
787 |        "    <tr>\n",
788 |        "      <th>4</th>\n",
789 |        "      <td>good</td>\n",
790 |        "      <td>Ganesh</td>\n",
791 |        "    </tr>\n",
792 |        "    <tr>\n",
793 |        "      <th>5</th>\n",
794 |        "      <td>very good</td>\n",
795 |        "      <td>Zahid</td>\n",
796 |        "    </tr>\n",
797 |        "    <tr>\n",
798 |        "      <th>6</th>\n",
799 |        "      <td>outstanding</td>\n",
800 |        "      <td>Mohan</td>\n",
801 |        "    </tr>\n",
802 |        "    <tr>\n",
803 |        "      <th>7</th>\n",
804 |        "      <td>poor</td>\n",
805 |        "      <td>Sohan</td>\n",
806 |        "    </tr>\n",
807 |        "  </tbody>\n",
808 |        "</table>\n",
809 |        "</div>"
810 |       ],
811 |       "text/plain": [
812 |        "        grades    name\n",
813 |        "0         poor  Shahid\n",
814 |        "1    excellent  Adrika\n",
815 |        "2    very good  Bikash\n",
816 |        "3      average  Ashish\n",
817 |        "4         good  Ganesh\n",
818 |        "5    very good   Zahid\n",
819 |        "6  outstanding   Mohan\n",
820 |        "7         poor   Sohan"
821 |       ]
822 |      },
823 |      "execution_count": 29,
824 |      "metadata": {},
825 |      "output_type": "execute_result"
826 |     }
827 |    ],
828 |    "source": [
829 |     "mydis={\n",
830 |     "    \"name\":[\"Shahid\",\"Adrika\",\"Bikash\",\"Ashish\",\"Ganesh\",\"Zahid\",\"Mohan\",\"Sohan\"],\n",
831 |     "    \"grades\":[\"poor\",\"excellent\",\"very good\",\"average\",\"good\",\"very good\",\"outstanding\",\"poor\"]\n",
832 |     "      }\n",
833 |     "df=pd.DataFrame(mydis)\n",
834 |     "df"
835 |    ]
836 |   },
837 |   {
838 |    "cell_type": "code",
839 |    "execution_count": 30,
840 |    "metadata": {},
841 |    "outputs": [
842 |     {
843 |      "data": {
844 |       "text/html": [
845 |        "<div>\n",
846 |        "<style scoped>\n",
847 |        "    .dataframe tbody tr th:only-of-type {\n",
848 |        "        vertical-align: middle;\n",
849 |        "    }\n",
850 |        "\n",
851 |        "    .dataframe tbody tr th {\n",
852 |        "        vertical-align: top;\n",
853 |        "    }\n",
854 |        "\n",
855 |        "    .dataframe thead th {\n",
856 |        "        text-align: right;\n",
857 |        "    }\n",
858 |        "</style>\n",
859 |        "<table border=\"1\" class=\"dataframe\">\n",
860 |        "  <thead>\n",
861 |        "    <tr style=\"text-align: right;\">\n",
862 |        "      <th></th>\n",
863 |        "      <th>grades</th>\n",
864 |        "      <th>name</th>\n",
865 |        "    </tr>\n",
866 |        "  </thead>\n",
867 |        "  <tbody>\n",
868 |        "    <tr>\n",
869 |        "      <th>0</th>\n",
870 |        "      <td>5</td>\n",
871 |        "      <td>Shahid</td>\n",
872 |        "    </tr>\n",
873 |        "    <tr>\n",
874 |        "      <th>1</th>\n",
875 |        "      <td>9</td>\n",
876 |        "      <td>Adrika</td>\n",
877 |        "    </tr>\n",
878 |        "    <tr>\n",
879 |        "      <th>2</th>\n",
880 |        "      <td>8</td>\n",
881 |        "      <td>Bikash</td>\n",
882 |        "    </tr>\n",
883 |        "    <tr>\n",
884 |        "      <th>3</th>\n",
885 |        "      <td>6</td>\n",
886 |        "      <td>Ashish</td>\n",
887 |        "    </tr>\n",
888 |        "    <tr>\n",
889 |        "      <th>4</th>\n",
890 |        "      <td>7</td>\n",
891 |        "      <td>Ganesh</td>\n",
892 |        "    </tr>\n",
893 |        "    <tr>\n",
894 |        "      <th>5</th>\n",
895 |        "      <td>8</td>\n",
896 |        "      <td>Zahid</td>\n",
897 |        "    </tr>\n",
898 |        "    <tr>\n",
899 |        "      <th>6</th>\n",
900 |        "      <td>10</td>\n",
901 |        "      <td>Mohan</td>\n",
902 |        "    </tr>\n",
903 |        "    <tr>\n",
904 |        "      <th>7</th>\n",
905 |        "      <td>5</td>\n",
906 |        "      <td>Sohan</td>\n",
907 |        "    </tr>\n",
908 |        "  </tbody>\n",
909 |        "</table>\n",
910 |        "</div>"
911 |       ],
912 |       "text/plain": [
913 |        "   grades    name\n",
914 |        "0       5  Shahid\n",
915 |        "1       9  Adrika\n",
916 |        "2       8  Bikash\n",
917 |        "3       6  Ashish\n",
918 |        "4       7  Ganesh\n",
919 |        "5       8   Zahid\n",
920 |        "6      10   Mohan\n",
921 |        "7       5   Sohan"
922 |       ]
923 |      },
924 |      "execution_count": 30,
925 |      "metadata": {},
926 |      "output_type": "execute_result"
927 |     }
928 |    ],
929 |    "source": [
930 |     "df2=df.replace([\"poor\",\"average\",\"good\",\"very good\",\"excellent\",\"outstanding\"],[5,6,7,8,9,10])\n",
931 |     "df2"
932 |    ]
933 |   }
934 |  ],
935 |  "metadata": {
936 |   "kernelspec": {
937 |    "display_name": "Python 3",
938 |    "language": "python",
939 |    "name": "python3"
940 |   },
941 |   "language_info": {
942 |    "codemirror_mode": {
943 |     "name": "ipython",
944 |     "version": 3
945 |    },
946 |    "file_extension": ".py",
947 |    "mimetype": "text/x-python",
948 |    "name": "python",
949 |    "nbconvert_exporter": "python",
950 |    "pygments_lexer": "ipython3",
951 |    "version": "3.6.4"
952 |   }
953 |  },
954 |  "nbformat": 4,
955 |  "nbformat_minor": 2
956 | }
957 | 


--------------------------------------------------------------------------------
/pandas_part6.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Groupby : Split, Apply and Combine\n",
  8 |     "---------------\n",
  9 |     "   Groupby is one of the important operations in data analysis.It includes three steps - <br>\n",
 10 |     "         \n",
 11 |     "   <ul>\n",
 12 |     "    <li><b>Splitting</b></li>\n",
 13 |     "    <li><b>Applying</b>\n",
 14 |     "        <ul>\n",
 15 |     "            <li>Aggregation</li>\n",
 16 |     "            <li>Transformation</li>\n",
 17 |     "            <li>Filterting</li>\n",
 18 |     "        </ul>\n",
 19 |     "    </li>\n",
 20 |     "    <li><b>Combine</b></li>\n",
 21 |     "   </ul>\n",
 22 |     "        \n",
 23 |     "   Groupby property is grouped the data according the column supplied to the function.In the following example\n",
 24 |     " you can see the i have grouped the dataframe df by its team."
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": 28,
 30 |    "metadata": {},
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "import pandas as pd\n",
 34 |     "import numpy as np"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 22,
 40 |    "metadata": {},
 41 |    "outputs": [
 42 |     {
 43 |      "data": {
 44 |       "text/html": [
 45 |        "<div>\n",
 46 |        "<style scoped>\n",
 47 |        "    .dataframe tbody tr th:only-of-type {\n",
 48 |        "        vertical-align: middle;\n",
 49 |        "    }\n",
 50 |        "\n",
 51 |        "    .dataframe tbody tr th {\n",
 52 |        "        vertical-align: top;\n",
 53 |        "    }\n",
 54 |        "\n",
 55 |        "    .dataframe thead th {\n",
 56 |        "        text-align: right;\n",
 57 |        "    }\n",
 58 |        "</style>\n",
 59 |        "<table border=\"1\" class=\"dataframe\">\n",
 60 |        "  <thead>\n",
 61 |        "    <tr style=\"text-align: right;\">\n",
 62 |        "      <th></th>\n",
 63 |        "      <th>Match</th>\n",
 64 |        "      <th>Run</th>\n",
 65 |        "      <th>Year</th>\n",
 66 |        "      <th>team</th>\n",
 67 |        "    </tr>\n",
 68 |        "  </thead>\n",
 69 |        "  <tbody>\n",
 70 |        "    <tr>\n",
 71 |        "      <th>0</th>\n",
 72 |        "      <td>2</td>\n",
 73 |        "      <td>330</td>\n",
 74 |        "      <td>2012</td>\n",
 75 |        "      <td>India</td>\n",
 76 |        "    </tr>\n",
 77 |        "    <tr>\n",
 78 |        "      <th>1</th>\n",
 79 |        "      <td>4</td>\n",
 80 |        "      <td>230</td>\n",
 81 |        "      <td>2012</td>\n",
 82 |        "      <td>New zealand</td>\n",
 83 |        "    </tr>\n",
 84 |        "    <tr>\n",
 85 |        "      <th>2</th>\n",
 86 |        "      <td>2</td>\n",
 87 |        "      <td>300</td>\n",
 88 |        "      <td>2012</td>\n",
 89 |        "      <td>Australia</td>\n",
 90 |        "    </tr>\n",
 91 |        "    <tr>\n",
 92 |        "      <th>3</th>\n",
 93 |        "      <td>1</td>\n",
 94 |        "      <td>180</td>\n",
 95 |        "      <td>2012</td>\n",
 96 |        "      <td>India</td>\n",
 97 |        "    </tr>\n",
 98 |        "    <tr>\n",
 99 |        "      <th>4</th>\n",
100 |        "      <td>5</td>\n",
101 |        "      <td>200</td>\n",
102 |        "      <td>2013</td>\n",
103 |        "      <td>India</td>\n",
104 |        "    </tr>\n",
105 |        "    <tr>\n",
106 |        "      <th>5</th>\n",
107 |        "      <td>6</td>\n",
108 |        "      <td>250</td>\n",
109 |        "      <td>2013</td>\n",
110 |        "      <td>New zealand</td>\n",
111 |        "    </tr>\n",
112 |        "    <tr>\n",
113 |        "      <th>6</th>\n",
114 |        "      <td>3</td>\n",
115 |        "      <td>190</td>\n",
116 |        "      <td>2013</td>\n",
117 |        "      <td>Australia</td>\n",
118 |        "    </tr>\n",
119 |        "    <tr>\n",
120 |        "      <th>7</th>\n",
121 |        "      <td>2</td>\n",
122 |        "      <td>400</td>\n",
123 |        "      <td>2013</td>\n",
124 |        "      <td>India</td>\n",
125 |        "    </tr>\n",
126 |        "    <tr>\n",
127 |        "      <th>8</th>\n",
128 |        "      <td>5</td>\n",
129 |        "      <td>340</td>\n",
130 |        "      <td>2014</td>\n",
131 |        "      <td>New zealand</td>\n",
132 |        "    </tr>\n",
133 |        "    <tr>\n",
134 |        "      <th>9</th>\n",
135 |        "      <td>3</td>\n",
136 |        "      <td>290</td>\n",
137 |        "      <td>2014</td>\n",
138 |        "      <td>Australia</td>\n",
139 |        "    </tr>\n",
140 |        "    <tr>\n",
141 |        "      <th>10</th>\n",
142 |        "      <td>1</td>\n",
143 |        "      <td>390</td>\n",
144 |        "      <td>2014</td>\n",
145 |        "      <td>New zealand</td>\n",
146 |        "    </tr>\n",
147 |        "    <tr>\n",
148 |        "      <th>11</th>\n",
149 |        "      <td>1</td>\n",
150 |        "      <td>333</td>\n",
151 |        "      <td>2014</td>\n",
152 |        "      <td>Australia</td>\n",
153 |        "    </tr>\n",
154 |        "  </tbody>\n",
155 |        "</table>\n",
156 |        "</div>"
157 |       ],
158 |       "text/plain": [
159 |        "    Match  Run  Year         team\n",
160 |        "0       2  330  2012        India\n",
161 |        "1       4  230  2012  New zealand\n",
162 |        "2       2  300  2012    Australia\n",
163 |        "3       1  180  2012        India\n",
164 |        "4       5  200  2013        India\n",
165 |        "5       6  250  2013  New zealand\n",
166 |        "6       3  190  2013    Australia\n",
167 |        "7       2  400  2013        India\n",
168 |        "8       5  340  2014  New zealand\n",
169 |        "9       3  290  2014    Australia\n",
170 |        "10      1  390  2014  New zealand\n",
171 |        "11      1  333  2014    Australia"
172 |       ]
173 |      },
174 |      "execution_count": 22,
175 |      "metadata": {},
176 |      "output_type": "execute_result"
177 |     }
178 |    ],
179 |    "source": [
180 |     "d={\n",
181 |     "    'team':[\"India\",\"New zealand\",\"Australia\",\"India\",\n",
182 |     "            \"India\",\"New zealand\",\"Australia\",\"India\",\n",
183 |     "            \"New zealand\",\"Australia\",\"New zealand\",\"Australia\"],\n",
184 |     "     'Run':[330,230,300,180,200,250,190,400,340,290,390,333],\n",
185 |     "     \"Match\":[2,4,2,1,5,6,3,2,5,3,1,1],\n",
186 |     "     \"Year\":['2012','2012','2012','2012','2013','2013','2013','2013','2014','2014','2014','2014']\n",
187 |     "  }\n",
188 |     "df=pd.DataFrame(d)\n",
189 |     "df"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "markdown",
194 |    "metadata": {},
195 |    "source": [
196 |     "    groupby() function will return an object.we can imagine that every group is pointing to its dataframe.\n",
197 |     "        "
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "code",
202 |    "execution_count": 4,
203 |    "metadata": {},
204 |    "outputs": [
205 |     {
206 |      "data": {
207 |       "text/plain": [
208 |        "<pandas.core.groupby.DataFrameGroupBy object at 0x00000202354F3D68>"
209 |       ]
210 |      },
211 |      "execution_count": 4,
212 |      "metadata": {},
213 |      "output_type": "execute_result"
214 |     }
215 |    ],
216 |    "source": [
217 |     "g=df.groupby('team')\n",
218 |     "g"
219 |    ]
220 |   },
221 |   {
222 |    "cell_type": "code",
223 |    "execution_count": 14,
224 |    "metadata": {},
225 |    "outputs": [
226 |     {
227 |      "name": "stdout",
228 |      "output_type": "stream",
229 |      "text": [
230 |       "Australia\n",
231 |       "----------\n",
232 |       "    Match  Run  Year       team\n",
233 |       "2       2  300  2012  Australia\n",
234 |       "6       3  190  2013  Australia\n",
235 |       "9       3  290  2014  Australia\n",
236 |       "11      1  333  2014  Australia\n",
237 |       "-----------------------------------\n",
238 |       "India\n",
239 |       "----------\n",
240 |       "   Match  Run  Year   team\n",
241 |       "0      2  330  2012  India\n",
242 |       "3      1  180  2012  India\n",
243 |       "4      5  200  2013  India\n",
244 |       "7      2  400  2013  India\n",
245 |       "-----------------------------------\n",
246 |       "New zealand\n",
247 |       "----------\n",
248 |       "    Match  Run  Year         team\n",
249 |       "1       4  230  2012  New zealand\n",
250 |       "5       6  250  2013  New zealand\n",
251 |       "8       5  340  2014  New zealand\n",
252 |       "10      1  390  2014  New zealand\n",
253 |       "-----------------------------------\n"
254 |      ]
255 |     }
256 |    ],
257 |    "source": [
258 |     "for team,teamdata in g:\n",
259 |     "    print(team)\n",
260 |     "    print(\"-\"*10)\n",
261 |     "    print(teamdata)\n",
262 |     "    print(\"-\"*35)"
263 |    ]
264 |   },
265 |   {
266 |    "cell_type": "markdown",
267 |    "metadata": {},
268 |    "source": [
269 |     " **get_group('group name') :**  <br>\n",
270 |     "  It will return dataframe of particular group"
271 |    ]
272 |   },
273 |   {
274 |    "cell_type": "code",
275 |    "execution_count": 16,
276 |    "metadata": {},
277 |    "outputs": [
278 |     {
279 |      "data": {
280 |       "text/html": [
281 |        "<div>\n",
282 |        "<style scoped>\n",
283 |        "    .dataframe tbody tr th:only-of-type {\n",
284 |        "        vertical-align: middle;\n",
285 |        "    }\n",
286 |        "\n",
287 |        "    .dataframe tbody tr th {\n",
288 |        "        vertical-align: top;\n",
289 |        "    }\n",
290 |        "\n",
291 |        "    .dataframe thead th {\n",
292 |        "        text-align: right;\n",
293 |        "    }\n",
294 |        "</style>\n",
295 |        "<table border=\"1\" class=\"dataframe\">\n",
296 |        "  <thead>\n",
297 |        "    <tr style=\"text-align: right;\">\n",
298 |        "      <th></th>\n",
299 |        "      <th>Match</th>\n",
300 |        "      <th>Run</th>\n",
301 |        "      <th>Year</th>\n",
302 |        "    </tr>\n",
303 |        "  </thead>\n",
304 |        "  <tbody>\n",
305 |        "    <tr>\n",
306 |        "      <th>0</th>\n",
307 |        "      <td>2</td>\n",
308 |        "      <td>330</td>\n",
309 |        "      <td>2012</td>\n",
310 |        "    </tr>\n",
311 |        "    <tr>\n",
312 |        "      <th>3</th>\n",
313 |        "      <td>1</td>\n",
314 |        "      <td>180</td>\n",
315 |        "      <td>2012</td>\n",
316 |        "    </tr>\n",
317 |        "    <tr>\n",
318 |        "      <th>4</th>\n",
319 |        "      <td>5</td>\n",
320 |        "      <td>200</td>\n",
321 |        "      <td>2013</td>\n",
322 |        "    </tr>\n",
323 |        "    <tr>\n",
324 |        "      <th>7</th>\n",
325 |        "      <td>2</td>\n",
326 |        "      <td>400</td>\n",
327 |        "      <td>2013</td>\n",
328 |        "    </tr>\n",
329 |        "  </tbody>\n",
330 |        "</table>\n",
331 |        "</div>"
332 |       ],
333 |       "text/plain": [
334 |        "   Match  Run  Year\n",
335 |        "0      2  330  2012\n",
336 |        "3      1  180  2012\n",
337 |        "4      5  200  2013\n",
338 |        "7      2  400  2013"
339 |       ]
340 |      },
341 |      "execution_count": 16,
342 |      "metadata": {},
343 |      "output_type": "execute_result"
344 |     }
345 |    ],
346 |    "source": [
347 |     "g.get_group('India')"
348 |    ]
349 |   },
350 |   {
351 |    "cell_type": "code",
352 |    "execution_count": 17,
353 |    "metadata": {},
354 |    "outputs": [
355 |     {
356 |      "data": {
357 |       "text/html": [
358 |        "<div>\n",
359 |        "<style scoped>\n",
360 |        "    .dataframe tbody tr th:only-of-type {\n",
361 |        "        vertical-align: middle;\n",
362 |        "    }\n",
363 |        "\n",
364 |        "    .dataframe tbody tr th {\n",
365 |        "        vertical-align: top;\n",
366 |        "    }\n",
367 |        "\n",
368 |        "    .dataframe thead th {\n",
369 |        "        text-align: right;\n",
370 |        "    }\n",
371 |        "</style>\n",
372 |        "<table border=\"1\" class=\"dataframe\">\n",
373 |        "  <thead>\n",
374 |        "    <tr style=\"text-align: right;\">\n",
375 |        "      <th></th>\n",
376 |        "      <th>Match</th>\n",
377 |        "      <th>Run</th>\n",
378 |        "      <th>Year</th>\n",
379 |        "    </tr>\n",
380 |        "  </thead>\n",
381 |        "  <tbody>\n",
382 |        "    <tr>\n",
383 |        "      <th>2</th>\n",
384 |        "      <td>2</td>\n",
385 |        "      <td>300</td>\n",
386 |        "      <td>2012</td>\n",
387 |        "    </tr>\n",
388 |        "    <tr>\n",
389 |        "      <th>6</th>\n",
390 |        "      <td>3</td>\n",
391 |        "      <td>190</td>\n",
392 |        "      <td>2013</td>\n",
393 |        "    </tr>\n",
394 |        "    <tr>\n",
395 |        "      <th>9</th>\n",
396 |        "      <td>3</td>\n",
397 |        "      <td>290</td>\n",
398 |        "      <td>2014</td>\n",
399 |        "    </tr>\n",
400 |        "    <tr>\n",
401 |        "      <th>11</th>\n",
402 |        "      <td>1</td>\n",
403 |        "      <td>333</td>\n",
404 |        "      <td>2014</td>\n",
405 |        "    </tr>\n",
406 |        "  </tbody>\n",
407 |        "</table>\n",
408 |        "</div>"
409 |       ],
410 |       "text/plain": [
411 |        "    Match  Run  Year\n",
412 |        "2       2  300  2012\n",
413 |        "6       3  190  2013\n",
414 |        "9       3  290  2014\n",
415 |        "11      1  333  2014"
416 |       ]
417 |      },
418 |      "execution_count": 17,
419 |      "metadata": {},
420 |      "output_type": "execute_result"
421 |     }
422 |    ],
423 |    "source": [
424 |     "g.get_group('Australia')"
425 |    ]
426 |   },
427 |   {
428 |    "cell_type": "markdown",
429 |    "metadata": {},
430 |    "source": [
431 |     "     We can also perform the operations which we were applying in dataframe.This only difference here you get is \n",
432 |     "     your operation will be applied to all of group and return the result of all groups."
433 |    ]
434 |   },
435 |   {
436 |    "cell_type": "code",
437 |    "execution_count": 18,
438 |    "metadata": {},
439 |    "outputs": [
440 |     {
441 |      "data": {
442 |       "text/html": [
443 |        "<div>\n",
444 |        "<style scoped>\n",
445 |        "    .dataframe tbody tr th:only-of-type {\n",
446 |        "        vertical-align: middle;\n",
447 |        "    }\n",
448 |        "\n",
449 |        "    .dataframe tbody tr th {\n",
450 |        "        vertical-align: top;\n",
451 |        "    }\n",
452 |        "\n",
453 |        "    .dataframe thead th {\n",
454 |        "        text-align: right;\n",
455 |        "    }\n",
456 |        "</style>\n",
457 |        "<table border=\"1\" class=\"dataframe\">\n",
458 |        "  <thead>\n",
459 |        "    <tr style=\"text-align: right;\">\n",
460 |        "      <th></th>\n",
461 |        "      <th>Match</th>\n",
462 |        "      <th>Run</th>\n",
463 |        "      <th>Year</th>\n",
464 |        "    </tr>\n",
465 |        "    <tr>\n",
466 |        "      <th>team</th>\n",
467 |        "      <th></th>\n",
468 |        "      <th></th>\n",
469 |        "      <th></th>\n",
470 |        "    </tr>\n",
471 |        "  </thead>\n",
472 |        "  <tbody>\n",
473 |        "    <tr>\n",
474 |        "      <th>Australia</th>\n",
475 |        "      <td>1</td>\n",
476 |        "      <td>190</td>\n",
477 |        "      <td>2012</td>\n",
478 |        "    </tr>\n",
479 |        "    <tr>\n",
480 |        "      <th>India</th>\n",
481 |        "      <td>1</td>\n",
482 |        "      <td>180</td>\n",
483 |        "      <td>2012</td>\n",
484 |        "    </tr>\n",
485 |        "    <tr>\n",
486 |        "      <th>New zealand</th>\n",
487 |        "      <td>1</td>\n",
488 |        "      <td>230</td>\n",
489 |        "      <td>2012</td>\n",
490 |        "    </tr>\n",
491 |        "  </tbody>\n",
492 |        "</table>\n",
493 |        "</div>"
494 |       ],
495 |       "text/plain": [
496 |        "             Match  Run  Year\n",
497 |        "team                         \n",
498 |        "Australia        1  190  2012\n",
499 |        "India            1  180  2012\n",
500 |        "New zealand      1  230  2012"
501 |       ]
502 |      },
503 |      "execution_count": 18,
504 |      "metadata": {},
505 |      "output_type": "execute_result"
506 |     }
507 |    ],
508 |    "source": [
509 |     "g.min()"
510 |    ]
511 |   },
512 |   {
513 |    "cell_type": "code",
514 |    "execution_count": 20,
515 |    "metadata": {},
516 |    "outputs": [
517 |     {
518 |      "data": {
519 |       "text/html": [
520 |        "<div>\n",
521 |        "<style scoped>\n",
522 |        "    .dataframe tbody tr th:only-of-type {\n",
523 |        "        vertical-align: middle;\n",
524 |        "    }\n",
525 |        "\n",
526 |        "    .dataframe tbody tr th {\n",
527 |        "        vertical-align: top;\n",
528 |        "    }\n",
529 |        "\n",
530 |        "    .dataframe thead th {\n",
531 |        "        text-align: right;\n",
532 |        "    }\n",
533 |        "</style>\n",
534 |        "<table border=\"1\" class=\"dataframe\">\n",
535 |        "  <thead>\n",
536 |        "    <tr style=\"text-align: right;\">\n",
537 |        "      <th></th>\n",
538 |        "      <th>Match</th>\n",
539 |        "      <th>Run</th>\n",
540 |        "      <th>Year</th>\n",
541 |        "    </tr>\n",
542 |        "    <tr>\n",
543 |        "      <th>team</th>\n",
544 |        "      <th></th>\n",
545 |        "      <th></th>\n",
546 |        "      <th></th>\n",
547 |        "    </tr>\n",
548 |        "  </thead>\n",
549 |        "  <tbody>\n",
550 |        "    <tr>\n",
551 |        "      <th>Australia</th>\n",
552 |        "      <td>2.25</td>\n",
553 |        "      <td>278.25</td>\n",
554 |        "      <td>2013.25</td>\n",
555 |        "    </tr>\n",
556 |        "    <tr>\n",
557 |        "      <th>India</th>\n",
558 |        "      <td>2.50</td>\n",
559 |        "      <td>277.50</td>\n",
560 |        "      <td>2012.50</td>\n",
561 |        "    </tr>\n",
562 |        "    <tr>\n",
563 |        "      <th>New zealand</th>\n",
564 |        "      <td>4.00</td>\n",
565 |        "      <td>302.50</td>\n",
566 |        "      <td>2013.25</td>\n",
567 |        "    </tr>\n",
568 |        "  </tbody>\n",
569 |        "</table>\n",
570 |        "</div>"
571 |       ],
572 |       "text/plain": [
573 |        "             Match     Run     Year\n",
574 |        "team                               \n",
575 |        "Australia     2.25  278.25  2013.25\n",
576 |        "India         2.50  277.50  2012.50\n",
577 |        "New zealand   4.00  302.50  2013.25"
578 |       ]
579 |      },
580 |      "execution_count": 20,
581 |      "metadata": {},
582 |      "output_type": "execute_result"
583 |     }
584 |    ],
585 |    "source": [
586 |     "g.mean()"
587 |    ]
588 |   },
589 |   {
590 |    "cell_type": "code",
591 |    "execution_count": 30,
592 |    "metadata": {},
593 |    "outputs": [
594 |     {
595 |      "data": {
596 |       "text/html": [
597 |        "<div>\n",
598 |        "<style scoped>\n",
599 |        "    .dataframe tbody tr th:only-of-type {\n",
600 |        "        vertical-align: middle;\n",
601 |        "    }\n",
602 |        "\n",
603 |        "    .dataframe tbody tr th {\n",
604 |        "        vertical-align: top;\n",
605 |        "    }\n",
606 |        "\n",
607 |        "    .dataframe thead th {\n",
608 |        "        text-align: right;\n",
609 |        "    }\n",
610 |        "</style>\n",
611 |        "<table border=\"1\" class=\"dataframe\">\n",
612 |        "  <thead>\n",
613 |        "    <tr style=\"text-align: right;\">\n",
614 |        "      <th></th>\n",
615 |        "      <th>amin</th>\n",
616 |        "      <th>amax</th>\n",
617 |        "      <th>mean</th>\n",
618 |        "      <th>sum</th>\n",
619 |        "      <th>std</th>\n",
620 |        "    </tr>\n",
621 |        "    <tr>\n",
622 |        "      <th>Year</th>\n",
623 |        "      <th></th>\n",
624 |        "      <th></th>\n",
625 |        "      <th></th>\n",
626 |        "      <th></th>\n",
627 |        "      <th></th>\n",
628 |        "    </tr>\n",
629 |        "  </thead>\n",
630 |        "  <tbody>\n",
631 |        "    <tr>\n",
632 |        "      <th>2012</th>\n",
633 |        "      <td>180</td>\n",
634 |        "      <td>330</td>\n",
635 |        "      <td>260.00</td>\n",
636 |        "      <td>1040</td>\n",
637 |        "      <td>67.823300</td>\n",
638 |        "    </tr>\n",
639 |        "    <tr>\n",
640 |        "      <th>2013</th>\n",
641 |        "      <td>190</td>\n",
642 |        "      <td>400</td>\n",
643 |        "      <td>260.00</td>\n",
644 |        "      <td>1040</td>\n",
645 |        "      <td>96.953597</td>\n",
646 |        "    </tr>\n",
647 |        "    <tr>\n",
648 |        "      <th>2014</th>\n",
649 |        "      <td>290</td>\n",
650 |        "      <td>390</td>\n",
651 |        "      <td>338.25</td>\n",
652 |        "      <td>1353</td>\n",
653 |        "      <td>40.974586</td>\n",
654 |        "    </tr>\n",
655 |        "  </tbody>\n",
656 |        "</table>\n",
657 |        "</div>"
658 |       ],
659 |       "text/plain": [
660 |        "      amin  amax    mean   sum        std\n",
661 |        "Year                                     \n",
662 |        "2012   180   330  260.00  1040  67.823300\n",
663 |        "2013   190   400  260.00  1040  96.953597\n",
664 |        "2014   290   390  338.25  1353  40.974586"
665 |       ]
666 |      },
667 |      "execution_count": 30,
668 |      "metadata": {},
669 |      "output_type": "execute_result"
670 |     }
671 |    ],
672 |    "source": [
673 |     "g_yr=df.groupby('Year')\n",
674 |     "g_yr['Run'].agg([np.min,np.max,np.mean,np.sum,np.std])"
675 |    ]
676 |   },
677 |   {
678 |    "cell_type": "code",
679 |    "execution_count": null,
680 |    "metadata": {},
681 |    "outputs": [],
682 |    "source": []
683 |   },
684 |   {
685 |    "cell_type": "code",
686 |    "execution_count": null,
687 |    "metadata": {},
688 |    "outputs": [],
689 |    "source": []
690 |   },
691 |   {
692 |    "cell_type": "code",
693 |    "execution_count": null,
694 |    "metadata": {},
695 |    "outputs": [],
696 |    "source": []
697 |   },
698 |   {
699 |    "cell_type": "code",
700 |    "execution_count": null,
701 |    "metadata": {},
702 |    "outputs": [],
703 |    "source": []
704 |   }
705 |  ],
706 |  "metadata": {
707 |   "kernelspec": {
708 |    "display_name": "Python 3",
709 |    "language": "python",
710 |    "name": "python3"
711 |   },
712 |   "language_info": {
713 |    "codemirror_mode": {
714 |     "name": "ipython",
715 |     "version": 3
716 |    },
717 |    "file_extension": ".py",
718 |    "mimetype": "text/x-python",
719 |    "name": "python",
720 |    "nbconvert_exporter": "python",
721 |    "pygments_lexer": "ipython3",
722 |    "version": "3.6.4"
723 |   }
724 |  },
725 |  "nbformat": 4,
726 |  "nbformat_minor": 2
727 | }
728 | 


--------------------------------------------------------------------------------
/pandas_part7.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "# Concat DataFrame \n",
   8 |     "------------\n",
   9 |     "       Concat is very useful method of pandas.You can concatanate two dataframes in two way -\n",
  10 |     "          1)Append by row\n",
  11 |     "          2)Append by col\n",
  12 |     "  \n",
  13 |     "### 1) Append by row :\n",
  14 |     "       Concat is the method of pandas in which you can join two and more dataframes provided its indices are same.\n",
  15 |     "       Let's take an example -\n",
  16 |     "       You have two dataframes of weathers of two cities Kolkata and Chennai,\n",
  17 |     "       \n",
  18 |     "   <code> df=pd.concat([dataframe_1,dataframe_2,...,dataframe_n]) </code>\n",
  19 |     "       for n number of dataframes,"
  20 |    ]
  21 |   },
  22 |   {
  23 |    "cell_type": "code",
  24 |    "execution_count": 2,
  25 |    "metadata": {},
  26 |    "outputs": [],
  27 |    "source": [
  28 |     "import pandas as pd"
  29 |    ]
  30 |   },
  31 |   {
  32 |    "cell_type": "code",
  33 |    "execution_count": 3,
  34 |    "metadata": {},
  35 |    "outputs": [
  36 |     {
  37 |      "data": {
  38 |       "text/html": [
  39 |        "<div>\n",
  40 |        "<style scoped>\n",
  41 |        "    .dataframe tbody tr th:only-of-type {\n",
  42 |        "        vertical-align: middle;\n",
  43 |        "    }\n",
  44 |        "\n",
  45 |        "    .dataframe tbody tr th {\n",
  46 |        "        vertical-align: top;\n",
  47 |        "    }\n",
  48 |        "\n",
  49 |        "    .dataframe thead th {\n",
  50 |        "        text-align: right;\n",
  51 |        "    }\n",
  52 |        "</style>\n",
  53 |        "<table border=\"1\" class=\"dataframe\">\n",
  54 |        "  <thead>\n",
  55 |        "    <tr style=\"text-align: right;\">\n",
  56 |        "      <th></th>\n",
  57 |        "      <th>date</th>\n",
  58 |        "      <th>event</th>\n",
  59 |        "      <th>temp</th>\n",
  60 |        "      <th>wind-speed</th>\n",
  61 |        "    </tr>\n",
  62 |        "  </thead>\n",
  63 |        "  <tbody>\n",
  64 |        "    <tr>\n",
  65 |        "      <th>0</th>\n",
  66 |        "      <td>01-02-12</td>\n",
  67 |        "      <td>hot</td>\n",
  68 |        "      <td>20</td>\n",
  69 |        "      <td>33</td>\n",
  70 |        "    </tr>\n",
  71 |        "    <tr>\n",
  72 |        "      <th>1</th>\n",
  73 |        "      <td>03-02-12</td>\n",
  74 |        "      <td>sunny</td>\n",
  75 |        "      <td>21</td>\n",
  76 |        "      <td>23</td>\n",
  77 |        "    </tr>\n",
  78 |        "    <tr>\n",
  79 |        "      <th>2</th>\n",
  80 |        "      <td>04-02-12</td>\n",
  81 |        "      <td>rainy</td>\n",
  82 |        "      <td>15</td>\n",
  83 |        "      <td>45</td>\n",
  84 |        "    </tr>\n",
  85 |        "    <tr>\n",
  86 |        "      <th>3</th>\n",
  87 |        "      <td>05-02-12</td>\n",
  88 |        "      <td>cold</td>\n",
  89 |        "      <td>18</td>\n",
  90 |        "      <td>24</td>\n",
  91 |        "    </tr>\n",
  92 |        "  </tbody>\n",
  93 |        "</table>\n",
  94 |        "</div>"
  95 |       ],
  96 |       "text/plain": [
  97 |        "       date  event  temp  wind-speed\n",
  98 |        "0  01-02-12    hot    20          33\n",
  99 |        "1  03-02-12  sunny    21          23\n",
 100 |        "2  04-02-12  rainy    15          45\n",
 101 |        "3  05-02-12   cold    18          24"
 102 |       ]
 103 |      },
 104 |      "execution_count": 3,
 105 |      "metadata": {},
 106 |      "output_type": "execute_result"
 107 |     }
 108 |    ],
 109 |    "source": [
 110 |     "chennai={\n",
 111 |     "      \"date\":['01-02-12','03-02-12','04-02-12','05-02-12'],\n",
 112 |     "      \"event\":['hot','sunny','rainy','cold'],\n",
 113 |     "      \"wind-speed\":[33,23,45,24],\n",
 114 |     "      \"temp\":[20,21,15,18]\n",
 115 |     "}\n",
 116 |     "chen=pd.DataFrame(chennai)\n",
 117 |     "chen"
 118 |    ]
 119 |   },
 120 |   {
 121 |    "cell_type": "code",
 122 |    "execution_count": 5,
 123 |    "metadata": {},
 124 |    "outputs": [
 125 |     {
 126 |      "data": {
 127 |       "text/html": [
 128 |        "<div>\n",
 129 |        "<style scoped>\n",
 130 |        "    .dataframe tbody tr th:only-of-type {\n",
 131 |        "        vertical-align: middle;\n",
 132 |        "    }\n",
 133 |        "\n",
 134 |        "    .dataframe tbody tr th {\n",
 135 |        "        vertical-align: top;\n",
 136 |        "    }\n",
 137 |        "\n",
 138 |        "    .dataframe thead th {\n",
 139 |        "        text-align: right;\n",
 140 |        "    }\n",
 141 |        "</style>\n",
 142 |        "<table border=\"1\" class=\"dataframe\">\n",
 143 |        "  <thead>\n",
 144 |        "    <tr style=\"text-align: right;\">\n",
 145 |        "      <th></th>\n",
 146 |        "      <th>date</th>\n",
 147 |        "      <th>event</th>\n",
 148 |        "      <th>temp</th>\n",
 149 |        "      <th>wind-speed</th>\n",
 150 |        "    </tr>\n",
 151 |        "  </thead>\n",
 152 |        "  <tbody>\n",
 153 |        "    <tr>\n",
 154 |        "      <th>0</th>\n",
 155 |        "      <td>01-02-12</td>\n",
 156 |        "      <td>sunny</td>\n",
 157 |        "      <td>14</td>\n",
 158 |        "      <td>12</td>\n",
 159 |        "    </tr>\n",
 160 |        "    <tr>\n",
 161 |        "      <th>1</th>\n",
 162 |        "      <td>03-02-12</td>\n",
 163 |        "      <td>cold</td>\n",
 164 |        "      <td>16</td>\n",
 165 |        "      <td>10</td>\n",
 166 |        "    </tr>\n",
 167 |        "    <tr>\n",
 168 |        "      <th>2</th>\n",
 169 |        "      <td>04-02-12</td>\n",
 170 |        "      <td>cold</td>\n",
 171 |        "      <td>15</td>\n",
 172 |        "      <td>9</td>\n",
 173 |        "    </tr>\n",
 174 |        "    <tr>\n",
 175 |        "      <th>3</th>\n",
 176 |        "      <td>05-02-12</td>\n",
 177 |        "      <td>rainy</td>\n",
 178 |        "      <td>10</td>\n",
 179 |        "      <td>14</td>\n",
 180 |        "    </tr>\n",
 181 |        "  </tbody>\n",
 182 |        "</table>\n",
 183 |        "</div>"
 184 |       ],
 185 |       "text/plain": [
 186 |        "       date  event  temp  wind-speed\n",
 187 |        "0  01-02-12  sunny    14          12\n",
 188 |        "1  03-02-12   cold    16          10\n",
 189 |        "2  04-02-12   cold    15           9\n",
 190 |        "3  05-02-12  rainy    10          14"
 191 |       ]
 192 |      },
 193 |      "execution_count": 5,
 194 |      "metadata": {},
 195 |      "output_type": "execute_result"
 196 |     }
 197 |    ],
 198 |    "source": [
 199 |     "kolkata={\n",
 200 |     "      \"date\":['01-02-12','03-02-12','04-02-12','05-02-12'],\n",
 201 |     "      \"event\":['sunny','cold','cold','rainy'],\n",
 202 |     "      \"wind-speed\":[12,10,9,14],\n",
 203 |     "      \"temp\":[14,16,15,10]\n",
 204 |     "}\n",
 205 |     "kol=pd.DataFrame(kolkata)\n",
 206 |     "kol"
 207 |    ]
 208 |   },
 209 |   {
 210 |    "cell_type": "code",
 211 |    "execution_count": 6,
 212 |    "metadata": {},
 213 |    "outputs": [
 214 |     {
 215 |      "data": {
 216 |       "text/html": [
 217 |        "<div>\n",
 218 |        "<style scoped>\n",
 219 |        "    .dataframe tbody tr th:only-of-type {\n",
 220 |        "        vertical-align: middle;\n",
 221 |        "    }\n",
 222 |        "\n",
 223 |        "    .dataframe tbody tr th {\n",
 224 |        "        vertical-align: top;\n",
 225 |        "    }\n",
 226 |        "\n",
 227 |        "    .dataframe thead th {\n",
 228 |        "        text-align: right;\n",
 229 |        "    }\n",
 230 |        "</style>\n",
 231 |        "<table border=\"1\" class=\"dataframe\">\n",
 232 |        "  <thead>\n",
 233 |        "    <tr style=\"text-align: right;\">\n",
 234 |        "      <th></th>\n",
 235 |        "      <th>date</th>\n",
 236 |        "      <th>event</th>\n",
 237 |        "      <th>temp</th>\n",
 238 |        "      <th>wind-speed</th>\n",
 239 |        "    </tr>\n",
 240 |        "  </thead>\n",
 241 |        "  <tbody>\n",
 242 |        "    <tr>\n",
 243 |        "      <th>0</th>\n",
 244 |        "      <td>01-02-12</td>\n",
 245 |        "      <td>sunny</td>\n",
 246 |        "      <td>14</td>\n",
 247 |        "      <td>12</td>\n",
 248 |        "    </tr>\n",
 249 |        "    <tr>\n",
 250 |        "      <th>1</th>\n",
 251 |        "      <td>03-02-12</td>\n",
 252 |        "      <td>cold</td>\n",
 253 |        "      <td>16</td>\n",
 254 |        "      <td>10</td>\n",
 255 |        "    </tr>\n",
 256 |        "    <tr>\n",
 257 |        "      <th>2</th>\n",
 258 |        "      <td>04-02-12</td>\n",
 259 |        "      <td>cold</td>\n",
 260 |        "      <td>15</td>\n",
 261 |        "      <td>9</td>\n",
 262 |        "    </tr>\n",
 263 |        "    <tr>\n",
 264 |        "      <th>3</th>\n",
 265 |        "      <td>05-02-12</td>\n",
 266 |        "      <td>rainy</td>\n",
 267 |        "      <td>10</td>\n",
 268 |        "      <td>14</td>\n",
 269 |        "    </tr>\n",
 270 |        "    <tr>\n",
 271 |        "      <th>0</th>\n",
 272 |        "      <td>01-02-12</td>\n",
 273 |        "      <td>hot</td>\n",
 274 |        "      <td>20</td>\n",
 275 |        "      <td>33</td>\n",
 276 |        "    </tr>\n",
 277 |        "    <tr>\n",
 278 |        "      <th>1</th>\n",
 279 |        "      <td>03-02-12</td>\n",
 280 |        "      <td>sunny</td>\n",
 281 |        "      <td>21</td>\n",
 282 |        "      <td>23</td>\n",
 283 |        "    </tr>\n",
 284 |        "    <tr>\n",
 285 |        "      <th>2</th>\n",
 286 |        "      <td>04-02-12</td>\n",
 287 |        "      <td>rainy</td>\n",
 288 |        "      <td>15</td>\n",
 289 |        "      <td>45</td>\n",
 290 |        "    </tr>\n",
 291 |        "    <tr>\n",
 292 |        "      <th>3</th>\n",
 293 |        "      <td>05-02-12</td>\n",
 294 |        "      <td>cold</td>\n",
 295 |        "      <td>18</td>\n",
 296 |        "      <td>24</td>\n",
 297 |        "    </tr>\n",
 298 |        "  </tbody>\n",
 299 |        "</table>\n",
 300 |        "</div>"
 301 |       ],
 302 |       "text/plain": [
 303 |        "       date  event  temp  wind-speed\n",
 304 |        "0  01-02-12  sunny    14          12\n",
 305 |        "1  03-02-12   cold    16          10\n",
 306 |        "2  04-02-12   cold    15           9\n",
 307 |        "3  05-02-12  rainy    10          14\n",
 308 |        "0  01-02-12    hot    20          33\n",
 309 |        "1  03-02-12  sunny    21          23\n",
 310 |        "2  04-02-12  rainy    15          45\n",
 311 |        "3  05-02-12   cold    18          24"
 312 |       ]
 313 |      },
 314 |      "execution_count": 6,
 315 |      "metadata": {},
 316 |      "output_type": "execute_result"
 317 |     }
 318 |    ],
 319 |    "source": [
 320 |     "df=pd.concat([kol,chen])\n",
 321 |     "df"
 322 |    ]
 323 |   },
 324 |   {
 325 |    "cell_type": "markdown",
 326 |    "metadata": {},
 327 |    "source": [
 328 |     "  **You can observe the index first 0 to 3 then again 0 to 3.To ignore this you have to pass an extra argument.**"
 329 |    ]
 330 |   },
 331 |   {
 332 |    "cell_type": "code",
 333 |    "execution_count": 8,
 334 |    "metadata": {},
 335 |    "outputs": [
 336 |     {
 337 |      "data": {
 338 |       "text/html": [
 339 |        "<div>\n",
 340 |        "<style scoped>\n",
 341 |        "    .dataframe tbody tr th:only-of-type {\n",
 342 |        "        vertical-align: middle;\n",
 343 |        "    }\n",
 344 |        "\n",
 345 |        "    .dataframe tbody tr th {\n",
 346 |        "        vertical-align: top;\n",
 347 |        "    }\n",
 348 |        "\n",
 349 |        "    .dataframe thead th {\n",
 350 |        "        text-align: right;\n",
 351 |        "    }\n",
 352 |        "</style>\n",
 353 |        "<table border=\"1\" class=\"dataframe\">\n",
 354 |        "  <thead>\n",
 355 |        "    <tr style=\"text-align: right;\">\n",
 356 |        "      <th></th>\n",
 357 |        "      <th>date</th>\n",
 358 |        "      <th>event</th>\n",
 359 |        "      <th>temp</th>\n",
 360 |        "      <th>wind-speed</th>\n",
 361 |        "    </tr>\n",
 362 |        "  </thead>\n",
 363 |        "  <tbody>\n",
 364 |        "    <tr>\n",
 365 |        "      <th>0</th>\n",
 366 |        "      <td>01-02-12</td>\n",
 367 |        "      <td>sunny</td>\n",
 368 |        "      <td>14</td>\n",
 369 |        "      <td>12</td>\n",
 370 |        "    </tr>\n",
 371 |        "    <tr>\n",
 372 |        "      <th>1</th>\n",
 373 |        "      <td>03-02-12</td>\n",
 374 |        "      <td>cold</td>\n",
 375 |        "      <td>16</td>\n",
 376 |        "      <td>10</td>\n",
 377 |        "    </tr>\n",
 378 |        "    <tr>\n",
 379 |        "      <th>2</th>\n",
 380 |        "      <td>04-02-12</td>\n",
 381 |        "      <td>cold</td>\n",
 382 |        "      <td>15</td>\n",
 383 |        "      <td>9</td>\n",
 384 |        "    </tr>\n",
 385 |        "    <tr>\n",
 386 |        "      <th>3</th>\n",
 387 |        "      <td>05-02-12</td>\n",
 388 |        "      <td>rainy</td>\n",
 389 |        "      <td>10</td>\n",
 390 |        "      <td>14</td>\n",
 391 |        "    </tr>\n",
 392 |        "    <tr>\n",
 393 |        "      <th>4</th>\n",
 394 |        "      <td>01-02-12</td>\n",
 395 |        "      <td>hot</td>\n",
 396 |        "      <td>20</td>\n",
 397 |        "      <td>33</td>\n",
 398 |        "    </tr>\n",
 399 |        "    <tr>\n",
 400 |        "      <th>5</th>\n",
 401 |        "      <td>03-02-12</td>\n",
 402 |        "      <td>sunny</td>\n",
 403 |        "      <td>21</td>\n",
 404 |        "      <td>23</td>\n",
 405 |        "    </tr>\n",
 406 |        "    <tr>\n",
 407 |        "      <th>6</th>\n",
 408 |        "      <td>04-02-12</td>\n",
 409 |        "      <td>rainy</td>\n",
 410 |        "      <td>15</td>\n",
 411 |        "      <td>45</td>\n",
 412 |        "    </tr>\n",
 413 |        "    <tr>\n",
 414 |        "      <th>7</th>\n",
 415 |        "      <td>05-02-12</td>\n",
 416 |        "      <td>cold</td>\n",
 417 |        "      <td>18</td>\n",
 418 |        "      <td>24</td>\n",
 419 |        "    </tr>\n",
 420 |        "  </tbody>\n",
 421 |        "</table>\n",
 422 |        "</div>"
 423 |       ],
 424 |       "text/plain": [
 425 |        "       date  event  temp  wind-speed\n",
 426 |        "0  01-02-12  sunny    14          12\n",
 427 |        "1  03-02-12   cold    16          10\n",
 428 |        "2  04-02-12   cold    15           9\n",
 429 |        "3  05-02-12  rainy    10          14\n",
 430 |        "4  01-02-12    hot    20          33\n",
 431 |        "5  03-02-12  sunny    21          23\n",
 432 |        "6  04-02-12  rainy    15          45\n",
 433 |        "7  05-02-12   cold    18          24"
 434 |       ]
 435 |      },
 436 |      "execution_count": 8,
 437 |      "metadata": {},
 438 |      "output_type": "execute_result"
 439 |     }
 440 |    ],
 441 |    "source": [
 442 |     "df=pd.concat([kol,chen],ignore_index=True)\n",
 443 |     "df\n",
 444 |     "# Now see index column"
 445 |    ]
 446 |   },
 447 |   {
 448 |    "cell_type": "markdown",
 449 |    "metadata": {},
 450 |    "source": [
 451 |     "##### If you want to join dataframes with dataframe name as an index name"
 452 |    ]
 453 |   },
 454 |   {
 455 |    "cell_type": "code",
 456 |    "execution_count": 9,
 457 |    "metadata": {},
 458 |    "outputs": [
 459 |     {
 460 |      "data": {
 461 |       "text/html": [
 462 |        "<div>\n",
 463 |        "<style scoped>\n",
 464 |        "    .dataframe tbody tr th:only-of-type {\n",
 465 |        "        vertical-align: middle;\n",
 466 |        "    }\n",
 467 |        "\n",
 468 |        "    .dataframe tbody tr th {\n",
 469 |        "        vertical-align: top;\n",
 470 |        "    }\n",
 471 |        "\n",
 472 |        "    .dataframe thead th {\n",
 473 |        "        text-align: right;\n",
 474 |        "    }\n",
 475 |        "</style>\n",
 476 |        "<table border=\"1\" class=\"dataframe\">\n",
 477 |        "  <thead>\n",
 478 |        "    <tr style=\"text-align: right;\">\n",
 479 |        "      <th></th>\n",
 480 |        "      <th></th>\n",
 481 |        "      <th>date</th>\n",
 482 |        "      <th>event</th>\n",
 483 |        "      <th>temp</th>\n",
 484 |        "      <th>wind-speed</th>\n",
 485 |        "    </tr>\n",
 486 |        "  </thead>\n",
 487 |        "  <tbody>\n",
 488 |        "    <tr>\n",
 489 |        "      <th rowspan=\"4\" valign=\"top\">Kolkata</th>\n",
 490 |        "      <th>0</th>\n",
 491 |        "      <td>01-02-12</td>\n",
 492 |        "      <td>sunny</td>\n",
 493 |        "      <td>14</td>\n",
 494 |        "      <td>12</td>\n",
 495 |        "    </tr>\n",
 496 |        "    <tr>\n",
 497 |        "      <th>1</th>\n",
 498 |        "      <td>03-02-12</td>\n",
 499 |        "      <td>cold</td>\n",
 500 |        "      <td>16</td>\n",
 501 |        "      <td>10</td>\n",
 502 |        "    </tr>\n",
 503 |        "    <tr>\n",
 504 |        "      <th>2</th>\n",
 505 |        "      <td>04-02-12</td>\n",
 506 |        "      <td>cold</td>\n",
 507 |        "      <td>15</td>\n",
 508 |        "      <td>9</td>\n",
 509 |        "    </tr>\n",
 510 |        "    <tr>\n",
 511 |        "      <th>3</th>\n",
 512 |        "      <td>05-02-12</td>\n",
 513 |        "      <td>rainy</td>\n",
 514 |        "      <td>10</td>\n",
 515 |        "      <td>14</td>\n",
 516 |        "    </tr>\n",
 517 |        "    <tr>\n",
 518 |        "      <th rowspan=\"4\" valign=\"top\">Chennai</th>\n",
 519 |        "      <th>0</th>\n",
 520 |        "      <td>01-02-12</td>\n",
 521 |        "      <td>hot</td>\n",
 522 |        "      <td>20</td>\n",
 523 |        "      <td>33</td>\n",
 524 |        "    </tr>\n",
 525 |        "    <tr>\n",
 526 |        "      <th>1</th>\n",
 527 |        "      <td>03-02-12</td>\n",
 528 |        "      <td>sunny</td>\n",
 529 |        "      <td>21</td>\n",
 530 |        "      <td>23</td>\n",
 531 |        "    </tr>\n",
 532 |        "    <tr>\n",
 533 |        "      <th>2</th>\n",
 534 |        "      <td>04-02-12</td>\n",
 535 |        "      <td>rainy</td>\n",
 536 |        "      <td>15</td>\n",
 537 |        "      <td>45</td>\n",
 538 |        "    </tr>\n",
 539 |        "    <tr>\n",
 540 |        "      <th>3</th>\n",
 541 |        "      <td>05-02-12</td>\n",
 542 |        "      <td>cold</td>\n",
 543 |        "      <td>18</td>\n",
 544 |        "      <td>24</td>\n",
 545 |        "    </tr>\n",
 546 |        "  </tbody>\n",
 547 |        "</table>\n",
 548 |        "</div>"
 549 |       ],
 550 |       "text/plain": [
 551 |        "               date  event  temp  wind-speed\n",
 552 |        "Kolkata 0  01-02-12  sunny    14          12\n",
 553 |        "        1  03-02-12   cold    16          10\n",
 554 |        "        2  04-02-12   cold    15           9\n",
 555 |        "        3  05-02-12  rainy    10          14\n",
 556 |        "Chennai 0  01-02-12    hot    20          33\n",
 557 |        "        1  03-02-12  sunny    21          23\n",
 558 |        "        2  04-02-12  rainy    15          45\n",
 559 |        "        3  05-02-12   cold    18          24"
 560 |       ]
 561 |      },
 562 |      "execution_count": 9,
 563 |      "metadata": {},
 564 |      "output_type": "execute_result"
 565 |     }
 566 |    ],
 567 |    "source": [
 568 |     "df=pd.concat([kol,chen],keys=['Kolkata','Chennai'])\n",
 569 |     "df"
 570 |    ]
 571 |   },
 572 |   {
 573 |    "cell_type": "code",
 574 |    "execution_count": 11,
 575 |    "metadata": {},
 576 |    "outputs": [
 577 |     {
 578 |      "data": {
 579 |       "text/html": [
 580 |        "<div>\n",
 581 |        "<style scoped>\n",
 582 |        "    .dataframe tbody tr th:only-of-type {\n",
 583 |        "        vertical-align: middle;\n",
 584 |        "    }\n",
 585 |        "\n",
 586 |        "    .dataframe tbody tr th {\n",
 587 |        "        vertical-align: top;\n",
 588 |        "    }\n",
 589 |        "\n",
 590 |        "    .dataframe thead th {\n",
 591 |        "        text-align: right;\n",
 592 |        "    }\n",
 593 |        "</style>\n",
 594 |        "<table border=\"1\" class=\"dataframe\">\n",
 595 |        "  <thead>\n",
 596 |        "    <tr style=\"text-align: right;\">\n",
 597 |        "      <th></th>\n",
 598 |        "      <th>date</th>\n",
 599 |        "      <th>event</th>\n",
 600 |        "      <th>temp</th>\n",
 601 |        "      <th>wind-speed</th>\n",
 602 |        "    </tr>\n",
 603 |        "  </thead>\n",
 604 |        "  <tbody>\n",
 605 |        "    <tr>\n",
 606 |        "      <th>0</th>\n",
 607 |        "      <td>01-02-12</td>\n",
 608 |        "      <td>sunny</td>\n",
 609 |        "      <td>14</td>\n",
 610 |        "      <td>12</td>\n",
 611 |        "    </tr>\n",
 612 |        "    <tr>\n",
 613 |        "      <th>1</th>\n",
 614 |        "      <td>03-02-12</td>\n",
 615 |        "      <td>cold</td>\n",
 616 |        "      <td>16</td>\n",
 617 |        "      <td>10</td>\n",
 618 |        "    </tr>\n",
 619 |        "    <tr>\n",
 620 |        "      <th>2</th>\n",
 621 |        "      <td>04-02-12</td>\n",
 622 |        "      <td>cold</td>\n",
 623 |        "      <td>15</td>\n",
 624 |        "      <td>9</td>\n",
 625 |        "    </tr>\n",
 626 |        "    <tr>\n",
 627 |        "      <th>3</th>\n",
 628 |        "      <td>05-02-12</td>\n",
 629 |        "      <td>rainy</td>\n",
 630 |        "      <td>10</td>\n",
 631 |        "      <td>14</td>\n",
 632 |        "    </tr>\n",
 633 |        "  </tbody>\n",
 634 |        "</table>\n",
 635 |        "</div>"
 636 |       ],
 637 |       "text/plain": [
 638 |        "       date  event  temp  wind-speed\n",
 639 |        "0  01-02-12  sunny    14          12\n",
 640 |        "1  03-02-12   cold    16          10\n",
 641 |        "2  04-02-12   cold    15           9\n",
 642 |        "3  05-02-12  rainy    10          14"
 643 |       ]
 644 |      },
 645 |      "execution_count": 11,
 646 |      "metadata": {},
 647 |      "output_type": "execute_result"
 648 |     }
 649 |    ],
 650 |    "source": [
 651 |     "df.loc['Kolkata']"
 652 |    ]
 653 |   },
 654 |   {
 655 |    "cell_type": "code",
 656 |    "execution_count": 12,
 657 |    "metadata": {},
 658 |    "outputs": [
 659 |     {
 660 |      "data": {
 661 |       "text/html": [
 662 |        "<div>\n",
 663 |        "<style scoped>\n",
 664 |        "    .dataframe tbody tr th:only-of-type {\n",
 665 |        "        vertical-align: middle;\n",
 666 |        "    }\n",
 667 |        "\n",
 668 |        "    .dataframe tbody tr th {\n",
 669 |        "        vertical-align: top;\n",
 670 |        "    }\n",
 671 |        "\n",
 672 |        "    .dataframe thead th {\n",
 673 |        "        text-align: right;\n",
 674 |        "    }\n",
 675 |        "</style>\n",
 676 |        "<table border=\"1\" class=\"dataframe\">\n",
 677 |        "  <thead>\n",
 678 |        "    <tr style=\"text-align: right;\">\n",
 679 |        "      <th></th>\n",
 680 |        "      <th>date</th>\n",
 681 |        "      <th>event</th>\n",
 682 |        "      <th>temp</th>\n",
 683 |        "      <th>wind-speed</th>\n",
 684 |        "    </tr>\n",
 685 |        "  </thead>\n",
 686 |        "  <tbody>\n",
 687 |        "    <tr>\n",
 688 |        "      <th>0</th>\n",
 689 |        "      <td>01-02-12</td>\n",
 690 |        "      <td>hot</td>\n",
 691 |        "      <td>20</td>\n",
 692 |        "      <td>33</td>\n",
 693 |        "    </tr>\n",
 694 |        "    <tr>\n",
 695 |        "      <th>1</th>\n",
 696 |        "      <td>03-02-12</td>\n",
 697 |        "      <td>sunny</td>\n",
 698 |        "      <td>21</td>\n",
 699 |        "      <td>23</td>\n",
 700 |        "    </tr>\n",
 701 |        "    <tr>\n",
 702 |        "      <th>2</th>\n",
 703 |        "      <td>04-02-12</td>\n",
 704 |        "      <td>rainy</td>\n",
 705 |        "      <td>15</td>\n",
 706 |        "      <td>45</td>\n",
 707 |        "    </tr>\n",
 708 |        "    <tr>\n",
 709 |        "      <th>3</th>\n",
 710 |        "      <td>05-02-12</td>\n",
 711 |        "      <td>cold</td>\n",
 712 |        "      <td>18</td>\n",
 713 |        "      <td>24</td>\n",
 714 |        "    </tr>\n",
 715 |        "  </tbody>\n",
 716 |        "</table>\n",
 717 |        "</div>"
 718 |       ],
 719 |       "text/plain": [
 720 |        "       date  event  temp  wind-speed\n",
 721 |        "0  01-02-12    hot    20          33\n",
 722 |        "1  03-02-12  sunny    21          23\n",
 723 |        "2  04-02-12  rainy    15          45\n",
 724 |        "3  05-02-12   cold    18          24"
 725 |       ]
 726 |      },
 727 |      "execution_count": 12,
 728 |      "metadata": {},
 729 |      "output_type": "execute_result"
 730 |     }
 731 |    ],
 732 |    "source": [
 733 |     "df.loc['Chennai']"
 734 |    ]
 735 |   },
 736 |   {
 737 |    "cell_type": "markdown",
 738 |    "metadata": {},
 739 |    "source": [
 740 |     "### 2) Append by column :\n",
 741 |     "    if you have two dataframes and you want to append column wise. For example - if you have two dataframes of weather\n",
 742 |     "    first dataframe having columns date,event & temp and second dataframe having columns date,event & wind-speed when you\n",
 743 |     "    join both you will get one dataframe having columns date, event, temp, date, event and wind-speed.\n",
 744 |     "    To join two dataframe column wise\n",
 745 |     "    you have to pass axis=1 in concat() method.\n",
 746 |     "    \n",
 747 |     "   <code> df=pd.concat([dataframe_1,dataframe_2,...,dataframe_n],axis=1) </code>"
 748 |    ]
 749 |   },
 750 |   {
 751 |    "cell_type": "code",
 752 |    "execution_count": 3,
 753 |    "metadata": {},
 754 |    "outputs": [
 755 |     {
 756 |      "data": {
 757 |       "text/html": [
 758 |        "<div>\n",
 759 |        "<style scoped>\n",
 760 |        "    .dataframe tbody tr th:only-of-type {\n",
 761 |        "        vertical-align: middle;\n",
 762 |        "    }\n",
 763 |        "\n",
 764 |        "    .dataframe tbody tr th {\n",
 765 |        "        vertical-align: top;\n",
 766 |        "    }\n",
 767 |        "\n",
 768 |        "    .dataframe thead th {\n",
 769 |        "        text-align: right;\n",
 770 |        "    }\n",
 771 |        "</style>\n",
 772 |        "<table border=\"1\" class=\"dataframe\">\n",
 773 |        "  <thead>\n",
 774 |        "    <tr style=\"text-align: right;\">\n",
 775 |        "      <th></th>\n",
 776 |        "      <th>date</th>\n",
 777 |        "      <th>event</th>\n",
 778 |        "      <th>temp</th>\n",
 779 |        "    </tr>\n",
 780 |        "  </thead>\n",
 781 |        "  <tbody>\n",
 782 |        "    <tr>\n",
 783 |        "      <th>0</th>\n",
 784 |        "      <td>01-02-12</td>\n",
 785 |        "      <td>sunny</td>\n",
 786 |        "      <td>14</td>\n",
 787 |        "    </tr>\n",
 788 |        "    <tr>\n",
 789 |        "      <th>1</th>\n",
 790 |        "      <td>03-02-12</td>\n",
 791 |        "      <td>cold</td>\n",
 792 |        "      <td>16</td>\n",
 793 |        "    </tr>\n",
 794 |        "    <tr>\n",
 795 |        "      <th>2</th>\n",
 796 |        "      <td>04-02-12</td>\n",
 797 |        "      <td>cold</td>\n",
 798 |        "      <td>15</td>\n",
 799 |        "    </tr>\n",
 800 |        "    <tr>\n",
 801 |        "      <th>3</th>\n",
 802 |        "      <td>05-02-12</td>\n",
 803 |        "      <td>rainy</td>\n",
 804 |        "      <td>10</td>\n",
 805 |        "    </tr>\n",
 806 |        "  </tbody>\n",
 807 |        "</table>\n",
 808 |        "</div>"
 809 |       ],
 810 |       "text/plain": [
 811 |        "       date  event  temp\n",
 812 |        "0  01-02-12  sunny    14\n",
 813 |        "1  03-02-12   cold    16\n",
 814 |        "2  04-02-12   cold    15\n",
 815 |        "3  05-02-12  rainy    10"
 816 |       ]
 817 |      },
 818 |      "execution_count": 3,
 819 |      "metadata": {},
 820 |      "output_type": "execute_result"
 821 |     }
 822 |    ],
 823 |    "source": [
 824 |     "temp1={\n",
 825 |     "      \"date\":['01-02-12','03-02-12','04-02-12','05-02-12'],\n",
 826 |     "      \"event\":['sunny','cold','cold','rainy'],\n",
 827 |     "      \"temp\":[14,16,15,10]\n",
 828 |     "}\n",
 829 |     "temp=pd.DataFrame(temp1)\n",
 830 |     "temp"
 831 |    ]
 832 |   },
 833 |   {
 834 |    "cell_type": "code",
 835 |    "execution_count": 7,
 836 |    "metadata": {},
 837 |    "outputs": [
 838 |     {
 839 |      "data": {
 840 |       "text/html": [
 841 |        "<div>\n",
 842 |        "<style scoped>\n",
 843 |        "    .dataframe tbody tr th:only-of-type {\n",
 844 |        "        vertical-align: middle;\n",
 845 |        "    }\n",
 846 |        "\n",
 847 |        "    .dataframe tbody tr th {\n",
 848 |        "        vertical-align: top;\n",
 849 |        "    }\n",
 850 |        "\n",
 851 |        "    .dataframe thead th {\n",
 852 |        "        text-align: right;\n",
 853 |        "    }\n",
 854 |        "</style>\n",
 855 |        "<table border=\"1\" class=\"dataframe\">\n",
 856 |        "  <thead>\n",
 857 |        "    <tr style=\"text-align: right;\">\n",
 858 |        "      <th></th>\n",
 859 |        "      <th>date</th>\n",
 860 |        "      <th>event</th>\n",
 861 |        "      <th>wind-speed</th>\n",
 862 |        "    </tr>\n",
 863 |        "  </thead>\n",
 864 |        "  <tbody>\n",
 865 |        "    <tr>\n",
 866 |        "      <th>0</th>\n",
 867 |        "      <td>01-02-12</td>\n",
 868 |        "      <td>sunny</td>\n",
 869 |        "      <td>12</td>\n",
 870 |        "    </tr>\n",
 871 |        "    <tr>\n",
 872 |        "      <th>1</th>\n",
 873 |        "      <td>03-02-12</td>\n",
 874 |        "      <td>cold</td>\n",
 875 |        "      <td>10</td>\n",
 876 |        "    </tr>\n",
 877 |        "    <tr>\n",
 878 |        "      <th>2</th>\n",
 879 |        "      <td>04-02-12</td>\n",
 880 |        "      <td>cold</td>\n",
 881 |        "      <td>9</td>\n",
 882 |        "    </tr>\n",
 883 |        "    <tr>\n",
 884 |        "      <th>3</th>\n",
 885 |        "      <td>05-02-12</td>\n",
 886 |        "      <td>rainy</td>\n",
 887 |        "      <td>14</td>\n",
 888 |        "    </tr>\n",
 889 |        "  </tbody>\n",
 890 |        "</table>\n",
 891 |        "</div>"
 892 |       ],
 893 |       "text/plain": [
 894 |        "       date  event  wind-speed\n",
 895 |        "0  01-02-12  sunny          12\n",
 896 |        "1  03-02-12   cold          10\n",
 897 |        "2  04-02-12   cold           9\n",
 898 |        "3  05-02-12  rainy          14"
 899 |       ]
 900 |      },
 901 |      "execution_count": 7,
 902 |      "metadata": {},
 903 |      "output_type": "execute_result"
 904 |     }
 905 |    ],
 906 |    "source": [
 907 |     "ws={\n",
 908 |     "      \"date\":['01-02-12','03-02-12','04-02-12','05-02-12'],\n",
 909 |     "      \"event\":['sunny','cold','cold','rainy'],\n",
 910 |     "      \"wind-speed\":[12,10,9,14],\n",
 911 |     "}\n",
 912 |     "wind_speed=pd.DataFrame(ws)\n",
 913 |     "wind_speed"
 914 |    ]
 915 |   },
 916 |   {
 917 |    "cell_type": "code",
 918 |    "execution_count": 9,
 919 |    "metadata": {},
 920 |    "outputs": [
 921 |     {
 922 |      "data": {
 923 |       "text/html": [
 924 |        "<div>\n",
 925 |        "<style scoped>\n",
 926 |        "    .dataframe tbody tr th:only-of-type {\n",
 927 |        "        vertical-align: middle;\n",
 928 |        "    }\n",
 929 |        "\n",
 930 |        "    .dataframe tbody tr th {\n",
 931 |        "        vertical-align: top;\n",
 932 |        "    }\n",
 933 |        "\n",
 934 |        "    .dataframe thead th {\n",
 935 |        "        text-align: right;\n",
 936 |        "    }\n",
 937 |        "</style>\n",
 938 |        "<table border=\"1\" class=\"dataframe\">\n",
 939 |        "  <thead>\n",
 940 |        "    <tr style=\"text-align: right;\">\n",
 941 |        "      <th></th>\n",
 942 |        "      <th>date</th>\n",
 943 |        "      <th>event</th>\n",
 944 |        "      <th>temp</th>\n",
 945 |        "      <th>date</th>\n",
 946 |        "      <th>event</th>\n",
 947 |        "      <th>wind-speed</th>\n",
 948 |        "    </tr>\n",
 949 |        "  </thead>\n",
 950 |        "  <tbody>\n",
 951 |        "    <tr>\n",
 952 |        "      <th>0</th>\n",
 953 |        "      <td>01-02-12</td>\n",
 954 |        "      <td>sunny</td>\n",
 955 |        "      <td>14</td>\n",
 956 |        "      <td>01-02-12</td>\n",
 957 |        "      <td>sunny</td>\n",
 958 |        "      <td>12</td>\n",
 959 |        "    </tr>\n",
 960 |        "    <tr>\n",
 961 |        "      <th>1</th>\n",
 962 |        "      <td>03-02-12</td>\n",
 963 |        "      <td>cold</td>\n",
 964 |        "      <td>16</td>\n",
 965 |        "      <td>03-02-12</td>\n",
 966 |        "      <td>cold</td>\n",
 967 |        "      <td>10</td>\n",
 968 |        "    </tr>\n",
 969 |        "    <tr>\n",
 970 |        "      <th>2</th>\n",
 971 |        "      <td>04-02-12</td>\n",
 972 |        "      <td>cold</td>\n",
 973 |        "      <td>15</td>\n",
 974 |        "      <td>04-02-12</td>\n",
 975 |        "      <td>cold</td>\n",
 976 |        "      <td>9</td>\n",
 977 |        "    </tr>\n",
 978 |        "    <tr>\n",
 979 |        "      <th>3</th>\n",
 980 |        "      <td>05-02-12</td>\n",
 981 |        "      <td>rainy</td>\n",
 982 |        "      <td>10</td>\n",
 983 |        "      <td>05-02-12</td>\n",
 984 |        "      <td>rainy</td>\n",
 985 |        "      <td>14</td>\n",
 986 |        "    </tr>\n",
 987 |        "  </tbody>\n",
 988 |        "</table>\n",
 989 |        "</div>"
 990 |       ],
 991 |       "text/plain": [
 992 |        "       date  event  temp      date  event  wind-speed\n",
 993 |        "0  01-02-12  sunny    14  01-02-12  sunny          12\n",
 994 |        "1  03-02-12   cold    16  03-02-12   cold          10\n",
 995 |        "2  04-02-12   cold    15  04-02-12   cold           9\n",
 996 |        "3  05-02-12  rainy    10  05-02-12  rainy          14"
 997 |       ]
 998 |      },
 999 |      "execution_count": 9,
1000 |      "metadata": {},
1001 |      "output_type": "execute_result"
1002 |     }
1003 |    ],
1004 |    "source": [
1005 |     "df=pd.concat([temp,wind_speed],axis=1)\n",
1006 |     "df"
1007 |    ]
1008 |   },
1009 |   {
1010 |    "cell_type": "markdown",
1011 |    "metadata": {},
1012 |    "source": []
1013 |   }
1014 |  ],
1015 |  "metadata": {
1016 |   "kernelspec": {
1017 |    "display_name": "Python 3",
1018 |    "language": "python",
1019 |    "name": "python3"
1020 |   },
1021 |   "language_info": {
1022 |    "codemirror_mode": {
1023 |     "name": "ipython",
1024 |     "version": 3
1025 |    },
1026 |    "file_extension": ".py",
1027 |    "mimetype": "text/x-python",
1028 |    "name": "python",
1029 |    "nbconvert_exporter": "python",
1030 |    "pygments_lexer": "ipython3",
1031 |    "version": "3.6.4"
1032 |   }
1033 |  },
1034 |  "nbformat": 4,
1035 |  "nbformat_minor": 2
1036 | }
1037 | 


--------------------------------------------------------------------------------
/pandas_part8.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Merging dataframes \n",
  8 |     "-------\n",
  9 |     "    merge() is a method in pandas in which you can merge two dataframes withou repeating columns as we did in\n",
 10 |     "    concat() method.\n",
 11 |     "    There are two types of merging -\n",
 12 |     "       1) Inner join\n",
 13 |     "       2) outer join\n",
 14 |     " ### 1) Inner join\n",
 15 |     "    In this method you will get the intersetion of two dataframes with merged column.It is the default merge method.\n",
 16 |     "    In merge() method you have to pass the dataframes as arguments and list of columns on which you want to merge\n",
 17 |     "    and dataframes as arguments."
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 1,
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "import pandas as pd"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 2,
 32 |    "metadata": {},
 33 |    "outputs": [
 34 |     {
 35 |      "data": {
 36 |       "text/html": [
 37 |        "<div>\n",
 38 |        "<style scoped>\n",
 39 |        "    .dataframe tbody tr th:only-of-type {\n",
 40 |        "        vertical-align: middle;\n",
 41 |        "    }\n",
 42 |        "\n",
 43 |        "    .dataframe tbody tr th {\n",
 44 |        "        vertical-align: top;\n",
 45 |        "    }\n",
 46 |        "\n",
 47 |        "    .dataframe thead th {\n",
 48 |        "        text-align: right;\n",
 49 |        "    }\n",
 50 |        "</style>\n",
 51 |        "<table border=\"1\" class=\"dataframe\">\n",
 52 |        "  <thead>\n",
 53 |        "    <tr style=\"text-align: right;\">\n",
 54 |        "      <th></th>\n",
 55 |        "      <th>date</th>\n",
 56 |        "      <th>event</th>\n",
 57 |        "      <th>temp</th>\n",
 58 |        "    </tr>\n",
 59 |        "  </thead>\n",
 60 |        "  <tbody>\n",
 61 |        "    <tr>\n",
 62 |        "      <th>0</th>\n",
 63 |        "      <td>01-02-12</td>\n",
 64 |        "      <td>sunny</td>\n",
 65 |        "      <td>14</td>\n",
 66 |        "    </tr>\n",
 67 |        "    <tr>\n",
 68 |        "      <th>1</th>\n",
 69 |        "      <td>03-02-12</td>\n",
 70 |        "      <td>cold</td>\n",
 71 |        "      <td>16</td>\n",
 72 |        "    </tr>\n",
 73 |        "    <tr>\n",
 74 |        "      <th>2</th>\n",
 75 |        "      <td>04-02-12</td>\n",
 76 |        "      <td>cold</td>\n",
 77 |        "      <td>15</td>\n",
 78 |        "    </tr>\n",
 79 |        "    <tr>\n",
 80 |        "      <th>3</th>\n",
 81 |        "      <td>05-02-12</td>\n",
 82 |        "      <td>rainy</td>\n",
 83 |        "      <td>10</td>\n",
 84 |        "    </tr>\n",
 85 |        "  </tbody>\n",
 86 |        "</table>\n",
 87 |        "</div>"
 88 |       ],
 89 |       "text/plain": [
 90 |        "       date  event  temp\n",
 91 |        "0  01-02-12  sunny    14\n",
 92 |        "1  03-02-12   cold    16\n",
 93 |        "2  04-02-12   cold    15\n",
 94 |        "3  05-02-12  rainy    10"
 95 |       ]
 96 |      },
 97 |      "execution_count": 2,
 98 |      "metadata": {},
 99 |      "output_type": "execute_result"
100 |     }
101 |    ],
102 |    "source": [
103 |     "temp1={\n",
104 |     "      \"date\":['01-02-12','03-02-12','04-02-12','05-02-12'],\n",
105 |     "      \"event\":['sunny','cold','cold','rainy'],\n",
106 |     "      \"temp\":[14,16,15,10]\n",
107 |     "}\n",
108 |     "temp=pd.DataFrame(temp1)\n",
109 |     "temp"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": 3,
115 |    "metadata": {},
116 |    "outputs": [
117 |     {
118 |      "data": {
119 |       "text/html": [
120 |        "<div>\n",
121 |        "<style scoped>\n",
122 |        "    .dataframe tbody tr th:only-of-type {\n",
123 |        "        vertical-align: middle;\n",
124 |        "    }\n",
125 |        "\n",
126 |        "    .dataframe tbody tr th {\n",
127 |        "        vertical-align: top;\n",
128 |        "    }\n",
129 |        "\n",
130 |        "    .dataframe thead th {\n",
131 |        "        text-align: right;\n",
132 |        "    }\n",
133 |        "</style>\n",
134 |        "<table border=\"1\" class=\"dataframe\">\n",
135 |        "  <thead>\n",
136 |        "    <tr style=\"text-align: right;\">\n",
137 |        "      <th></th>\n",
138 |        "      <th>date</th>\n",
139 |        "      <th>event</th>\n",
140 |        "      <th>wind-speed</th>\n",
141 |        "    </tr>\n",
142 |        "  </thead>\n",
143 |        "  <tbody>\n",
144 |        "    <tr>\n",
145 |        "      <th>0</th>\n",
146 |        "      <td>01-02-12</td>\n",
147 |        "      <td>sunny</td>\n",
148 |        "      <td>12</td>\n",
149 |        "    </tr>\n",
150 |        "    <tr>\n",
151 |        "      <th>1</th>\n",
152 |        "      <td>03-02-12</td>\n",
153 |        "      <td>cold</td>\n",
154 |        "      <td>10</td>\n",
155 |        "    </tr>\n",
156 |        "    <tr>\n",
157 |        "      <th>2</th>\n",
158 |        "      <td>04-02-12</td>\n",
159 |        "      <td>cold</td>\n",
160 |        "      <td>9</td>\n",
161 |        "    </tr>\n",
162 |        "    <tr>\n",
163 |        "      <th>3</th>\n",
164 |        "      <td>05-02-12</td>\n",
165 |        "      <td>rainy</td>\n",
166 |        "      <td>14</td>\n",
167 |        "    </tr>\n",
168 |        "  </tbody>\n",
169 |        "</table>\n",
170 |        "</div>"
171 |       ],
172 |       "text/plain": [
173 |        "       date  event  wind-speed\n",
174 |        "0  01-02-12  sunny          12\n",
175 |        "1  03-02-12   cold          10\n",
176 |        "2  04-02-12   cold           9\n",
177 |        "3  05-02-12  rainy          14"
178 |       ]
179 |      },
180 |      "execution_count": 3,
181 |      "metadata": {},
182 |      "output_type": "execute_result"
183 |     }
184 |    ],
185 |    "source": [
186 |     "ws={\n",
187 |     "      \"date\":['01-02-12','03-02-12','04-02-12','05-02-12'],\n",
188 |     "      \"event\":['sunny','cold','cold','rainy'],\n",
189 |     "      \"wind-speed\":[12,10,9,14],\n",
190 |     "}\n",
191 |     "wind_speed=pd.DataFrame(ws)\n",
192 |     "wind_speed"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "code",
197 |    "execution_count": 5,
198 |    "metadata": {},
199 |    "outputs": [
200 |     {
201 |      "data": {
202 |       "text/html": [
203 |        "<div>\n",
204 |        "<style scoped>\n",
205 |        "    .dataframe tbody tr th:only-of-type {\n",
206 |        "        vertical-align: middle;\n",
207 |        "    }\n",
208 |        "\n",
209 |        "    .dataframe tbody tr th {\n",
210 |        "        vertical-align: top;\n",
211 |        "    }\n",
212 |        "\n",
213 |        "    .dataframe thead th {\n",
214 |        "        text-align: right;\n",
215 |        "    }\n",
216 |        "</style>\n",
217 |        "<table border=\"1\" class=\"dataframe\">\n",
218 |        "  <thead>\n",
219 |        "    <tr style=\"text-align: right;\">\n",
220 |        "      <th></th>\n",
221 |        "      <th>date</th>\n",
222 |        "      <th>event</th>\n",
223 |        "      <th>temp</th>\n",
224 |        "      <th>wind-speed</th>\n",
225 |        "    </tr>\n",
226 |        "  </thead>\n",
227 |        "  <tbody>\n",
228 |        "    <tr>\n",
229 |        "      <th>0</th>\n",
230 |        "      <td>01-02-12</td>\n",
231 |        "      <td>sunny</td>\n",
232 |        "      <td>14</td>\n",
233 |        "      <td>12</td>\n",
234 |        "    </tr>\n",
235 |        "    <tr>\n",
236 |        "      <th>1</th>\n",
237 |        "      <td>03-02-12</td>\n",
238 |        "      <td>cold</td>\n",
239 |        "      <td>16</td>\n",
240 |        "      <td>10</td>\n",
241 |        "    </tr>\n",
242 |        "    <tr>\n",
243 |        "      <th>2</th>\n",
244 |        "      <td>04-02-12</td>\n",
245 |        "      <td>cold</td>\n",
246 |        "      <td>15</td>\n",
247 |        "      <td>9</td>\n",
248 |        "    </tr>\n",
249 |        "    <tr>\n",
250 |        "      <th>3</th>\n",
251 |        "      <td>05-02-12</td>\n",
252 |        "      <td>rainy</td>\n",
253 |        "      <td>10</td>\n",
254 |        "      <td>14</td>\n",
255 |        "    </tr>\n",
256 |        "  </tbody>\n",
257 |        "</table>\n",
258 |        "</div>"
259 |       ],
260 |       "text/plain": [
261 |        "       date  event  temp  wind-speed\n",
262 |        "0  01-02-12  sunny    14          12\n",
263 |        "1  03-02-12   cold    16          10\n",
264 |        "2  04-02-12   cold    15           9\n",
265 |        "3  05-02-12  rainy    10          14"
266 |       ]
267 |      },
268 |      "execution_count": 5,
269 |      "metadata": {},
270 |      "output_type": "execute_result"
271 |     }
272 |    ],
273 |    "source": [
274 |     "df=pd.merge(temp,wind_speed,on=['date','event'])\n",
275 |     "df"
276 |    ]
277 |   },
278 |   {
279 |    "cell_type": "markdown",
280 |    "metadata": {},
281 |    "source": [
282 |     "    If you have different data in the common columns then you can not do inner merge in this case.\n",
283 |     "    Assume the following case:\n",
284 |     "    \n",
285 |     "      temp                                \n",
286 |     "      ___________________\n",
287 |     "     | event  |   temp   |\n",
288 |     "     |--------|----------|\n",
289 |     "     | sunny  |   40     |\n",
290 |     "     | hot    |   30     |\n",
291 |     "     | rainy  |   25     |\n",
292 |     "     |________|__________|\n",
293 |     "     \n",
294 |     "     wind-speed\n",
295 |     "      ___________________\n",
296 |     "     | event  |wind-speed|\n",
297 |     "     |--------|----------|\n",
298 |     "     | sunny  |   12     |\n",
299 |     "     | hot    |   14     |\n",
300 |     "     | hot    |   11     |\n",
301 |     "     |________|__________|\n",
302 |     "     \n",
303 |     "     Merged Dataframe by inner method\n",
304 |     "     \n",
305 |     "      ________________________________\n",
306 |     "     | event  |   temp   | wind-speed |\n",
307 |     "     |--------|----------|------------|\n",
308 |     "     | sunny  |   40     |     12     | \n",
309 |     "     | hot    |   30     |     14     |\n",
310 |     "     |________|__________|____________|\n",
311 |     "     \n",
312 |     "     You can see that last row is not merged here because there is no common element in the common columns.\n",
313 |     "     "
314 |    ]
315 |   },
316 |   {
317 |    "cell_type": "markdown",
318 |    "metadata": {},
319 |    "source": [
320 |     "### Outer join\n",
321 |     "This is just like union of two dataframe.The value which dont exist will contain NaN.                               \n",
322 |     "     \n",
323 |     "\n",
324 |     "     temp                                \n",
325 |     "      ___________________\n",
326 |     "     | event  |   temp   |\n",
327 |     "     |--------|----------|\n",
328 |     "     | sunny  |   40     |\n",
329 |     "     | hot    |   30     |\n",
330 |     "     | rainy  |   25     |\n",
331 |     "     |________|__________|\n",
332 |     "     \n",
333 |     "     wind-speed\n",
334 |     "      ___________________\n",
335 |     "     | event  |wind-speed|\n",
336 |     "     |--------|----------|\n",
337 |     "     | sunny  |   12     |\n",
338 |     "     | hot    |   14     |\n",
339 |     "     | hot    |   11     |\n",
340 |     "     |________|__________|\n",
341 |     "     \n",
342 |     "     Merged Dataframe by inner method\n",
343 |     "     \n",
344 |     "      ________________________________\n",
345 |     "     | event  |   temp   | wind-speed |\n",
346 |     "     |--------|----------|------------|\n",
347 |     "     | sunny  |   40     |     12     | \n",
348 |     "     | hot    |   30     |     14     |\n",
349 |     "     | rainy  |   25     |     NaN    |\n",
350 |     "     | hot    |   NaN    |     11     |\n",
351 |     "     |________|__________|____________|\n",
352 |     "     \n",
353 |     "     You can see in the final dataframe two rows increased."
354 |    ]
355 |   },
356 |   {
357 |    "cell_type": "code",
358 |    "execution_count": 8,
359 |    "metadata": {},
360 |    "outputs": [
361 |     {
362 |      "data": {
363 |       "text/html": [
364 |        "<div>\n",
365 |        "<style scoped>\n",
366 |        "    .dataframe tbody tr th:only-of-type {\n",
367 |        "        vertical-align: middle;\n",
368 |        "    }\n",
369 |        "\n",
370 |        "    .dataframe tbody tr th {\n",
371 |        "        vertical-align: top;\n",
372 |        "    }\n",
373 |        "\n",
374 |        "    .dataframe thead th {\n",
375 |        "        text-align: right;\n",
376 |        "    }\n",
377 |        "</style>\n",
378 |        "<table border=\"1\" class=\"dataframe\">\n",
379 |        "  <thead>\n",
380 |        "    <tr style=\"text-align: right;\">\n",
381 |        "      <th></th>\n",
382 |        "      <th>date</th>\n",
383 |        "      <th>event</th>\n",
384 |        "      <th>temp</th>\n",
385 |        "    </tr>\n",
386 |        "  </thead>\n",
387 |        "  <tbody>\n",
388 |        "    <tr>\n",
389 |        "      <th>0</th>\n",
390 |        "      <td>01-02-12</td>\n",
391 |        "      <td>sunny</td>\n",
392 |        "      <td>14</td>\n",
393 |        "    </tr>\n",
394 |        "    <tr>\n",
395 |        "      <th>1</th>\n",
396 |        "      <td>03-02-12</td>\n",
397 |        "      <td>cold</td>\n",
398 |        "      <td>16</td>\n",
399 |        "    </tr>\n",
400 |        "    <tr>\n",
401 |        "      <th>2</th>\n",
402 |        "      <td>04-02-12</td>\n",
403 |        "      <td>hot</td>\n",
404 |        "      <td>15</td>\n",
405 |        "    </tr>\n",
406 |        "    <tr>\n",
407 |        "      <th>3</th>\n",
408 |        "      <td>05-02-12</td>\n",
409 |        "      <td>sunny</td>\n",
410 |        "      <td>10</td>\n",
411 |        "    </tr>\n",
412 |        "  </tbody>\n",
413 |        "</table>\n",
414 |        "</div>"
415 |       ],
416 |       "text/plain": [
417 |        "       date  event  temp\n",
418 |        "0  01-02-12  sunny    14\n",
419 |        "1  03-02-12   cold    16\n",
420 |        "2  04-02-12    hot    15\n",
421 |        "3  05-02-12  sunny    10"
422 |       ]
423 |      },
424 |      "execution_count": 8,
425 |      "metadata": {},
426 |      "output_type": "execute_result"
427 |     }
428 |    ],
429 |    "source": [
430 |     "temp1={\n",
431 |     "      \"date\":['01-02-12','03-02-12','04-02-12','05-02-12'],\n",
432 |     "      \"event\":['sunny','cold','hot','sunny'],\n",
433 |     "      \"temp\":[14,16,15,10]\n",
434 |     "}\n",
435 |     "temp=pd.DataFrame(temp1)\n",
436 |     "temp"
437 |    ]
438 |   },
439 |   {
440 |    "cell_type": "code",
441 |    "execution_count": 7,
442 |    "metadata": {},
443 |    "outputs": [
444 |     {
445 |      "data": {
446 |       "text/html": [
447 |        "<div>\n",
448 |        "<style scoped>\n",
449 |        "    .dataframe tbody tr th:only-of-type {\n",
450 |        "        vertical-align: middle;\n",
451 |        "    }\n",
452 |        "\n",
453 |        "    .dataframe tbody tr th {\n",
454 |        "        vertical-align: top;\n",
455 |        "    }\n",
456 |        "\n",
457 |        "    .dataframe thead th {\n",
458 |        "        text-align: right;\n",
459 |        "    }\n",
460 |        "</style>\n",
461 |        "<table border=\"1\" class=\"dataframe\">\n",
462 |        "  <thead>\n",
463 |        "    <tr style=\"text-align: right;\">\n",
464 |        "      <th></th>\n",
465 |        "      <th>date</th>\n",
466 |        "      <th>event</th>\n",
467 |        "      <th>wind-speed</th>\n",
468 |        "    </tr>\n",
469 |        "  </thead>\n",
470 |        "  <tbody>\n",
471 |        "    <tr>\n",
472 |        "      <th>0</th>\n",
473 |        "      <td>01-02-12</td>\n",
474 |        "      <td>sunny</td>\n",
475 |        "      <td>12</td>\n",
476 |        "    </tr>\n",
477 |        "    <tr>\n",
478 |        "      <th>1</th>\n",
479 |        "      <td>03-02-12</td>\n",
480 |        "      <td>cold</td>\n",
481 |        "      <td>10</td>\n",
482 |        "    </tr>\n",
483 |        "    <tr>\n",
484 |        "      <th>2</th>\n",
485 |        "      <td>04-02-12</td>\n",
486 |        "      <td>cold</td>\n",
487 |        "      <td>9</td>\n",
488 |        "    </tr>\n",
489 |        "    <tr>\n",
490 |        "      <th>3</th>\n",
491 |        "      <td>05-02-12</td>\n",
492 |        "      <td>rainy</td>\n",
493 |        "      <td>14</td>\n",
494 |        "    </tr>\n",
495 |        "  </tbody>\n",
496 |        "</table>\n",
497 |        "</div>"
498 |       ],
499 |       "text/plain": [
500 |        "       date  event  wind-speed\n",
501 |        "0  01-02-12  sunny          12\n",
502 |        "1  03-02-12   cold          10\n",
503 |        "2  04-02-12   cold           9\n",
504 |        "3  05-02-12  rainy          14"
505 |       ]
506 |      },
507 |      "execution_count": 7,
508 |      "metadata": {},
509 |      "output_type": "execute_result"
510 |     }
511 |    ],
512 |    "source": [
513 |     "ws={\n",
514 |     "      \"date\":['01-02-12','03-02-12','04-02-12','05-02-12'],\n",
515 |     "      \"event\":['sunny','cold','cold','rainy'],\n",
516 |     "      \"wind-speed\":[12,10,9,14],\n",
517 |     "}\n",
518 |     "wind_speed=pd.DataFrame(ws)\n",
519 |     "wind_speed"
520 |    ]
521 |   },
522 |   {
523 |    "cell_type": "markdown",
524 |    "metadata": {},
525 |    "source": [
526 |     "##### One extra argument you need to pass in outer join i.e, how=\"outer\".By defaut it is inner."
527 |    ]
528 |   },
529 |   {
530 |    "cell_type": "code",
531 |    "execution_count": 9,
532 |    "metadata": {},
533 |    "outputs": [
534 |     {
535 |      "data": {
536 |       "text/html": [
537 |        "<div>\n",
538 |        "<style scoped>\n",
539 |        "    .dataframe tbody tr th:only-of-type {\n",
540 |        "        vertical-align: middle;\n",
541 |        "    }\n",
542 |        "\n",
543 |        "    .dataframe tbody tr th {\n",
544 |        "        vertical-align: top;\n",
545 |        "    }\n",
546 |        "\n",
547 |        "    .dataframe thead th {\n",
548 |        "        text-align: right;\n",
549 |        "    }\n",
550 |        "</style>\n",
551 |        "<table border=\"1\" class=\"dataframe\">\n",
552 |        "  <thead>\n",
553 |        "    <tr style=\"text-align: right;\">\n",
554 |        "      <th></th>\n",
555 |        "      <th>date</th>\n",
556 |        "      <th>event</th>\n",
557 |        "      <th>temp</th>\n",
558 |        "      <th>wind-speed</th>\n",
559 |        "    </tr>\n",
560 |        "  </thead>\n",
561 |        "  <tbody>\n",
562 |        "    <tr>\n",
563 |        "      <th>0</th>\n",
564 |        "      <td>01-02-12</td>\n",
565 |        "      <td>sunny</td>\n",
566 |        "      <td>14.0</td>\n",
567 |        "      <td>12.0</td>\n",
568 |        "    </tr>\n",
569 |        "    <tr>\n",
570 |        "      <th>1</th>\n",
571 |        "      <td>03-02-12</td>\n",
572 |        "      <td>cold</td>\n",
573 |        "      <td>16.0</td>\n",
574 |        "      <td>10.0</td>\n",
575 |        "    </tr>\n",
576 |        "    <tr>\n",
577 |        "      <th>2</th>\n",
578 |        "      <td>04-02-12</td>\n",
579 |        "      <td>hot</td>\n",
580 |        "      <td>15.0</td>\n",
581 |        "      <td>NaN</td>\n",
582 |        "    </tr>\n",
583 |        "    <tr>\n",
584 |        "      <th>3</th>\n",
585 |        "      <td>05-02-12</td>\n",
586 |        "      <td>sunny</td>\n",
587 |        "      <td>10.0</td>\n",
588 |        "      <td>NaN</td>\n",
589 |        "    </tr>\n",
590 |        "    <tr>\n",
591 |        "      <th>4</th>\n",
592 |        "      <td>04-02-12</td>\n",
593 |        "      <td>cold</td>\n",
594 |        "      <td>NaN</td>\n",
595 |        "      <td>9.0</td>\n",
596 |        "    </tr>\n",
597 |        "    <tr>\n",
598 |        "      <th>5</th>\n",
599 |        "      <td>05-02-12</td>\n",
600 |        "      <td>rainy</td>\n",
601 |        "      <td>NaN</td>\n",
602 |        "      <td>14.0</td>\n",
603 |        "    </tr>\n",
604 |        "  </tbody>\n",
605 |        "</table>\n",
606 |        "</div>"
607 |       ],
608 |       "text/plain": [
609 |        "       date  event  temp  wind-speed\n",
610 |        "0  01-02-12  sunny  14.0        12.0\n",
611 |        "1  03-02-12   cold  16.0        10.0\n",
612 |        "2  04-02-12    hot  15.0         NaN\n",
613 |        "3  05-02-12  sunny  10.0         NaN\n",
614 |        "4  04-02-12   cold   NaN         9.0\n",
615 |        "5  05-02-12  rainy   NaN        14.0"
616 |       ]
617 |      },
618 |      "execution_count": 9,
619 |      "metadata": {},
620 |      "output_type": "execute_result"
621 |     }
622 |    ],
623 |    "source": [
624 |     "df=pd.merge(temp,wind_speed,on=['date','event'],how='outer')\n",
625 |     "df"
626 |    ]
627 |   },
628 |   {
629 |    "cell_type": "code",
630 |    "execution_count": null,
631 |    "metadata": {},
632 |    "outputs": [],
633 |    "source": []
634 |   }
635 |  ],
636 |  "metadata": {
637 |   "kernelspec": {
638 |    "display_name": "Python 3",
639 |    "language": "python",
640 |    "name": "python3"
641 |   },
642 |   "language_info": {
643 |    "codemirror_mode": {
644 |     "name": "ipython",
645 |     "version": 3
646 |    },
647 |    "file_extension": ".py",
648 |    "mimetype": "text/x-python",
649 |    "name": "python",
650 |    "nbconvert_exporter": "python",
651 |    "pygments_lexer": "ipython3",
652 |    "version": "3.6.4"
653 |   }
654 |  },
655 |  "nbformat": 4,
656 |  "nbformat_minor": 2
657 | }
658 | 


--------------------------------------------------------------------------------