├── Week3
    ├── Week3.py
    ├── Week3 slides.pdf
    ├── Assignment - Week 3
    │   ├── scimagojr-3.xlsx
    │   ├── Energy Indicators.xls
    │   └── Assignment+3.ipynb
    └── Week+3.ipynb
├── Week4
    ├── Week4 slides.pdf
    └── Week+4.ipynb
├── Week1
    ├── Week1.py
    └── Week+1.ipynb
├── Week2
    └── Week2.py
└── README.md


/Week3/Week3.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Week3/Week3 slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yanniey/Coursera_Intro_to_Data_Science_with_Python/HEAD/Week3/Week3 slides.pdf


--------------------------------------------------------------------------------
/Week4/Week4 slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yanniey/Coursera_Intro_to_Data_Science_with_Python/HEAD/Week4/Week4 slides.pdf


--------------------------------------------------------------------------------
/Week3/Assignment - Week 3/scimagojr-3.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yanniey/Coursera_Intro_to_Data_Science_with_Python/HEAD/Week3/Assignment - Week 3/scimagojr-3.xlsx


--------------------------------------------------------------------------------
/Week3/Assignment - Week 3/Energy Indicators.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yanniey/Coursera_Intro_to_Data_Science_with_Python/HEAD/Week3/Assignment - Week 3/Energy Indicators.xls


--------------------------------------------------------------------------------
/Week1/Week1.py:
--------------------------------------------------------------------------------
 1 | people = ['Dr. Christopher Brooks', 'Dr. Kevyn Collins-Thompson', 'Dr. VG Vinod Vydiswaran', 'Dr. Daniel Romero']
 2 | 
 3 | titleName = []
 4 | def split_title_and_name():
 5 |   for person in people:
 6 |     last = person.split(" ")[-1]
 7 |     title = person.split(" ")[0]
 8 |     titleName.append(title + " "+last)
 9 |   print(titleName)
10 | 
11 | split_title_and_name()
12 | # list(map(split_title_and_name, people)
13 | 


--------------------------------------------------------------------------------
/Week2/Week2.py:
--------------------------------------------------------------------------------
 1 | Week 2 Assignment
 2 | 
 3 | Question 1
 4 | Which country has won the most gold medals in summer games?
 5 | This function should return a single string value.
 6 | 
 7 | ```
 8 | def answer_one():
 9 |     return df['Gold'].idxmax()
10 | 
11 | answer_one()
12 | ```
13 | 
14 | Question 2¶
15 | Which country had the biggest difference between their summer and winter gold medal counts?
16 | This function should return a single string value.
17 | ```
18 | def answer_two():
19 |     max_diff=max(df['Gold']-df['Gold.1'])
20 |     answer = df[(df['Gold']-df['Gold.1'])==max_diff].index.tolist()
21 |     return answer[0]
22 | 
23 | answer_two()
24 | ```
25 | 
26 | Question 3
27 | 
28 | Which country has the biggest difference between their summer gold medal counts and winter gold medal counts relative to their total gold medal count?
29 | (Summer Gold−Winter Gold)/Total Gold
30 |  
31 | Only include countries that have won at least 1 gold in both summer and winter.
32 | This function should return a single string value.
33 | ```
34 | def answer_three():
35 |     df_nozero = df[(df['Gold']>0) & (df['Gold.1']>0)]
36 |     percentage = max(abs((df_nozero['Gold']-df_nozero['Gold.1'])/df_nozero['Gold.2']))
37 |     return df[((df['Gold']-df['Gold.1'])/df['Gold.2'])==percentage].index.tolist()[0]
38 | 
39 | answer_three()
40 | ```
41 | 
42 | 
43 | Question 4¶
44 | Write a function that creates a Series called "Points" which is a weighted value where each gold medal (Gold.2) counts for 3 points, silver medals (Silver.2) for 2 points, and bronze medals (Bronze.2) for 1 point. The function should return only the column (a Series object) which you created.
45 | This function should return a Series named Points of length 146
46 | 
47 | ```
48 | def answer_four():
49 |     df['Points']= (df['Gold.2']*3+df['Silver.2']*2+df['Bronze.2'])
50 |     return df['Points']
51 | 
52 | answer_four()
53 | ```
54 | 
55 | Question 5
56 | Question 5¶
57 | Which state has the most counties in it? (hint: consider the sumlevel key carefully! You'll need this for future questions too...)
58 | This function should return a single string value.
59 | ```
60 | 
61 | def answer_five():
62 |     new_df = census_df[census_df['SUMLEV'] == 50]
63 |     return new_df.groupby('STNAME').count()['SUMLEV'].idxmax()
64 | 
65 | answer_five()
66 | ```
67 | 
68 | 


--------------------------------------------------------------------------------
/Week4/Week+4.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "---\n",
  8 |     "\n",
  9 |     "_You are currently looking at **version 1.0** of this notebook. To download notebooks and datafiles, as well as get help on Jupyter notebooks in the Coursera platform, visit the [Jupyter Notebook FAQ](https://www.coursera.org/learn/python-data-analysis/resources/0dhYG) course resource._\n",
 10 |     "\n",
 11 |     "---"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "# Distributions in Pandas"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "metadata": {
 25 |     "collapsed": false
 26 |    },
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "import pandas as pd\n",
 30 |     "import numpy as np"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": null,
 36 |    "metadata": {
 37 |     "collapsed": false
 38 |    },
 39 |    "outputs": [],
 40 |    "source": [
 41 |     "np.random.binomial(1, 0.5)"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": null,
 47 |    "metadata": {
 48 |     "collapsed": false
 49 |    },
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "np.random.binomial(1000, 0.5)/1000"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": null,
 58 |    "metadata": {
 59 |     "collapsed": false
 60 |    },
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "chance_of_tornado = 0.01/100\n",
 64 |     "np.random.binomial(100000, chance_of_tornado)"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": null,
 70 |    "metadata": {
 71 |     "collapsed": false
 72 |    },
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "chance_of_tornado = 0.01\n",
 76 |     "\n",
 77 |     "tornado_events = np.random.binomial(1, chance_of_tornado, 1000000)\n",
 78 |     "    \n",
 79 |     "two_days_in_a_row = 0\n",
 80 |     "for j in range(1,len(tornado_events)-1):\n",
 81 |     "    if tornado_events[j]==1 and tornado_events[j-1]==1:\n",
 82 |     "        two_days_in_a_row+=1\n",
 83 |     "\n",
 84 |     "print('{} tornadoes back to back in {} years'.format(two_days_in_a_row, 1000000/365))"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": null,
 90 |    "metadata": {
 91 |     "collapsed": false
 92 |    },
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "np.random.uniform(0, 1)"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": null,
101 |    "metadata": {
102 |     "collapsed": false
103 |    },
104 |    "outputs": [],
105 |    "source": [
106 |     "np.random.normal(0.75)"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "markdown",
111 |    "metadata": {},
112 |    "source": [
113 |     "Formula for standard deviation\n",
114 |     "$$\\sqrt{\\frac{1}{N} \\sum_{i=1}^N (x_i - \\overline{x})^2}$$"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": null,
120 |    "metadata": {
121 |     "collapsed": false
122 |    },
123 |    "outputs": [],
124 |    "source": [
125 |     "distribution = np.random.normal(0.75,size=1000)\n",
126 |     "\n",
127 |     "np.sqrt(np.sum((np.mean(distribution)-distribution)**2)/len(distribution))"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": null,
133 |    "metadata": {
134 |     "collapsed": false,
135 |     "scrolled": true
136 |    },
137 |    "outputs": [],
138 |    "source": [
139 |     "np.std(distribution)"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": null,
145 |    "metadata": {
146 |     "collapsed": false
147 |    },
148 |    "outputs": [],
149 |    "source": [
150 |     "import scipy.stats as stats\n",
151 |     "stats.kurtosis(distribution)"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": null,
157 |    "metadata": {
158 |     "collapsed": false
159 |    },
160 |    "outputs": [],
161 |    "source": [
162 |     "stats.skew(distribution)"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": null,
168 |    "metadata": {
169 |     "collapsed": false
170 |    },
171 |    "outputs": [],
172 |    "source": [
173 |     "chi_squared_df2 = np.random.chisquare(2, size=10000)\n",
174 |     "stats.skew(chi_squared_df2)"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "code",
179 |    "execution_count": null,
180 |    "metadata": {
181 |     "collapsed": false
182 |    },
183 |    "outputs": [],
184 |    "source": [
185 |     "chi_squared_df5 = np.random.chisquare(5, size=10000)\n",
186 |     "stats.skew(chi_squared_df5)"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "code",
191 |    "execution_count": null,
192 |    "metadata": {
193 |     "collapsed": false
194 |    },
195 |    "outputs": [],
196 |    "source": [
197 |     "%matplotlib inline\n",
198 |     "import matplotlib\n",
199 |     "import matplotlib.pyplot as plt\n",
200 |     "\n",
201 |     "output = plt.hist([chi_squared_df2,chi_squared_df5], bins=50, histtype='step', \n",
202 |     "                  label=['2 degrees of freedom','5 degrees of freedom'])\n",
203 |     "plt.legend(loc='upper right')\n"
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "markdown",
208 |    "metadata": {},
209 |    "source": [
210 |     "# Hypothesis Testing"
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "code",
215 |    "execution_count": null,
216 |    "metadata": {
217 |     "collapsed": false
218 |    },
219 |    "outputs": [],
220 |    "source": [
221 |     "df = pd.read_csv('grades.csv')"
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "code",
226 |    "execution_count": null,
227 |    "metadata": {
228 |     "collapsed": false
229 |    },
230 |    "outputs": [],
231 |    "source": [
232 |     "df.head()"
233 |    ]
234 |   },
235 |   {
236 |    "cell_type": "code",
237 |    "execution_count": null,
238 |    "metadata": {
239 |     "collapsed": false
240 |    },
241 |    "outputs": [],
242 |    "source": [
243 |     "len(df)"
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "code",
248 |    "execution_count": null,
249 |    "metadata": {
250 |     "collapsed": false
251 |    },
252 |    "outputs": [],
253 |    "source": [
254 |     "early = df[df['assignment1_submission'] <= '2015-12-31']\n",
255 |     "late = df[df['assignment1_submission'] > '2015-12-31']"
256 |    ]
257 |   },
258 |   {
259 |    "cell_type": "code",
260 |    "execution_count": null,
261 |    "metadata": {
262 |     "collapsed": false
263 |    },
264 |    "outputs": [],
265 |    "source": [
266 |     "early.mean()"
267 |    ]
268 |   },
269 |   {
270 |    "cell_type": "code",
271 |    "execution_count": null,
272 |    "metadata": {
273 |     "collapsed": false
274 |    },
275 |    "outputs": [],
276 |    "source": [
277 |     "late.mean()"
278 |    ]
279 |   },
280 |   {
281 |    "cell_type": "code",
282 |    "execution_count": null,
283 |    "metadata": {
284 |     "collapsed": false
285 |    },
286 |    "outputs": [],
287 |    "source": [
288 |     "from scipy import stats\n",
289 |     "stats.ttest_ind?"
290 |    ]
291 |   },
292 |   {
293 |    "cell_type": "code",
294 |    "execution_count": null,
295 |    "metadata": {
296 |     "collapsed": false
297 |    },
298 |    "outputs": [],
299 |    "source": [
300 |     "stats.ttest_ind(early['assignment1_grade'], late['assignment1_grade'])"
301 |    ]
302 |   },
303 |   {
304 |    "cell_type": "code",
305 |    "execution_count": null,
306 |    "metadata": {
307 |     "collapsed": false
308 |    },
309 |    "outputs": [],
310 |    "source": [
311 |     "stats.ttest_ind(early['assignment2_grade'], late['assignment2_grade'])"
312 |    ]
313 |   },
314 |   {
315 |    "cell_type": "code",
316 |    "execution_count": null,
317 |    "metadata": {
318 |     "collapsed": false
319 |    },
320 |    "outputs": [],
321 |    "source": [
322 |     "stats.ttest_ind(early['assignment3_grade'], late['assignment3_grade'])"
323 |    ]
324 |   }
325 |  ],
326 |  "metadata": {
327 |   "kernelspec": {
328 |    "display_name": "Python 3",
329 |    "language": "python",
330 |    "name": "python3"
331 |   },
332 |   "language_info": {
333 |    "codemirror_mode": {
334 |     "name": "ipython",
335 |     "version": 3
336 |    },
337 |    "file_extension": ".py",
338 |    "mimetype": "text/x-python",
339 |    "name": "python",
340 |    "nbconvert_exporter": "python",
341 |    "pygments_lexer": "ipython3",
342 |    "version": "3.5.2"
343 |   }
344 |  },
345 |  "nbformat": 4,
346 |  "nbformat_minor": 0
347 | }
348 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Intro to Data Science in Python
  2 | ## University of Michigan, Professor Christopher Brooks, Coursera course
  3 | ### 11/2016 - Completed on 04/12/2016
  4 | 
  5 | Summary:
  6 | Despite the course name, this is an intermediate-level data science course with Python. Familiarity with Numpy and Pandas libraries is not required, but is highly recommended, as the course does get pretty intense really quickly (i.e. Week 2) To be honest, this is a solid course for someone who has a background with Panda and numpy libraries. However, there is a big knowledge gap between the videos and the assignments, so it's challenging for beginners. 
  7 | 
  8 | 
  9 | 
 10 | Feedback:
 11 | 
 12 | ![My feeling while taking this course...](https://imgs.xkcd.com/comics/python.png)
 13 | 
 14 | > My feeling while taking this course...
 15 | 
 16 | 04/12/2016:
 17 | Finally finished this...was close to giving up on it SO MANY TIMES! 
 18 | 
 19 | 
 20 | ## Week 4 Statistical Analysis in Python and Project
 21 | 
 22 | 
 23 | Binomial Distribution in numpy for coin flipping
 24 | 
 25 | ```
 26 | np.random.binomial(1,0.5)
 27 | ```
 28 | First term (1) is the number of times you want it to run, and second term (0.5) is the chance we get a zero
 29 | 
 30 | ```
 31 | np.random.binomial(1000, 0.5)/1000
 32 | ```
 33 | Flip coins 1000 times, and divide the result by 1000
 34 | 
 35 | Run 1000 simulations of flipping coins 20 times and getting a number >= 15. 
 36 | 
 37 | ```
 38 | x = np.random.binomial(20, .5, 10000)
 39 | print((x>=15).mean())
 40 | ```
 41 | Output:
 42 | ```
 43 | 0.0219
 44 | ```
 45 | 
 46 | Get the number of events given no. of simulation. 
 47 | "How many tornados will take place based on 100,000 simulations, given that the chance of a tornado is 0.01%?"
 48 | 
 49 | ```
 50 | chance_of_tornado = 0.01/100
 51 | np.random.binomial(100000,chance of tornado)
 52 | ```
 53 | Output:
 54 | ```
 55 | 8
 56 | ```
 57 | 
 58 | "Assume the chance of tornado is 1%. How many tornados will take place (what is the chance of tornados taking place) two days in a row based on 1000000 simulations?"
 59 | 
 60 | ```
 61 | chance_of_tornado = 0.01
 62 | 
 63 | tornado_events = np.random.binomial(1, chance_of_tornado, 1000000)
 64 |     
 65 | two_days_in_a_row = 0
 66 | for j in range(1,len(tornado_events)-1):
 67 |     if tornado_events[j]==1 and tornado_events[j-1]==1:
 68 |         two_days_in_a_row+=1
 69 | 
 70 | print('{} tornadoes back to back in {} years'.format(two_days_in_a_row, 1000000/365))
 71 | ```
 72 | Output:
 73 | ```
 74 | 103 tornadoes back to back in 2739.72602739726 years
 75 | ```
 76 | tornado_events[j]== 1 means the day when tornado took place.
 77 | 
 78 | #### Standard deviation
 79 | 
 80 | Draw 1000 samples of a normal distriubtion, with expected value of 0.75 and a standard deviation of 1. Result is ~ 68% of area.
 81 | ```
 82 | distribution = np.random.normal(0.75,size=1000)
 83 | 
 84 | np.sqrt(np.sum((np.mean(distribution)-distribution)**2)/len(distribution))
 85 | ```
 86 | The above code is equivalent to the np.std() function:
 87 | ```
 88 | np.std(distribution)
 89 | ```
 90 | 
 91 | #### Kirtosis (shape of tails) with stats module
 92 | 
 93 | Positive value = more chubby than a normal distribution
 94 | Negative value = more flat than a normal distribution
 95 | 
 96 | ```
 97 | import scipy.stats as stats
 98 | stats.kurtosis(distribution)
 99 | 
100 | ```
101 | Output:
102 | ```
103 | -0.21162400583818153
104 | ```
105 | 
106 | #### Skew with stats module
107 | If skew = 0.5, then there's no skew (i.e. the distribution is symmetric)
108 | 
109 | ```
110 | stats.skew(distribution)
111 | ```
112 | Output:
113 | ```
114 | 0.051147428570855365
115 | ```
116 | 
117 | 
118 | #### Chi squared distribution (left-skewed)
119 | As the degree of freedom increases, the plot moves from left to center
120 | 
121 | Degree of freedom = 2:
122 | ```
123 | chi_squared_df2 = np.random.chisquare(2, size=10000)
124 | stats.skew(chi_squared_df2)
125 | ```
126 | Output:
127 | ```
128 | 1.9589902136938178
129 | ```
130 | 
131 | Degree of freemdom = 5:
132 | ```
133 | chi_squared_df5 = np.random.chisquare(5, size=10000)
134 | stats.skew(chi_squared_df5)
135 | ```
136 | Output:
137 | ```
138 | 1.3010399138921354
139 | ```
140 | #### Bimodal distribution (having 2 peaks)
141 | 
142 | #### Hypothesis Testing
143 | Alternative Hypothesis vs. Null Hypothesis
144 | Significance level (alpha), 
145 | alpha = 0.05 or 5%
146 | 
147 | #### t-test: compare the means of two different populations
148 | 
149 | stats.ttest_ind(): compare 2 difference samples to see if they have different means. In this case, we're using ttest_ind() to compare the average grade of assignment 1 between early users('early' dataframe) and late users('late' dataframe).
150 | 
151 | Output is a tuple with a test statistic and a p-value.
152 | 
153 | 
154 | ```
155 | import scipy.stats as stats
156 | 
157 | early = df[df['assignment1_submission'] <= '2015-12-31']
158 | late = df[df['assignment1_submission'] > '2015-12-31']
159 | 
160 | stats.ttest_ind(early['assignment1_grade'], late['assignment1_grade'])
161 | ```
162 | Output:
163 | ```
164 | Ttest_indResult(statistic=1.400549944897566, pvalue=0.16148283016060577)
165 | ```
166 | 
167 | If the p-value is >0.05(the significance value/alpha we decided previously), then we cannot reject the null hypothesis.
168 | 
169 | Do the same test on assignment 2:
170 | ```
171 | stats.ttest_ind(early['assignment2_grade'], late['assignment2_grade'])
172 | ```
173 | Output:
174 | ```
175 | Ttest_indResult(statistic=1.3239868220912567, pvalue=0.18563824610067967)
176 | In [ ]:
177 | ```
178 | p-value is still >0.05, so we cannot reject the null hypothesis. 
179 | ---
180 | 
181 | ## Week 3 Advanced Python Pandas
182 | 
183 | ![Finished Week 3's assignment](http://cdn.someecards.com/someecards/usercards/MjAxMi1mNWM4MDQ3MTJkODYzMzhi.png)
184 | 
185 | > Finally finished Week 3's assignment.
186 | 
187 | 11/27/2016 Update
188 | Finally finished this week's assignment! The first one took a long time. I had to relearn regular expression because of it. Learned a lot about dataframes through the practices, so I'm happy about the progress eventually, but Jesus,that was a lot of work...
189 | 
190 | Merging dataframes based on the same index. "NaN" is assigned when there's a missing value.
191 | 
192 | #### iloc() and loc()
193 | iloc()for query based on location
194 | loc() for query based on label
195 | 
196 | #### Outer vs inner join
197 | 
198 | Outer Join
199 | ```
200 | pd.merge(df1,df2,how='outer',left_index=True,right_index=True)
201 | ```
202 | Inner Join
203 | ```
204 | pd.merge(df1,df2,how='inner,left_index=True,right_index=True)
205 | ```
206 | Left Join: keep all information from df1
207 | ```
208 | pd.merge(df1,df2,how='left',left_index=True,right_index=True)
209 | ```
210 | Right Join: keep all information from df2
211 | ```
212 | pd.merge(df1,df2,how='right',left_index=True,right_index=True)
213 | ```
214 | Join by Column names
215 | ```
216 | pd.merge(df1,df2,how='left',left_on='Name',right_on='Name')
217 | ```
218 | 
219 | Chain indexing - not recommended
220 | ```
221 | df.loc['Washtenaw']['Total Population']
222 | ```
223 | 
224 | Method chaining 
225 | ```
226 | (df.where(df['SUMLEV']==50)
227 |     .dropna()
228 |     .set_index(['STNAME','CTYNAME'])
229 |     .rename(columns={'ESTIMATESBASE2010': 'Estimates Base 2010'}))
230 | ```
231 | Drop rows where 'Quantity' is 0, and rename the column 'Weight' to 'Weight(oz.)'
232 | ```
233 | df = df[df.Quantity !=0].rename({'Weight':'Weight(oz.)'})
234 | ```
235 | Alternatively:
236 | ```
237 | print(df.drop(df[df['Quantity'] == 0].index).rename(columns={'Weight': 'Weight (oz.)'}))
238 | ```
239 | 
240 | #### Apply() function which applies a function to all rows in a dataframe
241 | 
242 | To apply to all columns in the same row(i.e.1 = across), use axis= 1 
243 | To apply to all rows in the same column (i.e. 0 = down), use axis = 0
244 | 
245 | ```
246 | import numpy as np
247 | def min_max(row):
248 |     data = row[['POPESTIMATE2010',
249 |                 'POPESTIMATE2011',
250 |                 'POPESTIMATE2012',
251 |                 'POPESTIMATE2013',
252 |                 'POPESTIMATE2014',
253 |                 'POPESTIMATE2015']]
254 |     return pd.Series({'min': np.min(data), 'max': np.max(data)})
255 | 
256 | df.apply(min_max, axis=1)
257 | ```
258 | Adding the applied function to the existing dataframe (instead of creating a new one)
259 | ```
260 | import numpy as np
261 | def min_max(row):
262 |     data = row[['POPESTIMATE2010',
263 |                 'POPESTIMATE2011',
264 |                 'POPESTIMATE2012',
265 |                 'POPESTIMATE2013',
266 |                 'POPESTIMATE2014',
267 |                 'POPESTIMATE2015']]
268 |     row['max'] = np.max(data)
269 |     row['min'] = np.min(data)
270 |     return row
271 | df.apply(min_max, axis=1)
272 | ```
273 | Use apply() with lambda function:
274 | create a function with the max of each row
275 | ```
276 | rows = ['POPESTIMATE2010',
277 |         'POPESTIMATE2011',
278 |         'POPESTIMATE2012',
279 |         'POPESTIMATE2013',
280 |         'POPESTIMATE2014',
281 |         'POPESTIMATE2015']
282 | df.apply(lambda x: np.max(x[rows]), axis=1)
283 | ```
284 | 
285 | #### Groupby()
286 | you can use a function to be the criteria for group_by()
287 | ```
288 | df = df.set_index('STNAME')
289 | 
290 | def fun(item):
291 |     if item[0]<'M':
292 |         return 0
293 |     if item[0]<'Q':
294 |         return 1
295 |     return 2
296 | 
297 | for group, frame in df.groupby(fun):
298 |     print('There are ' + str(len(frame)) + ' records in group ' + str(group) + ' for processing.')
299 | 
300 | ```
301 | Calculate the average/sum of a certain group with groupby() and agg()
302 | ```
303 | df.groupby('STNAME').agg({'CENSUS2010POP': np.average})
304 | ```
305 | ```
306 | print(df.groupby('Category').agg('sum'))
307 | ```
308 | 
309 | #### Use apply() with groupby()
310 | ```
311 | def totalweight(df, w, q):
312 |         return sum(df[w] * df[q])
313 |         
314 | print(df.groupby('Category').apply(totalweight, 'Weight (oz.)', 'Quantity'))
315 | ```
316 | 
317 | #### Scales
318 | Use astype() to change the type of scales from one to another
319 | 
320 | create a list and use astype() to indicate the order with ordered = True. This enables > or < to be used on strings. 
321 | 
322 | ```
323 | df = pd.DataFrame(['A+', 'A', 'A-', 'B+', 'B', 'B-', 'C+', 'C', 'C-', 'D+', 'D'],
324 |                   index=['excellent', 'excellent', 'excellent', 'good', 'good', 'good', 'ok', 'ok', 'ok', 'poor', 'poor'])
325 | df.rename(columns={0: 'Grades'}, inplace=True)
326 | 
327 | grades = df['Grades'].astype('category',
328 |                              categories=['D', 'D+', 'C-', 'C', 'C+', 'B-', 'B', 'B+', 'A-', 'A', 'A+'],
329 |                              ordered=True)
330 | grades.head()
331 | ```
332 | output is:
333 | ```
334 | excellent    A+
335 | excellent     A
336 | excellent    A-
337 | good         B+
338 | good          B
339 | Name: Grades, dtype: category
340 | Categories (11, object): [D < D+ < C- < C ... B+ < A- < A < A+]
341 | 
342 | ```
343 | Use > or < functions on types, output:
344 | ```
345 | excellent     True
346 | excellent     True
347 | excellent     True
348 | good          True
349 | good          True
350 | good          True
351 | ok            True
352 | ok           False
353 | ok           False
354 | poor         False
355 | poor         False
356 | Name: Grades, dtype: bool
357 | ```
358 | 
359 | Change this series to categorical with ordering Low < Medium < High
360 | 
361 | ```
362 | s = pd.Series(['Low', 'Low', 'High', 'Medium', 'Low', 'High', 'Low'])
363 | 
364 | s.astype('category', categories=['Low', 'Medium', 'High'], ordered=True)
365 | ```
366 | 
367 | Use get_dummies() to convert boolean values into 0s and 1s
368 | 
369 | #### cut(): to cut data into bins (i.e. to divide them equally into 10 buckets)
370 | 
371 | ```
372 | df = pd.read_csv('census.csv')
373 | df = df[df['SUMLEV']==50]
374 | df = df.set_index('STNAME').groupby(level=0)['CENSUS2010POP'].agg({'avg': np.average})
375 | pd.cut(df['avg'],10)
376 | ```
377 | Cut a series into 3 equal-sized bins
378 | ```
379 | s = pd.Series([168, 180, 174, 190, 170, 185, 179, 181, 175, 169, 182, 177, 180, 171])
380 | 
381 | 
382 | pd.cut(s, 3)
383 | 
384 | # You can also add labels for the sizes [Small < Medium < Large].
385 | pd.cut(s, 3, labels=['Small', 'Medium', 'Large'])
386 | ```
387 | 
388 | #### Use pivot_table() to create Pivot Tables 
389 | 
390 | ```
391 | df = pd.read_csv('cars.csv')
392 | df.pivot_table(values='(kW)', index='YEAR', columns='Make', aggfunc=np.mean)
393 | ```
394 | 
395 | Create a pivot table that shows mean price and mean ratings for every "Manufacturer"/"Bike Type" combination
396 | ```
397 | print(pd.pivot_table(Bikes, index=['Manufacturer','Bike Type']))
398 | 
399 | import numpy as np
400 | print(Bikes.pivot_table(values ='Price',index = 'Manufacturer',columns = 'Bike Type',aggfunc=np.average))
401 | ```
402 | 
403 | #### Date Functionality in Panda
404 | 1. Timestamp
405 | 2. DatetimeIndex (the index of 1)
406 | 3. Period
407 | 4. PeriodIndex (the index of 3)
408 | 
409 | 1. Timestamp, exchangeable to Python's datetime
410 | ⋅⋅⋅```
411 | ⋅⋅⋅pd.Timestamp('9/1/2016 10:05AM')
412 | ⋅⋅⋅```
413 | 
414 | 2. Period
415 | ```
416 | pd.Period('1/2016')
417 | ```
418 | 
419 | 3. DatetimeIndex and PeriodIndex
420 | DatetimeIndex
421 | ```
422 | t1 = pd.Series(list('abc'), [pd.Timestamp('2016-09-01'), pd.Timestamp('2016-09-02'), pd.Timestamp('2016-09-03')])
423 | 
424 | type(t1.index)
425 | 
426 | ```
427 | Output:
428 | ```
429 | pandas.tseries.index.DatetimeIndex
430 | ```
431 | PeriodIndex
432 | ```
433 | t2 = pd.Series(list('def'), [pd.Period('2016-09'), pd.Period('2016-10'), pd.Period('2016-11')])
434 | type(t2.index)
435 | ```
436 | Output:
437 | ```
438 | pandas.tseries.period.PeriodIndex
439 | ```
440 | 
441 | Coverts datetimes to the same format with to_datetime()
442 | 
443 | ```
444 | d1 = ['2 June 2013', 'Aug 29, 2014', '2015-06-26', '7/12/16']
445 | ts3 = pd.DataFrame(np.random.randint(10, 100, (4,2)), index=d1, columns=list('ab'))
446 | ts3.index = pd.to_datetime(ts3.index)
447 | ```
448 | 
449 | use dayfirst = True to change the datetime into European format
450 | ```
451 | pd.to_datetime('4.7.12', dayfirst=True)
452 | ```
453 | #### Timedelta: show difference in times
454 | 
455 | ```
456 | pd.Timestamp('9/3/2016')-pd.Timestamp('9/1/2016')
457 | ```
458 | Output:
459 | ```
460 | Timedelta('2 days 00:00:00')
461 | ```
462 | 
463 | Calculate datetime with timedelta
464 | ```
465 | pd.Timestamp('9/2/2016 8:10AM') + pd.Timedelta('12D 3H')
466 | ```
467 | Output:
468 | ```
469 | Timestamp('2016-09-14 11:10:00')
470 | ```
471 | 
472 | #### Date_range()
473 | Create a range of dates for bi-weekly on Sundays, starting with a specific date
474 | 
475 | ```
476 | dates = pd.date_range('10-01-2016', periods=9, freq='2W-SUN')
477 | ```
478 | 
479 | #### weekday_name(): check what day of the week it is
480 | ```
481 | df.index.weekday_name
482 | ```
483 | 
484 | #### diff(): find difference between each day's value
485 | ```
486 | df.diff()
487 | ```
488 | 
489 | #### resample(): frequency conversion. example: find mean count for each month, will show the data as of month end. 'M' stands for month
490 | ```
491 | df.resample('M').mean()
492 | ```
493 | 
494 | Find values from a specific year, month or a range of dates
495 | 
496 | ```
497 | df['2017']
498 | df['2016-12']
499 | df['2016-12':]
500 | <!-- from 12/2016 onwards -->
501 | ```
502 | #### asfreq(): change frequency from bi-weekly to weekly, and fill NaN value with last week's data point
503 | ```
504 | df.asfreq('W', method='ffill')
505 | ```
506 | #### matplotlib: visualising a timeseries
507 | 
508 | ```
509 | import matplotlib.pyplot as plt
510 | %matplotlib inline
511 | 
512 | df.plot()
513 | ```
514 | ---
515 | ## Week 2 Basic Data Processing with Pandas
516 | 
517 | Dataframe
518 | 
519 | ```
520 | import pandas as pd
521 | purchase_1 = pd.Series({'Name': 'Chris',
522 |                         'Item Purchased': 'Dog Food',
523 |                         'Cost': 22.50})
524 | purchase_2 = pd.Series({'Name': 'Kevyn',
525 |                         'Item Purchased': 'Kitty Litter',
526 |                         'Cost': 2.50})
527 | purchase_3 = pd.Series({'Name': 'Vinod',
528 |                         'Item Purchased': 'Bird Seed',
529 |                         'Cost': 5.00})
530 | df = pd.DataFrame([purchase_1, purchase_2, purchase_3], index=['Store 1', 'Store 1', 'Store 2'])
531 | df.head()
532 | ```
533 | 
534 | df.T.loc --> T transforms data
535 | 
536 | iloc vs loc: iloc searches by index, loc searches by value
537 | 
538 | Avoid chaining as it generally create a copy of the data, instead of simply viewing it.
539 | 
540 | Deleting data with df.drop(). It creates a copy of the dataframe with the given rows removed.
541 | 
542 | ```
543 | df.drop("Store 1") 
544 | ```
545 | 
546 | Deleting data with del() function
547 | 
548 | ```
549 | del copy_df['Name']
550 | ```
551 | 
552 | apply 20% discount to cost
553 | 
554 | ```
555 | purchase_1 = pd.Series({'Name': 'Chris',
556 |                         'Item Purchased': 'Dog Food',
557 |                         'Cost': 22.50})
558 | purchase_2 = pd.Series({'Name': 'Kevyn',
559 |                         'Item Purchased': 'Kitty Litter',
560 |                         'Cost': 2.50})
561 | purchase_3 = pd.Series({'Name': 'Vinod',
562 |                         'Item Purchased': 'Bird Seed',
563 |                         'Cost': 5.00})
564 | 
565 | df = pd.DataFrame([purchase_1, purchase_2, purchase_3], index=['Store 1', 'Store 1', 'Store 2'])
566 | 
567 | 
568 | df['Cost'] *= 0.8
569 | print(df)
570 | ```
571 | 
572 | Panda's read_csv() function, making first column the index
573 | 
574 | ```
575 | df = pd.read_csv('olympics.csv', index_col=0, skiprows=1)
576 | ```
577 | 
578 | Change column names with rename() method
579 | 
580 | ```
581 | for col in df.columns:
582 |     if col[:2]=='01':
583 |         df.rename(columns={col:'Gold' + col[4:]}, inplace=True)
584 |     if col[:2]=='02':
585 |         df.rename(columns={col:'Silver' + col[4:]}, inplace=True)
586 |     if col[:2]=='03':
587 |         df.rename(columns={col:'Bronze' + col[4:]}, inplace=True)
588 |     if col[:1]=='№':
589 |         df.rename(columns={col:'#' + col[1:]}, inplace=True) 
590 | 
591 | df.head()
592 | ```
593 | 
594 | Boolean masking: applying a boolean (True or False) filter/mask to a dataframe/array with where() function
595 | 
596 | ```
597 | only_gold = df.where(df['Gold']>0)
598 | only_gold.head()
599 | ```
600 | 
601 | Drop lines when there is no data with na() function
602 | 
603 | ```
604 | only_gold = only_gold.dropna()
605 | ```
606 | 
607 | Chaining boolean maskes
608 | 
609 | ```
610 | <!-- either  -->
611 | len(df[(df['Gold'] > 0) | (df['Gold.1'] > 0)])
612 | <!-- and -->
613 | df[(df['Gold.1'] > 0) & (df['Gold'] == 0)]
614 | 
615 | ```
616 | 
617 | Return all of names of people who spend more than $3.00
618 | ```
619 | purchase_1 = pd.Series({'Name': 'Chris',
620 |                         'Item Purchased': 'Dog Food',
621 |                         'Cost': 22.50})
622 | purchase_2 = pd.Series({'Name': 'Kevyn',
623 |                         'Item Purchased': 'Kitty Litter',
624 |                         'Cost': 2.50})
625 | purchase_3 = pd.Series({'Name': 'Vinod',
626 |                         'Item Purchased': 'Bird Seed',
627 |                         'Cost': 5.00})
628 | 
629 | df = pd.DataFrame([purchase_1, purchase_2, purchase_3], index=['Store 1', 'Store 1', 'Store 2'])
630 | df['Name'][df['Cost']>3]
631 | ```
632 | 
633 | Set_index() function
634 | 
635 | Reindex the purchase records Dataframe to be index hierarchically, first by store, then by person. Name these indexes "Location" and "Name". Then add a new entry to it with the value of:
636 | 
637 | Name: "Kevyn", Item Purchased: "Kitty Food", Cost: 3.00 Location:"Store 2".
638 | 
639 | ```
640 | purchase_1 = pd.Series({'Name': 'Chris',
641 |                         'Item Purchased': 'Dog Food',
642 |                         'Cost': 22.50})
643 | purchase_2 = pd.Series({'Name': 'Kevyn',
644 |                         'Item Purchased': 'Kitty Litter',
645 |                         'Cost': 2.50})
646 | purchase_3 = pd.Series({'Name': 'Vinod',
647 |                         'Item Purchased': 'Bird Seed',
648 |                         'Cost': 5.00})
649 | 
650 | df = pd.DataFrame([purchase_1, purchase_2, purchase_3], index=['Store 1', 'Store 1', 'Store 2'])
651 | 
652 | 
653 | df = df.set_index([df.index, 'Name'])
654 | df.index.names = ['Location', 'Name']
655 | df = df.append(pd.Series(data={'Cost': 3.00, 'Item Purchased': 'Kitty Food'}, name=('Store 2', 'Kevyn')))
656 | ```
657 | ---
658 | 
659 | 
660 | ## Week 1
661 | 
662 | ####List Indexing and Slicing
663 | 
664 | Example 1
665 | 
666 | ```
667 | people = ['Dr. Christopher Brooks', 'Dr. Kevyn Collins-Thompson', 'Dr. VG Vinod Vydiswaran', 'Dr. Daniel Romero']
668 | 
669 | titleName = []
670 | def split_title_and_name():
671 |   for person in people:
672 |     last = person.split(" ")[-1]
673 |     title = person.split(" ")[0]
674 |     titleName.append(title + " "+last)
675 |   print(titleName)
676 | 
677 | split_title_and_name()
678 | ```
679 | 
680 | 
681 | Example 2
682 | 
683 | ```
684 | people = ['Dr. Christopher Brooks', 'Dr. Kevyn Collins-Thompson', 'Dr. VG Vinod Vydiswaran', 'Dr. Daniel Romero']
685 | 
686 | def split_title_and_name(person):
687 |     return person.split(" ")[0] + " " + person.split(" ")[-1]
688 | 
689 | list(map(split_title_and_name,people))
690 | ```
691 | 
692 | Example 3 (official answer)
693 | 
694 | ```
695 | people = ['Dr. Christopher Brooks', 'Dr. Kevyn Collins-Thompson', 'Dr. VG Vinod Vydiswaran', 'Dr. Daniel Romero']
696 | 
697 | def split_title_and_name(person):
698 |     title = person.split()[0]
699 |     lastname = person.split()[-1]
700 |     return '{} {}'.format(title, lastname)
701 | 
702 | list(map(split_title_and_name, people))
703 | ```
704 | 
705 | 
706 | Lambda functions (for writing quick one-liner functions)
707 | 
708 | ```
709 | my_function = lambda a,b: a+b
710 | my_function(1,2)
711 | ```
712 | 
713 | list comprehension (list all even numbers in range 0 - 1000)
714 | 
715 | ```
716 | my_list = [number for number in range(0,1000) if number % 2==0]
717 | ```
718 | 
719 | 
720 | 
721 | ```
722 | def times_tables():
723 |     lst = []
724 |     for i in range(10):
725 |         for j in range (10):
726 |             lst.append(i*j)
727 |     return lst
728 | 
729 | times_tables() == [j*i for i in range(10) for j in range(10)]
730 | ```
731 | 
732 | ```
733 | lowercase = 'abcdefghijklmnopqrstuvwxyz'
734 | digits = '0123456789'
735 | 
736 | correct_answer = [a+b+c+d for a in lowercase for b in lowercase for c in digits for d in digits]
737 | 
738 | correct_answer[:50] # Display first 50 ids
739 | ```


--------------------------------------------------------------------------------
/Week3/Week+3.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "---\n",
   8 |     "\n",
   9 |     "_You are currently looking at **version 1.0** of this notebook. To download notebooks and datafiles, as well as get help on Jupyter notebooks in the Coursera platform, visit the [Jupyter Notebook FAQ](https://www.coursera.org/learn/python-data-analysis/resources/0dhYG) course resource._\n",
  10 |     "\n",
  11 |     "---"
  12 |    ]
  13 |   },
  14 |   {
  15 |    "cell_type": "markdown",
  16 |    "metadata": {},
  17 |    "source": [
  18 |     "# Merging Dataframes\n"
  19 |    ]
  20 |   },
  21 |   {
  22 |    "cell_type": "code",
  23 |    "execution_count": 1,
  24 |    "metadata": {
  25 |     "collapsed": false
  26 |    },
  27 |    "outputs": [
  28 |     {
  29 |      "data": {
  30 |       "text/html": [
  31 |        "<div>\n",
  32 |        "<table border=\"1\" class=\"dataframe\">\n",
  33 |        "  <thead>\n",
  34 |        "    <tr style=\"text-align: right;\">\n",
  35 |        "      <th></th>\n",
  36 |        "      <th>Cost</th>\n",
  37 |        "      <th>Item Purchased</th>\n",
  38 |        "      <th>Name</th>\n",
  39 |        "    </tr>\n",
  40 |        "  </thead>\n",
  41 |        "  <tbody>\n",
  42 |        "    <tr>\n",
  43 |        "      <th>Store 1</th>\n",
  44 |        "      <td>22.5</td>\n",
  45 |        "      <td>Sponge</td>\n",
  46 |        "      <td>Chris</td>\n",
  47 |        "    </tr>\n",
  48 |        "    <tr>\n",
  49 |        "      <th>Store 1</th>\n",
  50 |        "      <td>2.5</td>\n",
  51 |        "      <td>Kitty Litter</td>\n",
  52 |        "      <td>Kevyn</td>\n",
  53 |        "    </tr>\n",
  54 |        "    <tr>\n",
  55 |        "      <th>Store 2</th>\n",
  56 |        "      <td>5.0</td>\n",
  57 |        "      <td>Spoon</td>\n",
  58 |        "      <td>Filip</td>\n",
  59 |        "    </tr>\n",
  60 |        "  </tbody>\n",
  61 |        "</table>\n",
  62 |        "</div>"
  63 |       ],
  64 |       "text/plain": [
  65 |        "         Cost Item Purchased   Name\n",
  66 |        "Store 1  22.5         Sponge  Chris\n",
  67 |        "Store 1   2.5   Kitty Litter  Kevyn\n",
  68 |        "Store 2   5.0          Spoon  Filip"
  69 |       ]
  70 |      },
  71 |      "execution_count": 1,
  72 |      "metadata": {},
  73 |      "output_type": "execute_result"
  74 |     }
  75 |    ],
  76 |    "source": [
  77 |     "import pandas as pd\n",
  78 |     "\n",
  79 |     "df = pd.DataFrame([{'Name': 'Chris', 'Item Purchased': 'Sponge', 'Cost': 22.50},\n",
  80 |     "                   {'Name': 'Kevyn', 'Item Purchased': 'Kitty Litter', 'Cost': 2.50},\n",
  81 |     "                   {'Name': 'Filip', 'Item Purchased': 'Spoon', 'Cost': 5.00}],\n",
  82 |     "                  index=['Store 1', 'Store 1', 'Store 2'])\n",
  83 |     "df"
  84 |    ]
  85 |   },
  86 |   {
  87 |    "cell_type": "code",
  88 |    "execution_count": null,
  89 |    "metadata": {
  90 |     "collapsed": false
  91 |    },
  92 |    "outputs": [],
  93 |    "source": [
  94 |     "df['Date'] = ['December 1', 'January 1', 'mid-May']\n",
  95 |     "df"
  96 |    ]
  97 |   },
  98 |   {
  99 |    "cell_type": "code",
 100 |    "execution_count": null,
 101 |    "metadata": {
 102 |     "collapsed": false
 103 |    },
 104 |    "outputs": [],
 105 |    "source": [
 106 |     "df['Delivered'] = True\n",
 107 |     "df"
 108 |    ]
 109 |   },
 110 |   {
 111 |    "cell_type": "code",
 112 |    "execution_count": null,
 113 |    "metadata": {
 114 |     "collapsed": false
 115 |    },
 116 |    "outputs": [],
 117 |    "source": [
 118 |     "df['Feedback'] = ['Positive', None, 'Negative']\n",
 119 |     "df"
 120 |    ]
 121 |   },
 122 |   {
 123 |    "cell_type": "code",
 124 |    "execution_count": null,
 125 |    "metadata": {
 126 |     "collapsed": false
 127 |    },
 128 |    "outputs": [],
 129 |    "source": [
 130 |     "adf = df.reset_index()\n",
 131 |     "adf['Date'] = pd.Series({0: 'December 1', 2: 'mid-May'})\n",
 132 |     "adf"
 133 |    ]
 134 |   },
 135 |   {
 136 |    "cell_type": "code",
 137 |    "execution_count": null,
 138 |    "metadata": {
 139 |     "collapsed": false
 140 |    },
 141 |    "outputs": [],
 142 |    "source": [
 143 |     "staff_df = pd.DataFrame([{'Name': 'Kelly', 'Role': 'Director of HR'},\n",
 144 |     "                         {'Name': 'Sally', 'Role': 'Course liasion'},\n",
 145 |     "                         {'Name': 'James', 'Role': 'Grader'}])\n",
 146 |     "staff_df = staff_df.set_index('Name')\n",
 147 |     "student_df = pd.DataFrame([{'Name': 'James', 'School': 'Business'},\n",
 148 |     "                           {'Name': 'Mike', 'School': 'Law'},\n",
 149 |     "                           {'Name': 'Sally', 'School': 'Engineering'}])\n",
 150 |     "student_df = student_df.set_index('Name')\n",
 151 |     "print(staff_df.head())\n",
 152 |     "print()\n",
 153 |     "print(student_df.head())"
 154 |    ]
 155 |   },
 156 |   {
 157 |    "cell_type": "code",
 158 |    "execution_count": null,
 159 |    "metadata": {
 160 |     "collapsed": false,
 161 |     "scrolled": true
 162 |    },
 163 |    "outputs": [],
 164 |    "source": [
 165 |     "pd.merge(staff_df, student_df, how='outer', left_index=True, right_index=True)"
 166 |    ]
 167 |   },
 168 |   {
 169 |    "cell_type": "code",
 170 |    "execution_count": null,
 171 |    "metadata": {
 172 |     "collapsed": false
 173 |    },
 174 |    "outputs": [],
 175 |    "source": [
 176 |     "pd.merge(staff_df, student_df, how='inner', left_index=True, right_index=True)"
 177 |    ]
 178 |   },
 179 |   {
 180 |    "cell_type": "code",
 181 |    "execution_count": null,
 182 |    "metadata": {
 183 |     "collapsed": false
 184 |    },
 185 |    "outputs": [],
 186 |    "source": [
 187 |     "pd.merge(staff_df, student_df, how='left', left_index=True, right_index=True)"
 188 |    ]
 189 |   },
 190 |   {
 191 |    "cell_type": "code",
 192 |    "execution_count": null,
 193 |    "metadata": {
 194 |     "collapsed": false
 195 |    },
 196 |    "outputs": [],
 197 |    "source": [
 198 |     "pd.merge(staff_df, student_df, how='right', left_index=True, right_index=True)"
 199 |    ]
 200 |   },
 201 |   {
 202 |    "cell_type": "code",
 203 |    "execution_count": null,
 204 |    "metadata": {
 205 |     "collapsed": false,
 206 |     "scrolled": true
 207 |    },
 208 |    "outputs": [],
 209 |    "source": [
 210 |     "staff_df = staff_df.reset_index()\n",
 211 |     "student_df = student_df.reset_index()\n",
 212 |     "pd.merge(staff_df, student_df, how='left', left_on='Name', right_on='Name')"
 213 |    ]
 214 |   },
 215 |   {
 216 |    "cell_type": "code",
 217 |    "execution_count": null,
 218 |    "metadata": {
 219 |     "collapsed": false
 220 |    },
 221 |    "outputs": [],
 222 |    "source": [
 223 |     "staff_df = pd.DataFrame([{'Name': 'Kelly', 'Role': 'Director of HR', 'Location': 'State Street'},\n",
 224 |     "                         {'Name': 'Sally', 'Role': 'Course liasion', 'Location': 'Washington Avenue'},\n",
 225 |     "                         {'Name': 'James', 'Role': 'Grader', 'Location': 'Washington Avenue'}])\n",
 226 |     "student_df = pd.DataFrame([{'Name': 'James', 'School': 'Business', 'Location': '1024 Billiard Avenue'},\n",
 227 |     "                           {'Name': 'Mike', 'School': 'Law', 'Location': 'Fraternity House #22'},\n",
 228 |     "                           {'Name': 'Sally', 'School': 'Engineering', 'Location': '512 Wilson Crescent'}])\n",
 229 |     "pd.merge(staff_df, student_df, how='left', left_on='Name', right_on='Name')"
 230 |    ]
 231 |   },
 232 |   {
 233 |    "cell_type": "code",
 234 |    "execution_count": null,
 235 |    "metadata": {
 236 |     "collapsed": false
 237 |    },
 238 |    "outputs": [],
 239 |    "source": [
 240 |     "staff_df = pd.DataFrame([{'First Name': 'Kelly', 'Last Name': 'Desjardins', 'Role': 'Director of HR'},\n",
 241 |     "                         {'First Name': 'Sally', 'Last Name': 'Brooks', 'Role': 'Course liasion'},\n",
 242 |     "                         {'First Name': 'James', 'Last Name': 'Wilde', 'Role': 'Grader'}])\n",
 243 |     "student_df = pd.DataFrame([{'First Name': 'James', 'Last Name': 'Hammond', 'School': 'Business'},\n",
 244 |     "                           {'First Name': 'Mike', 'Last Name': 'Smith', 'School': 'Law'},\n",
 245 |     "                           {'First Name': 'Sally', 'Last Name': 'Brooks', 'School': 'Engineering'}])\n",
 246 |     "staff_df\n",
 247 |     "student_df\n",
 248 |     "pd.merge(staff_df, student_df, how='inner', left_on=['First Name','Last Name'], right_on=['First Name','Last Name'])"
 249 |    ]
 250 |   },
 251 |   {
 252 |    "cell_type": "markdown",
 253 |    "metadata": {},
 254 |    "source": [
 255 |     "# Idiomatic Pandas: Making Code Pandorable"
 256 |    ]
 257 |   },
 258 |   {
 259 |    "cell_type": "code",
 260 |    "execution_count": null,
 261 |    "metadata": {
 262 |     "collapsed": false
 263 |    },
 264 |    "outputs": [],
 265 |    "source": [
 266 |     "import pandas as pd\n",
 267 |     "df = pd.read_csv('census.csv')\n",
 268 |     "df"
 269 |    ]
 270 |   },
 271 |   {
 272 |    "cell_type": "code",
 273 |    "execution_count": null,
 274 |    "metadata": {
 275 |     "collapsed": false
 276 |    },
 277 |    "outputs": [],
 278 |    "source": [
 279 |     "(df.where(df['SUMLEV']==50)\n",
 280 |     "    .dropna()\n",
 281 |     "    .set_index(['STNAME','CTYNAME'])\n",
 282 |     "    .rename(columns={'ESTIMATESBASE2010': 'Estimates Base 2010'}))"
 283 |    ]
 284 |   },
 285 |   {
 286 |    "cell_type": "code",
 287 |    "execution_count": null,
 288 |    "metadata": {
 289 |     "collapsed": false
 290 |    },
 291 |    "outputs": [],
 292 |    "source": [
 293 |     "df = df[df['SUMLEV']==50]\n",
 294 |     "df.set_index(['STNAME','CTYNAME'], inplace=True)\n",
 295 |     "df.rename(columns={'ESTIMATESBASE2010': 'Estimates Base 2010'})"
 296 |    ]
 297 |   },
 298 |   {
 299 |    "cell_type": "code",
 300 |    "execution_count": null,
 301 |    "metadata": {
 302 |     "collapsed": false
 303 |    },
 304 |    "outputs": [],
 305 |    "source": [
 306 |     "import numpy as np\n",
 307 |     "def min_max(row):\n",
 308 |     "    data = row[['POPESTIMATE2010',\n",
 309 |     "                'POPESTIMATE2011',\n",
 310 |     "                'POPESTIMATE2012',\n",
 311 |     "                'POPESTIMATE2013',\n",
 312 |     "                'POPESTIMATE2014',\n",
 313 |     "                'POPESTIMATE2015']]\n",
 314 |     "    return pd.Series({'min': np.min(data), 'max': np.max(data)})"
 315 |    ]
 316 |   },
 317 |   {
 318 |    "cell_type": "code",
 319 |    "execution_count": null,
 320 |    "metadata": {
 321 |     "collapsed": false
 322 |    },
 323 |    "outputs": [],
 324 |    "source": [
 325 |     "df.apply(min_max, axis=1)"
 326 |    ]
 327 |   },
 328 |   {
 329 |    "cell_type": "code",
 330 |    "execution_count": null,
 331 |    "metadata": {
 332 |     "collapsed": false
 333 |    },
 334 |    "outputs": [],
 335 |    "source": [
 336 |     "import numpy as np\n",
 337 |     "def min_max(row):\n",
 338 |     "    data = row[['POPESTIMATE2010',\n",
 339 |     "                'POPESTIMATE2011',\n",
 340 |     "                'POPESTIMATE2012',\n",
 341 |     "                'POPESTIMATE2013',\n",
 342 |     "                'POPESTIMATE2014',\n",
 343 |     "                'POPESTIMATE2015']]\n",
 344 |     "    row['max'] = np.max(data)\n",
 345 |     "    row['min'] = np.min(data)\n",
 346 |     "    return row\n",
 347 |     "df.apply(min_max, axis=1)"
 348 |    ]
 349 |   },
 350 |   {
 351 |    "cell_type": "code",
 352 |    "execution_count": null,
 353 |    "metadata": {
 354 |     "collapsed": false
 355 |    },
 356 |    "outputs": [],
 357 |    "source": [
 358 |     "rows = ['POPESTIMATE2010',\n",
 359 |     "        'POPESTIMATE2011',\n",
 360 |     "        'POPESTIMATE2012',\n",
 361 |     "        'POPESTIMATE2013',\n",
 362 |     "        'POPESTIMATE2014',\n",
 363 |     "        'POPESTIMATE2015']\n",
 364 |     "df.apply(lambda x: np.max(x[rows]), axis=1)"
 365 |    ]
 366 |   },
 367 |   {
 368 |    "cell_type": "markdown",
 369 |    "metadata": {},
 370 |    "source": [
 371 |     "# Group by"
 372 |    ]
 373 |   },
 374 |   {
 375 |    "cell_type": "code",
 376 |    "execution_count": null,
 377 |    "metadata": {
 378 |     "collapsed": false
 379 |    },
 380 |    "outputs": [],
 381 |    "source": [
 382 |     "import pandas as pd\n",
 383 |     "import numpy as np\n",
 384 |     "df = pd.read_csv('census.csv')\n",
 385 |     "df = df[df['SUMLEV']==50]\n",
 386 |     "df"
 387 |    ]
 388 |   },
 389 |   {
 390 |    "cell_type": "code",
 391 |    "execution_count": null,
 392 |    "metadata": {
 393 |     "collapsed": false
 394 |    },
 395 |    "outputs": [],
 396 |    "source": [
 397 |     "%%timeit -n 10\n",
 398 |     "for state in df['STNAME'].unique():\n",
 399 |     "    avg = np.average(df.where(df['STNAME']==state).dropna()['CENSUS2010POP'])\n",
 400 |     "    print('Counties in state ' + state + ' have an average population of ' + str(avg))"
 401 |    ]
 402 |   },
 403 |   {
 404 |    "cell_type": "code",
 405 |    "execution_count": null,
 406 |    "metadata": {
 407 |     "collapsed": false,
 408 |     "scrolled": true
 409 |    },
 410 |    "outputs": [],
 411 |    "source": [
 412 |     "%%timeit -n 10\n",
 413 |     "for group, frame in df.groupby('STNAME'):\n",
 414 |     "    avg = np.average(frame['CENSUS2010POP'])\n",
 415 |     "    print('Counties in state ' + group + ' have an average population of ' + str(avg))"
 416 |    ]
 417 |   },
 418 |   {
 419 |    "cell_type": "code",
 420 |    "execution_count": null,
 421 |    "metadata": {
 422 |     "collapsed": false
 423 |    },
 424 |    "outputs": [],
 425 |    "source": [
 426 |     "df.head()"
 427 |    ]
 428 |   },
 429 |   {
 430 |    "cell_type": "code",
 431 |    "execution_count": null,
 432 |    "metadata": {
 433 |     "collapsed": false
 434 |    },
 435 |    "outputs": [],
 436 |    "source": [
 437 |     "df = df.set_index('STNAME')\n",
 438 |     "\n",
 439 |     "def fun(item):\n",
 440 |     "    if item[0]<'M':\n",
 441 |     "        return 0\n",
 442 |     "    if item[0]<'Q':\n",
 443 |     "        return 1\n",
 444 |     "    return 2\n",
 445 |     "\n",
 446 |     "for group, frame in df.groupby(fun):\n",
 447 |     "    print('There are ' + str(len(frame)) + ' records in group ' + str(group) + ' for processing.')\n"
 448 |    ]
 449 |   },
 450 |   {
 451 |    "cell_type": "code",
 452 |    "execution_count": null,
 453 |    "metadata": {
 454 |     "collapsed": false
 455 |    },
 456 |    "outputs": [],
 457 |    "source": [
 458 |     "df = pd.read_csv('census.csv')\n",
 459 |     "df = df[df['SUMLEV']==50]"
 460 |    ]
 461 |   },
 462 |   {
 463 |    "cell_type": "code",
 464 |    "execution_count": null,
 465 |    "metadata": {
 466 |     "collapsed": false
 467 |    },
 468 |    "outputs": [],
 469 |    "source": [
 470 |     "df.groupby('STNAME').agg({'CENSUS2010POP': np.average})"
 471 |    ]
 472 |   },
 473 |   {
 474 |    "cell_type": "code",
 475 |    "execution_count": null,
 476 |    "metadata": {
 477 |     "collapsed": false
 478 |    },
 479 |    "outputs": [],
 480 |    "source": [
 481 |     "print(type(df.groupby(level=0)['POPESTIMATE2010','POPESTIMATE2011']))\n",
 482 |     "print(type(df.groupby(level=0)['POPESTIMATE2010']))"
 483 |    ]
 484 |   },
 485 |   {
 486 |    "cell_type": "code",
 487 |    "execution_count": null,
 488 |    "metadata": {
 489 |     "collapsed": false
 490 |    },
 491 |    "outputs": [],
 492 |    "source": [
 493 |     "(df.set_index('STNAME').groupby(level=0)['CENSUS2010POP']\n",
 494 |     "    .agg({'avg': np.average, 'sum': np.sum}))"
 495 |    ]
 496 |   },
 497 |   {
 498 |    "cell_type": "code",
 499 |    "execution_count": null,
 500 |    "metadata": {
 501 |     "collapsed": false
 502 |    },
 503 |    "outputs": [],
 504 |    "source": [
 505 |     "(df.set_index('STNAME').groupby(level=0)['POPESTIMATE2010','POPESTIMATE2011']\n",
 506 |     "    .agg({'avg': np.average, 'sum': np.sum}))"
 507 |    ]
 508 |   },
 509 |   {
 510 |    "cell_type": "code",
 511 |    "execution_count": null,
 512 |    "metadata": {
 513 |     "collapsed": false
 514 |    },
 515 |    "outputs": [],
 516 |    "source": [
 517 |     "(df.set_index('STNAME').groupby(level=0)['POPESTIMATE2010','POPESTIMATE2011']\n",
 518 |     "    .agg({'POPESTIMATE2010': np.average, 'POPESTIMATE2011': np.sum}))"
 519 |    ]
 520 |   },
 521 |   {
 522 |    "cell_type": "markdown",
 523 |    "metadata": {},
 524 |    "source": [
 525 |     "# Scales"
 526 |    ]
 527 |   },
 528 |   {
 529 |    "cell_type": "code",
 530 |    "execution_count": null,
 531 |    "metadata": {
 532 |     "collapsed": false
 533 |    },
 534 |    "outputs": [],
 535 |    "source": [
 536 |     "df = pd.DataFrame(['A+', 'A', 'A-', 'B+', 'B', 'B-', 'C+', 'C', 'C-', 'D+', 'D'],\n",
 537 |     "                  index=['excellent', 'excellent', 'excellent', 'good', 'good', 'good', 'ok', 'ok', 'ok', 'poor', 'poor'])\n",
 538 |     "df.rename(columns={0: 'Grades'}, inplace=True)\n",
 539 |     "df"
 540 |    ]
 541 |   },
 542 |   {
 543 |    "cell_type": "code",
 544 |    "execution_count": null,
 545 |    "metadata": {
 546 |     "collapsed": false
 547 |    },
 548 |    "outputs": [],
 549 |    "source": [
 550 |     "df['Grades'].astype('category').head()"
 551 |    ]
 552 |   },
 553 |   {
 554 |    "cell_type": "code",
 555 |    "execution_count": null,
 556 |    "metadata": {
 557 |     "collapsed": false
 558 |    },
 559 |    "outputs": [],
 560 |    "source": [
 561 |     "grades = df['Grades'].astype('category',\n",
 562 |     "                             categories=['D', 'D+', 'C-', 'C', 'C+', 'B-', 'B', 'B+', 'A-', 'A', 'A+'],\n",
 563 |     "                             ordered=True)\n",
 564 |     "grades.head()"
 565 |    ]
 566 |   },
 567 |   {
 568 |    "cell_type": "code",
 569 |    "execution_count": null,
 570 |    "metadata": {
 571 |     "collapsed": false
 572 |    },
 573 |    "outputs": [],
 574 |    "source": [
 575 |     "grades > 'C'"
 576 |    ]
 577 |   },
 578 |   {
 579 |    "cell_type": "code",
 580 |    "execution_count": null,
 581 |    "metadata": {
 582 |     "collapsed": false
 583 |    },
 584 |    "outputs": [],
 585 |    "source": [
 586 |     "df = pd.read_csv('census.csv')\n",
 587 |     "df = df[df['SUMLEV']==50]\n",
 588 |     "df = df.set_index('STNAME').groupby(level=0)['CENSUS2010POP'].agg({'avg': np.average})\n",
 589 |     "pd.cut(df['avg'],10)"
 590 |    ]
 591 |   },
 592 |   {
 593 |    "cell_type": "markdown",
 594 |    "metadata": {},
 595 |    "source": [
 596 |     "# Pivot Tables"
 597 |    ]
 598 |   },
 599 |   {
 600 |    "cell_type": "code",
 601 |    "execution_count": null,
 602 |    "metadata": {
 603 |     "collapsed": true
 604 |    },
 605 |    "outputs": [],
 606 |    "source": [
 607 |     "#http://open.canada.ca/data/en/dataset/98f1a129-f628-4ce4-b24d-6f16bf24dd64\n",
 608 |     "df = pd.read_csv('cars.csv')"
 609 |    ]
 610 |   },
 611 |   {
 612 |    "cell_type": "code",
 613 |    "execution_count": null,
 614 |    "metadata": {
 615 |     "collapsed": false
 616 |    },
 617 |    "outputs": [],
 618 |    "source": [
 619 |     "df.head()"
 620 |    ]
 621 |   },
 622 |   {
 623 |    "cell_type": "code",
 624 |    "execution_count": null,
 625 |    "metadata": {
 626 |     "collapsed": false
 627 |    },
 628 |    "outputs": [],
 629 |    "source": [
 630 |     "df.pivot_table(values='(kW)', index='YEAR', columns='Make', aggfunc=np.mean)"
 631 |    ]
 632 |   },
 633 |   {
 634 |    "cell_type": "code",
 635 |    "execution_count": null,
 636 |    "metadata": {
 637 |     "collapsed": false
 638 |    },
 639 |    "outputs": [],
 640 |    "source": [
 641 |     "df.pivot_table(values='(kW)', index='YEAR', columns='Make', aggfunc=[np.mean,np.min], margins=True)"
 642 |    ]
 643 |   },
 644 |   {
 645 |    "cell_type": "markdown",
 646 |    "metadata": {},
 647 |    "source": [
 648 |     "# Date Functionality in Pandas"
 649 |    ]
 650 |   },
 651 |   {
 652 |    "cell_type": "code",
 653 |    "execution_count": 2,
 654 |    "metadata": {
 655 |     "collapsed": true
 656 |    },
 657 |    "outputs": [],
 658 |    "source": [
 659 |     "import pandas as pd\n",
 660 |     "import numpy as np"
 661 |    ]
 662 |   },
 663 |   {
 664 |    "cell_type": "markdown",
 665 |    "metadata": {},
 666 |    "source": [
 667 |     "### Timestamp"
 668 |    ]
 669 |   },
 670 |   {
 671 |    "cell_type": "code",
 672 |    "execution_count": 3,
 673 |    "metadata": {
 674 |     "collapsed": false
 675 |    },
 676 |    "outputs": [
 677 |     {
 678 |      "data": {
 679 |       "text/plain": [
 680 |        "Timestamp('2016-09-01 10:05:00')"
 681 |       ]
 682 |      },
 683 |      "execution_count": 3,
 684 |      "metadata": {},
 685 |      "output_type": "execute_result"
 686 |     }
 687 |    ],
 688 |    "source": [
 689 |     "pd.Timestamp('9/1/2016 10:05AM')"
 690 |    ]
 691 |   },
 692 |   {
 693 |    "cell_type": "markdown",
 694 |    "metadata": {},
 695 |    "source": [
 696 |     "### Period"
 697 |    ]
 698 |   },
 699 |   {
 700 |    "cell_type": "code",
 701 |    "execution_count": 4,
 702 |    "metadata": {
 703 |     "collapsed": false
 704 |    },
 705 |    "outputs": [
 706 |     {
 707 |      "data": {
 708 |       "text/plain": [
 709 |        "Period('2016-01', 'M')"
 710 |       ]
 711 |      },
 712 |      "execution_count": 4,
 713 |      "metadata": {},
 714 |      "output_type": "execute_result"
 715 |     }
 716 |    ],
 717 |    "source": [
 718 |     "pd.Period('1/2016')"
 719 |    ]
 720 |   },
 721 |   {
 722 |    "cell_type": "code",
 723 |    "execution_count": 5,
 724 |    "metadata": {
 725 |     "collapsed": false
 726 |    },
 727 |    "outputs": [
 728 |     {
 729 |      "data": {
 730 |       "text/plain": [
 731 |        "Period('2016-03-05', 'D')"
 732 |       ]
 733 |      },
 734 |      "execution_count": 5,
 735 |      "metadata": {},
 736 |      "output_type": "execute_result"
 737 |     }
 738 |    ],
 739 |    "source": [
 740 |     "pd.Period('3/5/2016')"
 741 |    ]
 742 |   },
 743 |   {
 744 |    "cell_type": "markdown",
 745 |    "metadata": {},
 746 |    "source": [
 747 |     "### DatetimeIndex"
 748 |    ]
 749 |   },
 750 |   {
 751 |    "cell_type": "code",
 752 |    "execution_count": 6,
 753 |    "metadata": {
 754 |     "collapsed": false
 755 |    },
 756 |    "outputs": [
 757 |     {
 758 |      "data": {
 759 |       "text/plain": [
 760 |        "2016-09-01    a\n",
 761 |        "2016-09-02    b\n",
 762 |        "2016-09-03    c\n",
 763 |        "dtype: object"
 764 |       ]
 765 |      },
 766 |      "execution_count": 6,
 767 |      "metadata": {},
 768 |      "output_type": "execute_result"
 769 |     }
 770 |    ],
 771 |    "source": [
 772 |     "t1 = pd.Series(list('abc'), [pd.Timestamp('2016-09-01'), pd.Timestamp('2016-09-02'), pd.Timestamp('2016-09-03')])\n",
 773 |     "t1"
 774 |    ]
 775 |   },
 776 |   {
 777 |    "cell_type": "code",
 778 |    "execution_count": 7,
 779 |    "metadata": {
 780 |     "collapsed": false
 781 |    },
 782 |    "outputs": [
 783 |     {
 784 |      "data": {
 785 |       "text/plain": [
 786 |        "pandas.tseries.index.DatetimeIndex"
 787 |       ]
 788 |      },
 789 |      "execution_count": 7,
 790 |      "metadata": {},
 791 |      "output_type": "execute_result"
 792 |     }
 793 |    ],
 794 |    "source": [
 795 |     "type(t1.index)"
 796 |    ]
 797 |   },
 798 |   {
 799 |    "cell_type": "markdown",
 800 |    "metadata": {},
 801 |    "source": [
 802 |     "### PeriodIndex"
 803 |    ]
 804 |   },
 805 |   {
 806 |    "cell_type": "code",
 807 |    "execution_count": 8,
 808 |    "metadata": {
 809 |     "collapsed": false
 810 |    },
 811 |    "outputs": [
 812 |     {
 813 |      "data": {
 814 |       "text/plain": [
 815 |        "2016-09    d\n",
 816 |        "2016-10    e\n",
 817 |        "2016-11    f\n",
 818 |        "Freq: M, dtype: object"
 819 |       ]
 820 |      },
 821 |      "execution_count": 8,
 822 |      "metadata": {},
 823 |      "output_type": "execute_result"
 824 |     }
 825 |    ],
 826 |    "source": [
 827 |     "t2 = pd.Series(list('def'), [pd.Period('2016-09'), pd.Period('2016-10'), pd.Period('2016-11')])\n",
 828 |     "t2"
 829 |    ]
 830 |   },
 831 |   {
 832 |    "cell_type": "code",
 833 |    "execution_count": 9,
 834 |    "metadata": {
 835 |     "collapsed": false
 836 |    },
 837 |    "outputs": [
 838 |     {
 839 |      "data": {
 840 |       "text/plain": [
 841 |        "pandas.tseries.period.PeriodIndex"
 842 |       ]
 843 |      },
 844 |      "execution_count": 9,
 845 |      "metadata": {},
 846 |      "output_type": "execute_result"
 847 |     }
 848 |    ],
 849 |    "source": [
 850 |     "type(t2.index)"
 851 |    ]
 852 |   },
 853 |   {
 854 |    "cell_type": "markdown",
 855 |    "metadata": {},
 856 |    "source": [
 857 |     "### Converting to Datetime"
 858 |    ]
 859 |   },
 860 |   {
 861 |    "cell_type": "code",
 862 |    "execution_count": 10,
 863 |    "metadata": {
 864 |     "collapsed": false
 865 |    },
 866 |    "outputs": [
 867 |     {
 868 |      "data": {
 869 |       "text/html": [
 870 |        "<div>\n",
 871 |        "<table border=\"1\" class=\"dataframe\">\n",
 872 |        "  <thead>\n",
 873 |        "    <tr style=\"text-align: right;\">\n",
 874 |        "      <th></th>\n",
 875 |        "      <th>a</th>\n",
 876 |        "      <th>b</th>\n",
 877 |        "    </tr>\n",
 878 |        "  </thead>\n",
 879 |        "  <tbody>\n",
 880 |        "    <tr>\n",
 881 |        "      <th>2 June 2013</th>\n",
 882 |        "      <td>16</td>\n",
 883 |        "      <td>46</td>\n",
 884 |        "    </tr>\n",
 885 |        "    <tr>\n",
 886 |        "      <th>Aug 29, 2014</th>\n",
 887 |        "      <td>14</td>\n",
 888 |        "      <td>66</td>\n",
 889 |        "    </tr>\n",
 890 |        "    <tr>\n",
 891 |        "      <th>2015-06-26</th>\n",
 892 |        "      <td>59</td>\n",
 893 |        "      <td>99</td>\n",
 894 |        "    </tr>\n",
 895 |        "    <tr>\n",
 896 |        "      <th>7/12/16</th>\n",
 897 |        "      <td>27</td>\n",
 898 |        "      <td>17</td>\n",
 899 |        "    </tr>\n",
 900 |        "  </tbody>\n",
 901 |        "</table>\n",
 902 |        "</div>"
 903 |       ],
 904 |       "text/plain": [
 905 |        "               a   b\n",
 906 |        "2 June 2013   16  46\n",
 907 |        "Aug 29, 2014  14  66\n",
 908 |        "2015-06-26    59  99\n",
 909 |        "7/12/16       27  17"
 910 |       ]
 911 |      },
 912 |      "execution_count": 10,
 913 |      "metadata": {},
 914 |      "output_type": "execute_result"
 915 |     }
 916 |    ],
 917 |    "source": [
 918 |     "d1 = ['2 June 2013', 'Aug 29, 2014', '2015-06-26', '7/12/16']\n",
 919 |     "ts3 = pd.DataFrame(np.random.randint(10, 100, (4,2)), index=d1, columns=list('ab'))\n",
 920 |     "ts3"
 921 |    ]
 922 |   },
 923 |   {
 924 |    "cell_type": "code",
 925 |    "execution_count": 11,
 926 |    "metadata": {
 927 |     "collapsed": false
 928 |    },
 929 |    "outputs": [
 930 |     {
 931 |      "data": {
 932 |       "text/html": [
 933 |        "<div>\n",
 934 |        "<table border=\"1\" class=\"dataframe\">\n",
 935 |        "  <thead>\n",
 936 |        "    <tr style=\"text-align: right;\">\n",
 937 |        "      <th></th>\n",
 938 |        "      <th>a</th>\n",
 939 |        "      <th>b</th>\n",
 940 |        "    </tr>\n",
 941 |        "  </thead>\n",
 942 |        "  <tbody>\n",
 943 |        "    <tr>\n",
 944 |        "      <th>2013-06-02</th>\n",
 945 |        "      <td>16</td>\n",
 946 |        "      <td>46</td>\n",
 947 |        "    </tr>\n",
 948 |        "    <tr>\n",
 949 |        "      <th>2014-08-29</th>\n",
 950 |        "      <td>14</td>\n",
 951 |        "      <td>66</td>\n",
 952 |        "    </tr>\n",
 953 |        "    <tr>\n",
 954 |        "      <th>2015-06-26</th>\n",
 955 |        "      <td>59</td>\n",
 956 |        "      <td>99</td>\n",
 957 |        "    </tr>\n",
 958 |        "    <tr>\n",
 959 |        "      <th>2016-07-12</th>\n",
 960 |        "      <td>27</td>\n",
 961 |        "      <td>17</td>\n",
 962 |        "    </tr>\n",
 963 |        "  </tbody>\n",
 964 |        "</table>\n",
 965 |        "</div>"
 966 |       ],
 967 |       "text/plain": [
 968 |        "             a   b\n",
 969 |        "2013-06-02  16  46\n",
 970 |        "2014-08-29  14  66\n",
 971 |        "2015-06-26  59  99\n",
 972 |        "2016-07-12  27  17"
 973 |       ]
 974 |      },
 975 |      "execution_count": 11,
 976 |      "metadata": {},
 977 |      "output_type": "execute_result"
 978 |     }
 979 |    ],
 980 |    "source": [
 981 |     "ts3.index = pd.to_datetime(ts3.index)\n",
 982 |     "ts3"
 983 |    ]
 984 |   },
 985 |   {
 986 |    "cell_type": "code",
 987 |    "execution_count": 12,
 988 |    "metadata": {
 989 |     "collapsed": false
 990 |    },
 991 |    "outputs": [
 992 |     {
 993 |      "data": {
 994 |       "text/plain": [
 995 |        "Timestamp('2012-07-04 00:00:00')"
 996 |       ]
 997 |      },
 998 |      "execution_count": 12,
 999 |      "metadata": {},
1000 |      "output_type": "execute_result"
1001 |     }
1002 |    ],
1003 |    "source": [
1004 |     "pd.to_datetime('4.7.12', dayfirst=True)"
1005 |    ]
1006 |   },
1007 |   {
1008 |    "cell_type": "markdown",
1009 |    "metadata": {},
1010 |    "source": [
1011 |     "### Timedeltas"
1012 |    ]
1013 |   },
1014 |   {
1015 |    "cell_type": "code",
1016 |    "execution_count": 13,
1017 |    "metadata": {
1018 |     "collapsed": false
1019 |    },
1020 |    "outputs": [
1021 |     {
1022 |      "data": {
1023 |       "text/plain": [
1024 |        "Timedelta('2 days 00:00:00')"
1025 |       ]
1026 |      },
1027 |      "execution_count": 13,
1028 |      "metadata": {},
1029 |      "output_type": "execute_result"
1030 |     }
1031 |    ],
1032 |    "source": [
1033 |     "pd.Timestamp('9/3/2016')-pd.Timestamp('9/1/2016')"
1034 |    ]
1035 |   },
1036 |   {
1037 |    "cell_type": "code",
1038 |    "execution_count": 14,
1039 |    "metadata": {
1040 |     "collapsed": false
1041 |    },
1042 |    "outputs": [
1043 |     {
1044 |      "data": {
1045 |       "text/plain": [
1046 |        "Timestamp('2016-09-14 11:10:00')"
1047 |       ]
1048 |      },
1049 |      "execution_count": 14,
1050 |      "metadata": {},
1051 |      "output_type": "execute_result"
1052 |     }
1053 |    ],
1054 |    "source": [
1055 |     "pd.Timestamp('9/2/2016 8:10AM') + pd.Timedelta('12D 3H')"
1056 |    ]
1057 |   },
1058 |   {
1059 |    "cell_type": "markdown",
1060 |    "metadata": {},
1061 |    "source": [
1062 |     "### Working with Dates in a Dataframe"
1063 |    ]
1064 |   },
1065 |   {
1066 |    "cell_type": "code",
1067 |    "execution_count": 15,
1068 |    "metadata": {
1069 |     "collapsed": false
1070 |    },
1071 |    "outputs": [
1072 |     {
1073 |      "data": {
1074 |       "text/plain": [
1075 |        "DatetimeIndex(['2016-10-02', '2016-10-16', '2016-10-30', '2016-11-13',\n",
1076 |        "               '2016-11-27', '2016-12-11', '2016-12-25', '2017-01-08',\n",
1077 |        "               '2017-01-22'],\n",
1078 |        "              dtype='datetime64[ns]', freq='2W-SUN')"
1079 |       ]
1080 |      },
1081 |      "execution_count": 15,
1082 |      "metadata": {},
1083 |      "output_type": "execute_result"
1084 |     }
1085 |    ],
1086 |    "source": [
1087 |     "dates = pd.date_range('10-01-2016', periods=9, freq='2W-SUN')\n",
1088 |     "dates"
1089 |    ]
1090 |   },
1091 |   {
1092 |    "cell_type": "code",
1093 |    "execution_count": 16,
1094 |    "metadata": {
1095 |     "collapsed": false
1096 |    },
1097 |    "outputs": [
1098 |     {
1099 |      "data": {
1100 |       "text/html": [
1101 |        "<div>\n",
1102 |        "<table border=\"1\" class=\"dataframe\">\n",
1103 |        "  <thead>\n",
1104 |        "    <tr style=\"text-align: right;\">\n",
1105 |        "      <th></th>\n",
1106 |        "      <th>Count 1</th>\n",
1107 |        "      <th>Count 2</th>\n",
1108 |        "    </tr>\n",
1109 |        "  </thead>\n",
1110 |        "  <tbody>\n",
1111 |        "    <tr>\n",
1112 |        "      <th>2016-10-02</th>\n",
1113 |        "      <td>104</td>\n",
1114 |        "      <td>125</td>\n",
1115 |        "    </tr>\n",
1116 |        "    <tr>\n",
1117 |        "      <th>2016-10-16</th>\n",
1118 |        "      <td>109</td>\n",
1119 |        "      <td>122</td>\n",
1120 |        "    </tr>\n",
1121 |        "    <tr>\n",
1122 |        "      <th>2016-10-30</th>\n",
1123 |        "      <td>111</td>\n",
1124 |        "      <td>127</td>\n",
1125 |        "    </tr>\n",
1126 |        "    <tr>\n",
1127 |        "      <th>2016-11-13</th>\n",
1128 |        "      <td>117</td>\n",
1129 |        "      <td>126</td>\n",
1130 |        "    </tr>\n",
1131 |        "    <tr>\n",
1132 |        "      <th>2016-11-27</th>\n",
1133 |        "      <td>114</td>\n",
1134 |        "      <td>126</td>\n",
1135 |        "    </tr>\n",
1136 |        "    <tr>\n",
1137 |        "      <th>2016-12-11</th>\n",
1138 |        "      <td>109</td>\n",
1139 |        "      <td>121</td>\n",
1140 |        "    </tr>\n",
1141 |        "    <tr>\n",
1142 |        "      <th>2016-12-25</th>\n",
1143 |        "      <td>105</td>\n",
1144 |        "      <td>126</td>\n",
1145 |        "    </tr>\n",
1146 |        "    <tr>\n",
1147 |        "      <th>2017-01-08</th>\n",
1148 |        "      <td>105</td>\n",
1149 |        "      <td>125</td>\n",
1150 |        "    </tr>\n",
1151 |        "    <tr>\n",
1152 |        "      <th>2017-01-22</th>\n",
1153 |        "      <td>101</td>\n",
1154 |        "      <td>123</td>\n",
1155 |        "    </tr>\n",
1156 |        "  </tbody>\n",
1157 |        "</table>\n",
1158 |        "</div>"
1159 |       ],
1160 |       "text/plain": [
1161 |        "            Count 1  Count 2\n",
1162 |        "2016-10-02      104      125\n",
1163 |        "2016-10-16      109      122\n",
1164 |        "2016-10-30      111      127\n",
1165 |        "2016-11-13      117      126\n",
1166 |        "2016-11-27      114      126\n",
1167 |        "2016-12-11      109      121\n",
1168 |        "2016-12-25      105      126\n",
1169 |        "2017-01-08      105      125\n",
1170 |        "2017-01-22      101      123"
1171 |       ]
1172 |      },
1173 |      "execution_count": 16,
1174 |      "metadata": {},
1175 |      "output_type": "execute_result"
1176 |     }
1177 |    ],
1178 |    "source": [
1179 |     "df = pd.DataFrame({'Count 1': 100 + np.random.randint(-5, 10, 9).cumsum(),\n",
1180 |     "                  'Count 2': 120 + np.random.randint(-5, 10, 9)}, index=dates)\n",
1181 |     "df"
1182 |    ]
1183 |   },
1184 |   {
1185 |    "cell_type": "code",
1186 |    "execution_count": 17,
1187 |    "metadata": {
1188 |     "collapsed": false
1189 |    },
1190 |    "outputs": [
1191 |     {
1192 |      "data": {
1193 |       "text/plain": [
1194 |        "array(['Sunday', 'Sunday', 'Sunday', 'Sunday', 'Sunday', 'Sunday',\n",
1195 |        "       'Sunday', 'Sunday', 'Sunday'], dtype=object)"
1196 |       ]
1197 |      },
1198 |      "execution_count": 17,
1199 |      "metadata": {},
1200 |      "output_type": "execute_result"
1201 |     }
1202 |    ],
1203 |    "source": [
1204 |     "df.index.weekday_name"
1205 |    ]
1206 |   },
1207 |   {
1208 |    "cell_type": "code",
1209 |    "execution_count": 18,
1210 |    "metadata": {
1211 |     "collapsed": false
1212 |    },
1213 |    "outputs": [
1214 |     {
1215 |      "data": {
1216 |       "text/html": [
1217 |        "<div>\n",
1218 |        "<table border=\"1\" class=\"dataframe\">\n",
1219 |        "  <thead>\n",
1220 |        "    <tr style=\"text-align: right;\">\n",
1221 |        "      <th></th>\n",
1222 |        "      <th>Count 1</th>\n",
1223 |        "      <th>Count 2</th>\n",
1224 |        "    </tr>\n",
1225 |        "  </thead>\n",
1226 |        "  <tbody>\n",
1227 |        "    <tr>\n",
1228 |        "      <th>2016-10-02</th>\n",
1229 |        "      <td>NaN</td>\n",
1230 |        "      <td>NaN</td>\n",
1231 |        "    </tr>\n",
1232 |        "    <tr>\n",
1233 |        "      <th>2016-10-16</th>\n",
1234 |        "      <td>5.0</td>\n",
1235 |        "      <td>-3.0</td>\n",
1236 |        "    </tr>\n",
1237 |        "    <tr>\n",
1238 |        "      <th>2016-10-30</th>\n",
1239 |        "      <td>2.0</td>\n",
1240 |        "      <td>5.0</td>\n",
1241 |        "    </tr>\n",
1242 |        "    <tr>\n",
1243 |        "      <th>2016-11-13</th>\n",
1244 |        "      <td>6.0</td>\n",
1245 |        "      <td>-1.0</td>\n",
1246 |        "    </tr>\n",
1247 |        "    <tr>\n",
1248 |        "      <th>2016-11-27</th>\n",
1249 |        "      <td>-3.0</td>\n",
1250 |        "      <td>0.0</td>\n",
1251 |        "    </tr>\n",
1252 |        "    <tr>\n",
1253 |        "      <th>2016-12-11</th>\n",
1254 |        "      <td>-5.0</td>\n",
1255 |        "      <td>-5.0</td>\n",
1256 |        "    </tr>\n",
1257 |        "    <tr>\n",
1258 |        "      <th>2016-12-25</th>\n",
1259 |        "      <td>-4.0</td>\n",
1260 |        "      <td>5.0</td>\n",
1261 |        "    </tr>\n",
1262 |        "    <tr>\n",
1263 |        "      <th>2017-01-08</th>\n",
1264 |        "      <td>0.0</td>\n",
1265 |        "      <td>-1.0</td>\n",
1266 |        "    </tr>\n",
1267 |        "    <tr>\n",
1268 |        "      <th>2017-01-22</th>\n",
1269 |        "      <td>-4.0</td>\n",
1270 |        "      <td>-2.0</td>\n",
1271 |        "    </tr>\n",
1272 |        "  </tbody>\n",
1273 |        "</table>\n",
1274 |        "</div>"
1275 |       ],
1276 |       "text/plain": [
1277 |        "            Count 1  Count 2\n",
1278 |        "2016-10-02      NaN      NaN\n",
1279 |        "2016-10-16      5.0     -3.0\n",
1280 |        "2016-10-30      2.0      5.0\n",
1281 |        "2016-11-13      6.0     -1.0\n",
1282 |        "2016-11-27     -3.0      0.0\n",
1283 |        "2016-12-11     -5.0     -5.0\n",
1284 |        "2016-12-25     -4.0      5.0\n",
1285 |        "2017-01-08      0.0     -1.0\n",
1286 |        "2017-01-22     -4.0     -2.0"
1287 |       ]
1288 |      },
1289 |      "execution_count": 18,
1290 |      "metadata": {},
1291 |      "output_type": "execute_result"
1292 |     }
1293 |    ],
1294 |    "source": [
1295 |     "df.diff()"
1296 |    ]
1297 |   },
1298 |   {
1299 |    "cell_type": "code",
1300 |    "execution_count": 19,
1301 |    "metadata": {
1302 |     "collapsed": false
1303 |    },
1304 |    "outputs": [
1305 |     {
1306 |      "data": {
1307 |       "text/html": [
1308 |        "<div>\n",
1309 |        "<table border=\"1\" class=\"dataframe\">\n",
1310 |        "  <thead>\n",
1311 |        "    <tr style=\"text-align: right;\">\n",
1312 |        "      <th></th>\n",
1313 |        "      <th>Count 1</th>\n",
1314 |        "      <th>Count 2</th>\n",
1315 |        "    </tr>\n",
1316 |        "  </thead>\n",
1317 |        "  <tbody>\n",
1318 |        "    <tr>\n",
1319 |        "      <th>2016-10-31</th>\n",
1320 |        "      <td>108.0</td>\n",
1321 |        "      <td>124.666667</td>\n",
1322 |        "    </tr>\n",
1323 |        "    <tr>\n",
1324 |        "      <th>2016-11-30</th>\n",
1325 |        "      <td>115.5</td>\n",
1326 |        "      <td>126.000000</td>\n",
1327 |        "    </tr>\n",
1328 |        "    <tr>\n",
1329 |        "      <th>2016-12-31</th>\n",
1330 |        "      <td>107.0</td>\n",
1331 |        "      <td>123.500000</td>\n",
1332 |        "    </tr>\n",
1333 |        "    <tr>\n",
1334 |        "      <th>2017-01-31</th>\n",
1335 |        "      <td>103.0</td>\n",
1336 |        "      <td>124.000000</td>\n",
1337 |        "    </tr>\n",
1338 |        "  </tbody>\n",
1339 |        "</table>\n",
1340 |        "</div>"
1341 |       ],
1342 |       "text/plain": [
1343 |        "            Count 1     Count 2\n",
1344 |        "2016-10-31    108.0  124.666667\n",
1345 |        "2016-11-30    115.5  126.000000\n",
1346 |        "2016-12-31    107.0  123.500000\n",
1347 |        "2017-01-31    103.0  124.000000"
1348 |       ]
1349 |      },
1350 |      "execution_count": 19,
1351 |      "metadata": {},
1352 |      "output_type": "execute_result"
1353 |     }
1354 |    ],
1355 |    "source": [
1356 |     "df.resample('M').mean()"
1357 |    ]
1358 |   },
1359 |   {
1360 |    "cell_type": "code",
1361 |    "execution_count": 20,
1362 |    "metadata": {
1363 |     "collapsed": false
1364 |    },
1365 |    "outputs": [
1366 |     {
1367 |      "data": {
1368 |       "text/html": [
1369 |        "<div>\n",
1370 |        "<table border=\"1\" class=\"dataframe\">\n",
1371 |        "  <thead>\n",
1372 |        "    <tr style=\"text-align: right;\">\n",
1373 |        "      <th></th>\n",
1374 |        "      <th>Count 1</th>\n",
1375 |        "      <th>Count 2</th>\n",
1376 |        "    </tr>\n",
1377 |        "  </thead>\n",
1378 |        "  <tbody>\n",
1379 |        "    <tr>\n",
1380 |        "      <th>2017-01-08</th>\n",
1381 |        "      <td>105</td>\n",
1382 |        "      <td>125</td>\n",
1383 |        "    </tr>\n",
1384 |        "    <tr>\n",
1385 |        "      <th>2017-01-22</th>\n",
1386 |        "      <td>101</td>\n",
1387 |        "      <td>123</td>\n",
1388 |        "    </tr>\n",
1389 |        "  </tbody>\n",
1390 |        "</table>\n",
1391 |        "</div>"
1392 |       ],
1393 |       "text/plain": [
1394 |        "            Count 1  Count 2\n",
1395 |        "2017-01-08      105      125\n",
1396 |        "2017-01-22      101      123"
1397 |       ]
1398 |      },
1399 |      "execution_count": 20,
1400 |      "metadata": {},
1401 |      "output_type": "execute_result"
1402 |     }
1403 |    ],
1404 |    "source": [
1405 |     "df['2017']"
1406 |    ]
1407 |   },
1408 |   {
1409 |    "cell_type": "code",
1410 |    "execution_count": 21,
1411 |    "metadata": {
1412 |     "collapsed": false
1413 |    },
1414 |    "outputs": [
1415 |     {
1416 |      "data": {
1417 |       "text/html": [
1418 |        "<div>\n",
1419 |        "<table border=\"1\" class=\"dataframe\">\n",
1420 |        "  <thead>\n",
1421 |        "    <tr style=\"text-align: right;\">\n",
1422 |        "      <th></th>\n",
1423 |        "      <th>Count 1</th>\n",
1424 |        "      <th>Count 2</th>\n",
1425 |        "    </tr>\n",
1426 |        "  </thead>\n",
1427 |        "  <tbody>\n",
1428 |        "    <tr>\n",
1429 |        "      <th>2016-12-11</th>\n",
1430 |        "      <td>109</td>\n",
1431 |        "      <td>121</td>\n",
1432 |        "    </tr>\n",
1433 |        "    <tr>\n",
1434 |        "      <th>2016-12-25</th>\n",
1435 |        "      <td>105</td>\n",
1436 |        "      <td>126</td>\n",
1437 |        "    </tr>\n",
1438 |        "  </tbody>\n",
1439 |        "</table>\n",
1440 |        "</div>"
1441 |       ],
1442 |       "text/plain": [
1443 |        "            Count 1  Count 2\n",
1444 |        "2016-12-11      109      121\n",
1445 |        "2016-12-25      105      126"
1446 |       ]
1447 |      },
1448 |      "execution_count": 21,
1449 |      "metadata": {},
1450 |      "output_type": "execute_result"
1451 |     }
1452 |    ],
1453 |    "source": [
1454 |     "df['2016-12']"
1455 |    ]
1456 |   },
1457 |   {
1458 |    "cell_type": "code",
1459 |    "execution_count": 22,
1460 |    "metadata": {
1461 |     "collapsed": false
1462 |    },
1463 |    "outputs": [
1464 |     {
1465 |      "data": {
1466 |       "text/html": [
1467 |        "<div>\n",
1468 |        "<table border=\"1\" class=\"dataframe\">\n",
1469 |        "  <thead>\n",
1470 |        "    <tr style=\"text-align: right;\">\n",
1471 |        "      <th></th>\n",
1472 |        "      <th>Count 1</th>\n",
1473 |        "      <th>Count 2</th>\n",
1474 |        "    </tr>\n",
1475 |        "  </thead>\n",
1476 |        "  <tbody>\n",
1477 |        "    <tr>\n",
1478 |        "      <th>2016-12-11</th>\n",
1479 |        "      <td>109</td>\n",
1480 |        "      <td>121</td>\n",
1481 |        "    </tr>\n",
1482 |        "    <tr>\n",
1483 |        "      <th>2016-12-25</th>\n",
1484 |        "      <td>105</td>\n",
1485 |        "      <td>126</td>\n",
1486 |        "    </tr>\n",
1487 |        "    <tr>\n",
1488 |        "      <th>2017-01-08</th>\n",
1489 |        "      <td>105</td>\n",
1490 |        "      <td>125</td>\n",
1491 |        "    </tr>\n",
1492 |        "    <tr>\n",
1493 |        "      <th>2017-01-22</th>\n",
1494 |        "      <td>101</td>\n",
1495 |        "      <td>123</td>\n",
1496 |        "    </tr>\n",
1497 |        "  </tbody>\n",
1498 |        "</table>\n",
1499 |        "</div>"
1500 |       ],
1501 |       "text/plain": [
1502 |        "            Count 1  Count 2\n",
1503 |        "2016-12-11      109      121\n",
1504 |        "2016-12-25      105      126\n",
1505 |        "2017-01-08      105      125\n",
1506 |        "2017-01-22      101      123"
1507 |       ]
1508 |      },
1509 |      "execution_count": 22,
1510 |      "metadata": {},
1511 |      "output_type": "execute_result"
1512 |     }
1513 |    ],
1514 |    "source": [
1515 |     "df['2016-12':]"
1516 |    ]
1517 |   },
1518 |   {
1519 |    "cell_type": "code",
1520 |    "execution_count": null,
1521 |    "metadata": {
1522 |     "collapsed": false
1523 |    },
1524 |    "outputs": [],
1525 |    "source": [
1526 |     "df.asfreq('W', method='ffill')"
1527 |    ]
1528 |   },
1529 |   {
1530 |    "cell_type": "code",
1531 |    "execution_count": null,
1532 |    "metadata": {
1533 |     "collapsed": false
1534 |    },
1535 |    "outputs": [],
1536 |    "source": [
1537 |     "import matplotlib.pyplot as plt\n",
1538 |     "%matplotlib inline\n",
1539 |     "\n",
1540 |     "df.plot()"
1541 |    ]
1542 |   }
1543 |  ],
1544 |  "metadata": {
1545 |   "celltoolbar": "Raw Cell Format",
1546 |   "kernelspec": {
1547 |    "display_name": "Python 3",
1548 |    "language": "python",
1549 |    "name": "python3"
1550 |   },
1551 |   "language_info": {
1552 |    "codemirror_mode": {
1553 |     "name": "ipython",
1554 |     "version": 3
1555 |    },
1556 |    "file_extension": ".py",
1557 |    "mimetype": "text/x-python",
1558 |    "name": "python",
1559 |    "nbconvert_exporter": "python",
1560 |    "pygments_lexer": "ipython3",
1561 |    "version": "3.5.2"
1562 |   }
1563 |  },
1564 |  "nbformat": 4,
1565 |  "nbformat_minor": 0
1566 | }
1567 | 


--------------------------------------------------------------------------------
/Week3/Assignment - Week 3/Assignment+3.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "---\n",
   8 |     "\n",
   9 |     "_You are currently looking at **version 1.4** of this notebook. To download notebooks and datafiles, as well as get help on Jupyter notebooks in the Coursera platform, visit the [Jupyter Notebook FAQ](https://www.coursera.org/learn/python-data-analysis/resources/0dhYG) course resource._\n",
  10 |     "\n",
  11 |     "---"
  12 |    ]
  13 |   },
  14 |   {
  15 |    "cell_type": "markdown",
  16 |    "metadata": {},
  17 |    "source": [
  18 |     "# Assignment 3 - More Pandas\n",
  19 |     "All questions are weighted the same in this assignment. This assignment requires more individual learning then the last one did - you are encouraged to check out the [pandas documentation](http://pandas.pydata.org/pandas-docs/stable/) to find functions or methods you might not have used yet, or ask questions on [Stack Overflow](http://stackoverflow.com/) and tag them as pandas and python related. And of course, the discussion forums are open for interaction with your peers and the course staff."
  20 |    ]
  21 |   },
  22 |   {
  23 |    "cell_type": "markdown",
  24 |    "metadata": {},
  25 |    "source": [
  26 |     "### Question 1 (20%)\n",
  27 |     "Load the energy data from the file `Energy Indicators.xls`, which is a list of indicators of [energy supply and renewable electricity production](Energy%20Indicators.xls) from the [United Nations](http://unstats.un.org/unsd/environment/excel_file_tables/2013/Energy%20Indicators.xls) for the year 2013, and should be put into a DataFrame with the variable name of **energy**.\n",
  28 |     "\n",
  29 |     "Keep in mind that this is an Excel file, and not a comma separated values file. Also, make sure to exclude the footer and header information from the datafile. The first two columns are unneccessary, so you should get rid of them, and you should change the column labels so that the columns are:\n",
  30 |     "\n",
  31 |     "`['Country', 'Energy Supply', 'Energy Supply per Capita', '% Renewable]`\n",
  32 |     "\n",
  33 |     "Convert `Energy Supply` to gigajoules (there are 1,000,000 gigajoules in a petajoule). For all countries which have missing data (e.g. data with \"...\") make sure this is reflected as `np.NaN` values.\n",
  34 |     "\n",
  35 |     "Rename the following list of countries (for use in later questions):\n",
  36 |     "\n",
  37 |     "```\"Republic of Korea\": \"South Korea\",\n",
  38 |     "\"United States of America\": \"United States\",\n",
  39 |     "\"United Kingdom of Great Britain and Northern Ireland\": \"United Kingdom\",\n",
  40 |     "\"China, Hong Kong Special Administrative Region\": \"Hong Kong\"```\n",
  41 |     "\n",
  42 |     "There are also several countries with numbers and/or parenthesis in their name. Be sure to remove these, e.g. `'Bolivia (Plurinational State of)'` should be `'Bolivia'`.\n",
  43 |     "\n",
  44 |     "<br>\n",
  45 |     "\n",
  46 |     "Next, load the GDP data from the file `world_bank.csv`, which is a csv containing countries' GDP from 1960 to 2015 from [World Bank](http://data.worldbank.org/indicator/NY.GDP.MKTP.CD). Call this DataFrame **GDP**. \n",
  47 |     "\n",
  48 |     "Make sure to skip the header, and rename the following list of countries:\n",
  49 |     "\n",
  50 |     "```\"Korea, Rep.\": \"South Korea\", \n",
  51 |     "\"Iran, Islamic Rep.\": \"Iran\",\n",
  52 |     "\"Hong Kong SAR, China\": \"Hong Kong\"```\n",
  53 |     "\n",
  54 |     "<br>\n",
  55 |     "\n",
  56 |     "Finally, load the [Sciamgo Journal and Country Rank data for Energy Engineering and Power Technology](http://www.scimagojr.com/countryrank.php?category=2102) from the file `scimagojr-3.xlsx`, which ranks countries based on their journal contributions in the aforementioned area. Call this DataFrame **ScimEn**.\n",
  57 |     "\n",
  58 |     "Join the three datasets: GDP, Energy, and ScimEn into a new dataset (using the intersection of country names). Use only the last 10 years (2006-2015) of GDP data and only the top 15 countries by Scimagojr 'Rank' (Rank 1 through 15). \n",
  59 |     "\n",
  60 |     "The index of this DataFrame should be the name of the country, and the columns should be ['Rank', 'Documents', 'Citable documents', 'Citations', 'Self-citations',\n",
  61 |     "       'Citations per document', 'H index', 'Energy Supply',\n",
  62 |     "       'Energy Supply per Capita', '% Renewable', '2006', '2007', '2008',\n",
  63 |     "       '2009', '2010', '2011', '2012', '2013', '2014', '2015'].\n",
  64 |     "\n",
  65 |     "*This function should return a DataFrame with 20 columns and 15 entries.*"
  66 |    ]
  67 |   },
  68 |   {
  69 |    "cell_type": "code",
  70 |    "execution_count": 2,
  71 |    "metadata": {
  72 |     "collapsed": false,
  73 |     "scrolled": true
  74 |    },
  75 |    "outputs": [
  76 |     {
  77 |      "data": {
  78 |       "text/html": [
  79 |        "<div>\n",
  80 |        "<table border=\"1\" class=\"dataframe\">\n",
  81 |        "  <thead>\n",
  82 |        "    <tr style=\"text-align: right;\">\n",
  83 |        "      <th></th>\n",
  84 |        "      <th>Rank</th>\n",
  85 |        "      <th>Documents</th>\n",
  86 |        "      <th>Citable documents</th>\n",
  87 |        "      <th>Citations</th>\n",
  88 |        "      <th>Self-citations</th>\n",
  89 |        "      <th>Citations per document</th>\n",
  90 |        "      <th>H index</th>\n",
  91 |        "      <th>Energy Supply</th>\n",
  92 |        "      <th>Energy Supply per Capita</th>\n",
  93 |        "      <th>% Renewable</th>\n",
  94 |        "      <th>2006</th>\n",
  95 |        "      <th>2007</th>\n",
  96 |        "      <th>2008</th>\n",
  97 |        "      <th>2009</th>\n",
  98 |        "      <th>2010</th>\n",
  99 |        "      <th>2011</th>\n",
 100 |        "      <th>2012</th>\n",
 101 |        "      <th>2013</th>\n",
 102 |        "      <th>2014</th>\n",
 103 |        "      <th>2015</th>\n",
 104 |        "    </tr>\n",
 105 |        "    <tr>\n",
 106 |        "      <th>Country</th>\n",
 107 |        "      <th></th>\n",
 108 |        "      <th></th>\n",
 109 |        "      <th></th>\n",
 110 |        "      <th></th>\n",
 111 |        "      <th></th>\n",
 112 |        "      <th></th>\n",
 113 |        "      <th></th>\n",
 114 |        "      <th></th>\n",
 115 |        "      <th></th>\n",
 116 |        "      <th></th>\n",
 117 |        "      <th></th>\n",
 118 |        "      <th></th>\n",
 119 |        "      <th></th>\n",
 120 |        "      <th></th>\n",
 121 |        "      <th></th>\n",
 122 |        "      <th></th>\n",
 123 |        "      <th></th>\n",
 124 |        "      <th></th>\n",
 125 |        "      <th></th>\n",
 126 |        "      <th></th>\n",
 127 |        "    </tr>\n",
 128 |        "  </thead>\n",
 129 |        "  <tbody>\n",
 130 |        "    <tr>\n",
 131 |        "      <th>China</th>\n",
 132 |        "      <td>1</td>\n",
 133 |        "      <td>127050</td>\n",
 134 |        "      <td>126767</td>\n",
 135 |        "      <td>597237</td>\n",
 136 |        "      <td>411683</td>\n",
 137 |        "      <td>4.70</td>\n",
 138 |        "      <td>138</td>\n",
 139 |        "      <td>127191000000</td>\n",
 140 |        "      <td>93</td>\n",
 141 |        "      <td>19.7549</td>\n",
 142 |        "      <td>3.992331e+12</td>\n",
 143 |        "      <td>4.559041e+12</td>\n",
 144 |        "      <td>4.997775e+12</td>\n",
 145 |        "      <td>5.459247e+12</td>\n",
 146 |        "      <td>6.039659e+12</td>\n",
 147 |        "      <td>6.612490e+12</td>\n",
 148 |        "      <td>7.124978e+12</td>\n",
 149 |        "      <td>7.672448e+12</td>\n",
 150 |        "      <td>8.230121e+12</td>\n",
 151 |        "      <td>8.797999e+12</td>\n",
 152 |        "    </tr>\n",
 153 |        "    <tr>\n",
 154 |        "      <th>United States</th>\n",
 155 |        "      <td>2</td>\n",
 156 |        "      <td>96661</td>\n",
 157 |        "      <td>94747</td>\n",
 158 |        "      <td>792274</td>\n",
 159 |        "      <td>265436</td>\n",
 160 |        "      <td>8.20</td>\n",
 161 |        "      <td>230</td>\n",
 162 |        "      <td>90838000000</td>\n",
 163 |        "      <td>286</td>\n",
 164 |        "      <td>11.571</td>\n",
 165 |        "      <td>1.479230e+13</td>\n",
 166 |        "      <td>1.505540e+13</td>\n",
 167 |        "      <td>1.501149e+13</td>\n",
 168 |        "      <td>1.459484e+13</td>\n",
 169 |        "      <td>1.496437e+13</td>\n",
 170 |        "      <td>1.520402e+13</td>\n",
 171 |        "      <td>1.554216e+13</td>\n",
 172 |        "      <td>1.577367e+13</td>\n",
 173 |        "      <td>1.615662e+13</td>\n",
 174 |        "      <td>1.654857e+13</td>\n",
 175 |        "    </tr>\n",
 176 |        "    <tr>\n",
 177 |        "      <th>Japan</th>\n",
 178 |        "      <td>3</td>\n",
 179 |        "      <td>30504</td>\n",
 180 |        "      <td>30287</td>\n",
 181 |        "      <td>223024</td>\n",
 182 |        "      <td>61554</td>\n",
 183 |        "      <td>7.31</td>\n",
 184 |        "      <td>134</td>\n",
 185 |        "      <td>18984000000</td>\n",
 186 |        "      <td>149</td>\n",
 187 |        "      <td>10.2328</td>\n",
 188 |        "      <td>5.496542e+12</td>\n",
 189 |        "      <td>5.617036e+12</td>\n",
 190 |        "      <td>5.558527e+12</td>\n",
 191 |        "      <td>5.251308e+12</td>\n",
 192 |        "      <td>5.498718e+12</td>\n",
 193 |        "      <td>5.473738e+12</td>\n",
 194 |        "      <td>5.569102e+12</td>\n",
 195 |        "      <td>5.644659e+12</td>\n",
 196 |        "      <td>5.642884e+12</td>\n",
 197 |        "      <td>5.669563e+12</td>\n",
 198 |        "    </tr>\n",
 199 |        "    <tr>\n",
 200 |        "      <th>United Kingdom</th>\n",
 201 |        "      <td>4</td>\n",
 202 |        "      <td>20944</td>\n",
 203 |        "      <td>20357</td>\n",
 204 |        "      <td>206091</td>\n",
 205 |        "      <td>37874</td>\n",
 206 |        "      <td>9.84</td>\n",
 207 |        "      <td>139</td>\n",
 208 |        "      <td>7920000000</td>\n",
 209 |        "      <td>124</td>\n",
 210 |        "      <td>10.6005</td>\n",
 211 |        "      <td>2.419631e+12</td>\n",
 212 |        "      <td>2.482203e+12</td>\n",
 213 |        "      <td>2.470614e+12</td>\n",
 214 |        "      <td>2.367048e+12</td>\n",
 215 |        "      <td>2.403504e+12</td>\n",
 216 |        "      <td>2.450911e+12</td>\n",
 217 |        "      <td>2.479809e+12</td>\n",
 218 |        "      <td>2.533370e+12</td>\n",
 219 |        "      <td>2.605643e+12</td>\n",
 220 |        "      <td>2.666333e+12</td>\n",
 221 |        "    </tr>\n",
 222 |        "    <tr>\n",
 223 |        "      <th>Russian Federation</th>\n",
 224 |        "      <td>5</td>\n",
 225 |        "      <td>18534</td>\n",
 226 |        "      <td>18301</td>\n",
 227 |        "      <td>34266</td>\n",
 228 |        "      <td>12422</td>\n",
 229 |        "      <td>1.85</td>\n",
 230 |        "      <td>57</td>\n",
 231 |        "      <td>30709000000</td>\n",
 232 |        "      <td>214</td>\n",
 233 |        "      <td>17.2887</td>\n",
 234 |        "      <td>1.385793e+12</td>\n",
 235 |        "      <td>1.504071e+12</td>\n",
 236 |        "      <td>1.583004e+12</td>\n",
 237 |        "      <td>1.459199e+12</td>\n",
 238 |        "      <td>1.524917e+12</td>\n",
 239 |        "      <td>1.589943e+12</td>\n",
 240 |        "      <td>1.645876e+12</td>\n",
 241 |        "      <td>1.666934e+12</td>\n",
 242 |        "      <td>1.678709e+12</td>\n",
 243 |        "      <td>1.616149e+12</td>\n",
 244 |        "    </tr>\n",
 245 |        "    <tr>\n",
 246 |        "      <th>Canada</th>\n",
 247 |        "      <td>6</td>\n",
 248 |        "      <td>17899</td>\n",
 249 |        "      <td>17620</td>\n",
 250 |        "      <td>215003</td>\n",
 251 |        "      <td>40930</td>\n",
 252 |        "      <td>12.01</td>\n",
 253 |        "      <td>149</td>\n",
 254 |        "      <td>10431000000</td>\n",
 255 |        "      <td>296</td>\n",
 256 |        "      <td>61.9454</td>\n",
 257 |        "      <td>1.564469e+12</td>\n",
 258 |        "      <td>1.596740e+12</td>\n",
 259 |        "      <td>1.612713e+12</td>\n",
 260 |        "      <td>1.565145e+12</td>\n",
 261 |        "      <td>1.613406e+12</td>\n",
 262 |        "      <td>1.664087e+12</td>\n",
 263 |        "      <td>1.693133e+12</td>\n",
 264 |        "      <td>1.730688e+12</td>\n",
 265 |        "      <td>1.773486e+12</td>\n",
 266 |        "      <td>1.792609e+12</td>\n",
 267 |        "    </tr>\n",
 268 |        "    <tr>\n",
 269 |        "      <th>Germany</th>\n",
 270 |        "      <td>7</td>\n",
 271 |        "      <td>17027</td>\n",
 272 |        "      <td>16831</td>\n",
 273 |        "      <td>140566</td>\n",
 274 |        "      <td>27426</td>\n",
 275 |        "      <td>8.26</td>\n",
 276 |        "      <td>126</td>\n",
 277 |        "      <td>13261000000</td>\n",
 278 |        "      <td>165</td>\n",
 279 |        "      <td>17.9015</td>\n",
 280 |        "      <td>3.332891e+12</td>\n",
 281 |        "      <td>3.441561e+12</td>\n",
 282 |        "      <td>3.478809e+12</td>\n",
 283 |        "      <td>3.283340e+12</td>\n",
 284 |        "      <td>3.417298e+12</td>\n",
 285 |        "      <td>3.542371e+12</td>\n",
 286 |        "      <td>3.556724e+12</td>\n",
 287 |        "      <td>3.567317e+12</td>\n",
 288 |        "      <td>3.624386e+12</td>\n",
 289 |        "      <td>3.685556e+12</td>\n",
 290 |        "    </tr>\n",
 291 |        "    <tr>\n",
 292 |        "      <th>India</th>\n",
 293 |        "      <td>8</td>\n",
 294 |        "      <td>15005</td>\n",
 295 |        "      <td>14841</td>\n",
 296 |        "      <td>128763</td>\n",
 297 |        "      <td>37209</td>\n",
 298 |        "      <td>8.58</td>\n",
 299 |        "      <td>115</td>\n",
 300 |        "      <td>33195000000</td>\n",
 301 |        "      <td>26</td>\n",
 302 |        "      <td>14.9691</td>\n",
 303 |        "      <td>1.265894e+12</td>\n",
 304 |        "      <td>1.374865e+12</td>\n",
 305 |        "      <td>1.428361e+12</td>\n",
 306 |        "      <td>1.549483e+12</td>\n",
 307 |        "      <td>1.708459e+12</td>\n",
 308 |        "      <td>1.821872e+12</td>\n",
 309 |        "      <td>1.924235e+12</td>\n",
 310 |        "      <td>2.051982e+12</td>\n",
 311 |        "      <td>2.200617e+12</td>\n",
 312 |        "      <td>2.367206e+12</td>\n",
 313 |        "    </tr>\n",
 314 |        "    <tr>\n",
 315 |        "      <th>France</th>\n",
 316 |        "      <td>9</td>\n",
 317 |        "      <td>13153</td>\n",
 318 |        "      <td>12973</td>\n",
 319 |        "      <td>130632</td>\n",
 320 |        "      <td>28601</td>\n",
 321 |        "      <td>9.93</td>\n",
 322 |        "      <td>114</td>\n",
 323 |        "      <td>10597000000</td>\n",
 324 |        "      <td>166</td>\n",
 325 |        "      <td>17.0203</td>\n",
 326 |        "      <td>2.607840e+12</td>\n",
 327 |        "      <td>2.669424e+12</td>\n",
 328 |        "      <td>2.674637e+12</td>\n",
 329 |        "      <td>2.595967e+12</td>\n",
 330 |        "      <td>2.646995e+12</td>\n",
 331 |        "      <td>2.702032e+12</td>\n",
 332 |        "      <td>2.706968e+12</td>\n",
 333 |        "      <td>2.722567e+12</td>\n",
 334 |        "      <td>2.729632e+12</td>\n",
 335 |        "      <td>2.761185e+12</td>\n",
 336 |        "    </tr>\n",
 337 |        "    <tr>\n",
 338 |        "      <th>South Korea</th>\n",
 339 |        "      <td>10</td>\n",
 340 |        "      <td>11983</td>\n",
 341 |        "      <td>11923</td>\n",
 342 |        "      <td>114675</td>\n",
 343 |        "      <td>22595</td>\n",
 344 |        "      <td>9.57</td>\n",
 345 |        "      <td>104</td>\n",
 346 |        "      <td>11007000000</td>\n",
 347 |        "      <td>221</td>\n",
 348 |        "      <td>2.27935</td>\n",
 349 |        "      <td>9.410199e+11</td>\n",
 350 |        "      <td>9.924316e+11</td>\n",
 351 |        "      <td>1.020510e+12</td>\n",
 352 |        "      <td>1.027730e+12</td>\n",
 353 |        "      <td>1.094499e+12</td>\n",
 354 |        "      <td>1.134796e+12</td>\n",
 355 |        "      <td>1.160809e+12</td>\n",
 356 |        "      <td>1.194429e+12</td>\n",
 357 |        "      <td>1.234340e+12</td>\n",
 358 |        "      <td>1.266580e+12</td>\n",
 359 |        "    </tr>\n",
 360 |        "    <tr>\n",
 361 |        "      <th>Italy</th>\n",
 362 |        "      <td>11</td>\n",
 363 |        "      <td>10964</td>\n",
 364 |        "      <td>10794</td>\n",
 365 |        "      <td>111850</td>\n",
 366 |        "      <td>26661</td>\n",
 367 |        "      <td>10.20</td>\n",
 368 |        "      <td>106</td>\n",
 369 |        "      <td>6530000000</td>\n",
 370 |        "      <td>109</td>\n",
 371 |        "      <td>33.6672</td>\n",
 372 |        "      <td>2.202170e+12</td>\n",
 373 |        "      <td>2.234627e+12</td>\n",
 374 |        "      <td>2.211154e+12</td>\n",
 375 |        "      <td>2.089938e+12</td>\n",
 376 |        "      <td>2.125185e+12</td>\n",
 377 |        "      <td>2.137439e+12</td>\n",
 378 |        "      <td>2.077184e+12</td>\n",
 379 |        "      <td>2.040871e+12</td>\n",
 380 |        "      <td>2.033868e+12</td>\n",
 381 |        "      <td>2.049316e+12</td>\n",
 382 |        "    </tr>\n",
 383 |        "    <tr>\n",
 384 |        "      <th>Spain</th>\n",
 385 |        "      <td>12</td>\n",
 386 |        "      <td>9428</td>\n",
 387 |        "      <td>9330</td>\n",
 388 |        "      <td>123336</td>\n",
 389 |        "      <td>23964</td>\n",
 390 |        "      <td>13.08</td>\n",
 391 |        "      <td>115</td>\n",
 392 |        "      <td>4923000000</td>\n",
 393 |        "      <td>106</td>\n",
 394 |        "      <td>37.9686</td>\n",
 395 |        "      <td>1.414823e+12</td>\n",
 396 |        "      <td>1.468146e+12</td>\n",
 397 |        "      <td>1.484530e+12</td>\n",
 398 |        "      <td>1.431475e+12</td>\n",
 399 |        "      <td>1.431673e+12</td>\n",
 400 |        "      <td>1.417355e+12</td>\n",
 401 |        "      <td>1.380216e+12</td>\n",
 402 |        "      <td>1.357139e+12</td>\n",
 403 |        "      <td>1.375605e+12</td>\n",
 404 |        "      <td>1.419821e+12</td>\n",
 405 |        "    </tr>\n",
 406 |        "    <tr>\n",
 407 |        "      <th>Iran</th>\n",
 408 |        "      <td>13</td>\n",
 409 |        "      <td>8896</td>\n",
 410 |        "      <td>8819</td>\n",
 411 |        "      <td>57470</td>\n",
 412 |        "      <td>19125</td>\n",
 413 |        "      <td>6.46</td>\n",
 414 |        "      <td>72</td>\n",
 415 |        "      <td>9172000000</td>\n",
 416 |        "      <td>119</td>\n",
 417 |        "      <td>5.70772</td>\n",
 418 |        "      <td>3.895523e+11</td>\n",
 419 |        "      <td>4.250646e+11</td>\n",
 420 |        "      <td>4.289909e+11</td>\n",
 421 |        "      <td>4.389208e+11</td>\n",
 422 |        "      <td>4.677902e+11</td>\n",
 423 |        "      <td>4.853309e+11</td>\n",
 424 |        "      <td>4.532569e+11</td>\n",
 425 |        "      <td>4.445926e+11</td>\n",
 426 |        "      <td>4.639027e+11</td>\n",
 427 |        "      <td>NaN</td>\n",
 428 |        "    </tr>\n",
 429 |        "    <tr>\n",
 430 |        "      <th>Australia</th>\n",
 431 |        "      <td>14</td>\n",
 432 |        "      <td>8831</td>\n",
 433 |        "      <td>8725</td>\n",
 434 |        "      <td>90765</td>\n",
 435 |        "      <td>15606</td>\n",
 436 |        "      <td>10.28</td>\n",
 437 |        "      <td>107</td>\n",
 438 |        "      <td>5386000000</td>\n",
 439 |        "      <td>231</td>\n",
 440 |        "      <td>11.8108</td>\n",
 441 |        "      <td>1.021939e+12</td>\n",
 442 |        "      <td>1.060340e+12</td>\n",
 443 |        "      <td>1.099644e+12</td>\n",
 444 |        "      <td>1.119654e+12</td>\n",
 445 |        "      <td>1.142251e+12</td>\n",
 446 |        "      <td>1.169431e+12</td>\n",
 447 |        "      <td>1.211913e+12</td>\n",
 448 |        "      <td>1.241484e+12</td>\n",
 449 |        "      <td>1.272520e+12</td>\n",
 450 |        "      <td>1.301251e+12</td>\n",
 451 |        "    </tr>\n",
 452 |        "    <tr>\n",
 453 |        "      <th>Brazil</th>\n",
 454 |        "      <td>15</td>\n",
 455 |        "      <td>8668</td>\n",
 456 |        "      <td>8596</td>\n",
 457 |        "      <td>60702</td>\n",
 458 |        "      <td>14396</td>\n",
 459 |        "      <td>7.00</td>\n",
 460 |        "      <td>86</td>\n",
 461 |        "      <td>12149000000</td>\n",
 462 |        "      <td>59</td>\n",
 463 |        "      <td>69.648</td>\n",
 464 |        "      <td>1.845080e+12</td>\n",
 465 |        "      <td>1.957118e+12</td>\n",
 466 |        "      <td>2.056809e+12</td>\n",
 467 |        "      <td>2.054215e+12</td>\n",
 468 |        "      <td>2.208872e+12</td>\n",
 469 |        "      <td>2.295245e+12</td>\n",
 470 |        "      <td>2.339209e+12</td>\n",
 471 |        "      <td>2.409740e+12</td>\n",
 472 |        "      <td>2.412231e+12</td>\n",
 473 |        "      <td>2.319423e+12</td>\n",
 474 |        "    </tr>\n",
 475 |        "  </tbody>\n",
 476 |        "</table>\n",
 477 |        "</div>"
 478 |       ],
 479 |       "text/plain": [
 480 |        "                    Rank  Documents  Citable documents  Citations  \\\n",
 481 |        "Country                                                             \n",
 482 |        "China                  1     127050             126767     597237   \n",
 483 |        "United States          2      96661              94747     792274   \n",
 484 |        "Japan                  3      30504              30287     223024   \n",
 485 |        "United Kingdom         4      20944              20357     206091   \n",
 486 |        "Russian Federation     5      18534              18301      34266   \n",
 487 |        "Canada                 6      17899              17620     215003   \n",
 488 |        "Germany                7      17027              16831     140566   \n",
 489 |        "India                  8      15005              14841     128763   \n",
 490 |        "France                 9      13153              12973     130632   \n",
 491 |        "South Korea           10      11983              11923     114675   \n",
 492 |        "Italy                 11      10964              10794     111850   \n",
 493 |        "Spain                 12       9428               9330     123336   \n",
 494 |        "Iran                  13       8896               8819      57470   \n",
 495 |        "Australia             14       8831               8725      90765   \n",
 496 |        "Brazil                15       8668               8596      60702   \n",
 497 |        "\n",
 498 |        "                    Self-citations  Citations per document  H index  \\\n",
 499 |        "Country                                                               \n",
 500 |        "China                       411683                    4.70      138   \n",
 501 |        "United States               265436                    8.20      230   \n",
 502 |        "Japan                        61554                    7.31      134   \n",
 503 |        "United Kingdom               37874                    9.84      139   \n",
 504 |        "Russian Federation           12422                    1.85       57   \n",
 505 |        "Canada                       40930                   12.01      149   \n",
 506 |        "Germany                      27426                    8.26      126   \n",
 507 |        "India                        37209                    8.58      115   \n",
 508 |        "France                       28601                    9.93      114   \n",
 509 |        "South Korea                  22595                    9.57      104   \n",
 510 |        "Italy                        26661                   10.20      106   \n",
 511 |        "Spain                        23964                   13.08      115   \n",
 512 |        "Iran                         19125                    6.46       72   \n",
 513 |        "Australia                    15606                   10.28      107   \n",
 514 |        "Brazil                       14396                    7.00       86   \n",
 515 |        "\n",
 516 |        "                   Energy Supply Energy Supply per Capita % Renewable  \\\n",
 517 |        "Country                                                                 \n",
 518 |        "China               127191000000                       93     19.7549   \n",
 519 |        "United States        90838000000                      286      11.571   \n",
 520 |        "Japan                18984000000                      149     10.2328   \n",
 521 |        "United Kingdom        7920000000                      124     10.6005   \n",
 522 |        "Russian Federation   30709000000                      214     17.2887   \n",
 523 |        "Canada               10431000000                      296     61.9454   \n",
 524 |        "Germany              13261000000                      165     17.9015   \n",
 525 |        "India                33195000000                       26     14.9691   \n",
 526 |        "France               10597000000                      166     17.0203   \n",
 527 |        "South Korea          11007000000                      221     2.27935   \n",
 528 |        "Italy                 6530000000                      109     33.6672   \n",
 529 |        "Spain                 4923000000                      106     37.9686   \n",
 530 |        "Iran                  9172000000                      119     5.70772   \n",
 531 |        "Australia             5386000000                      231     11.8108   \n",
 532 |        "Brazil               12149000000                       59      69.648   \n",
 533 |        "\n",
 534 |        "                            2006          2007          2008          2009  \\\n",
 535 |        "Country                                                                      \n",
 536 |        "China               3.992331e+12  4.559041e+12  4.997775e+12  5.459247e+12   \n",
 537 |        "United States       1.479230e+13  1.505540e+13  1.501149e+13  1.459484e+13   \n",
 538 |        "Japan               5.496542e+12  5.617036e+12  5.558527e+12  5.251308e+12   \n",
 539 |        "United Kingdom      2.419631e+12  2.482203e+12  2.470614e+12  2.367048e+12   \n",
 540 |        "Russian Federation  1.385793e+12  1.504071e+12  1.583004e+12  1.459199e+12   \n",
 541 |        "Canada              1.564469e+12  1.596740e+12  1.612713e+12  1.565145e+12   \n",
 542 |        "Germany             3.332891e+12  3.441561e+12  3.478809e+12  3.283340e+12   \n",
 543 |        "India               1.265894e+12  1.374865e+12  1.428361e+12  1.549483e+12   \n",
 544 |        "France              2.607840e+12  2.669424e+12  2.674637e+12  2.595967e+12   \n",
 545 |        "South Korea         9.410199e+11  9.924316e+11  1.020510e+12  1.027730e+12   \n",
 546 |        "Italy               2.202170e+12  2.234627e+12  2.211154e+12  2.089938e+12   \n",
 547 |        "Spain               1.414823e+12  1.468146e+12  1.484530e+12  1.431475e+12   \n",
 548 |        "Iran                3.895523e+11  4.250646e+11  4.289909e+11  4.389208e+11   \n",
 549 |        "Australia           1.021939e+12  1.060340e+12  1.099644e+12  1.119654e+12   \n",
 550 |        "Brazil              1.845080e+12  1.957118e+12  2.056809e+12  2.054215e+12   \n",
 551 |        "\n",
 552 |        "                            2010          2011          2012          2013  \\\n",
 553 |        "Country                                                                      \n",
 554 |        "China               6.039659e+12  6.612490e+12  7.124978e+12  7.672448e+12   \n",
 555 |        "United States       1.496437e+13  1.520402e+13  1.554216e+13  1.577367e+13   \n",
 556 |        "Japan               5.498718e+12  5.473738e+12  5.569102e+12  5.644659e+12   \n",
 557 |        "United Kingdom      2.403504e+12  2.450911e+12  2.479809e+12  2.533370e+12   \n",
 558 |        "Russian Federation  1.524917e+12  1.589943e+12  1.645876e+12  1.666934e+12   \n",
 559 |        "Canada              1.613406e+12  1.664087e+12  1.693133e+12  1.730688e+12   \n",
 560 |        "Germany             3.417298e+12  3.542371e+12  3.556724e+12  3.567317e+12   \n",
 561 |        "India               1.708459e+12  1.821872e+12  1.924235e+12  2.051982e+12   \n",
 562 |        "France              2.646995e+12  2.702032e+12  2.706968e+12  2.722567e+12   \n",
 563 |        "South Korea         1.094499e+12  1.134796e+12  1.160809e+12  1.194429e+12   \n",
 564 |        "Italy               2.125185e+12  2.137439e+12  2.077184e+12  2.040871e+12   \n",
 565 |        "Spain               1.431673e+12  1.417355e+12  1.380216e+12  1.357139e+12   \n",
 566 |        "Iran                4.677902e+11  4.853309e+11  4.532569e+11  4.445926e+11   \n",
 567 |        "Australia           1.142251e+12  1.169431e+12  1.211913e+12  1.241484e+12   \n",
 568 |        "Brazil              2.208872e+12  2.295245e+12  2.339209e+12  2.409740e+12   \n",
 569 |        "\n",
 570 |        "                            2014          2015  \n",
 571 |        "Country                                         \n",
 572 |        "China               8.230121e+12  8.797999e+12  \n",
 573 |        "United States       1.615662e+13  1.654857e+13  \n",
 574 |        "Japan               5.642884e+12  5.669563e+12  \n",
 575 |        "United Kingdom      2.605643e+12  2.666333e+12  \n",
 576 |        "Russian Federation  1.678709e+12  1.616149e+12  \n",
 577 |        "Canada              1.773486e+12  1.792609e+12  \n",
 578 |        "Germany             3.624386e+12  3.685556e+12  \n",
 579 |        "India               2.200617e+12  2.367206e+12  \n",
 580 |        "France              2.729632e+12  2.761185e+12  \n",
 581 |        "South Korea         1.234340e+12  1.266580e+12  \n",
 582 |        "Italy               2.033868e+12  2.049316e+12  \n",
 583 |        "Spain               1.375605e+12  1.419821e+12  \n",
 584 |        "Iran                4.639027e+11           NaN  \n",
 585 |        "Australia           1.272520e+12  1.301251e+12  \n",
 586 |        "Brazil              2.412231e+12  2.319423e+12  "
 587 |       ]
 588 |      },
 589 |      "execution_count": 2,
 590 |      "metadata": {},
 591 |      "output_type": "execute_result"
 592 |     }
 593 |    ],
 594 |    "source": [
 595 |     "import pandas as pd\n",
 596 |     "import numpy as np\n",
 597 |     "\n",
 598 |     "# Load the Excel sheet and CSV file into Panda Dataframe. Skip header for GDP Dataframe.\n",
 599 |     "xls_file = pd.ExcelFile('Energy Indicators.xls')\n",
 600 |     "energy = xls_file.parse('Energy')\n",
 601 |     "GDP = pd.read_csv('world_bank.csv', header=None)\n",
 602 |     "ScimEn_file = pd.ExcelFile('scimagojr-3.xlsx')\n",
 603 |     "ScimEn = ScimEn_file.parse('Sheet1')\n",
 604 |     "\n",
 605 |     "# Drop first 2 columns\n",
 606 |     "energy = energy.drop(['Unnamed: 0','Unnamed: 1'],1)\n",
 607 |     "\n",
 608 |     "# Rename the columns\n",
 609 |     "energy.columns = ['Country', 'Energy Supply', 'Energy Supply per Capita', '% Renewable']\n",
 610 |     "\n",
 611 |     "# Convert \"...\" to np.NaN\n",
 612 |     "# This section is not very DRY[\"Don't Repeat Yourself\"]. Need to think about ways to apply regex to column names, so that the same function applies to columns starting with \"Energy Supply\"\n",
 613 |     "energy['Energy Supply'].replace(regex=True,inplace=True,to_replace=r'\\W',value=np.nan)\n",
 614 |     "energy['Energy Supply per Capita'].replace(regex=True,inplace=True,to_replace=r'\\W',value=np.nan)\n",
 615 |     "\n",
 616 |     "# Convert 'Energy Supply' Column from petajoule to gigajoules (there are 1,000,000 gigajoules in a petajoule)\n",
 617 |     "energy['Energy Supply']*=1000000\n",
 618 |     "\n",
 619 |     "# Set Country column to be the index\n",
 620 |     "energy.set_index('Country')\n",
 621 |     "\n",
 622 |     "# Use Regular Expression to remove numbers and parenthesis(and the content inside) in country names. \\d stands for digits. \n",
 623 |     "# Remember to add a whitespace before the first escape before ()....couldn't find Bolivia otherwise. Struggled for a long time for this!\n",
 624 |     "energy['Country'].replace(regex=True,inplace=True,to_replace=r'\\d',value=r'')\n",
 625 |     "energy['Country'].replace(regex=True,inplace=True,to_replace=r' \\(.*\\)',value=r'')\n",
 626 |     "\n",
 627 |     "# Replace country names with new names\n",
 628 |     "energy.Country[energy.Country == \"Republic of Korea\"]=\"South Korea\"\n",
 629 |     "energy.Country[energy.Country == \"United States of America\"]=\"United States\"\n",
 630 |     "energy.Country[energy.Country == \"China, Hong Kong Special Administrative Region\"]=\"Hong Kong\"\n",
 631 |     "energy.Country[energy.Country == \"United Kingdom of Great Britain and Northern Ireland\"]=\"United Kingdom\"\n",
 632 |     "\n",
 633 |     "# Test to ensure that the replacement worked. Have turned them into comments.\n",
 634 |     "# energy.loc[energy.Country ==\"Bolivia\"]\n",
 635 |     "# energy.loc[energy.Country ==\"United Kingdom\"]\n",
 636 |     "\n",
 637 |     "\n",
 638 |     "# ----------------------------------------------\n",
 639 |     "\n",
 640 |     "# Drop the first few rows of GDP Dataframe to clean the data\n",
 641 |     "GDP.drop(GDP.index[:4],axis=0, inplace=True)\n",
 642 |     "\n",
 643 |     "# Drop the other columns so that we only keep the last 10 years' data (2006-2015)\n",
 644 |     "GDP.drop(GDP.columns[1:50], axis=1, inplace=True)\n",
 645 |     "\n",
 646 |     "# Convert the Column heads from number to strings (2006.0 -> Year 2006)\n",
 647 |     "# GDP[50:]=GDP[50:].astype(str)\n",
 648 |     "# GDP.rename(columns = lambda x: str(x), inplace=True)\n",
 649 |     "GDP.columns = ['Country','2006','2007','2008','2009','2010','2011','2012','2013','2014','2015']\n",
 650 |     "GDP.columns.values\n",
 651 |     "\n",
 652 |     "\n",
 653 |     "\n",
 654 |     "# Use the first row as column headers\n",
 655 |     "# Drop redundant row\n",
 656 |     "GDP.columns = GDP.iloc[0]\n",
 657 |     "GDP.drop(GDP.index[0],axis=0, inplace=True)\n",
 658 |     "GDP.rename(columns={'Country Name': 'Country'}, inplace=True)\n",
 659 |     "GDP.set_index('Country')\n",
 660 |     "\n",
 661 |     "# Rename the countries. South Korea was actually named \"Korea, Rep.\" in the original spreadsheet. They gave the wrong instruction here...\n",
 662 |     "GDP['Country'].replace(regex=True,inplace=True,to_replace='Korea, Rep.',value='South Korea')\n",
 663 |     "GDP['Country'].replace(regex=True,inplace=True,to_replace='Iran, Islamic Rep.',value='Iran')\n",
 664 |     "GDP['Country'].replace(regex=True,inplace=True,to_replace='Hong Kong SAR, China',value='Hong Kong')\n",
 665 |     "\n",
 666 |     "# Remove the parenthesis\n",
 667 |     "GDP['Country'].replace(regex=True,inplace=True,to_replace=r' \\(.*\\)',value=r'')\n",
 668 |     "\n",
 669 |     "\n",
 670 |     "# Test to ensure that the replacement worked. Have turned it into comment.\n",
 671 |     "# GDP.loc[GDP[\"Country Name\"] ==\"South Korea\"]\n",
 672 |     "\n",
 673 |     "# ----------------------------------------------\n",
 674 |     "\n",
 675 |     "# Keep only the top 15 countries by Ranking in the ScimEn Dataframe\n",
 676 |     "ScimEn.drop(ScimEn.index[15:],axis=0, inplace=True)\n",
 677 |     "\n",
 678 |     "# Merge the 3 dataframes: energy, GDP, ScimEn\n",
 679 |     "new = pd.merge(pd.merge(ScimEn,energy,on='Country'),GDP,on='Country')\n",
 680 |     "\n",
 681 |     "# Set the Country column to be the index. Remember to use inplace=True. Otherwise it kept returning a list of integers...spend an hour on this.  \n",
 682 |     "new.set_index('Country',inplace=True)\n",
 683 |     "\n",
 684 |     "# Count the number of rows and columns in the dataframe. This is a test, have commented it. \n",
 685 |     "# new.shape[0] #gives number of row count\n",
 686 |     "# new.shape[1] #gives number of col count\n",
 687 |     "\n",
 688 |     "# Hardcoding this because I couln't figure out how to convert Numbers to Strings in the header. Tried astype() and to_string, both didn't work....\n",
 689 |     "new.columns = ['Rank', 'Documents', 'Citable documents', 'Citations', 'Self-citations', 'Citations per document', 'H index', 'Energy Supply', 'Energy Supply per Capita', '% Renewable', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015']\n",
 690 |     "\n",
 691 |     "def answer_one():\n",
 692 |     "     return new\n",
 693 |     "\n",
 694 |     "answer_one()"
 695 |    ]
 696 |   },
 697 |   {
 698 |    "cell_type": "code",
 699 |    "execution_count": null,
 700 |    "metadata": {
 701 |     "collapsed": true
 702 |    },
 703 |    "outputs": [],
 704 |    "source": []
 705 |   },
 706 |   {
 707 |    "cell_type": "markdown",
 708 |    "metadata": {},
 709 |    "source": [
 710 |     "### Question 2 (6.6%)\n",
 711 |     "The previous question joined three datasets then reduced this to just the top 15 entries. When you joined the datasets, but before you reduced this to the top 15 items, how many entries did you lose?\n",
 712 |     "\n",
 713 |     "*This function should return a single number.*"
 714 |    ]
 715 |   },
 716 |   {
 717 |    "cell_type": "code",
 718 |    "execution_count": 58,
 719 |    "metadata": {
 720 |     "collapsed": false
 721 |    },
 722 |    "outputs": [
 723 |     {
 724 |      "data": {
 725 |       "text/html": [
 726 |        "<svg width=\"800\" height=\"300\">\n",
 727 |        "  <circle cx=\"150\" cy=\"180\" r=\"80\" fill-opacity=\"0.2\" stroke=\"black\" stroke-width=\"2\" fill=\"blue\" />\n",
 728 |        "  <circle cx=\"200\" cy=\"100\" r=\"80\" fill-opacity=\"0.2\" stroke=\"black\" stroke-width=\"2\" fill=\"red\" />\n",
 729 |        "  <circle cx=\"100\" cy=\"100\" r=\"80\" fill-opacity=\"0.2\" stroke=\"black\" stroke-width=\"2\" fill=\"green\" />\n",
 730 |        "  <line x1=\"150\" y1=\"125\" x2=\"300\" y2=\"150\" stroke=\"black\" stroke-width=\"2\" fill=\"black\" stroke-dasharray=\"5,3\"/>\n",
 731 |        "  <text  x=\"300\" y=\"165\" font-family=\"Verdana\" font-size=\"35\">Everything but this!</text>\n",
 732 |        "</svg>"
 733 |       ],
 734 |       "text/plain": [
 735 |        "<IPython.core.display.HTML object>"
 736 |       ]
 737 |      },
 738 |      "metadata": {},
 739 |      "output_type": "display_data"
 740 |     }
 741 |    ],
 742 |    "source": [
 743 |     "%%HTML\n",
 744 |     "<svg width=\"800\" height=\"300\">\n",
 745 |     "  <circle cx=\"150\" cy=\"180\" r=\"80\" fill-opacity=\"0.2\" stroke=\"black\" stroke-width=\"2\" fill=\"blue\" />\n",
 746 |     "  <circle cx=\"200\" cy=\"100\" r=\"80\" fill-opacity=\"0.2\" stroke=\"black\" stroke-width=\"2\" fill=\"red\" />\n",
 747 |     "  <circle cx=\"100\" cy=\"100\" r=\"80\" fill-opacity=\"0.2\" stroke=\"black\" stroke-width=\"2\" fill=\"green\" />\n",
 748 |     "  <line x1=\"150\" y1=\"125\" x2=\"300\" y2=\"150\" stroke=\"black\" stroke-width=\"2\" fill=\"black\" stroke-dasharray=\"5,3\"/>\n",
 749 |     "  <text  x=\"300\" y=\"165\" font-family=\"Verdana\" font-size=\"35\">Everything but this!</text>\n",
 750 |     "</svg>"
 751 |    ]
 752 |   },
 753 |   {
 754 |    "cell_type": "code",
 755 |    "execution_count": null,
 756 |    "metadata": {
 757 |     "collapsed": true
 758 |    },
 759 |    "outputs": [],
 760 |    "source": []
 761 |   },
 762 |   {
 763 |    "cell_type": "code",
 764 |    "execution_count": null,
 765 |    "metadata": {
 766 |     "collapsed": false
 767 |    },
 768 |    "outputs": [],
 769 |    "source": [
 770 |     "def answer_two():\n",
 771 |     "    return \"ANSWER\""
 772 |    ]
 773 |   },
 774 |   {
 775 |    "cell_type": "markdown",
 776 |    "metadata": {},
 777 |    "source": [
 778 |     "<br>\n",
 779 |     "\n",
 780 |     "Answer the following questions in the context of only the top 15 countries by Scimagojr Rank (aka the DataFrame returned by `answer_one()`)"
 781 |    ]
 782 |   },
 783 |   {
 784 |    "cell_type": "markdown",
 785 |    "metadata": {},
 786 |    "source": [
 787 |     "### Question 3 (6.6%)\n",
 788 |     "What is the average GDP over the last 10 years for each country?\n",
 789 |     "\n",
 790 |     "*This function should return a Series named `avgGDP` with 15 countries and their average GDP sorted in descending order.*"
 791 |    ]
 792 |   },
 793 |   {
 794 |    "cell_type": "code",
 795 |    "execution_count": 84,
 796 |    "metadata": {
 797 |     "collapsed": false,
 798 |     "scrolled": true
 799 |    },
 800 |    "outputs": [
 801 |     {
 802 |      "name": "stderr",
 803 |      "output_type": "stream",
 804 |      "text": [
 805 |       "/opt/conda/lib/python3.5/site-packages/ipykernel/__main__.py:8: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)\n"
 806 |      ]
 807 |     },
 808 |     {
 809 |      "data": {
 810 |       "text/plain": [
 811 |        "Country\n",
 812 |        "United States         1.536434e+13\n",
 813 |        "China                 6.348609e+12\n",
 814 |        "Japan                 5.542208e+12\n",
 815 |        "Germany               3.493025e+12\n",
 816 |        "France                2.681725e+12\n",
 817 |        "United Kingdom        2.487907e+12\n",
 818 |        "Brazil                2.189794e+12\n",
 819 |        "Italy                 2.120175e+12\n",
 820 |        "India                 1.769297e+12\n",
 821 |        "Canada                1.660647e+12\n",
 822 |        "Russian Federation    1.565459e+12\n",
 823 |        "Spain                 1.418078e+12\n",
 824 |        "Australia             1.164043e+12\n",
 825 |        "South Korea           1.106715e+12\n",
 826 |        "Iran                  4.441558e+11\n",
 827 |        "Name: avgGDP, dtype: float64"
 828 |       ]
 829 |      },
 830 |      "execution_count": 84,
 831 |      "metadata": {},
 832 |      "output_type": "execute_result"
 833 |     }
 834 |    ],
 835 |    "source": [
 836 |     "# Need to create a new dataframe for each question, otherwise the autograder would think that I'm creating new columns for the DF created in question 1, and will stop working...\n",
 837 |     "question3=new.copy()\n",
 838 |     "\n",
 839 |     "# The 2015 GDP data for Iran is NAN, so I had to use np.mean() instead of hard code it as df.sum()/10.... Probably better this way.\n",
 840 |     "question3[\"avgGDP\"]=question3[['2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015']].mean(axis=1)\n",
 841 |     "\n",
 842 |     "# Sort by descending order\n",
 843 |     "question3.sort('avgGDP', ascending=False,inplace=True)\n",
 844 |     "\n",
 845 |     "# Create a new data series named \"avgGDP\"\n",
 846 |     "avgGDP_series= question3.ix[:,'avgGDP']\n",
 847 |     "\n",
 848 |     "def answer_three():\n",
 849 |     "    Top15 = answer_one()\n",
 850 |     "    return avgGDP_series\n",
 851 |     "answer_three()"
 852 |    ]
 853 |   },
 854 |   {
 855 |    "cell_type": "markdown",
 856 |    "metadata": {},
 857 |    "source": [
 858 |     "### Question 4 (6.6%)\n",
 859 |     "By how much had the GDP changed over the 10 year span for the country with the 6th largest average GDP?\n",
 860 |     "\n",
 861 |     "*This function should return a single number.*"
 862 |    ]
 863 |   },
 864 |   {
 865 |    "cell_type": "code",
 866 |    "execution_count": 148,
 867 |    "metadata": {
 868 |     "collapsed": false,
 869 |     "scrolled": true
 870 |    },
 871 |    "outputs": [
 872 |     {
 873 |      "data": {
 874 |       "text/plain": [
 875 |        "246702696075.3999"
 876 |       ]
 877 |      },
 878 |      "execution_count": 148,
 879 |      "metadata": {},
 880 |      "output_type": "execute_result"
 881 |     }
 882 |    ],
 883 |    "source": [
 884 |     "question4 = question3.copy()\n",
 885 |     "question4.drop(question4.columns[0:10],axis=1, inplace=True)\n",
 886 |     "\n",
 887 |     "diff=question4.iloc[5][9]-question4.iloc[5][0]\n",
 888 |     "\n",
 889 |     "def answer_four():\n",
 890 |     "    Top15 = answer_one()\n",
 891 |     "    return diff\n",
 892 |     "\n",
 893 |     "answer_four()"
 894 |    ]
 895 |   },
 896 |   {
 897 |    "cell_type": "code",
 898 |    "execution_count": null,
 899 |    "metadata": {
 900 |     "collapsed": true
 901 |    },
 902 |    "outputs": [],
 903 |    "source": []
 904 |   },
 905 |   {
 906 |    "cell_type": "markdown",
 907 |    "metadata": {},
 908 |    "source": [
 909 |     "### Question 5 (6.6%)\n",
 910 |     "What is the mean energy supply per capita?\n",
 911 |     "\n",
 912 |     "*This function should return a single number.*"
 913 |    ]
 914 |   },
 915 |   {
 916 |    "cell_type": "code",
 917 |    "execution_count": 157,
 918 |    "metadata": {
 919 |     "collapsed": false
 920 |    },
 921 |    "outputs": [
 922 |     {
 923 |      "data": {
 924 |       "text/plain": [
 925 |        "157.6"
 926 |       ]
 927 |      },
 928 |      "execution_count": 157,
 929 |      "metadata": {},
 930 |      "output_type": "execute_result"
 931 |     }
 932 |    ],
 933 |    "source": [
 934 |     "question5 = new.copy()\n",
 935 |     "\n",
 936 |     "# Use item() to convert a numpy.float object to normal Python object (float)\n",
 937 |     "def answer_five():\n",
 938 |     "    return question5['Energy Supply per Capita'].mean().item()\n",
 939 |     "answer_five()"
 940 |    ]
 941 |   },
 942 |   {
 943 |    "cell_type": "markdown",
 944 |    "metadata": {},
 945 |    "source": [
 946 |     "### Question 6 (6.6%)\n",
 947 |     "What country has the maximum % Renewable and what is the percentage?\n",
 948 |     "\n",
 949 |     "*This function should return a tuple with the name of the country and the percentage.*"
 950 |    ]
 951 |   },
 952 |   {
 953 |    "cell_type": "code",
 954 |    "execution_count": 170,
 955 |    "metadata": {
 956 |     "collapsed": false
 957 |    },
 958 |    "outputs": [
 959 |     {
 960 |      "name": "stderr",
 961 |      "output_type": "stream",
 962 |      "text": [
 963 |       "/opt/conda/lib/python3.5/site-packages/ipykernel/__main__.py:2: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)\n",
 964 |       "  from ipykernel import kernelapp as app\n"
 965 |      ]
 966 |     },
 967 |     {
 968 |      "data": {
 969 |       "text/plain": [
 970 |        "('Brazil', 69.64803)"
 971 |       ]
 972 |      },
 973 |      "execution_count": 170,
 974 |      "metadata": {},
 975 |      "output_type": "execute_result"
 976 |     }
 977 |    ],
 978 |    "source": [
 979 |     "question6= new.copy()\n",
 980 |     "\n",
 981 |     "# Sort by '% Renewable'\n",
 982 |     "question6.sort('% Renewable',ascending=False,inplace=True)\n",
 983 |     "\n",
 984 |     "# Return as a tuple\n",
 985 |     "def answer_six():\n",
 986 |     "    return (question6.iloc[0].name,question6.iloc[0]['% Renewable'])\n",
 987 |     "answer_six()"
 988 |    ]
 989 |   },
 990 |   {
 991 |    "cell_type": "markdown",
 992 |    "metadata": {},
 993 |    "source": [
 994 |     "### Question 7 (6.6%)\n",
 995 |     "Create a new column that is the ratio of Self-Citations to Total Citations. \n",
 996 |     "What is the maximum value for this new column, and what country has the highest ratio?\n",
 997 |     "\n",
 998 |     "*This function should return a tuple with the name of the country and the ratio.*"
 999 |    ]
1000 |   },
1001 |   {
1002 |    "cell_type": "code",
1003 |    "execution_count": 212,
1004 |    "metadata": {
1005 |     "collapsed": false
1006 |    },
1007 |    "outputs": [
1008 |     {
1009 |      "data": {
1010 |       "text/plain": [
1011 |        "('China', 0.68931261793894216)"
1012 |       ]
1013 |      },
1014 |      "execution_count": 212,
1015 |      "metadata": {},
1016 |      "output_type": "execute_result"
1017 |     }
1018 |    ],
1019 |    "source": [
1020 |     "question7=new.copy()\n",
1021 |     "\n",
1022 |     "question7['Citation Ratio']=question7['Self-citations']/question7['Citations']\n",
1023 |     "\n",
1024 |     "name_of_country = question7[question7['Citation Ratio']==question7['Citation Ratio'].max()].index.values.item()\n",
1025 |     "value = question7['Citation Ratio'].max()\n",
1026 |     "def answer_seven():\n",
1027 |     "    return (name_of_country,value)\n",
1028 |     "answer_seven()"
1029 |    ]
1030 |   },
1031 |   {
1032 |    "cell_type": "code",
1033 |    "execution_count": null,
1034 |    "metadata": {
1035 |     "collapsed": true
1036 |    },
1037 |    "outputs": [],
1038 |    "source": []
1039 |   },
1040 |   {
1041 |    "cell_type": "markdown",
1042 |    "metadata": {},
1043 |    "source": [
1044 |     "### Question 8 (6.6%)\n",
1045 |     "\n",
1046 |     "Create a column that estimates the population using Energy Supply and Energy Supply per capita. \n",
1047 |     "What is the third most populous country according to this estimate?\n",
1048 |     "\n",
1049 |     "*This function should return a single string value.*"
1050 |    ]
1051 |   },
1052 |   {
1053 |    "cell_type": "code",
1054 |    "execution_count": 3,
1055 |    "metadata": {
1056 |     "collapsed": false
1057 |    },
1058 |    "outputs": [
1059 |     {
1060 |      "name": "stderr",
1061 |      "output_type": "stream",
1062 |      "text": [
1063 |       "/opt/conda/lib/python3.5/site-packages/ipykernel/__main__.py:6: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)\n"
1064 |      ]
1065 |     },
1066 |     {
1067 |      "data": {
1068 |       "text/plain": [
1069 |        "'United States'"
1070 |       ]
1071 |      },
1072 |      "execution_count": 3,
1073 |      "metadata": {},
1074 |      "output_type": "execute_result"
1075 |     }
1076 |    ],
1077 |    "source": [
1078 |     "question8 = new.copy()\n",
1079 |     "# Create a new row for 'Population'\n",
1080 |     "question8['Population']=question8['Energy Supply']/question8['Energy Supply per Capita']\n",
1081 |     "\n",
1082 |     "# Sort the dataframe by 'Population'\n",
1083 |     "question8.sort('Population',ascending=False,inplace=True)\n",
1084 |     "\n",
1085 |     "# Find the 3rd most populous country, and return the index (country name) with index.values. Item() converts it into a Python string\n",
1086 |     "third_populous_country = question8[question8['Population']==question8['Population'][2]].index.values.item()\n",
1087 |     "def answer_eight():\n",
1088 |     "    return third_populous_country\n",
1089 |     "\n",
1090 |     "answer_eight()"
1091 |    ]
1092 |   },
1093 |   {
1094 |    "cell_type": "code",
1095 |    "execution_count": null,
1096 |    "metadata": {
1097 |     "collapsed": true
1098 |    },
1099 |    "outputs": [],
1100 |    "source": []
1101 |   },
1102 |   {
1103 |    "cell_type": "markdown",
1104 |    "metadata": {},
1105 |    "source": [
1106 |     "### Question 9\n",
1107 |     "Create a column that estimates the number of citable documents per person. \n",
1108 |     "What is the correlation between the number of citable documents per capita and the energy supply per capita? Use the `.corr()` method, (Pearson's correlation).\n",
1109 |     "\n",
1110 |     "*This function should return a single number.*\n",
1111 |     "\n",
1112 |     "*(Optional: Use the built-in function `plot9()` to visualize the relationship between Energy Supply per Capita vs. Citable docs per Capita)*"
1113 |    ]
1114 |   },
1115 |   {
1116 |    "cell_type": "code",
1117 |    "execution_count": 47,
1118 |    "metadata": {
1119 |     "collapsed": false
1120 |    },
1121 |    "outputs": [],
1122 |    "source": [
1123 |     "# Can't figure this out. Ditched it. \n",
1124 |     "# I want chips.\n",
1125 |     "# Going out now to buy chips...\n",
1126 |     "\n",
1127 |     "# question9=question8.copy()\n",
1128 |     "\n",
1129 |     "# question9['Citable document per Capita']=question9['Citable documents']/question9['Population']\n",
1130 |     "# sub = question9[['Citable document per Capita','Energy Supply per Capita']]\n",
1131 |     "\n",
1132 |     "# # def answer_nine():\n",
1133 |     "# #     return \"ANSWER\""
1134 |    ]
1135 |   },
1136 |   {
1137 |    "cell_type": "code",
1138 |    "execution_count": 214,
1139 |    "metadata": {
1140 |     "collapsed": false
1141 |    },
1142 |    "outputs": [],
1143 |    "source": [
1144 |     "# def plot9():\n",
1145 |     "#     import matplotlib as plt\n",
1146 |     "#     %matplotlib inline\n",
1147 |     "    \n",
1148 |     "#     Top15 = answer_one()\n",
1149 |     "#     Top15['PopEst'] = Top15['Energy Supply'] / Top15['Energy Supply per Capita']\n",
1150 |     "#     Top15['Citable docs per Capita'] = Top15['Citable documents'] / Top15['PopEst']\n",
1151 |     "#     Top15.plot(x='Citable docs per Capita', y='Energy Supply per Capita', kind='scatter', xlim=[0, 0.0006])"
1152 |    ]
1153 |   },
1154 |   {
1155 |    "cell_type": "code",
1156 |    "execution_count": null,
1157 |    "metadata": {
1158 |     "collapsed": true
1159 |    },
1160 |    "outputs": [],
1161 |    "source": [
1162 |     "#"
1163 |    ]
1164 |   },
1165 |   {
1166 |    "cell_type": "code",
1167 |    "execution_count": null,
1168 |    "metadata": {
1169 |     "collapsed": false
1170 |    },
1171 |    "outputs": [],
1172 |    "source": [
1173 |     "#plot9() # Be sure to comment out plot9() before submitting the assignment!"
1174 |    ]
1175 |   },
1176 |   {
1177 |    "cell_type": "markdown",
1178 |    "metadata": {},
1179 |    "source": [
1180 |     "### Question 10 (6.6%)\n",
1181 |     "Create a new column with a 1 if the country's % Renewable value is at or above the median for all countries in the top 15, and a 0 if the country's % Renewable value is below the median.\n",
1182 |     "\n",
1183 |     "*This function should return a series named `HighRenew` whose index is the country name sorted in ascending order of rank.*"
1184 |    ]
1185 |   },
1186 |   {
1187 |    "cell_type": "code",
1188 |    "execution_count": 74,
1189 |    "metadata": {
1190 |     "collapsed": false
1191 |    },
1192 |    "outputs": [
1193 |     {
1194 |      "name": "stderr",
1195 |      "output_type": "stream",
1196 |      "text": [
1197 |       "/opt/conda/lib/python3.5/site-packages/ipykernel/__main__.py:7: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)\n"
1198 |      ]
1199 |     },
1200 |     {
1201 |      "data": {
1202 |       "text/plain": [
1203 |        "Country\n",
1204 |        "China                 1\n",
1205 |        "United States         0\n",
1206 |        "Japan                 0\n",
1207 |        "United Kingdom        0\n",
1208 |        "Russian Federation    1\n",
1209 |        "Canada                1\n",
1210 |        "Germany               1\n",
1211 |        "India                 0\n",
1212 |        "France                1\n",
1213 |        "South Korea           0\n",
1214 |        "Italy                 1\n",
1215 |        "Spain                 1\n",
1216 |        "Iran                  0\n",
1217 |        "Australia             0\n",
1218 |        "Brazil                1\n",
1219 |        "Name: HighRenew, dtype: int64"
1220 |       ]
1221 |      },
1222 |      "execution_count": 74,
1223 |      "metadata": {},
1224 |      "output_type": "execute_result"
1225 |     }
1226 |    ],
1227 |    "source": [
1228 |     "question10=new.copy()\n",
1229 |     "\n",
1230 |     "median = question10['% Renewable'].median()\n",
1231 |     "\n",
1232 |     "question10['HighRenew'] = np.where(question10['% Renewable'] >= median,1,0)\n",
1233 |     "question10['HighRenew'] = question10['HighRenew'].replace(np.nan,0)\n",
1234 |     "question10.sort('Rank',ascending=True, inplace=True)\n",
1235 |     "HighRenew= question10.ix[:,'HighRenew']\n",
1236 |     "\n",
1237 |     "def answer_ten():\n",
1238 |     "    return HighRenew\n",
1239 |     "\n",
1240 |     "answer_ten()"
1241 |    ]
1242 |   },
1243 |   {
1244 |    "cell_type": "markdown",
1245 |    "metadata": {},
1246 |    "source": [
1247 |     "### Question 11 (6.6%)\n",
1248 |     "Use the following dictionary to group the Countries by Continent, then create a dateframe that displays the sample size (the number of countries in each continent bin), and the sum, mean, and std deviation for the estimated population of each country.\n",
1249 |     "\n",
1250 |     "```python\n",
1251 |     "ContinentDict  = {'China':'Asia', \n",
1252 |     "                  'United States':'North America', \n",
1253 |     "                  'Japan':'Asia', \n",
1254 |     "                  'United Kingdom':'Europe', \n",
1255 |     "                  'Russian Federation':'Europe', \n",
1256 |     "                  'Canada':'North America', \n",
1257 |     "                  'Germany':'Europe', \n",
1258 |     "                  'India':'Asia',\n",
1259 |     "                  'France':'Europe', \n",
1260 |     "                  'South Korea':'Asia', \n",
1261 |     "                  'Italy':'Europe', \n",
1262 |     "                  'Spain':'Europe', \n",
1263 |     "                  'Iran':'Asia',\n",
1264 |     "                  'Australia':'Australia', \n",
1265 |     "                  'Brazil':'South America'}\n",
1266 |     "```\n",
1267 |     "\n",
1268 |     "*This function should return a DataFrame with index named Continent `['Asia', 'Australia', 'Europe', 'North America', 'South America']` and columns `['size', 'sum', 'mean', 'std']`*"
1269 |    ]
1270 |   },
1271 |   {
1272 |    "cell_type": "code",
1273 |    "execution_count": 163,
1274 |    "metadata": {
1275 |     "collapsed": false
1276 |    },
1277 |    "outputs": [
1278 |     {
1279 |      "name": "stderr",
1280 |      "output_type": "stream",
1281 |      "text": [
1282 |       "/opt/conda/lib/python3.5/site-packages/ipykernel/__main__.py:23: FutureWarning: convert_objects is deprecated.  Use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.\n"
1283 |      ]
1284 |     },
1285 |     {
1286 |      "data": {
1287 |       "text/html": [
1288 |        "<div>\n",
1289 |        "<table border=\"1\" class=\"dataframe\">\n",
1290 |        "  <thead>\n",
1291 |        "    <tr style=\"text-align: right;\">\n",
1292 |        "      <th></th>\n",
1293 |        "      <th>size</th>\n",
1294 |        "      <th>sum</th>\n",
1295 |        "      <th>mean</th>\n",
1296 |        "      <th>std</th>\n",
1297 |        "    </tr>\n",
1298 |        "    <tr>\n",
1299 |        "      <th>Continent</th>\n",
1300 |        "      <th></th>\n",
1301 |        "      <th></th>\n",
1302 |        "      <th></th>\n",
1303 |        "      <th></th>\n",
1304 |        "    </tr>\n",
1305 |        "  </thead>\n",
1306 |        "  <tbody>\n",
1307 |        "    <tr>\n",
1308 |        "      <th>Asia</th>\n",
1309 |        "      <td>5</td>\n",
1310 |        "      <td>2.898666e+09</td>\n",
1311 |        "      <td>5.797333e+08</td>\n",
1312 |        "      <td>6.790979e+08</td>\n",
1313 |        "    </tr>\n",
1314 |        "    <tr>\n",
1315 |        "      <th>Australia</th>\n",
1316 |        "      <td>1</td>\n",
1317 |        "      <td>2.331602e+07</td>\n",
1318 |        "      <td>2.331602e+07</td>\n",
1319 |        "      <td>NaN</td>\n",
1320 |        "    </tr>\n",
1321 |        "    <tr>\n",
1322 |        "      <th>Europe</th>\n",
1323 |        "      <td>6</td>\n",
1324 |        "      <td>4.579297e+08</td>\n",
1325 |        "      <td>7.632161e+07</td>\n",
1326 |        "      <td>3.464767e+07</td>\n",
1327 |        "    </tr>\n",
1328 |        "    <tr>\n",
1329 |        "      <th>North America</th>\n",
1330 |        "      <td>2</td>\n",
1331 |        "      <td>3.528552e+08</td>\n",
1332 |        "      <td>1.764276e+08</td>\n",
1333 |        "      <td>1.996696e+08</td>\n",
1334 |        "    </tr>\n",
1335 |        "    <tr>\n",
1336 |        "      <th>South America</th>\n",
1337 |        "      <td>1</td>\n",
1338 |        "      <td>2.059153e+08</td>\n",
1339 |        "      <td>2.059153e+08</td>\n",
1340 |        "      <td>NaN</td>\n",
1341 |        "    </tr>\n",
1342 |        "  </tbody>\n",
1343 |        "</table>\n",
1344 |        "</div>"
1345 |       ],
1346 |       "text/plain": [
1347 |        "               size           sum          mean           std\n",
1348 |        "Continent                                                    \n",
1349 |        "Asia              5  2.898666e+09  5.797333e+08  6.790979e+08\n",
1350 |        "Australia         1  2.331602e+07  2.331602e+07           NaN\n",
1351 |        "Europe            6  4.579297e+08  7.632161e+07  3.464767e+07\n",
1352 |        "North America     2  3.528552e+08  1.764276e+08  1.996696e+08\n",
1353 |        "South America     1  2.059153e+08  2.059153e+08           NaN"
1354 |       ]
1355 |      },
1356 |      "execution_count": 163,
1357 |      "metadata": {},
1358 |      "output_type": "execute_result"
1359 |     }
1360 |    ],
1361 |    "source": [
1362 |     "question11 = question8.copy()\n",
1363 |     "question11.drop(question11.columns[:-1],axis=1,inplace=True)\n",
1364 |     "\n",
1365 |     "ContinentDict  = {'China':'Asia', \n",
1366 |     "                  'United States':'North America', \n",
1367 |     "                  'Japan':'Asia', \n",
1368 |     "                  'United Kingdom':'Europe', \n",
1369 |     "                  'Russian Federation':'Europe', \n",
1370 |     "                  'Canada':'North America', \n",
1371 |     "                  'Germany':'Europe', \n",
1372 |     "                  'India':'Asia',\n",
1373 |     "                  'France':'Europe', \n",
1374 |     "                  'South Korea':'Asia', \n",
1375 |     "                  'Italy':'Europe', \n",
1376 |     "                  'Spain':'Europe', \n",
1377 |     "                  'Iran':'Asia',\n",
1378 |     "                  'Australia':'Australia', \n",
1379 |     "                  'Brazil':'South America'}\n",
1380 |     "question11.reset_index(inplace=1)\n",
1381 |     "question11['Continent']=question11['Country'].map(ContinentDict)\n",
1382 |     "\n",
1383 |     "\n",
1384 |     "question11['Population'] = question11['Population'].convert_objects(convert_numeric=True)\n",
1385 |     "\n",
1386 |     "PopSize=question11.groupby('Continent').agg({'Continent':np.count_nonzero})\n",
1387 |     "PopSum=question11.groupby('Continent').agg({'Population':np.sum})\n",
1388 |     "PopMean=question11.groupby('Continent').agg({'Population':np.average})\n",
1389 |     "PopStd=question11.groupby('Continent').agg({'Population':np.std})\n",
1390 |     "\n",
1391 |     "PopTotal = pd.concat([PopSize,PopSum,PopMean,PopStd],axis=1)\n",
1392 |     "PopTotal.columns = ['size', 'sum', 'mean', 'std']\n",
1393 |     "\n",
1394 |     "\n",
1395 |     "def answer_eleven():\n",
1396 |     "    return PopTotal\n",
1397 |     "answer_eleven()"
1398 |    ]
1399 |   },
1400 |   {
1401 |    "cell_type": "markdown",
1402 |    "metadata": {},
1403 |    "source": [
1404 |     "### Question 12 (6.6%)\n",
1405 |     "Cut % Renewable into 5 bins. Group Top15 by the Continent, as well as these new % Renewable bins. How many countries are in each of these groups?\n",
1406 |     "\n",
1407 |     "*This function should return a Series with a MultiIndex of `Continent`, then the bins for `% Renewable`. Do not include groups with no countries.*"
1408 |    ]
1409 |   },
1410 |   {
1411 |    "cell_type": "code",
1412 |    "execution_count": null,
1413 |    "metadata": {
1414 |     "collapsed": false,
1415 |     "scrolled": true
1416 |    },
1417 |    "outputs": [],
1418 |    "source": [
1419 |     "def answer_twelve():\n",
1420 |     "    Top15 = answer_one()\n",
1421 |     "    return \"ANSWER\""
1422 |    ]
1423 |   },
1424 |   {
1425 |    "cell_type": "markdown",
1426 |    "metadata": {},
1427 |    "source": [
1428 |     "### Question 13 (6.6%)\n",
1429 |     "Convert the Population Estimate series to a string with thousands separator (using commas). Do not round the results.\n",
1430 |     "\n",
1431 |     "e.g. 317615384.61538464 -> 317,615,384.61538464\n",
1432 |     "\n",
1433 |     "*This function should return a Series `PopEst` whose index is the country name and whose values are the population estimate string.*"
1434 |    ]
1435 |   },
1436 |   {
1437 |    "cell_type": "code",
1438 |    "execution_count": 95,
1439 |    "metadata": {
1440 |     "collapsed": false,
1441 |     "scrolled": true
1442 |    },
1443 |    "outputs": [
1444 |     {
1445 |      "data": {
1446 |       "text/plain": [
1447 |        "Country\n",
1448 |        "China                 1,367,645,161.2903225\n",
1449 |        "India                 1,276,730,769.2307692\n",
1450 |        "United States          317,615,384.61538464\n",
1451 |        "Brazil                 205,915,254.23728815\n",
1452 |        "Russian Federation            143,500,000.0\n",
1453 |        "Japan                  127,409,395.97315437\n",
1454 |        "Germany                 80,369,696.96969697\n",
1455 |        "Iran                    77,075,630.25210084\n",
1456 |        "United Kingdom         63,870,967.741935484\n",
1457 |        "France                  63,837,349.39759036\n",
1458 |        "Italy                  59,908,256.880733944\n",
1459 |        "South Korea            49,805,429.864253394\n",
1460 |        "Spain                    46,443,396.2264151\n",
1461 |        "Canada                  35,239,864.86486486\n",
1462 |        "Australia              23,316,017.316017315\n",
1463 |        "Name: PopEst, dtype: object"
1464 |       ]
1465 |      },
1466 |      "execution_count": 95,
1467 |      "metadata": {},
1468 |      "output_type": "execute_result"
1469 |     }
1470 |    ],
1471 |    "source": [
1472 |     "question13=question8.copy()\n",
1473 |     "\n",
1474 |     "# Keep only the 'Population' Column\n",
1475 |     "question13.drop(question13.columns[:-1],axis=1,inplace=True)\n",
1476 |     "\n",
1477 |     "# Use format() to add the thousands separator\n",
1478 |     "question13['PopEst'] = question13['Population'].apply(lambda x : '{:,}'.format(x))\n",
1479 |     "\n",
1480 |     "# Create a data series for the output\n",
1481 |     "question13_series= question13.ix[:,'PopEst']\n",
1482 |     "\n",
1483 |     "def answer_thirteen():\n",
1484 |     "    return question13_series\n",
1485 |     "\n",
1486 |     "answer_thirteen()"
1487 |    ]
1488 |   },
1489 |   {
1490 |    "cell_type": "markdown",
1491 |    "metadata": {},
1492 |    "source": [
1493 |     "### Optional\n",
1494 |     "\n",
1495 |     "Use the built in function `plot_optional()` to see an example visualization."
1496 |    ]
1497 |   },
1498 |   {
1499 |    "cell_type": "code",
1500 |    "execution_count": null,
1501 |    "metadata": {
1502 |     "collapsed": false,
1503 |     "scrolled": true
1504 |    },
1505 |    "outputs": [],
1506 |    "source": [
1507 |     "def plot_optional():\n",
1508 |     "    import matplotlib as plt\n",
1509 |     "    %matplotlib inline\n",
1510 |     "    Top15 = answer_one()\n",
1511 |     "    ax = Top15.plot(x='Rank', y='% Renewable', kind='scatter', \n",
1512 |     "                    c=['#e41a1c','#377eb8','#e41a1c','#4daf4a','#4daf4a','#377eb8','#4daf4a','#e41a1c',\n",
1513 |     "                       '#4daf4a','#e41a1c','#4daf4a','#4daf4a','#e41a1c','#dede00','#ff7f00'], \n",
1514 |     "                    xticks=range(1,16), s=6*Top15['2014']/10**10, alpha=.75, figsize=[16,6]);\n",
1515 |     "\n",
1516 |     "    for i, txt in enumerate(Top15.index):\n",
1517 |     "        ax.annotate(txt, [Top15['Rank'][i], Top15['% Renewable'][i]], ha='center')\n",
1518 |     "\n",
1519 |     "    print(\"This is an example of a visualization that can be created to help understand the data. \\\n",
1520 |     "This is a bubble chart showing % Renewable vs. Rank. The size of the bubble corresponds to the countries' \\\n",
1521 |     "2014 GDP, and the color corresponds to the continent.\")"
1522 |    ]
1523 |   },
1524 |   {
1525 |    "cell_type": "code",
1526 |    "execution_count": null,
1527 |    "metadata": {
1528 |     "collapsed": false
1529 |    },
1530 |    "outputs": [],
1531 |    "source": [
1532 |     "#plot_optional() # Be sure to comment out plot_optional() before submitting the assignment!"
1533 |    ]
1534 |   }
1535 |  ],
1536 |  "metadata": {
1537 |   "anaconda-cloud": {},
1538 |   "coursera": {
1539 |    "course_slug": "python-data-analysis",
1540 |    "graded_item_id": "zAr06",
1541 |    "launcher_item_id": "KSSjT",
1542 |    "part_id": "SL3fU"
1543 |   },
1544 |   "kernelspec": {
1545 |    "display_name": "Python 3",
1546 |    "language": "python",
1547 |    "name": "python3"
1548 |   },
1549 |   "language_info": {
1550 |    "codemirror_mode": {
1551 |     "name": "ipython",
1552 |     "version": 3
1553 |    },
1554 |    "file_extension": ".py",
1555 |    "mimetype": "text/x-python",
1556 |    "name": "python",
1557 |    "nbconvert_exporter": "python",
1558 |    "pygments_lexer": "ipython3",
1559 |    "version": "3.5.2"
1560 |   }
1561 |  },
1562 |  "nbformat": 4,
1563 |  "nbformat_minor": 0
1564 | }
1565 | 


--------------------------------------------------------------------------------
/Week1/Week+1.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "---\n",
   8 |     "\n",
   9 |     "_You are currently looking at **version 1.1** of this notebook. To download notebooks and datafiles, as well as get help on Jupyter notebooks in the Coursera platform, visit the [Jupyter Notebook FAQ](https://www.coursera.org/learn/python-data-analysis/resources/0dhYG) course resource._\n",
  10 |     "\n",
  11 |     "---"
  12 |    ]
  13 |   },
  14 |   {
  15 |    "cell_type": "markdown",
  16 |    "metadata": {},
  17 |    "source": [
  18 |     "# The Python Programming Language: Functions"
  19 |    ]
  20 |   },
  21 |   {
  22 |    "cell_type": "code",
  23 |    "execution_count": 2,
  24 |    "metadata": {
  25 |     "collapsed": false
  26 |    },
  27 |    "outputs": [
  28 |     {
  29 |      "data": {
  30 |       "text/plain": [
  31 |        "3"
  32 |       ]
  33 |      },
  34 |      "execution_count": 2,
  35 |      "metadata": {},
  36 |      "output_type": "execute_result"
  37 |     }
  38 |    ],
  39 |    "source": [
  40 |     "x = 1\n",
  41 |     "y = 2\n",
  42 |     "x + y"
  43 |    ]
  44 |   },
  45 |   {
  46 |    "cell_type": "code",
  47 |    "execution_count": 4,
  48 |    "metadata": {
  49 |     "collapsed": false
  50 |    },
  51 |    "outputs": [
  52 |     {
  53 |      "name": "stdout",
  54 |      "output_type": "stream",
  55 |      "text": [
  56 |       "x\n"
  57 |      ]
  58 |     }
  59 |    ],
  60 |    "source": [
  61 |     "print (\"x\")"
  62 |    ]
  63 |   },
  64 |   {
  65 |    "cell_type": "markdown",
  66 |    "metadata": {},
  67 |    "source": [
  68 |     "<br>\n",
  69 |     "`add_numbers` is a function that takes two numbers and adds them together."
  70 |    ]
  71 |   },
  72 |   {
  73 |    "cell_type": "code",
  74 |    "execution_count": null,
  75 |    "metadata": {
  76 |     "collapsed": false
  77 |    },
  78 |    "outputs": [],
  79 |    "source": [
  80 |     "def add_numbers(x, y):\n",
  81 |     "    return x + y\n",
  82 |     "\n",
  83 |     "add_numbers(1, 2)"
  84 |    ]
  85 |   },
  86 |   {
  87 |    "cell_type": "markdown",
  88 |    "metadata": {},
  89 |    "source": [
  90 |     "<br>\n",
  91 |     "`add_numbers` updated to take an optional 3rd parameter. Using `print` allows printing of multiple expressions within a single cell."
  92 |    ]
  93 |   },
  94 |   {
  95 |    "cell_type": "code",
  96 |    "execution_count": null,
  97 |    "metadata": {
  98 |     "collapsed": false
  99 |    },
 100 |    "outputs": [],
 101 |    "source": [
 102 |     "def add_numbers(x,y,z=None):\n",
 103 |     "    if (z==None):\n",
 104 |     "        return x+y\n",
 105 |     "    else:\n",
 106 |     "        return x+y+z\n",
 107 |     "\n",
 108 |     "print(add_numbers(1, 2))\n",
 109 |     "print(add_numbers(1, 2, 3))"
 110 |    ]
 111 |   },
 112 |   {
 113 |    "cell_type": "markdown",
 114 |    "metadata": {},
 115 |    "source": [
 116 |     "<br>\n",
 117 |     "`add_numbers` updated to take an optional flag parameter."
 118 |    ]
 119 |   },
 120 |   {
 121 |    "cell_type": "code",
 122 |    "execution_count": 1,
 123 |    "metadata": {
 124 |     "collapsed": false
 125 |    },
 126 |    "outputs": [
 127 |     {
 128 |      "name": "stdout",
 129 |      "output_type": "stream",
 130 |      "text": [
 131 |       "Flag is true!\n",
 132 |       "3\n"
 133 |      ]
 134 |     }
 135 |    ],
 136 |    "source": [
 137 |     "def add_numbers(x, y, z=None, flag=False):\n",
 138 |     "    if (flag):\n",
 139 |     "        print('Flag is true!')\n",
 140 |     "    if (z==None):\n",
 141 |     "        return x + y\n",
 142 |     "    else:\n",
 143 |     "        return x + y + z\n",
 144 |     "    \n",
 145 |     "print(add_numbers(1, 2, flag=True))"
 146 |    ]
 147 |   },
 148 |   {
 149 |    "cell_type": "markdown",
 150 |    "metadata": {},
 151 |    "source": [
 152 |     "<br>\n",
 153 |     "Assign function `add_numbers` to variable `a`."
 154 |    ]
 155 |   },
 156 |   {
 157 |    "cell_type": "code",
 158 |    "execution_count": 5,
 159 |    "metadata": {
 160 |     "collapsed": false
 161 |    },
 162 |    "outputs": [
 163 |     {
 164 |      "data": {
 165 |       "text/plain": [
 166 |        "3"
 167 |       ]
 168 |      },
 169 |      "execution_count": 5,
 170 |      "metadata": {},
 171 |      "output_type": "execute_result"
 172 |     }
 173 |    ],
 174 |    "source": [
 175 |     "def add_numbers(x,y):\n",
 176 |     "    return x+y\n",
 177 |     "\n",
 178 |     "a = add_numbers\n",
 179 |     "a(1,2)"
 180 |    ]
 181 |   },
 182 |   {
 183 |    "cell_type": "code",
 184 |    "execution_count": null,
 185 |    "metadata": {
 186 |     "collapsed": true
 187 |    },
 188 |    "outputs": [],
 189 |    "source": []
 190 |   },
 191 |   {
 192 |    "cell_type": "markdown",
 193 |    "metadata": {},
 194 |    "source": [
 195 |     "<br>\n",
 196 |     "# The Python Programming Language: Types and Sequences"
 197 |    ]
 198 |   },
 199 |   {
 200 |    "cell_type": "markdown",
 201 |    "metadata": {},
 202 |    "source": [
 203 |     "<br>\n",
 204 |     "Use `type` to return the object's type."
 205 |    ]
 206 |   },
 207 |   {
 208 |    "cell_type": "code",
 209 |    "execution_count": null,
 210 |    "metadata": {
 211 |     "collapsed": false
 212 |    },
 213 |    "outputs": [],
 214 |    "source": [
 215 |     "type('This is a string')"
 216 |    ]
 217 |   },
 218 |   {
 219 |    "cell_type": "code",
 220 |    "execution_count": 6,
 221 |    "metadata": {
 222 |     "collapsed": false
 223 |    },
 224 |    "outputs": [
 225 |     {
 226 |      "data": {
 227 |       "text/plain": [
 228 |        "NoneType"
 229 |       ]
 230 |      },
 231 |      "execution_count": 6,
 232 |      "metadata": {},
 233 |      "output_type": "execute_result"
 234 |     }
 235 |    ],
 236 |    "source": [
 237 |     "type(None)"
 238 |    ]
 239 |   },
 240 |   {
 241 |    "cell_type": "code",
 242 |    "execution_count": null,
 243 |    "metadata": {
 244 |     "collapsed": true
 245 |    },
 246 |    "outputs": [],
 247 |    "source": []
 248 |   },
 249 |   {
 250 |    "cell_type": "code",
 251 |    "execution_count": 7,
 252 |    "metadata": {
 253 |     "collapsed": false
 254 |    },
 255 |    "outputs": [
 256 |     {
 257 |      "data": {
 258 |       "text/plain": [
 259 |        "int"
 260 |       ]
 261 |      },
 262 |      "execution_count": 7,
 263 |      "metadata": {},
 264 |      "output_type": "execute_result"
 265 |     }
 266 |    ],
 267 |    "source": [
 268 |     "type(1)"
 269 |    ]
 270 |   },
 271 |   {
 272 |    "cell_type": "code",
 273 |    "execution_count": null,
 274 |    "metadata": {
 275 |     "collapsed": true
 276 |    },
 277 |    "outputs": [],
 278 |    "source": []
 279 |   },
 280 |   {
 281 |    "cell_type": "code",
 282 |    "execution_count": 8,
 283 |    "metadata": {
 284 |     "collapsed": false
 285 |    },
 286 |    "outputs": [
 287 |     {
 288 |      "data": {
 289 |       "text/plain": [
 290 |        "float"
 291 |       ]
 292 |      },
 293 |      "execution_count": 8,
 294 |      "metadata": {},
 295 |      "output_type": "execute_result"
 296 |     }
 297 |    ],
 298 |    "source": [
 299 |     "type(1.0)"
 300 |    ]
 301 |   },
 302 |   {
 303 |    "cell_type": "code",
 304 |    "execution_count": null,
 305 |    "metadata": {
 306 |     "collapsed": true
 307 |    },
 308 |    "outputs": [],
 309 |    "source": []
 310 |   },
 311 |   {
 312 |    "cell_type": "code",
 313 |    "execution_count": 9,
 314 |    "metadata": {
 315 |     "collapsed": false
 316 |    },
 317 |    "outputs": [
 318 |     {
 319 |      "data": {
 320 |       "text/plain": [
 321 |        "function"
 322 |       ]
 323 |      },
 324 |      "execution_count": 9,
 325 |      "metadata": {},
 326 |      "output_type": "execute_result"
 327 |     }
 328 |    ],
 329 |    "source": [
 330 |     "type(add_numbers)"
 331 |    ]
 332 |   },
 333 |   {
 334 |    "cell_type": "code",
 335 |    "execution_count": null,
 336 |    "metadata": {
 337 |     "collapsed": true
 338 |    },
 339 |    "outputs": [],
 340 |    "source": []
 341 |   },
 342 |   {
 343 |    "cell_type": "markdown",
 344 |    "metadata": {},
 345 |    "source": [
 346 |     "<br>\n",
 347 |     "Tuples are an immutable data structure (cannot be altered)."
 348 |    ]
 349 |   },
 350 |   {
 351 |    "cell_type": "code",
 352 |    "execution_count": 10,
 353 |    "metadata": {
 354 |     "collapsed": false
 355 |    },
 356 |    "outputs": [
 357 |     {
 358 |      "data": {
 359 |       "text/plain": [
 360 |        "tuple"
 361 |       ]
 362 |      },
 363 |      "execution_count": 10,
 364 |      "metadata": {},
 365 |      "output_type": "execute_result"
 366 |     }
 367 |    ],
 368 |    "source": [
 369 |     "x = (1, 'a', 2, 'b')\n",
 370 |     "type(x)"
 371 |    ]
 372 |   },
 373 |   {
 374 |    "cell_type": "code",
 375 |    "execution_count": null,
 376 |    "metadata": {
 377 |     "collapsed": true
 378 |    },
 379 |    "outputs": [],
 380 |    "source": []
 381 |   },
 382 |   {
 383 |    "cell_type": "markdown",
 384 |    "metadata": {},
 385 |    "source": [
 386 |     "<br>\n",
 387 |     "Lists are a mutable data structure."
 388 |    ]
 389 |   },
 390 |   {
 391 |    "cell_type": "code",
 392 |    "execution_count": 11,
 393 |    "metadata": {
 394 |     "collapsed": false
 395 |    },
 396 |    "outputs": [
 397 |     {
 398 |      "data": {
 399 |       "text/plain": [
 400 |        "list"
 401 |       ]
 402 |      },
 403 |      "execution_count": 11,
 404 |      "metadata": {},
 405 |      "output_type": "execute_result"
 406 |     }
 407 |    ],
 408 |    "source": [
 409 |     "x = [1, 'a', 2, 'b']\n",
 410 |     "type(x)"
 411 |    ]
 412 |   },
 413 |   {
 414 |    "cell_type": "code",
 415 |    "execution_count": null,
 416 |    "metadata": {
 417 |     "collapsed": true
 418 |    },
 419 |    "outputs": [],
 420 |    "source": []
 421 |   },
 422 |   {
 423 |    "cell_type": "markdown",
 424 |    "metadata": {},
 425 |    "source": [
 426 |     "<br>\n",
 427 |     "Use `append` to append an object to a list."
 428 |    ]
 429 |   },
 430 |   {
 431 |    "cell_type": "code",
 432 |    "execution_count": 12,
 433 |    "metadata": {
 434 |     "collapsed": false
 435 |    },
 436 |    "outputs": [
 437 |     {
 438 |      "name": "stdout",
 439 |      "output_type": "stream",
 440 |      "text": [
 441 |       "[1, 'a', 2, 'b', 3.3]\n"
 442 |      ]
 443 |     }
 444 |    ],
 445 |    "source": [
 446 |     "x.append(3.3)\n",
 447 |     "print(x)"
 448 |    ]
 449 |   },
 450 |   {
 451 |    "cell_type": "code",
 452 |    "execution_count": null,
 453 |    "metadata": {
 454 |     "collapsed": true
 455 |    },
 456 |    "outputs": [],
 457 |    "source": []
 458 |   },
 459 |   {
 460 |    "cell_type": "markdown",
 461 |    "metadata": {},
 462 |    "source": [
 463 |     "<br>\n",
 464 |     "This is an example of how to loop through each item in the list."
 465 |    ]
 466 |   },
 467 |   {
 468 |    "cell_type": "code",
 469 |    "execution_count": 13,
 470 |    "metadata": {
 471 |     "collapsed": false
 472 |    },
 473 |    "outputs": [
 474 |     {
 475 |      "name": "stdout",
 476 |      "output_type": "stream",
 477 |      "text": [
 478 |       "1\n",
 479 |       "a\n",
 480 |       "2\n",
 481 |       "b\n",
 482 |       "3.3\n"
 483 |      ]
 484 |     }
 485 |    ],
 486 |    "source": [
 487 |     "for item in x:\n",
 488 |     "    print(item)"
 489 |    ]
 490 |   },
 491 |   {
 492 |    "cell_type": "code",
 493 |    "execution_count": null,
 494 |    "metadata": {
 495 |     "collapsed": true
 496 |    },
 497 |    "outputs": [],
 498 |    "source": []
 499 |   },
 500 |   {
 501 |    "cell_type": "markdown",
 502 |    "metadata": {},
 503 |    "source": [
 504 |     "<br>\n",
 505 |     "Or using the indexing operator:"
 506 |    ]
 507 |   },
 508 |   {
 509 |    "cell_type": "code",
 510 |    "execution_count": 14,
 511 |    "metadata": {
 512 |     "collapsed": false
 513 |    },
 514 |    "outputs": [
 515 |     {
 516 |      "name": "stdout",
 517 |      "output_type": "stream",
 518 |      "text": [
 519 |       "1\n",
 520 |       "a\n",
 521 |       "2\n",
 522 |       "b\n",
 523 |       "3.3\n"
 524 |      ]
 525 |     }
 526 |    ],
 527 |    "source": [
 528 |     "i=0\n",
 529 |     "while( i != len(x) ):\n",
 530 |     "    print(x[i])\n",
 531 |     "    i = i + 1"
 532 |    ]
 533 |   },
 534 |   {
 535 |    "cell_type": "code",
 536 |    "execution_count": null,
 537 |    "metadata": {
 538 |     "collapsed": true
 539 |    },
 540 |    "outputs": [],
 541 |    "source": []
 542 |   },
 543 |   {
 544 |    "cell_type": "markdown",
 545 |    "metadata": {},
 546 |    "source": [
 547 |     "<br>\n",
 548 |     "Use `+` to concatenate lists."
 549 |    ]
 550 |   },
 551 |   {
 552 |    "cell_type": "code",
 553 |    "execution_count": 15,
 554 |    "metadata": {
 555 |     "collapsed": false
 556 |    },
 557 |    "outputs": [
 558 |     {
 559 |      "data": {
 560 |       "text/plain": [
 561 |        "[1, 2, 3, 4]"
 562 |       ]
 563 |      },
 564 |      "execution_count": 15,
 565 |      "metadata": {},
 566 |      "output_type": "execute_result"
 567 |     }
 568 |    ],
 569 |    "source": [
 570 |     "[1,2] + [3,4]"
 571 |    ]
 572 |   },
 573 |   {
 574 |    "cell_type": "code",
 575 |    "execution_count": null,
 576 |    "metadata": {
 577 |     "collapsed": true
 578 |    },
 579 |    "outputs": [],
 580 |    "source": []
 581 |   },
 582 |   {
 583 |    "cell_type": "markdown",
 584 |    "metadata": {},
 585 |    "source": [
 586 |     "<br>\n",
 587 |     "Use `*` to repeat lists."
 588 |    ]
 589 |   },
 590 |   {
 591 |    "cell_type": "code",
 592 |    "execution_count": 16,
 593 |    "metadata": {
 594 |     "collapsed": false
 595 |    },
 596 |    "outputs": [
 597 |     {
 598 |      "data": {
 599 |       "text/plain": [
 600 |        "[1, 1, 1]"
 601 |       ]
 602 |      },
 603 |      "execution_count": 16,
 604 |      "metadata": {},
 605 |      "output_type": "execute_result"
 606 |     }
 607 |    ],
 608 |    "source": [
 609 |     "[1]*3"
 610 |    ]
 611 |   },
 612 |   {
 613 |    "cell_type": "code",
 614 |    "execution_count": null,
 615 |    "metadata": {
 616 |     "collapsed": true
 617 |    },
 618 |    "outputs": [],
 619 |    "source": []
 620 |   },
 621 |   {
 622 |    "cell_type": "markdown",
 623 |    "metadata": {},
 624 |    "source": [
 625 |     "<br>\n",
 626 |     "Use the `in` operator to check if something is inside a list."
 627 |    ]
 628 |   },
 629 |   {
 630 |    "cell_type": "code",
 631 |    "execution_count": 17,
 632 |    "metadata": {
 633 |     "collapsed": false
 634 |    },
 635 |    "outputs": [
 636 |     {
 637 |      "data": {
 638 |       "text/plain": [
 639 |        "True"
 640 |       ]
 641 |      },
 642 |      "execution_count": 17,
 643 |      "metadata": {},
 644 |      "output_type": "execute_result"
 645 |     }
 646 |    ],
 647 |    "source": [
 648 |     "1 in [1, 2, 3]"
 649 |    ]
 650 |   },
 651 |   {
 652 |    "cell_type": "code",
 653 |    "execution_count": null,
 654 |    "metadata": {
 655 |     "collapsed": true
 656 |    },
 657 |    "outputs": [],
 658 |    "source": []
 659 |   },
 660 |   {
 661 |    "cell_type": "markdown",
 662 |    "metadata": {},
 663 |    "source": [
 664 |     "<br>\n",
 665 |     "Now let's look at strings. Use bracket notation to slice a string."
 666 |    ]
 667 |   },
 668 |   {
 669 |    "cell_type": "code",
 670 |    "execution_count": 18,
 671 |    "metadata": {
 672 |     "collapsed": false
 673 |    },
 674 |    "outputs": [
 675 |     {
 676 |      "name": "stdout",
 677 |      "output_type": "stream",
 678 |      "text": [
 679 |       "T\n",
 680 |       "T\n",
 681 |       "Th\n"
 682 |      ]
 683 |     }
 684 |    ],
 685 |    "source": [
 686 |     "x = 'This is a string'\n",
 687 |     "print(x[0]) #first character\n",
 688 |     "print(x[0:1]) #first character, but we have explicitly set the end character\n",
 689 |     "print(x[0:2]) #first two characters\n"
 690 |    ]
 691 |   },
 692 |   {
 693 |    "cell_type": "code",
 694 |    "execution_count": null,
 695 |    "metadata": {
 696 |     "collapsed": true
 697 |    },
 698 |    "outputs": [],
 699 |    "source": []
 700 |   },
 701 |   {
 702 |    "cell_type": "markdown",
 703 |    "metadata": {},
 704 |    "source": [
 705 |     "<br>\n",
 706 |     "This will return the last element of the string."
 707 |    ]
 708 |   },
 709 |   {
 710 |    "cell_type": "code",
 711 |    "execution_count": 19,
 712 |    "metadata": {
 713 |     "collapsed": false
 714 |    },
 715 |    "outputs": [
 716 |     {
 717 |      "data": {
 718 |       "text/plain": [
 719 |        "'g'"
 720 |       ]
 721 |      },
 722 |      "execution_count": 19,
 723 |      "metadata": {},
 724 |      "output_type": "execute_result"
 725 |     }
 726 |    ],
 727 |    "source": [
 728 |     "x[-1]"
 729 |    ]
 730 |   },
 731 |   {
 732 |    "cell_type": "code",
 733 |    "execution_count": null,
 734 |    "metadata": {
 735 |     "collapsed": true
 736 |    },
 737 |    "outputs": [],
 738 |    "source": []
 739 |   },
 740 |   {
 741 |    "cell_type": "markdown",
 742 |    "metadata": {},
 743 |    "source": [
 744 |     "<br>\n",
 745 |     "This will return the slice starting from the 4th element from the end and stopping before the 2nd element from the end."
 746 |    ]
 747 |   },
 748 |   {
 749 |    "cell_type": "code",
 750 |    "execution_count": 20,
 751 |    "metadata": {
 752 |     "collapsed": false
 753 |    },
 754 |    "outputs": [
 755 |     {
 756 |      "data": {
 757 |       "text/plain": [
 758 |        "'ri'"
 759 |       ]
 760 |      },
 761 |      "execution_count": 20,
 762 |      "metadata": {},
 763 |      "output_type": "execute_result"
 764 |     }
 765 |    ],
 766 |    "source": [
 767 |     "x[-4:-2]"
 768 |    ]
 769 |   },
 770 |   {
 771 |    "cell_type": "code",
 772 |    "execution_count": null,
 773 |    "metadata": {
 774 |     "collapsed": true
 775 |    },
 776 |    "outputs": [],
 777 |    "source": []
 778 |   },
 779 |   {
 780 |    "cell_type": "markdown",
 781 |    "metadata": {},
 782 |    "source": [
 783 |     "<br>\n",
 784 |     "This is a slice from the beginning of the string and stopping before the 3rd element."
 785 |    ]
 786 |   },
 787 |   {
 788 |    "cell_type": "code",
 789 |    "execution_count": 21,
 790 |    "metadata": {
 791 |     "collapsed": false
 792 |    },
 793 |    "outputs": [
 794 |     {
 795 |      "data": {
 796 |       "text/plain": [
 797 |        "'Thi'"
 798 |       ]
 799 |      },
 800 |      "execution_count": 21,
 801 |      "metadata": {},
 802 |      "output_type": "execute_result"
 803 |     }
 804 |    ],
 805 |    "source": [
 806 |     "x[:3]"
 807 |    ]
 808 |   },
 809 |   {
 810 |    "cell_type": "code",
 811 |    "execution_count": null,
 812 |    "metadata": {
 813 |     "collapsed": true
 814 |    },
 815 |    "outputs": [],
 816 |    "source": []
 817 |   },
 818 |   {
 819 |    "cell_type": "markdown",
 820 |    "metadata": {},
 821 |    "source": [
 822 |     "<br>\n",
 823 |     "And this is a slice starting from the 3rd element of the string and going all the way to the end."
 824 |    ]
 825 |   },
 826 |   {
 827 |    "cell_type": "code",
 828 |    "execution_count": null,
 829 |    "metadata": {
 830 |     "collapsed": false
 831 |    },
 832 |    "outputs": [],
 833 |    "source": [
 834 |     "x[3:]"
 835 |    ]
 836 |   },
 837 |   {
 838 |    "cell_type": "code",
 839 |    "execution_count": 1,
 840 |    "metadata": {
 841 |     "collapsed": false
 842 |    },
 843 |    "outputs": [
 844 |     {
 845 |      "name": "stdout",
 846 |      "output_type": "stream",
 847 |      "text": [
 848 |       "Christopher Brooks\n",
 849 |       "ChristopherChristopherChristopher\n",
 850 |       "True\n"
 851 |      ]
 852 |     }
 853 |    ],
 854 |    "source": [
 855 |     "firstname = 'Christopher'\n",
 856 |     "lastname = 'Brooks'\n",
 857 |     "\n",
 858 |     "print(firstname + ' ' + lastname)\n",
 859 |     "print(firstname*3)\n",
 860 |     "print('Chris' in firstname)\n"
 861 |    ]
 862 |   },
 863 |   {
 864 |    "cell_type": "code",
 865 |    "execution_count": null,
 866 |    "metadata": {
 867 |     "collapsed": true
 868 |    },
 869 |    "outputs": [],
 870 |    "source": []
 871 |   },
 872 |   {
 873 |    "cell_type": "markdown",
 874 |    "metadata": {},
 875 |    "source": [
 876 |     "<br>\n",
 877 |     "`split` returns a list of all the words in a string, or a list split on a specific character."
 878 |    ]
 879 |   },
 880 |   {
 881 |    "cell_type": "code",
 882 |    "execution_count": 2,
 883 |    "metadata": {
 884 |     "collapsed": false
 885 |    },
 886 |    "outputs": [
 887 |     {
 888 |      "name": "stdout",
 889 |      "output_type": "stream",
 890 |      "text": [
 891 |       "Christopher\n",
 892 |       "Brooks\n"
 893 |      ]
 894 |     }
 895 |    ],
 896 |    "source": [
 897 |     "firstname = 'Christopher Arthur Hansen Brooks'.split(' ')[0] # [0] selects the first element of the list\n",
 898 |     "lastname = 'Christopher Arthur Hansen Brooks'.split(' ')[-1] # [-1] selects the last element of the list\n",
 899 |     "print(firstname)\n",
 900 |     "print(lastname)"
 901 |    ]
 902 |   },
 903 |   {
 904 |    "cell_type": "code",
 905 |    "execution_count": null,
 906 |    "metadata": {
 907 |     "collapsed": true
 908 |    },
 909 |    "outputs": [],
 910 |    "source": []
 911 |   },
 912 |   {
 913 |    "cell_type": "markdown",
 914 |    "metadata": {},
 915 |    "source": [
 916 |     "<br>\n",
 917 |     "Make sure you convert objects to strings before concatenating."
 918 |    ]
 919 |   },
 920 |   {
 921 |    "cell_type": "code",
 922 |    "execution_count": null,
 923 |    "metadata": {
 924 |     "collapsed": false
 925 |    },
 926 |    "outputs": [],
 927 |    "source": [
 928 |     "'Chris' + 2"
 929 |    ]
 930 |   },
 931 |   {
 932 |    "cell_type": "code",
 933 |    "execution_count": null,
 934 |    "metadata": {
 935 |     "collapsed": false
 936 |    },
 937 |    "outputs": [],
 938 |    "source": [
 939 |     "'Chris' + str(2)"
 940 |    ]
 941 |   },
 942 |   {
 943 |    "cell_type": "markdown",
 944 |    "metadata": {},
 945 |    "source": [
 946 |     "<br>\n",
 947 |     "Dictionaries associate keys with values."
 948 |    ]
 949 |   },
 950 |   {
 951 |    "cell_type": "code",
 952 |    "execution_count": null,
 953 |    "metadata": {
 954 |     "collapsed": false
 955 |    },
 956 |    "outputs": [],
 957 |    "source": [
 958 |     "x = {'Christopher Brooks': 'brooksch@umich.edu', 'Bill Gates': 'billg@microsoft.com'}\n",
 959 |     "x['Christopher Brooks'] # Retrieve a value by using the indexing operator\n"
 960 |    ]
 961 |   },
 962 |   {
 963 |    "cell_type": "code",
 964 |    "execution_count": null,
 965 |    "metadata": {
 966 |     "collapsed": true
 967 |    },
 968 |    "outputs": [],
 969 |    "source": [
 970 |     "x['Kevyn Collins-Thompson'] = None\n",
 971 |     "x['Kevyn Collins-Thompson']"
 972 |    ]
 973 |   },
 974 |   {
 975 |    "cell_type": "markdown",
 976 |    "metadata": {},
 977 |    "source": [
 978 |     "<br>\n",
 979 |     "Iterate over all of the keys:"
 980 |    ]
 981 |   },
 982 |   {
 983 |    "cell_type": "code",
 984 |    "execution_count": null,
 985 |    "metadata": {
 986 |     "collapsed": false,
 987 |     "scrolled": true
 988 |    },
 989 |    "outputs": [],
 990 |    "source": [
 991 |     "for name in x:\n",
 992 |     "    print(x[name])"
 993 |    ]
 994 |   },
 995 |   {
 996 |    "cell_type": "markdown",
 997 |    "metadata": {},
 998 |    "source": [
 999 |     "<br>\n",
1000 |     "Iterate over all of the values:"
1001 |    ]
1002 |   },
1003 |   {
1004 |    "cell_type": "code",
1005 |    "execution_count": null,
1006 |    "metadata": {
1007 |     "collapsed": false
1008 |    },
1009 |    "outputs": [],
1010 |    "source": [
1011 |     "for email in x.values():\n",
1012 |     "    print(email)"
1013 |    ]
1014 |   },
1015 |   {
1016 |    "cell_type": "markdown",
1017 |    "metadata": {},
1018 |    "source": [
1019 |     "<br>\n",
1020 |     "Iterate over all of the items in the list:"
1021 |    ]
1022 |   },
1023 |   {
1024 |    "cell_type": "code",
1025 |    "execution_count": null,
1026 |    "metadata": {
1027 |     "collapsed": false
1028 |    },
1029 |    "outputs": [],
1030 |    "source": [
1031 |     "for name, email in x.items():\n",
1032 |     "    print(name)\n",
1033 |     "    print(email)"
1034 |    ]
1035 |   },
1036 |   {
1037 |    "cell_type": "markdown",
1038 |    "metadata": {},
1039 |    "source": [
1040 |     "<br>\n",
1041 |     "You can unpack a sequence into different variables:"
1042 |    ]
1043 |   },
1044 |   {
1045 |    "cell_type": "code",
1046 |    "execution_count": null,
1047 |    "metadata": {
1048 |     "collapsed": true
1049 |    },
1050 |    "outputs": [],
1051 |    "source": [
1052 |     "x = ('Christopher', 'Brooks', 'brooksch@umich.edu')\n",
1053 |     "fname, lname, email = x"
1054 |    ]
1055 |   },
1056 |   {
1057 |    "cell_type": "code",
1058 |    "execution_count": null,
1059 |    "metadata": {
1060 |     "collapsed": false
1061 |    },
1062 |    "outputs": [],
1063 |    "source": [
1064 |     "fname"
1065 |    ]
1066 |   },
1067 |   {
1068 |    "cell_type": "code",
1069 |    "execution_count": null,
1070 |    "metadata": {
1071 |     "collapsed": false
1072 |    },
1073 |    "outputs": [],
1074 |    "source": [
1075 |     "lname"
1076 |    ]
1077 |   },
1078 |   {
1079 |    "cell_type": "markdown",
1080 |    "metadata": {},
1081 |    "source": [
1082 |     "<br>\n",
1083 |     "Make sure the number of values you are unpacking matches the number of variables being assigned."
1084 |    ]
1085 |   },
1086 |   {
1087 |    "cell_type": "code",
1088 |    "execution_count": null,
1089 |    "metadata": {
1090 |     "collapsed": false
1091 |    },
1092 |    "outputs": [],
1093 |    "source": [
1094 |     "x = ('Christopher', 'Brooks', 'brooksch@umich.edu', 'Ann Arbor')\n",
1095 |     "fname, lname, email = x"
1096 |    ]
1097 |   },
1098 |   {
1099 |    "cell_type": "markdown",
1100 |    "metadata": {},
1101 |    "source": [
1102 |     "<br>\n",
1103 |     "# The Python Programming Language: More on Strings"
1104 |    ]
1105 |   },
1106 |   {
1107 |    "cell_type": "code",
1108 |    "execution_count": null,
1109 |    "metadata": {
1110 |     "collapsed": false
1111 |    },
1112 |    "outputs": [],
1113 |    "source": [
1114 |     "print('Chris' + 2)"
1115 |    ]
1116 |   },
1117 |   {
1118 |    "cell_type": "code",
1119 |    "execution_count": null,
1120 |    "metadata": {
1121 |     "collapsed": false
1122 |    },
1123 |    "outputs": [],
1124 |    "source": [
1125 |     "print('Chris' + str(2))"
1126 |    ]
1127 |   },
1128 |   {
1129 |    "cell_type": "markdown",
1130 |    "metadata": {},
1131 |    "source": [
1132 |     "<br>\n",
1133 |     "Python has a built in method for convenient string formatting."
1134 |    ]
1135 |   },
1136 |   {
1137 |    "cell_type": "code",
1138 |    "execution_count": null,
1139 |    "metadata": {
1140 |     "collapsed": false
1141 |    },
1142 |    "outputs": [],
1143 |    "source": [
1144 |     "sales_record = {\n",
1145 |     "'price': 3.24,\n",
1146 |     "'num_items': 4,\n",
1147 |     "'person': 'Chris'}\n",
1148 |     "\n",
1149 |     "sales_statement = '{} bought {} item(s) at a price of {} each for a total of {}'\n",
1150 |     "\n",
1151 |     "print(sales_statement.format(sales_record['person'],\n",
1152 |     "                             sales_record['num_items'],\n",
1153 |     "                             sales_record['price'],\n",
1154 |     "                             sales_record['num_items']*sales_record['price']))\n"
1155 |    ]
1156 |   },
1157 |   {
1158 |    "cell_type": "markdown",
1159 |    "metadata": {},
1160 |    "source": [
1161 |     "<br>\n",
1162 |     "# Reading and Writing CSV files"
1163 |    ]
1164 |   },
1165 |   {
1166 |    "cell_type": "markdown",
1167 |    "metadata": {},
1168 |    "source": [
1169 |     "<br>\n",
1170 |     "Let's import our datafile mpg.csv, which contains fuel economy data for 234 cars.\n",
1171 |     "\n",
1172 |     "* mpg : miles per gallon\n",
1173 |     "* class : car classification\n",
1174 |     "* cty : city mpg\n",
1175 |     "* cyl : # of cylinders\n",
1176 |     "* displ : engine displacement in liters\n",
1177 |     "* drv : f = front-wheel drive, r = rear wheel drive, 4 = 4wd\n",
1178 |     "* fl : fuel (e = ethanol E85, d = diesel, r = regular, p = premium, c = CNG)\n",
1179 |     "* hwy : highway mpg\n",
1180 |     "* manufacturer : automobile manufacturer\n",
1181 |     "* model : model of car\n",
1182 |     "* trans : type of transmission\n",
1183 |     "* year : model year"
1184 |    ]
1185 |   },
1186 |   {
1187 |    "cell_type": "code",
1188 |    "execution_count": null,
1189 |    "metadata": {
1190 |     "collapsed": false,
1191 |     "scrolled": true
1192 |    },
1193 |    "outputs": [],
1194 |    "source": [
1195 |     "import csv\n",
1196 |     "\n",
1197 |     "%precision 2\n",
1198 |     "\n",
1199 |     "with open('mpg.csv') as csvfile:\n",
1200 |     "    mpg = list(csv.DictReader(csvfile))\n",
1201 |     "    \n",
1202 |     "mpg[:3] # The first three dictionaries in our list."
1203 |    ]
1204 |   },
1205 |   {
1206 |    "cell_type": "markdown",
1207 |    "metadata": {},
1208 |    "source": [
1209 |     "<br>\n",
1210 |     "`csv.Dictreader` has read in each row of our csv file as a dictionary. `len` shows that our list is comprised of 234 dictionaries."
1211 |    ]
1212 |   },
1213 |   {
1214 |    "cell_type": "code",
1215 |    "execution_count": null,
1216 |    "metadata": {
1217 |     "collapsed": false
1218 |    },
1219 |    "outputs": [],
1220 |    "source": [
1221 |     "len(mpg)"
1222 |    ]
1223 |   },
1224 |   {
1225 |    "cell_type": "markdown",
1226 |    "metadata": {},
1227 |    "source": [
1228 |     "<br>\n",
1229 |     "`keys` gives us the column names of our csv."
1230 |    ]
1231 |   },
1232 |   {
1233 |    "cell_type": "code",
1234 |    "execution_count": null,
1235 |    "metadata": {
1236 |     "collapsed": false
1237 |    },
1238 |    "outputs": [],
1239 |    "source": [
1240 |     "mpg[0].keys()"
1241 |    ]
1242 |   },
1243 |   {
1244 |    "cell_type": "markdown",
1245 |    "metadata": {},
1246 |    "source": [
1247 |     "<br>\n",
1248 |     "This is how to find the average cty fuel economy across all cars. All values in the dictionaries are strings, so we need to convert to float."
1249 |    ]
1250 |   },
1251 |   {
1252 |    "cell_type": "code",
1253 |    "execution_count": null,
1254 |    "metadata": {
1255 |     "collapsed": false
1256 |    },
1257 |    "outputs": [],
1258 |    "source": [
1259 |     "sum(float(d['cty']) for d in mpg) / len(mpg)"
1260 |    ]
1261 |   },
1262 |   {
1263 |    "cell_type": "markdown",
1264 |    "metadata": {},
1265 |    "source": [
1266 |     "<br>\n",
1267 |     "Similarly this is how to find the average hwy fuel economy across all cars."
1268 |    ]
1269 |   },
1270 |   {
1271 |    "cell_type": "code",
1272 |    "execution_count": null,
1273 |    "metadata": {
1274 |     "collapsed": false
1275 |    },
1276 |    "outputs": [],
1277 |    "source": [
1278 |     "sum(float(d['hwy']) for d in mpg) / len(mpg)"
1279 |    ]
1280 |   },
1281 |   {
1282 |    "cell_type": "markdown",
1283 |    "metadata": {},
1284 |    "source": [
1285 |     "<br>\n",
1286 |     "Use `set` to return the unique values for the number of cylinders the cars in our dataset have."
1287 |    ]
1288 |   },
1289 |   {
1290 |    "cell_type": "code",
1291 |    "execution_count": null,
1292 |    "metadata": {
1293 |     "collapsed": false
1294 |    },
1295 |    "outputs": [],
1296 |    "source": [
1297 |     "cylinders = set(d['cyl'] for d in mpg)\n",
1298 |     "cylinders"
1299 |    ]
1300 |   },
1301 |   {
1302 |    "cell_type": "markdown",
1303 |    "metadata": {},
1304 |    "source": [
1305 |     "<br>\n",
1306 |     "Here's a more complex example where we are grouping the cars by number of cylinder, and finding the average cty mpg for each group."
1307 |    ]
1308 |   },
1309 |   {
1310 |    "cell_type": "code",
1311 |    "execution_count": null,
1312 |    "metadata": {
1313 |     "collapsed": false
1314 |    },
1315 |    "outputs": [],
1316 |    "source": [
1317 |     "CtyMpgByCyl = []\n",
1318 |     "\n",
1319 |     "for c in cylinders: # iterate over all the cylinder levels\n",
1320 |     "    summpg = 0\n",
1321 |     "    cyltypecount = 0\n",
1322 |     "    for d in mpg: # iterate over all dictionaries\n",
1323 |     "        if d['cyl'] == c: # if the cylinder level type matches,\n",
1324 |     "            summpg += float(d['cty']) # add the cty mpg\n",
1325 |     "            cyltypecount += 1 # increment the count\n",
1326 |     "    CtyMpgByCyl.append((c, summpg / cyltypecount)) # append the tuple ('cylinder', 'avg mpg')\n",
1327 |     "\n",
1328 |     "CtyMpgByCyl.sort(key=lambda x: x[0])\n",
1329 |     "CtyMpgByCyl"
1330 |    ]
1331 |   },
1332 |   {
1333 |    "cell_type": "markdown",
1334 |    "metadata": {},
1335 |    "source": [
1336 |     "<br>\n",
1337 |     "Use `set` to return the unique values for the class types in our dataset."
1338 |    ]
1339 |   },
1340 |   {
1341 |    "cell_type": "code",
1342 |    "execution_count": null,
1343 |    "metadata": {
1344 |     "collapsed": false
1345 |    },
1346 |    "outputs": [],
1347 |    "source": [
1348 |     "vehicleclass = set(d['class'] for d in mpg) # what are the class types\n",
1349 |     "vehicleclass"
1350 |    ]
1351 |   },
1352 |   {
1353 |    "cell_type": "markdown",
1354 |    "metadata": {},
1355 |    "source": [
1356 |     "<br>\n",
1357 |     "And here's an example of how to find the average hwy mpg for each class of vehicle in our dataset."
1358 |    ]
1359 |   },
1360 |   {
1361 |    "cell_type": "code",
1362 |    "execution_count": null,
1363 |    "metadata": {
1364 |     "collapsed": false
1365 |    },
1366 |    "outputs": [],
1367 |    "source": [
1368 |     "HwyMpgByClass = []\n",
1369 |     "\n",
1370 |     "for t in vehicleclass: # iterate over all the vehicle classes\n",
1371 |     "    summpg = 0\n",
1372 |     "    vclasscount = 0\n",
1373 |     "    for d in mpg: # iterate over all dictionaries\n",
1374 |     "        if d['class'] == t: # if the cylinder amount type matches,\n",
1375 |     "            summpg += float(d['hwy']) # add the hwy mpg\n",
1376 |     "            vclasscount += 1 # increment the count\n",
1377 |     "    HwyMpgByClass.append((t, summpg / vclasscount)) # append the tuple ('class', 'avg mpg')\n",
1378 |     "\n",
1379 |     "HwyMpgByClass.sort(key=lambda x: x[1])\n",
1380 |     "HwyMpgByClass"
1381 |    ]
1382 |   },
1383 |   {
1384 |    "cell_type": "markdown",
1385 |    "metadata": {},
1386 |    "source": [
1387 |     "<br>\n",
1388 |     "# The Python Programming Language: Dates and Times"
1389 |    ]
1390 |   },
1391 |   {
1392 |    "cell_type": "code",
1393 |    "execution_count": null,
1394 |    "metadata": {
1395 |     "collapsed": true
1396 |    },
1397 |    "outputs": [],
1398 |    "source": [
1399 |     "import datetime as dt\n",
1400 |     "import time as tm"
1401 |    ]
1402 |   },
1403 |   {
1404 |    "cell_type": "markdown",
1405 |    "metadata": {},
1406 |    "source": [
1407 |     "<br>\n",
1408 |     "`time` returns the current time in seconds since the Epoch. (January 1st, 1970)"
1409 |    ]
1410 |   },
1411 |   {
1412 |    "cell_type": "code",
1413 |    "execution_count": null,
1414 |    "metadata": {
1415 |     "collapsed": false
1416 |    },
1417 |    "outputs": [],
1418 |    "source": [
1419 |     "tm.time()"
1420 |    ]
1421 |   },
1422 |   {
1423 |    "cell_type": "markdown",
1424 |    "metadata": {},
1425 |    "source": [
1426 |     "<br>\n",
1427 |     "Convert the timestamp to datetime."
1428 |    ]
1429 |   },
1430 |   {
1431 |    "cell_type": "code",
1432 |    "execution_count": null,
1433 |    "metadata": {
1434 |     "collapsed": false
1435 |    },
1436 |    "outputs": [],
1437 |    "source": [
1438 |     "dtnow = dt.datetime.fromtimestamp(tm.time())\n",
1439 |     "dtnow"
1440 |    ]
1441 |   },
1442 |   {
1443 |    "cell_type": "markdown",
1444 |    "metadata": {},
1445 |    "source": [
1446 |     "<br>\n",
1447 |     "Handy datetime attributes:"
1448 |    ]
1449 |   },
1450 |   {
1451 |    "cell_type": "code",
1452 |    "execution_count": null,
1453 |    "metadata": {
1454 |     "collapsed": false
1455 |    },
1456 |    "outputs": [],
1457 |    "source": [
1458 |     "dtnow.year, dtnow.month, dtnow.day, dtnow.hour, dtnow.minute, dtnow.second # get year, month, day, etc.from a datetime"
1459 |    ]
1460 |   },
1461 |   {
1462 |    "cell_type": "markdown",
1463 |    "metadata": {},
1464 |    "source": [
1465 |     "<br>\n",
1466 |     "`timedelta` is a duration expressing the difference between two dates."
1467 |    ]
1468 |   },
1469 |   {
1470 |    "cell_type": "code",
1471 |    "execution_count": null,
1472 |    "metadata": {
1473 |     "collapsed": false
1474 |    },
1475 |    "outputs": [],
1476 |    "source": [
1477 |     "delta = dt.timedelta(days = 100) # create a timedelta of 100 days\n",
1478 |     "delta"
1479 |    ]
1480 |   },
1481 |   {
1482 |    "cell_type": "markdown",
1483 |    "metadata": {},
1484 |    "source": [
1485 |     "<br>\n",
1486 |     "`date.today` returns the current local date."
1487 |    ]
1488 |   },
1489 |   {
1490 |    "cell_type": "code",
1491 |    "execution_count": null,
1492 |    "metadata": {
1493 |     "collapsed": true
1494 |    },
1495 |    "outputs": [],
1496 |    "source": [
1497 |     "today = dt.date.today()"
1498 |    ]
1499 |   },
1500 |   {
1501 |    "cell_type": "code",
1502 |    "execution_count": null,
1503 |    "metadata": {
1504 |     "collapsed": false
1505 |    },
1506 |    "outputs": [],
1507 |    "source": [
1508 |     "today - delta # the date 100 days ago"
1509 |    ]
1510 |   },
1511 |   {
1512 |    "cell_type": "code",
1513 |    "execution_count": null,
1514 |    "metadata": {
1515 |     "collapsed": false
1516 |    },
1517 |    "outputs": [],
1518 |    "source": [
1519 |     "today > today-delta # compare dates"
1520 |    ]
1521 |   },
1522 |   {
1523 |    "cell_type": "markdown",
1524 |    "metadata": {},
1525 |    "source": [
1526 |     "<br>\n",
1527 |     "# The Python Programming Language: Objects and map()"
1528 |    ]
1529 |   },
1530 |   {
1531 |    "cell_type": "markdown",
1532 |    "metadata": {},
1533 |    "source": [
1534 |     "<br>\n",
1535 |     "An example of a class in python:"
1536 |    ]
1537 |   },
1538 |   {
1539 |    "cell_type": "code",
1540 |    "execution_count": null,
1541 |    "metadata": {
1542 |     "collapsed": true
1543 |    },
1544 |    "outputs": [],
1545 |    "source": [
1546 |     "class Person:\n",
1547 |     "    department = 'School of Information' #a class variable\n",
1548 |     "\n",
1549 |     "    def set_name(self, new_name): #a method\n",
1550 |     "        self.name = new_name\n",
1551 |     "    def set_location(self, new_location):\n",
1552 |     "        self.location = new_location"
1553 |    ]
1554 |   },
1555 |   {
1556 |    "cell_type": "code",
1557 |    "execution_count": null,
1558 |    "metadata": {
1559 |     "collapsed": false
1560 |    },
1561 |    "outputs": [],
1562 |    "source": [
1563 |     "person = Person()\n",
1564 |     "person.set_name('Christopher Brooks')\n",
1565 |     "person.set_location('Ann Arbor, MI, USA')\n",
1566 |     "print('{} live in {} and works in the department {}'.format(person.name, person.location, person.department))"
1567 |    ]
1568 |   },
1569 |   {
1570 |    "cell_type": "markdown",
1571 |    "metadata": {},
1572 |    "source": [
1573 |     "<br>\n",
1574 |     "Here's an example of mapping the `min` function between two lists."
1575 |    ]
1576 |   },
1577 |   {
1578 |    "cell_type": "code",
1579 |    "execution_count": null,
1580 |    "metadata": {
1581 |     "collapsed": false
1582 |    },
1583 |    "outputs": [],
1584 |    "source": [
1585 |     "store1 = [10.00, 11.00, 12.34, 2.34]\n",
1586 |     "store2 = [9.00, 11.10, 12.34, 2.01]\n",
1587 |     "cheapest = map(min, store1, store2)\n",
1588 |     "cheapest"
1589 |    ]
1590 |   },
1591 |   {
1592 |    "cell_type": "markdown",
1593 |    "metadata": {},
1594 |    "source": [
1595 |     "<br>\n",
1596 |     "Now let's iterate through the map object to see the values."
1597 |    ]
1598 |   },
1599 |   {
1600 |    "cell_type": "code",
1601 |    "execution_count": null,
1602 |    "metadata": {
1603 |     "collapsed": false,
1604 |     "scrolled": true
1605 |    },
1606 |    "outputs": [],
1607 |    "source": [
1608 |     "for item in cheapest:\n",
1609 |     "    print(item)"
1610 |    ]
1611 |   },
1612 |   {
1613 |    "cell_type": "markdown",
1614 |    "metadata": {},
1615 |    "source": [
1616 |     "<br>\n",
1617 |     "# The Python Programming Language: Lambda and List Comprehensions"
1618 |    ]
1619 |   },
1620 |   {
1621 |    "cell_type": "markdown",
1622 |    "metadata": {},
1623 |    "source": [
1624 |     "<br>\n",
1625 |     "Here's an example of lambda that takes in three parameters and adds the first two."
1626 |    ]
1627 |   },
1628 |   {
1629 |    "cell_type": "code",
1630 |    "execution_count": null,
1631 |    "metadata": {
1632 |     "collapsed": true
1633 |    },
1634 |    "outputs": [],
1635 |    "source": [
1636 |     "my_function = lambda a, b, c : a + b"
1637 |    ]
1638 |   },
1639 |   {
1640 |    "cell_type": "code",
1641 |    "execution_count": null,
1642 |    "metadata": {
1643 |     "collapsed": false
1644 |    },
1645 |    "outputs": [],
1646 |    "source": [
1647 |     "my_function(1, 2, 3)"
1648 |    ]
1649 |   },
1650 |   {
1651 |    "cell_type": "markdown",
1652 |    "metadata": {},
1653 |    "source": [
1654 |     "<br>\n",
1655 |     "Let's iterate from 0 to 999 and return the even numbers."
1656 |    ]
1657 |   },
1658 |   {
1659 |    "cell_type": "code",
1660 |    "execution_count": null,
1661 |    "metadata": {
1662 |     "collapsed": false
1663 |    },
1664 |    "outputs": [],
1665 |    "source": [
1666 |     "my_list = []\n",
1667 |     "for number in range(0, 1000):\n",
1668 |     "    if number % 2 == 0:\n",
1669 |     "        my_list.append(number)\n",
1670 |     "my_list"
1671 |    ]
1672 |   },
1673 |   {
1674 |    "cell_type": "markdown",
1675 |    "metadata": {},
1676 |    "source": [
1677 |     "<br>\n",
1678 |     "Now the same thing but with list comprehension."
1679 |    ]
1680 |   },
1681 |   {
1682 |    "cell_type": "code",
1683 |    "execution_count": null,
1684 |    "metadata": {
1685 |     "collapsed": false
1686 |    },
1687 |    "outputs": [],
1688 |    "source": [
1689 |     "my_list = [number for number in range(0,1000) if number % 2 == 0]\n",
1690 |     "my_list"
1691 |    ]
1692 |   },
1693 |   {
1694 |    "cell_type": "markdown",
1695 |    "metadata": {
1696 |     "collapsed": true
1697 |    },
1698 |    "source": [
1699 |     "<br>\n",
1700 |     "# The Python Programming Language: Numerical Python (NumPy)"
1701 |    ]
1702 |   },
1703 |   {
1704 |    "cell_type": "code",
1705 |    "execution_count": 3,
1706 |    "metadata": {
1707 |     "collapsed": true
1708 |    },
1709 |    "outputs": [],
1710 |    "source": [
1711 |     "import numpy as np"
1712 |    ]
1713 |   },
1714 |   {
1715 |    "cell_type": "code",
1716 |    "execution_count": null,
1717 |    "metadata": {
1718 |     "collapsed": true
1719 |    },
1720 |    "outputs": [],
1721 |    "source": []
1722 |   },
1723 |   {
1724 |    "cell_type": "markdown",
1725 |    "metadata": {},
1726 |    "source": [
1727 |     "<br>\n",
1728 |     "## Creating Arrays"
1729 |    ]
1730 |   },
1731 |   {
1732 |    "cell_type": "markdown",
1733 |    "metadata": {},
1734 |    "source": [
1735 |     "Create a list and convert it to a numpy array"
1736 |    ]
1737 |   },
1738 |   {
1739 |    "cell_type": "code",
1740 |    "execution_count": 4,
1741 |    "metadata": {
1742 |     "collapsed": false
1743 |    },
1744 |    "outputs": [
1745 |     {
1746 |      "data": {
1747 |       "text/plain": [
1748 |        "array([1, 2, 3])"
1749 |       ]
1750 |      },
1751 |      "execution_count": 4,
1752 |      "metadata": {},
1753 |      "output_type": "execute_result"
1754 |     }
1755 |    ],
1756 |    "source": [
1757 |     "mylist = [1, 2, 3]\n",
1758 |     "x = np.array(mylist)\n",
1759 |     "x"
1760 |    ]
1761 |   },
1762 |   {
1763 |    "cell_type": "code",
1764 |    "execution_count": null,
1765 |    "metadata": {
1766 |     "collapsed": true
1767 |    },
1768 |    "outputs": [],
1769 |    "source": []
1770 |   },
1771 |   {
1772 |    "cell_type": "code",
1773 |    "execution_count": null,
1774 |    "metadata": {
1775 |     "collapsed": true
1776 |    },
1777 |    "outputs": [],
1778 |    "source": []
1779 |   },
1780 |   {
1781 |    "cell_type": "markdown",
1782 |    "metadata": {},
1783 |    "source": [
1784 |     "<br>\n",
1785 |     "Or just pass in a list directly"
1786 |    ]
1787 |   },
1788 |   {
1789 |    "cell_type": "code",
1790 |    "execution_count": null,
1791 |    "metadata": {
1792 |     "collapsed": false
1793 |    },
1794 |    "outputs": [],
1795 |    "source": [
1796 |     "y = np.array([4, 5, 6])\n",
1797 |     "y"
1798 |    ]
1799 |   },
1800 |   {
1801 |    "cell_type": "markdown",
1802 |    "metadata": {},
1803 |    "source": [
1804 |     "<br>\n",
1805 |     "Pass in a list of lists to create a multidimensional array."
1806 |    ]
1807 |   },
1808 |   {
1809 |    "cell_type": "code",
1810 |    "execution_count": null,
1811 |    "metadata": {
1812 |     "collapsed": false
1813 |    },
1814 |    "outputs": [],
1815 |    "source": [
1816 |     "m = np.array([[7, 8, 9], [10, 11, 12]])\n",
1817 |     "m"
1818 |    ]
1819 |   },
1820 |   {
1821 |    "cell_type": "markdown",
1822 |    "metadata": {},
1823 |    "source": [
1824 |     "<br>\n",
1825 |     "Use the shape method to find the dimensions of the array. (rows, columns)"
1826 |    ]
1827 |   },
1828 |   {
1829 |    "cell_type": "code",
1830 |    "execution_count": null,
1831 |    "metadata": {
1832 |     "collapsed": false
1833 |    },
1834 |    "outputs": [],
1835 |    "source": [
1836 |     "m.shape"
1837 |    ]
1838 |   },
1839 |   {
1840 |    "cell_type": "markdown",
1841 |    "metadata": {},
1842 |    "source": [
1843 |     "<br>\n",
1844 |     "`arange` returns evenly spaced values within a given interval."
1845 |    ]
1846 |   },
1847 |   {
1848 |    "cell_type": "code",
1849 |    "execution_count": null,
1850 |    "metadata": {
1851 |     "collapsed": false
1852 |    },
1853 |    "outputs": [],
1854 |    "source": [
1855 |     "n = np.arange(0, 30, 2) # start at 0 count up by 2, stop before 30\n",
1856 |     "n"
1857 |    ]
1858 |   },
1859 |   {
1860 |    "cell_type": "markdown",
1861 |    "metadata": {},
1862 |    "source": [
1863 |     "<br>\n",
1864 |     "`reshape` returns an array with the same data with a new shape."
1865 |    ]
1866 |   },
1867 |   {
1868 |    "cell_type": "code",
1869 |    "execution_count": 1,
1870 |    "metadata": {
1871 |     "collapsed": false
1872 |    },
1873 |    "outputs": [
1874 |     {
1875 |      "ename": "NameError",
1876 |      "evalue": "name 'n' is not defined",
1877 |      "output_type": "error",
1878 |      "traceback": [
1879 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
1880 |       "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
1881 |       "\u001b[0;32m<ipython-input-1-b6a8af8a65ce>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# reshape array to be 3x5\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0mn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
1882 |       "\u001b[0;31mNameError\u001b[0m: name 'n' is not defined"
1883 |      ]
1884 |     }
1885 |    ],
1886 |    "source": [
1887 |     "n = n.reshape(3, 5) # reshape array to be 3x5\n",
1888 |     "n"
1889 |    ]
1890 |   },
1891 |   {
1892 |    "cell_type": "code",
1893 |    "execution_count": null,
1894 |    "metadata": {
1895 |     "collapsed": true
1896 |    },
1897 |    "outputs": [],
1898 |    "source": []
1899 |   },
1900 |   {
1901 |    "cell_type": "markdown",
1902 |    "metadata": {},
1903 |    "source": [
1904 |     "<br>\n",
1905 |     "`linspace` returns evenly spaced numbers over a specified interval."
1906 |    ]
1907 |   },
1908 |   {
1909 |    "cell_type": "code",
1910 |    "execution_count": null,
1911 |    "metadata": {
1912 |     "collapsed": false
1913 |    },
1914 |    "outputs": [],
1915 |    "source": [
1916 |     "o = np.linspace(0, 4, 9) # return 9 evenly spaced values from 0 to 4\n",
1917 |     "o"
1918 |    ]
1919 |   },
1920 |   {
1921 |    "cell_type": "markdown",
1922 |    "metadata": {},
1923 |    "source": [
1924 |     "<br>\n",
1925 |     "`resize` changes the shape and size of array in-place."
1926 |    ]
1927 |   },
1928 |   {
1929 |    "cell_type": "code",
1930 |    "execution_count": null,
1931 |    "metadata": {
1932 |     "collapsed": false
1933 |    },
1934 |    "outputs": [],
1935 |    "source": [
1936 |     "o.resize(3, 3)\n",
1937 |     "o"
1938 |    ]
1939 |   },
1940 |   {
1941 |    "cell_type": "markdown",
1942 |    "metadata": {},
1943 |    "source": [
1944 |     "<br>\n",
1945 |     "`ones` returns a new array of given shape and type, filled with ones."
1946 |    ]
1947 |   },
1948 |   {
1949 |    "cell_type": "code",
1950 |    "execution_count": null,
1951 |    "metadata": {
1952 |     "collapsed": false
1953 |    },
1954 |    "outputs": [],
1955 |    "source": [
1956 |     "np.ones((3, 2))"
1957 |    ]
1958 |   },
1959 |   {
1960 |    "cell_type": "markdown",
1961 |    "metadata": {},
1962 |    "source": [
1963 |     "<br>\n",
1964 |     "`zeros` returns a new array of given shape and type, filled with zeros."
1965 |    ]
1966 |   },
1967 |   {
1968 |    "cell_type": "code",
1969 |    "execution_count": null,
1970 |    "metadata": {
1971 |     "collapsed": false
1972 |    },
1973 |    "outputs": [],
1974 |    "source": [
1975 |     "np.zeros((2, 3))"
1976 |    ]
1977 |   },
1978 |   {
1979 |    "cell_type": "markdown",
1980 |    "metadata": {},
1981 |    "source": [
1982 |     "<br>\n",
1983 |     "`eye` returns a 2-D array with ones on the diagonal and zeros elsewhere."
1984 |    ]
1985 |   },
1986 |   {
1987 |    "cell_type": "code",
1988 |    "execution_count": null,
1989 |    "metadata": {
1990 |     "collapsed": false
1991 |    },
1992 |    "outputs": [],
1993 |    "source": [
1994 |     "np.eye(3)"
1995 |    ]
1996 |   },
1997 |   {
1998 |    "cell_type": "markdown",
1999 |    "metadata": {},
2000 |    "source": [
2001 |     "<br>\n",
2002 |     "`diag` extracts a diagonal or constructs a diagonal array."
2003 |    ]
2004 |   },
2005 |   {
2006 |    "cell_type": "code",
2007 |    "execution_count": null,
2008 |    "metadata": {
2009 |     "collapsed": false
2010 |    },
2011 |    "outputs": [],
2012 |    "source": [
2013 |     "np.diag(y)"
2014 |    ]
2015 |   },
2016 |   {
2017 |    "cell_type": "markdown",
2018 |    "metadata": {},
2019 |    "source": [
2020 |     "<br>\n",
2021 |     "Create an array using repeating list (or see `np.tile`)"
2022 |    ]
2023 |   },
2024 |   {
2025 |    "cell_type": "code",
2026 |    "execution_count": null,
2027 |    "metadata": {
2028 |     "collapsed": false
2029 |    },
2030 |    "outputs": [],
2031 |    "source": [
2032 |     "np.array([1, 2, 3] * 3)"
2033 |    ]
2034 |   },
2035 |   {
2036 |    "cell_type": "markdown",
2037 |    "metadata": {},
2038 |    "source": [
2039 |     "<br>\n",
2040 |     "Repeat elements of an array using `repeat`."
2041 |    ]
2042 |   },
2043 |   {
2044 |    "cell_type": "code",
2045 |    "execution_count": null,
2046 |    "metadata": {
2047 |     "collapsed": false
2048 |    },
2049 |    "outputs": [],
2050 |    "source": [
2051 |     "np.repeat([1, 2, 3], 3)"
2052 |    ]
2053 |   },
2054 |   {
2055 |    "cell_type": "markdown",
2056 |    "metadata": {},
2057 |    "source": [
2058 |     "<br>\n",
2059 |     "#### Combining Arrays"
2060 |    ]
2061 |   },
2062 |   {
2063 |    "cell_type": "code",
2064 |    "execution_count": null,
2065 |    "metadata": {
2066 |     "collapsed": false
2067 |    },
2068 |    "outputs": [],
2069 |    "source": [
2070 |     "p = np.ones([2, 3], int)\n",
2071 |     "p"
2072 |    ]
2073 |   },
2074 |   {
2075 |    "cell_type": "markdown",
2076 |    "metadata": {},
2077 |    "source": [
2078 |     "<br>\n",
2079 |     "Use `vstack` to stack arrays in sequence vertically (row wise)."
2080 |    ]
2081 |   },
2082 |   {
2083 |    "cell_type": "code",
2084 |    "execution_count": null,
2085 |    "metadata": {
2086 |     "collapsed": false
2087 |    },
2088 |    "outputs": [],
2089 |    "source": [
2090 |     "np.vstack([p, 2*p])"
2091 |    ]
2092 |   },
2093 |   {
2094 |    "cell_type": "markdown",
2095 |    "metadata": {},
2096 |    "source": [
2097 |     "<br>\n",
2098 |     "Use `hstack` to stack arrays in sequence horizontally (column wise)."
2099 |    ]
2100 |   },
2101 |   {
2102 |    "cell_type": "code",
2103 |    "execution_count": null,
2104 |    "metadata": {
2105 |     "collapsed": false
2106 |    },
2107 |    "outputs": [],
2108 |    "source": [
2109 |     "np.hstack([p, 2*p])"
2110 |    ]
2111 |   },
2112 |   {
2113 |    "cell_type": "markdown",
2114 |    "metadata": {},
2115 |    "source": [
2116 |     "<br>\n",
2117 |     "## Operations"
2118 |    ]
2119 |   },
2120 |   {
2121 |    "cell_type": "markdown",
2122 |    "metadata": {},
2123 |    "source": [
2124 |     "Use `+`, `-`, `*`, `/` and `**` to perform element wise addition, subtraction, multiplication, division and power."
2125 |    ]
2126 |   },
2127 |   {
2128 |    "cell_type": "code",
2129 |    "execution_count": null,
2130 |    "metadata": {
2131 |     "collapsed": false
2132 |    },
2133 |    "outputs": [],
2134 |    "source": [
2135 |     "print(x + y) # elementwise addition     [1 2 3] + [4 5 6] = [5  7  9]\n",
2136 |     "print(x - y) # elementwise subtraction  [1 2 3] - [4 5 6] = [-3 -3 -3]"
2137 |    ]
2138 |   },
2139 |   {
2140 |    "cell_type": "code",
2141 |    "execution_count": null,
2142 |    "metadata": {
2143 |     "collapsed": false
2144 |    },
2145 |    "outputs": [],
2146 |    "source": [
2147 |     "print(x * y) # elementwise multiplication  [1 2 3] * [4 5 6] = [4  10  18]\n",
2148 |     "print(x / y) # elementwise divison         [1 2 3] / [4 5 6] = [0.25  0.4  0.5]"
2149 |    ]
2150 |   },
2151 |   {
2152 |    "cell_type": "code",
2153 |    "execution_count": null,
2154 |    "metadata": {
2155 |     "collapsed": false
2156 |    },
2157 |    "outputs": [],
2158 |    "source": [
2159 |     "print(x**2) # elementwise power  [1 2 3] ^2 =  [1 4 9]"
2160 |    ]
2161 |   },
2162 |   {
2163 |    "cell_type": "markdown",
2164 |    "metadata": {},
2165 |    "source": [
2166 |     "<br>\n",
2167 |     "**Dot Product:**  \n",
2168 |     "\n",
2169 |     "$ \\begin{bmatrix}x_1 \\ x_2 \\ x_3\\end{bmatrix}\n",
2170 |     "\\cdot\n",
2171 |     "\\begin{bmatrix}y_1 \\\\ y_2 \\\\ y_3\\end{bmatrix}\n",
2172 |     "= x_1 y_1 + x_2 y_2 + x_3 y_3$"
2173 |    ]
2174 |   },
2175 |   {
2176 |    "cell_type": "code",
2177 |    "execution_count": null,
2178 |    "metadata": {
2179 |     "collapsed": false
2180 |    },
2181 |    "outputs": [],
2182 |    "source": [
2183 |     "x.dot(y) # dot product  1*4 + 2*5 + 3*6"
2184 |    ]
2185 |   },
2186 |   {
2187 |    "cell_type": "code",
2188 |    "execution_count": null,
2189 |    "metadata": {
2190 |     "collapsed": false
2191 |    },
2192 |    "outputs": [],
2193 |    "source": [
2194 |     "z = np.array([y, y**2])\n",
2195 |     "print(len(z)) # number of rows of array"
2196 |    ]
2197 |   },
2198 |   {
2199 |    "cell_type": "markdown",
2200 |    "metadata": {},
2201 |    "source": [
2202 |     "<br>\n",
2203 |     "Let's look at transposing arrays. Transposing permutes the dimensions of the array."
2204 |    ]
2205 |   },
2206 |   {
2207 |    "cell_type": "code",
2208 |    "execution_count": null,
2209 |    "metadata": {
2210 |     "collapsed": false
2211 |    },
2212 |    "outputs": [],
2213 |    "source": [
2214 |     "z = np.array([y, y**2])\n",
2215 |     "z"
2216 |    ]
2217 |   },
2218 |   {
2219 |    "cell_type": "markdown",
2220 |    "metadata": {},
2221 |    "source": [
2222 |     "<br>\n",
2223 |     "The shape of array `z` is `(2,3)` before transposing."
2224 |    ]
2225 |   },
2226 |   {
2227 |    "cell_type": "code",
2228 |    "execution_count": null,
2229 |    "metadata": {
2230 |     "collapsed": false
2231 |    },
2232 |    "outputs": [],
2233 |    "source": [
2234 |     "z.shape"
2235 |    ]
2236 |   },
2237 |   {
2238 |    "cell_type": "markdown",
2239 |    "metadata": {},
2240 |    "source": [
2241 |     "<br>\n",
2242 |     "Use `.T` to get the transpose."
2243 |    ]
2244 |   },
2245 |   {
2246 |    "cell_type": "code",
2247 |    "execution_count": null,
2248 |    "metadata": {
2249 |     "collapsed": false
2250 |    },
2251 |    "outputs": [],
2252 |    "source": [
2253 |     "z.T"
2254 |    ]
2255 |   },
2256 |   {
2257 |    "cell_type": "markdown",
2258 |    "metadata": {},
2259 |    "source": [
2260 |     "<br>\n",
2261 |     "The number of rows has swapped with the number of columns."
2262 |    ]
2263 |   },
2264 |   {
2265 |    "cell_type": "code",
2266 |    "execution_count": null,
2267 |    "metadata": {
2268 |     "collapsed": false
2269 |    },
2270 |    "outputs": [],
2271 |    "source": [
2272 |     "z.T.shape"
2273 |    ]
2274 |   },
2275 |   {
2276 |    "cell_type": "markdown",
2277 |    "metadata": {},
2278 |    "source": [
2279 |     "<br>\n",
2280 |     "Use `.dtype` to see the data type of the elements in the array."
2281 |    ]
2282 |   },
2283 |   {
2284 |    "cell_type": "code",
2285 |    "execution_count": null,
2286 |    "metadata": {
2287 |     "collapsed": false
2288 |    },
2289 |    "outputs": [],
2290 |    "source": [
2291 |     "z.dtype"
2292 |    ]
2293 |   },
2294 |   {
2295 |    "cell_type": "markdown",
2296 |    "metadata": {},
2297 |    "source": [
2298 |     "<br>\n",
2299 |     "Use `.astype` to cast to a specific type."
2300 |    ]
2301 |   },
2302 |   {
2303 |    "cell_type": "code",
2304 |    "execution_count": null,
2305 |    "metadata": {
2306 |     "collapsed": false
2307 |    },
2308 |    "outputs": [],
2309 |    "source": [
2310 |     "z = z.astype('f')\n",
2311 |     "z.dtype"
2312 |    ]
2313 |   },
2314 |   {
2315 |    "cell_type": "markdown",
2316 |    "metadata": {},
2317 |    "source": [
2318 |     "<br>\n",
2319 |     "## Math Functions"
2320 |    ]
2321 |   },
2322 |   {
2323 |    "cell_type": "markdown",
2324 |    "metadata": {},
2325 |    "source": [
2326 |     "Numpy has many built in math functions that can be performed on arrays."
2327 |    ]
2328 |   },
2329 |   {
2330 |    "cell_type": "code",
2331 |    "execution_count": null,
2332 |    "metadata": {
2333 |     "collapsed": true
2334 |    },
2335 |    "outputs": [],
2336 |    "source": [
2337 |     "a = np.array([-4, -2, 1, 3, 5])"
2338 |    ]
2339 |   },
2340 |   {
2341 |    "cell_type": "code",
2342 |    "execution_count": null,
2343 |    "metadata": {
2344 |     "collapsed": false
2345 |    },
2346 |    "outputs": [],
2347 |    "source": [
2348 |     "a.sum()"
2349 |    ]
2350 |   },
2351 |   {
2352 |    "cell_type": "code",
2353 |    "execution_count": null,
2354 |    "metadata": {
2355 |     "collapsed": false
2356 |    },
2357 |    "outputs": [],
2358 |    "source": [
2359 |     "a.max()"
2360 |    ]
2361 |   },
2362 |   {
2363 |    "cell_type": "code",
2364 |    "execution_count": null,
2365 |    "metadata": {
2366 |     "collapsed": false
2367 |    },
2368 |    "outputs": [],
2369 |    "source": [
2370 |     "a.min()"
2371 |    ]
2372 |   },
2373 |   {
2374 |    "cell_type": "code",
2375 |    "execution_count": null,
2376 |    "metadata": {
2377 |     "collapsed": false
2378 |    },
2379 |    "outputs": [],
2380 |    "source": [
2381 |     "a.mean()"
2382 |    ]
2383 |   },
2384 |   {
2385 |    "cell_type": "code",
2386 |    "execution_count": null,
2387 |    "metadata": {
2388 |     "collapsed": false
2389 |    },
2390 |    "outputs": [],
2391 |    "source": [
2392 |     "a.std()"
2393 |    ]
2394 |   },
2395 |   {
2396 |    "cell_type": "markdown",
2397 |    "metadata": {},
2398 |    "source": [
2399 |     "<br>\n",
2400 |     "`argmax` and `argmin` return the index of the maximum and minimum values in the array."
2401 |    ]
2402 |   },
2403 |   {
2404 |    "cell_type": "code",
2405 |    "execution_count": null,
2406 |    "metadata": {
2407 |     "collapsed": false
2408 |    },
2409 |    "outputs": [],
2410 |    "source": [
2411 |     "a.argmax()"
2412 |    ]
2413 |   },
2414 |   {
2415 |    "cell_type": "code",
2416 |    "execution_count": null,
2417 |    "metadata": {
2418 |     "collapsed": false
2419 |    },
2420 |    "outputs": [],
2421 |    "source": [
2422 |     "a.argmin()"
2423 |    ]
2424 |   },
2425 |   {
2426 |    "cell_type": "markdown",
2427 |    "metadata": {},
2428 |    "source": [
2429 |     "<br>\n",
2430 |     "## Indexing / Slicing"
2431 |    ]
2432 |   },
2433 |   {
2434 |    "cell_type": "code",
2435 |    "execution_count": null,
2436 |    "metadata": {
2437 |     "collapsed": false
2438 |    },
2439 |    "outputs": [],
2440 |    "source": [
2441 |     "s = np.arange(13)**2\n",
2442 |     "s"
2443 |    ]
2444 |   },
2445 |   {
2446 |    "cell_type": "markdown",
2447 |    "metadata": {},
2448 |    "source": [
2449 |     "<br>\n",
2450 |     "Use bracket notation to get the value at a specific index. Remember that indexing starts at 0."
2451 |    ]
2452 |   },
2453 |   {
2454 |    "cell_type": "code",
2455 |    "execution_count": null,
2456 |    "metadata": {
2457 |     "collapsed": false
2458 |    },
2459 |    "outputs": [],
2460 |    "source": [
2461 |     "s[0], s[4], s[-1]"
2462 |    ]
2463 |   },
2464 |   {
2465 |    "cell_type": "markdown",
2466 |    "metadata": {},
2467 |    "source": [
2468 |     "<br>\n",
2469 |     "Use `:` to indicate a range. `array[start:stop]`\n",
2470 |     "\n",
2471 |     "\n",
2472 |     "Leaving `start` or `stop` empty will default to the beginning/end of the array."
2473 |    ]
2474 |   },
2475 |   {
2476 |    "cell_type": "code",
2477 |    "execution_count": null,
2478 |    "metadata": {
2479 |     "collapsed": false
2480 |    },
2481 |    "outputs": [],
2482 |    "source": [
2483 |     "s[1:5]"
2484 |    ]
2485 |   },
2486 |   {
2487 |    "cell_type": "markdown",
2488 |    "metadata": {},
2489 |    "source": [
2490 |     "<br>\n",
2491 |     "Use negatives to count from the back."
2492 |    ]
2493 |   },
2494 |   {
2495 |    "cell_type": "code",
2496 |    "execution_count": null,
2497 |    "metadata": {
2498 |     "collapsed": false
2499 |    },
2500 |    "outputs": [],
2501 |    "source": [
2502 |     "s[-4:]"
2503 |    ]
2504 |   },
2505 |   {
2506 |    "cell_type": "markdown",
2507 |    "metadata": {},
2508 |    "source": [
2509 |     "<br>\n",
2510 |     "A second `:` can be used to indicate step-size. `array[start:stop:stepsize]`\n",
2511 |     "\n",
2512 |     "Here we are starting 5th element from the end, and counting backwards by 2 until the beginning of the array is reached."
2513 |    ]
2514 |   },
2515 |   {
2516 |    "cell_type": "code",
2517 |    "execution_count": null,
2518 |    "metadata": {
2519 |     "collapsed": false
2520 |    },
2521 |    "outputs": [],
2522 |    "source": [
2523 |     "s[-5::-2]"
2524 |    ]
2525 |   },
2526 |   {
2527 |    "cell_type": "markdown",
2528 |    "metadata": {
2529 |     "collapsed": false
2530 |    },
2531 |    "source": [
2532 |     "<br>\n",
2533 |     "Let's look at a multidimensional array."
2534 |    ]
2535 |   },
2536 |   {
2537 |    "cell_type": "code",
2538 |    "execution_count": null,
2539 |    "metadata": {
2540 |     "collapsed": false
2541 |    },
2542 |    "outputs": [],
2543 |    "source": [
2544 |     "r = np.arange(36)\n",
2545 |     "r.resize((6, 6))\n",
2546 |     "r"
2547 |    ]
2548 |   },
2549 |   {
2550 |    "cell_type": "markdown",
2551 |    "metadata": {},
2552 |    "source": [
2553 |     "<br>\n",
2554 |     "Use bracket notation to slice: `array[row, column]`"
2555 |    ]
2556 |   },
2557 |   {
2558 |    "cell_type": "code",
2559 |    "execution_count": null,
2560 |    "metadata": {
2561 |     "collapsed": false
2562 |    },
2563 |    "outputs": [],
2564 |    "source": [
2565 |     "r[2, 2]"
2566 |    ]
2567 |   },
2568 |   {
2569 |    "cell_type": "markdown",
2570 |    "metadata": {},
2571 |    "source": [
2572 |     "<br>\n",
2573 |     "And use : to select a range of rows or columns"
2574 |    ]
2575 |   },
2576 |   {
2577 |    "cell_type": "code",
2578 |    "execution_count": null,
2579 |    "metadata": {
2580 |     "collapsed": false
2581 |    },
2582 |    "outputs": [],
2583 |    "source": [
2584 |     "r[3, 3:6]"
2585 |    ]
2586 |   },
2587 |   {
2588 |    "cell_type": "markdown",
2589 |    "metadata": {},
2590 |    "source": [
2591 |     "<br>\n",
2592 |     "Here we are selecting all the rows up to (and not including) row 2, and all the columns up to (and not including) the last column."
2593 |    ]
2594 |   },
2595 |   {
2596 |    "cell_type": "code",
2597 |    "execution_count": null,
2598 |    "metadata": {
2599 |     "collapsed": false
2600 |    },
2601 |    "outputs": [],
2602 |    "source": [
2603 |     "r[:2, :-1]"
2604 |    ]
2605 |   },
2606 |   {
2607 |    "cell_type": "markdown",
2608 |    "metadata": {},
2609 |    "source": [
2610 |     "<br>\n",
2611 |     "This is a slice of the last row, and only every other element."
2612 |    ]
2613 |   },
2614 |   {
2615 |    "cell_type": "code",
2616 |    "execution_count": null,
2617 |    "metadata": {
2618 |     "collapsed": false
2619 |    },
2620 |    "outputs": [],
2621 |    "source": [
2622 |     "r[-1, ::2]"
2623 |    ]
2624 |   },
2625 |   {
2626 |    "cell_type": "markdown",
2627 |    "metadata": {},
2628 |    "source": [
2629 |     "<br>\n",
2630 |     "We can also perform conditional indexing. Here we are selecting values from the array that are greater than 30. (Also see `np.where`)"
2631 |    ]
2632 |   },
2633 |   {
2634 |    "cell_type": "code",
2635 |    "execution_count": null,
2636 |    "metadata": {
2637 |     "collapsed": false
2638 |    },
2639 |    "outputs": [],
2640 |    "source": [
2641 |     "r[r > 30]"
2642 |    ]
2643 |   },
2644 |   {
2645 |    "cell_type": "markdown",
2646 |    "metadata": {},
2647 |    "source": [
2648 |     "<br>\n",
2649 |     "Here we are assigning all values in the array that are greater than 30 to the value of 30."
2650 |    ]
2651 |   },
2652 |   {
2653 |    "cell_type": "code",
2654 |    "execution_count": null,
2655 |    "metadata": {
2656 |     "collapsed": false
2657 |    },
2658 |    "outputs": [],
2659 |    "source": [
2660 |     "r[r > 30] = 30\n",
2661 |     "r"
2662 |    ]
2663 |   },
2664 |   {
2665 |    "cell_type": "markdown",
2666 |    "metadata": {},
2667 |    "source": [
2668 |     "<br>\n",
2669 |     "## Copying Data"
2670 |    ]
2671 |   },
2672 |   {
2673 |    "cell_type": "markdown",
2674 |    "metadata": {},
2675 |    "source": [
2676 |     "Be careful with copying and modifying arrays in NumPy!\n",
2677 |     "\n",
2678 |     "\n",
2679 |     "`r2` is a slice of `r`"
2680 |    ]
2681 |   },
2682 |   {
2683 |    "cell_type": "code",
2684 |    "execution_count": null,
2685 |    "metadata": {
2686 |     "collapsed": false
2687 |    },
2688 |    "outputs": [],
2689 |    "source": [
2690 |     "r2 = r[:3,:3]\n",
2691 |     "r2"
2692 |    ]
2693 |   },
2694 |   {
2695 |    "cell_type": "markdown",
2696 |    "metadata": {},
2697 |    "source": [
2698 |     "<br>\n",
2699 |     "Set this slice's values to zero ([:] selects the entire array)"
2700 |    ]
2701 |   },
2702 |   {
2703 |    "cell_type": "code",
2704 |    "execution_count": null,
2705 |    "metadata": {
2706 |     "collapsed": false
2707 |    },
2708 |    "outputs": [],
2709 |    "source": [
2710 |     "r2[:] = 0\n",
2711 |     "r2"
2712 |    ]
2713 |   },
2714 |   {
2715 |    "cell_type": "markdown",
2716 |    "metadata": {},
2717 |    "source": [
2718 |     "<br>\n",
2719 |     "`r` has also been changed!"
2720 |    ]
2721 |   },
2722 |   {
2723 |    "cell_type": "code",
2724 |    "execution_count": null,
2725 |    "metadata": {
2726 |     "collapsed": false
2727 |    },
2728 |    "outputs": [],
2729 |    "source": [
2730 |     "r"
2731 |    ]
2732 |   },
2733 |   {
2734 |    "cell_type": "markdown",
2735 |    "metadata": {},
2736 |    "source": [
2737 |     "<br>\n",
2738 |     "To avoid this, use `r.copy` to create a copy that will not affect the original array"
2739 |    ]
2740 |   },
2741 |   {
2742 |    "cell_type": "code",
2743 |    "execution_count": null,
2744 |    "metadata": {
2745 |     "collapsed": false
2746 |    },
2747 |    "outputs": [],
2748 |    "source": [
2749 |     "r_copy = r.copy()\n",
2750 |     "r_copy"
2751 |    ]
2752 |   },
2753 |   {
2754 |    "cell_type": "markdown",
2755 |    "metadata": {},
2756 |    "source": [
2757 |     "<br>\n",
2758 |     "Now when r_copy is modified, r will not be changed."
2759 |    ]
2760 |   },
2761 |   {
2762 |    "cell_type": "code",
2763 |    "execution_count": null,
2764 |    "metadata": {
2765 |     "collapsed": false
2766 |    },
2767 |    "outputs": [],
2768 |    "source": [
2769 |     "r_copy[:] = 10\n",
2770 |     "print(r_copy, '\\n')\n",
2771 |     "print(r)"
2772 |    ]
2773 |   },
2774 |   {
2775 |    "cell_type": "markdown",
2776 |    "metadata": {},
2777 |    "source": [
2778 |     "<br>\n",
2779 |     "### Iterating Over Arrays"
2780 |    ]
2781 |   },
2782 |   {
2783 |    "cell_type": "markdown",
2784 |    "metadata": {},
2785 |    "source": [
2786 |     "Let's create a new 4 by 3 array of random numbers 0-9."
2787 |    ]
2788 |   },
2789 |   {
2790 |    "cell_type": "code",
2791 |    "execution_count": null,
2792 |    "metadata": {
2793 |     "collapsed": false
2794 |    },
2795 |    "outputs": [],
2796 |    "source": [
2797 |     "test = np.random.randint(0, 10, (4,3))\n",
2798 |     "test"
2799 |    ]
2800 |   },
2801 |   {
2802 |    "cell_type": "markdown",
2803 |    "metadata": {},
2804 |    "source": [
2805 |     "<br>\n",
2806 |     "Iterate by row:"
2807 |    ]
2808 |   },
2809 |   {
2810 |    "cell_type": "code",
2811 |    "execution_count": null,
2812 |    "metadata": {
2813 |     "collapsed": false
2814 |    },
2815 |    "outputs": [],
2816 |    "source": [
2817 |     "for row in test:\n",
2818 |     "    print(row)"
2819 |    ]
2820 |   },
2821 |   {
2822 |    "cell_type": "markdown",
2823 |    "metadata": {},
2824 |    "source": [
2825 |     "<br>\n",
2826 |     "Iterate by index:"
2827 |    ]
2828 |   },
2829 |   {
2830 |    "cell_type": "code",
2831 |    "execution_count": null,
2832 |    "metadata": {
2833 |     "collapsed": false
2834 |    },
2835 |    "outputs": [],
2836 |    "source": [
2837 |     "for i in range(len(test)):\n",
2838 |     "    print(test[i])"
2839 |    ]
2840 |   },
2841 |   {
2842 |    "cell_type": "markdown",
2843 |    "metadata": {},
2844 |    "source": [
2845 |     "<br>\n",
2846 |     "Iterate by row and index:"
2847 |    ]
2848 |   },
2849 |   {
2850 |    "cell_type": "code",
2851 |    "execution_count": null,
2852 |    "metadata": {
2853 |     "collapsed": false
2854 |    },
2855 |    "outputs": [],
2856 |    "source": [
2857 |     "for i, row in enumerate(test):\n",
2858 |     "    print('row', i, 'is', row)"
2859 |    ]
2860 |   },
2861 |   {
2862 |    "cell_type": "markdown",
2863 |    "metadata": {},
2864 |    "source": [
2865 |     "<br>\n",
2866 |     "Use `zip` to iterate over multiple iterables."
2867 |    ]
2868 |   },
2869 |   {
2870 |    "cell_type": "code",
2871 |    "execution_count": null,
2872 |    "metadata": {
2873 |     "collapsed": false
2874 |    },
2875 |    "outputs": [],
2876 |    "source": [
2877 |     "test2 = test**2\n",
2878 |     "test2"
2879 |    ]
2880 |   },
2881 |   {
2882 |    "cell_type": "code",
2883 |    "execution_count": null,
2884 |    "metadata": {
2885 |     "collapsed": false
2886 |    },
2887 |    "outputs": [],
2888 |    "source": [
2889 |     "for i, j in zip(test, test2):\n",
2890 |     "    print(i,'+',j,'=',i+j)"
2891 |    ]
2892 |   }
2893 |  ],
2894 |  "metadata": {
2895 |   "kernelspec": {
2896 |    "display_name": "Python 3",
2897 |    "language": "python",
2898 |    "name": "python3"
2899 |   },
2900 |   "language_info": {
2901 |    "codemirror_mode": {
2902 |     "name": "ipython",
2903 |     "version": 3
2904 |    },
2905 |    "file_extension": ".py",
2906 |    "mimetype": "text/x-python",
2907 |    "name": "python",
2908 |    "nbconvert_exporter": "python",
2909 |    "pygments_lexer": "ipython3",
2910 |    "version": "3.5.2"
2911 |   }
2912 |  },
2913 |  "nbformat": 4,
2914 |  "nbformat_minor": 0
2915 | }
2916 | 


--------------------------------------------------------------------------------