├── .DS_Store
├── .ipynb_checkpoints
├── A00. Course Overview 10-09-2016-checkpoint.ipynb
├── B00. Introduction-checkpoint.ipynb
├── B01. Getting Started -checkpoint.ipynb
├── B02. First Steps With Python-checkpoint.ipynb
├── B03. Functions-checkpoint.ipynb
├── B03E. Functions Exercises-checkpoint.ipynb
├── B03S. Functions Solutions-checkpoint.ipynb
├── B04. Basic Data Structures Part 1-checkpoint.ipynb
├── B04E. Basic Data Structures Part 1 Exercises-checkpoint.ipynb
├── B04S. Basic Data Structures Part 1 Solutions-checkpoint.ipynb
├── B05. Methods-checkpoint.ipynb
├── B06. Basic Data Structures Part 2-checkpoint.ipynb
├── B06E. Basic Data Structures Part 2 Exercises-checkpoint.ipynb
├── B06S. Basic Data Structures Part 2 Solutions-checkpoint.ipynb
├── B07. Exceptions, Try & Except-checkpoint.ipynb
├── B08. Loops & Iterating-checkpoint.ipynb
├── B08E. Loops & Iterating Exercises-checkpoint.ipynb
├── B08S. Loops & Iterating Solutions-checkpoint.ipynb
├── B09. Built In Functions-checkpoint.ipynb
├── B10. Understanding, Downloading, Installing and using Python Packages-checkpoint.ipynb
├── B11. Getting Help, StackOverflow and Github-checkpoint.ipynb
├── B11E. Sandbox Exercises-checkpoint.ipynb
├── B11S. Sandbox Solutions-checkpoint.ipynb
├── D00. Introduction to Python for Data Analysis-checkpoint.ipynb
├── D01. Advanced Data Structures-checkpoint.ipynb
├── D01E. Advanced Data Structures Exercises-checkpoint.ipynb
├── D01S. Advanced Data Structures Solutions-checkpoint.ipynb
├── D02. Importing & Exporting Data with Pandas-checkpoint.ipynb
├── D03. Dataframes - Handling Data-checkpoint.ipynb
├── D03E. Dataframes - Handling Data Exercises-checkpoint.ipynb
├── D03S. Dataframes - Handling Data Solutions-checkpoint.ipynb
├── D04. Dataframes - Refining and Indexing-checkpoint.ipynb
├── D04E. Dataframes - Refining and Indexing Exercises-checkpoint.ipynb
├── D04S. Dataframes - Refining and Indexing Solutions-checkpoint.ipynb
├── D05. Dataframes Merging & Concatanating-checkpoint.ipynb
├── D05E. Dataframes Merging & Concatenating Exercises-checkpoint.ipynb
├── D05S. Dataframes Merging & Concatenating Solutions-checkpoint.ipynb
├── D06. Summary Statistics & GroupBy-checkpoint.ipynb
├── D06E. Summary Statistics & GroupBy Exercises-checkpoint.ipynb
├── D06S. Summary Statistics & GroupBy Solutions-checkpoint.ipynb
├── D07. Useful Things in Pandas-checkpoint.ipynb
├── V00. Introduction to Data Visualisation-checkpoint.ipynb
├── V01. Getting Started with Matplotlib-checkpoint.ipynb
├── V02. Sexy Charting with Matplotlib-checkpoint.ipynb
├── V02E. Matplotlib Exercises-checkpoint.ipynb
├── V02S. Matplotlib Solutions-checkpoint.ipynb
├── V03. Getting Started with Seaborn-checkpoint.ipynb
├── V04. Customising your Seaborn Outputs-checkpoint.ipynb
├── V04E. Seaborn Exercises-checkpoint.ipynb
├── V04S. Seaborn Solutions-checkpoint.ipynb
├── X03. Introduction to Statistics-checkpoint.ipynb
├── X04. Introduction to Machine Learning-checkpoint.ipynb
└── X05. Introduction to Natural Language Processing-checkpoint.ipynb
├── A00. Course Overview 10-09-2016.ipynb
├── B00. Introduction.ipynb
├── B01. Getting Started .ipynb
├── B02. First Steps With Python.ipynb
├── B03. Functions.ipynb
├── B03E. Functions Exercises.ipynb
├── B03S. Functions Solutions.ipynb
├── B04. Basic Data Structures Part 1.ipynb
├── B04E. Basic Data Structures Part 1 Exercises.ipynb
├── B04S. Basic Data Structures Part 1 Solutions.ipynb
├── B05. Methods.ipynb
├── B06. Basic Data Structures Part 2.ipynb
├── B06E. Basic Data Structures Part 2 Exercises.ipynb
├── B06S. Basic Data Structures Part 2 Solutions.ipynb
├── B07. Exceptions, Try & Except.ipynb
├── B08. Loops & Iterating.ipynb
├── B08E. Loops & Iterating Exercises.ipynb
├── B08S. Loops & Iterating Solutions.ipynb
├── B09. Built In Functions.ipynb
├── B10. Understanding, Downloading, Installing and using Python Packages.ipynb
├── B11. Getting Help, StackOverflow and Github.ipynb
├── B11E. Sandbox Exercises.ipynb
├── B11S. Sandbox Solutions.ipynb
├── B12. Summary of the Basics section of the course.ipynb
├── C:\Users\Tom\Desktop\MOT File.csv
├── Chart.png
├── D00. Introduction to Python for Data Analysis.ipynb
├── D01. Advanced Data Structures.ipynb
├── D01E. Advanced Data Structures Exercises.ipynb
├── D01S. Advanced Data Structures Solutions.ipynb
├── D02. Importing & Exporting Data with Pandas.ipynb
├── D03. Dataframes - Handling Data.ipynb
├── D03E. Dataframes - Handling Data Exercises.ipynb
├── D03S. Dataframes - Handling Data Solutions.ipynb
├── D04. Dataframes - Refining and Indexing.ipynb
├── D04E. Dataframes - Refining and Indexing Exercises.ipynb
├── D04S. Dataframes - Refining and Indexing Solutions.ipynb
├── D05. Dataframes Merging & Concatanating.ipynb
├── D05E. Dataframes Merging & Concatenating Exercises.ipynb
├── D05S. Dataframes Merging & Concatenating Solutions.ipynb
├── D06. Summary Statistics & GroupBy.ipynb
├── D06E. Summary Statistics & GroupBy Exercises.ipynb
├── D06S. Summary Statistics & GroupBy Solutions.ipynb
├── D07. Useful Things in Pandas.ipynb
├── README.md
├── Training Cookbook.py
├── V00. Introduction to Data Visualisation.ipynb
├── V01. Getting Started with Matplotlib.ipynb
├── V02. Sexy Charting with Matplotlib.ipynb
├── V02E. Matplotlib Exercises.ipynb
├── V02S. Matplotlib Solutions.ipynb
├── V03. Getting Started with Seaborn.ipynb
├── V04. Customising your Seaborn Outputs.ipynb
├── V04E. Seaborn Exercises.ipynb
├── V04S. Seaborn Solutions.ipynb
├── V05. Interactive plots with Bokeh.ipynb
├── V05. Sandbox Challenge!.ipynb
├── X00. Recap & Sandbox Challenge.ipynb
├── X01. Introduction to APIs.ipynb
├── X01E.Introduction to APIs Exercies.ipynb
├── X02. Web Scraping with Python.ipynb
├── X02E. Web Scraping Exercises.ipynb
├── X03. Introduction to Statistics.ipynb
├── X04. Introduction to Machine Learning.ipynb
├── X05. Introduction to Natural Language Processing.ipynb
├── Z0. In Closing....ipynb
└── img
├── .DS_Store
├── API.png
├── Clipboard04.png
├── Icon
├── Launcher.jpg
├── Requests.png
├── bokeh.png
├── cell type.png
├── desktop.ini
├── desktop.png
├── github.jpg
├── interrupt.png
├── joins.jpg
├── joins.png
├── jupyter.jpg
├── lightning.png
├── matplot.png
├── numpy.jpg
├── pandas.jpg
├── plotly.png
├── rename.png
├── run cell.png
├── scikit.png
├── scipy.png
├── seaborn.png
├── shell.jpg
├── slack.png
├── stack.png
├── statsmodels.png
├── structure.png
└── tags.png
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/department-for-transport/Python-for-Analysts/89f4be24d0394f0269262f532cae36d937699530/.DS_Store
--------------------------------------------------------------------------------
/.ipynb_checkpoints/A00. Course Overview 10-09-2016-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Python for Analysts Training"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "Hi! And welcome to the Python for Analysts training course. This covers everything you need to know to start using Python for data analysis and visualisation as well as showcasing some more advanced and snazzy stuff, including Statistics, Machine Learning, Web Scraping / Interaction etc.\n",
15 | "\n",
16 | "The course assumes no prior knowledge of Python and will teach you everything you need to know in order to use Python for data analysis and visualisation, including interfacing with Python via the Jupyter interface, using Text Editors / Integrated Development Environments (IDEs), upgrading Python, working with the command line etc.\n",
17 | "\n",
18 | "Lastly, note that the course can only hope to give you an introduction to Python for Data Analysis over the 2 days. You'll no doubt want to continue your learning afterward, and the course provides links to relevant material with which to further your development."
19 | ]
20 | },
21 | {
22 | "cell_type": "markdown",
23 | "metadata": {},
24 | "source": [
25 | "## About me"
26 | ]
27 | },
28 | {
29 | "cell_type": "markdown",
30 | "metadata": {},
31 | "source": [
32 | "I'm Tom Ewing, a data scientist working for the Department for Transport. I've been using Python just under 2 years having made the jump from SAS. I've delivered training courses in other technology before so was really keen to combine this experience with Python!"
33 | ]
34 | },
35 | {
36 | "cell_type": "markdown",
37 | "metadata": {},
38 | "source": [
39 | "## Structure of the Course"
40 | ]
41 | },
42 | {
43 | "cell_type": "markdown",
44 | "metadata": {},
45 | "source": [
46 | "This course has been designed to be both delivered as a classroom training course, but can also be taken offline as well, in your own time.\n",
47 | "\n",
48 | "In a classroom environment course is designed to cover the best part of 2 days with time for exercises and consolidation in between. There is also more material for you to explore afterwatrd also.\n",
49 | "\n",
50 | "You will be expected to have a project to practice with! This will allow you to consolidate your Python training and continue to learn and develop.\n",
51 | "\n",
52 | "The structure of the course is as follows:\n",
53 | "\n",
54 | "## Day 1-2:\n",
55 | "\n",
56 | "### Basics \n",
57 | "\n",
58 | "* Interfacing with Python\n",
59 | "* Basic Python Sytnax\n",
60 | "* Data Structures\n",
61 | "* Coding concepts\n",
62 | "* Looping\n",
63 | "* Enhancing Python with Packages\n",
64 | "\n",
65 | "### Working with data\n",
66 | "\n",
67 | "* Data Analysis Libraries\n",
68 | "* Advanced Data Structures\n",
69 | "* Importing / Exporting Data\n",
70 | "* Working with DataFrames\n",
71 | "* Summary Statistics\n",
72 | "* Tables\n",
73 | "\n",
74 | "### Visualisation\n",
75 | "\n",
76 | "* Static Visualisation\n",
77 | "* Statistical Visualisation\n",
78 | "* Interactive Visualisation\n"
79 | ]
80 | },
81 | {
82 | "cell_type": "markdown",
83 | "metadata": {},
84 | "source": [
85 | "Those taking the course should note that the best way to consolidate their learning is via your project. Not only will this help you embed what you've leared, but it will also get you used to solving problems and continuing your learning journey in Python!"
86 | ]
87 | },
88 | {
89 | "cell_type": "markdown",
90 | "metadata": {},
91 | "source": [
92 | "## Following along"
93 | ]
94 | },
95 | {
96 | "cell_type": "markdown",
97 | "metadata": {},
98 | "source": [
99 | "During the lectures, you might wish to just listen, follow along on your screen, or execute the code in your own blank notebook, make notes etc. All of this is fine so long as you pay attention!\n",
100 | "\n",
101 | "In most of the lectures the code is 'pre-baked' - we will explain what it does, execute it and show you and talk you through the output. This means we can give the class our full attention and not focus on finding typos or wondering why code didn't run properly!"
102 | ]
103 | },
104 | {
105 | "cell_type": "markdown",
106 | "metadata": {},
107 | "source": [
108 | "## Domestic Arrangements"
109 | ]
110 | },
111 | {
112 | "cell_type": "markdown",
113 | "metadata": {},
114 | "source": [
115 | "* Laptops\n",
116 | "* Toilets\n",
117 | "* Fire Alarm\n",
118 | "* Breaks\n",
119 | "* Lunch"
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": null,
125 | "metadata": {
126 | "collapsed": true
127 | },
128 | "outputs": [],
129 | "source": []
130 | }
131 | ],
132 | "metadata": {
133 | "anaconda-cloud": {},
134 | "kernelspec": {
135 | "display_name": "Python [default]",
136 | "language": "python",
137 | "name": "python3"
138 | },
139 | "language_info": {
140 | "codemirror_mode": {
141 | "name": "ipython",
142 | "version": 3
143 | },
144 | "file_extension": ".py",
145 | "mimetype": "text/x-python",
146 | "name": "python",
147 | "nbconvert_exporter": "python",
148 | "pygments_lexer": "ipython3",
149 | "version": "3.5.2"
150 | }
151 | },
152 | "nbformat": 4,
153 | "nbformat_minor": 0
154 | }
155 |
--------------------------------------------------------------------------------
/.ipynb_checkpoints/B03E. Functions Exercises-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# B03E. Functions Exercises"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "## Introduction to Exercises"
15 | ]
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "Welcome to the first set of exercises! Whilst doing these we reccomend that you:\n",
22 | "\n",
23 | "1) Refer to the lesson notebooks and your notes in order to answer them but don't copy and paste over from them! You'll learn better if you start committing the code to muscle memory through typing!
\n",
24 | "2) Go beyond the scope of the exercises wherever you can - feel free to try stuff and experiment. It's unlikely that you'll break anything.
\n",
25 | "3) Google anything that you want to know more about.
\n",
26 | "4) Copy code over to your Cookbook for future reference.
\n",
27 | "5) Have fun =)
"
28 | ]
29 | },
30 | {
31 | "cell_type": "markdown",
32 | "metadata": {},
33 | "source": [
34 | "#### Exercise 1: Create a function that prints the type and value of an object / variable and use it to evaluate variables a through e below."
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": null,
40 | "metadata": {
41 | "collapsed": true
42 | },
43 | "outputs": [],
44 | "source": [
45 | "a = 'beans'\n",
46 | "b = 12\n",
47 | "c = 34.65\n",
48 | "d = True\n",
49 | "e = None"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": null,
55 | "metadata": {
56 | "collapsed": false
57 | },
58 | "outputs": [],
59 | "source": []
60 | },
61 | {
62 | "cell_type": "markdown",
63 | "metadata": {},
64 | "source": [
65 | "#### Exercise 2: Create a function that determines how many letters are in a character string and outputs this in a meaningful statement to the user. Then use it to evaluate variables f through j below:"
66 | ]
67 | },
68 | {
69 | "cell_type": "code",
70 | "execution_count": null,
71 | "metadata": {
72 | "collapsed": true
73 | },
74 | "outputs": [],
75 | "source": [
76 | "f = 'The Shawshank Redemption'\n",
77 | "g = 'The Godfather'\n",
78 | "h = 'The Godfather: Part II'\n",
79 | "i = 'The Dark Knight'\n",
80 | "j = \"Schindler's List\""
81 | ]
82 | },
83 | {
84 | "cell_type": "code",
85 | "execution_count": null,
86 | "metadata": {
87 | "collapsed": false
88 | },
89 | "outputs": [],
90 | "source": []
91 | },
92 | {
93 | "cell_type": "markdown",
94 | "metadata": {},
95 | "source": [
96 | "#### Exercise 3: Create a function using if/elif/else that determines what mode of transport to take based upon the number of people who want to travel:\n",
97 | " \n",
98 | "1-2: Walk
\n",
99 | "3-5: Car
\n",
100 | "6-50: Coach
\n",
101 | "51-100: Train
\n",
102 | "101+: Plane
\n",
103 | "\n",
104 | "#### Then use this function to evaluate variables k through o:"
105 | ]
106 | },
107 | {
108 | "cell_type": "code",
109 | "execution_count": null,
110 | "metadata": {
111 | "collapsed": true
112 | },
113 | "outputs": [],
114 | "source": [
115 | "k = 56\n",
116 | "l = 1\n",
117 | "m = 4\n",
118 | "n = 180\n",
119 | "o = 12"
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": null,
125 | "metadata": {
126 | "collapsed": false
127 | },
128 | "outputs": [],
129 | "source": [
130 | "\n"
131 | ]
132 | }
133 | ],
134 | "metadata": {
135 | "anaconda-cloud": {},
136 | "kernelspec": {
137 | "display_name": "Python [default]",
138 | "language": "python",
139 | "name": "python3"
140 | },
141 | "language_info": {
142 | "codemirror_mode": {
143 | "name": "ipython",
144 | "version": 3
145 | },
146 | "file_extension": ".py",
147 | "mimetype": "text/x-python",
148 | "name": "python",
149 | "nbconvert_exporter": "python",
150 | "pygments_lexer": "ipython3",
151 | "version": "3.5.2"
152 | }
153 | },
154 | "nbformat": 4,
155 | "nbformat_minor": 0
156 | }
157 |
--------------------------------------------------------------------------------
/.ipynb_checkpoints/B03S. Functions Solutions-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# B03S. Functions Solutions"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "## Introduction to Exercises"
15 | ]
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "Welcome to the first set of exercises! Whilst doing these we reccomend that you:\n",
22 | "\n",
23 | "1) Refer to the lesson notebooks and your notes in order to answer them but don't copy and paste over from them! You'll learn better if you start committing the code to muscle memory through typing!
\n",
24 | "2) Go beyond the scope of the exercises wherever you can - feel free to try stuff and experiment. It's unlikely that you'll break anything.
\n",
25 | "3) Google anything that you want to know more about.
\n",
26 | "4) Copy code over to your Cookbook for future reference.
\n",
27 | "5) Have fun =)
"
28 | ]
29 | },
30 | {
31 | "cell_type": "markdown",
32 | "metadata": {},
33 | "source": [
34 | "#### Exercise 1: Create a function that prints the type and value of an object / variable and use it to evaluate variables a through e below."
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": null,
40 | "metadata": {
41 | "collapsed": true
42 | },
43 | "outputs": [],
44 | "source": [
45 | "a = 'beans'\n",
46 | "b = 12\n",
47 | "c = 34.65\n",
48 | "d = True\n",
49 | "e = None\n",
50 | "f= 3"
51 | ]
52 | },
53 | {
54 | "cell_type": "code",
55 | "execution_count": null,
56 | "metadata": {
57 | "collapsed": false
58 | },
59 | "outputs": [],
60 | "source": [
61 | "def obj(var):\n",
62 | " '''Prints the type and value of an object'''\n",
63 | " print(\"The type of the variable is:\",type(var))\n",
64 | " print(\"The value of the object is:\",var)\n",
65 | " \n",
66 | "obj(a)\n",
67 | "obj(b)\n",
68 | "obj(c)\n",
69 | "obj(d)\n",
70 | "obj(e)\n",
71 | "obj(f)"
72 | ]
73 | },
74 | {
75 | "cell_type": "markdown",
76 | "metadata": {},
77 | "source": [
78 | "#### Exercise 2: Create a function that determines how many letters are in a character string and outputs this in a meaningful statement to the user. Then use it to evaluate variables f through j below:"
79 | ]
80 | },
81 | {
82 | "cell_type": "code",
83 | "execution_count": null,
84 | "metadata": {
85 | "collapsed": true
86 | },
87 | "outputs": [],
88 | "source": [
89 | "f = 'The Shawshank Redemption'\n",
90 | "g = 'The Godfather'\n",
91 | "h = 'The Godfather: Part II'\n",
92 | "i = 'The Dark Knight'\n",
93 | "j = \"Schindler's List\""
94 | ]
95 | },
96 | {
97 | "cell_type": "code",
98 | "execution_count": null,
99 | "metadata": {
100 | "collapsed": false
101 | },
102 | "outputs": [],
103 | "source": [
104 | "def char(var):\n",
105 | " '''Determines how many characters are in a string'''\n",
106 | " print(\"%s contains\" % var,len(var),\"characters\")\n",
107 | "\n",
108 | "char(f)\n",
109 | "char(g)\n",
110 | "char(h)\n",
111 | "char(i)\n",
112 | "char(j)"
113 | ]
114 | },
115 | {
116 | "cell_type": "markdown",
117 | "metadata": {},
118 | "source": [
119 | "#### Exercise 3: Create a function using if/elif/else that determines what mode of transport to take based upon the number of people who want to travel:\n",
120 | " \n",
121 | "1-2: Walk
\n",
122 | "3-5: Car
\n",
123 | "6-50: Coach
\n",
124 | "51-100: Train
\n",
125 | "101+: Plane
\n",
126 | "\n",
127 | "#### Then use this function to evaluate variables k through o:"
128 | ]
129 | },
130 | {
131 | "cell_type": "code",
132 | "execution_count": null,
133 | "metadata": {
134 | "collapsed": true
135 | },
136 | "outputs": [],
137 | "source": [
138 | "k = 56\n",
139 | "l = 1\n",
140 | "m = 4\n",
141 | "n = 180\n",
142 | "o = 12"
143 | ]
144 | },
145 | {
146 | "cell_type": "code",
147 | "execution_count": null,
148 | "metadata": {
149 | "collapsed": false
150 | },
151 | "outputs": [],
152 | "source": [
153 | "def people(var):\n",
154 | " '''Determines the best mode of transport based upon the volume of people travelling'''\n",
155 | " if var > 0 and var <= 2:\n",
156 | " return 'Walk'\n",
157 | " elif var > 2 and var <= 5:\n",
158 | " return 'Car'\n",
159 | " elif var > 5 and var <= 50:\n",
160 | " return 'Coach'\n",
161 | " elif var > 50 and var <= 100:\n",
162 | " return 'Train'\n",
163 | " elif var > 100:\n",
164 | " return 'Plane' \n",
165 | " else:\n",
166 | " return 'Error'\n",
167 | "\n",
168 | "print(\"k =\",people(k),\"l =\",people(l),\"m =\",people(m),\"n =\",people(n),\"o =\",people(o))"
169 | ]
170 | },
171 | {
172 | "cell_type": "code",
173 | "execution_count": null,
174 | "metadata": {
175 | "collapsed": true
176 | },
177 | "outputs": [],
178 | "source": []
179 | }
180 | ],
181 | "metadata": {
182 | "anaconda-cloud": {},
183 | "kernelspec": {
184 | "display_name": "Python [default]",
185 | "language": "python",
186 | "name": "python3"
187 | },
188 | "language_info": {
189 | "codemirror_mode": {
190 | "name": "ipython",
191 | "version": 3
192 | },
193 | "file_extension": ".py",
194 | "mimetype": "text/x-python",
195 | "name": "python",
196 | "nbconvert_exporter": "python",
197 | "pygments_lexer": "ipython3",
198 | "version": "3.5.2"
199 | }
200 | },
201 | "nbformat": 4,
202 | "nbformat_minor": 0
203 | }
204 |
--------------------------------------------------------------------------------
/.ipynb_checkpoints/B04E. Basic Data Structures Part 1 Exercises-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# B04E: Basic Data Structures Part 1 Exercises"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "#### Exercise 1: Create a function that returns the first and last items of a list and prints this to the user. Use the function to evaluate lists a, b and c as follows:"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 3,
20 | "metadata": {
21 | "collapsed": true
22 | },
23 | "outputs": [],
24 | "source": [
25 | "a = [1,2,3,4,5,6,7,8,9]\n",
26 | "b = ['alpha', 'beta', 'charlie', 'delta', 'echo']\n",
27 | "c = [1.1,1.2,1.3,1.4,1.5,1.6,1.7]"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 18,
33 | "metadata": {
34 | "collapsed": false
35 | },
36 | "outputs": [
37 | {
38 | "name": "stdout",
39 | "output_type": "stream",
40 | "text": [
41 | "1.1 1.7\n"
42 | ]
43 | }
44 | ],
45 | "source": [
46 | "def lister1(var):\n",
47 | " print(var[0],var[-1])\n",
48 | " \n",
49 | "lister1(c)"
50 | ]
51 | },
52 | {
53 | "cell_type": "markdown",
54 | "metadata": {},
55 | "source": [
56 | "#### Exercise 2: Modify your function so that it works on variable d below:"
57 | ]
58 | },
59 | {
60 | "cell_type": "code",
61 | "execution_count": 21,
62 | "metadata": {
63 | "collapsed": false,
64 | "scrolled": true
65 | },
66 | "outputs": [
67 | {
68 | "data": {
69 | "text/plain": [
70 | "[[1, 2, 3, 4, 5, 6, 7, 8, 9],\n",
71 | " ['alpha', 'beta', 'charlie', 'delta', 'echo'],\n",
72 | " [1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7]]"
73 | ]
74 | },
75 | "execution_count": 21,
76 | "metadata": {},
77 | "output_type": "execute_result"
78 | }
79 | ],
80 | "source": [
81 | "d = [a,b,c]\n",
82 | "d"
83 | ]
84 | },
85 | {
86 | "cell_type": "code",
87 | "execution_count": 23,
88 | "metadata": {
89 | "collapsed": false
90 | },
91 | "outputs": [
92 | {
93 | "name": "stdout",
94 | "output_type": "stream",
95 | "text": [
96 | "1 9\n",
97 | "alpha echo\n",
98 | "1.1 1.7\n"
99 | ]
100 | }
101 | ],
102 | "source": [
103 | "def lister2(var):\n",
104 | " print(var[0][0],var[0][-1])\n",
105 | " print(var[1][0],var[1][-1])\n",
106 | " print(var[2][0],var[2][-1])\n",
107 | "lister2(d)"
108 | ]
109 | },
110 | {
111 | "cell_type": "markdown",
112 | "metadata": {},
113 | "source": [
114 | "#### Exercise 3: Create 3 new variables using slicing as follows:\n",
115 | "\n",
116 | "f = Every 5th character of e, starting at the beginning of the string
\n",
117 | "g = Every 2nd character of e starting at the end of the string
\n",
118 | "h = Every 6th character of e starting at the middle of the string
\n",
119 | "\n",
120 | "Hint! You'll need to use a function for h!"
121 | ]
122 | },
123 | {
124 | "cell_type": "code",
125 | "execution_count": 3,
126 | "metadata": {
127 | "collapsed": true
128 | },
129 | "outputs": [],
130 | "source": [
131 | "e = \"Lorem Ipsum is simply dummy text of the printing and typesetting industry. It has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like PageMaker including versions of Lorem Ipsum\""
132 | ]
133 | },
134 | {
135 | "cell_type": "code",
136 | "execution_count": null,
137 | "metadata": {
138 | "collapsed": false
139 | },
140 | "outputs": [],
141 | "source": [
142 | "f = e[0::5]\n",
143 | "f"
144 | ]
145 | },
146 | {
147 | "cell_type": "code",
148 | "execution_count": null,
149 | "metadata": {
150 | "collapsed": true
151 | },
152 | "outputs": [],
153 | "source": [
154 | "g = e[-1::-2]\n",
155 | "g"
156 | ]
157 | },
158 | {
159 | "cell_type": "code",
160 | "execution_count": null,
161 | "metadata": {
162 | "collapsed": true
163 | },
164 | "outputs": [],
165 | "source": [
166 | "h = e[(int(len(e)/2))::6]\n",
167 | "h"
168 | ]
169 | },
170 | {
171 | "cell_type": "markdown",
172 | "metadata": {},
173 | "source": [
174 | "#### Exercise 4: Find a way to create a list containing all the words in e as separate items and assign this list to variable i.\n",
175 | "\n",
176 | "Hint! It's OK to use Google!"
177 | ]
178 | },
179 | {
180 | "cell_type": "code",
181 | "execution_count": 48,
182 | "metadata": {
183 | "collapsed": true
184 | },
185 | "outputs": [],
186 | "source": []
187 | }
188 | ],
189 | "metadata": {
190 | "anaconda-cloud": {},
191 | "kernelspec": {
192 | "display_name": "Python [default]",
193 | "language": "python",
194 | "name": "python3"
195 | },
196 | "language_info": {
197 | "codemirror_mode": {
198 | "name": "ipython",
199 | "version": 3
200 | },
201 | "file_extension": ".py",
202 | "mimetype": "text/x-python",
203 | "name": "python",
204 | "nbconvert_exporter": "python",
205 | "pygments_lexer": "ipython3",
206 | "version": "3.5.2"
207 | }
208 | },
209 | "nbformat": 4,
210 | "nbformat_minor": 0
211 | }
212 |
--------------------------------------------------------------------------------
/.ipynb_checkpoints/B04S. Basic Data Structures Part 1 Solutions-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# B04S: Basic Data Structures Part 1 Solutions\n"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "#### Exercise 1: Create a function that returns the first and last items of a list and prints this to the user. Use the function to evaluate lists a, b and c as follows:"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": null,
20 | "metadata": {
21 | "collapsed": true
22 | },
23 | "outputs": [],
24 | "source": [
25 | "a = [1,2,3,4,5,6,7,8,9]\n",
26 | "b = ['alpha', 'beta', 'charlie', 'delta', 'echo']\n",
27 | "c = [1.1,1.2,1.3,1.4,1.5,1.6,1.7]"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": null,
33 | "metadata": {
34 | "collapsed": false
35 | },
36 | "outputs": [],
37 | "source": [
38 | "def fl(var):\n",
39 | " '''Prints the first and last items in a list'''\n",
40 | " print(\"The first item is:\",var[0])\n",
41 | " print(\"The last item is:\",var[-1])\n",
42 | " \n",
43 | "fl(a)\n",
44 | "fl(b)\n",
45 | "fl(c)"
46 | ]
47 | },
48 | {
49 | "cell_type": "markdown",
50 | "metadata": {},
51 | "source": [
52 | "#### Exercise 2: Modify your function so that it works on variable d below:"
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": null,
58 | "metadata": {
59 | "collapsed": false
60 | },
61 | "outputs": [],
62 | "source": [
63 | "d = [a,b,c]"
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": null,
69 | "metadata": {
70 | "collapsed": false
71 | },
72 | "outputs": [],
73 | "source": [
74 | "def fl2(var):\n",
75 | " '''Prints the first last and middle items in a list'''\n",
76 | " print(\"The first item is:\",var[0][0])\n",
77 | " print(\"The last item is:\",var[0][-1])\n",
78 | " print(\"The first item is:\",var[1][0])\n",
79 | " print(\"The last item is:\",var[1][-1])\n",
80 | " print(\"The first item is:\",var[2][0])\n",
81 | " print(\"The last item is:\",var[2][-1])\n",
82 | " \n",
83 | "fl2(d)\n",
84 | "print(d)"
85 | ]
86 | },
87 | {
88 | "cell_type": "markdown",
89 | "metadata": {},
90 | "source": [
91 | "#### Exercise 3: Create 3 new variables using slicing as follows:\n",
92 | "\n",
93 | "f = Every 5th character of e, starting at the beginning of the string
\n",
94 | "g = Every 2nd character of e starting at the end of the string
\n",
95 | "h = Every 6th character of e starting at the middle of the string
\n",
96 | "\n",
97 | "Hint! You'll need to use a function for h!"
98 | ]
99 | },
100 | {
101 | "cell_type": "code",
102 | "execution_count": null,
103 | "metadata": {
104 | "collapsed": true
105 | },
106 | "outputs": [],
107 | "source": [
108 | "e = \"Lorem Ipsum is simply dummy text of the printing and typesetting industry. It has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like PageMaker including versions of Lorem Ipsum\""
109 | ]
110 | },
111 | {
112 | "cell_type": "code",
113 | "execution_count": null,
114 | "metadata": {
115 | "collapsed": false
116 | },
117 | "outputs": [],
118 | "source": [
119 | "f = e[0::5]\n",
120 | "f"
121 | ]
122 | },
123 | {
124 | "cell_type": "code",
125 | "execution_count": null,
126 | "metadata": {
127 | "collapsed": false
128 | },
129 | "outputs": [],
130 | "source": [
131 | "g = e[-1::-2]\n",
132 | "g"
133 | ]
134 | },
135 | {
136 | "cell_type": "code",
137 | "execution_count": null,
138 | "metadata": {
139 | "collapsed": false
140 | },
141 | "outputs": [],
142 | "source": [
143 | "h = e[(int(len(e)/2))::6]\n",
144 | "h"
145 | ]
146 | },
147 | {
148 | "cell_type": "markdown",
149 | "metadata": {},
150 | "source": [
151 | "#### Exercise 4: Find a way to create a list containing all the words in e as separate items and assign this list to variable i.\n",
152 | "\n",
153 | "Hint! It's OK to use Google!"
154 | ]
155 | },
156 | {
157 | "cell_type": "code",
158 | "execution_count": null,
159 | "metadata": {
160 | "collapsed": true
161 | },
162 | "outputs": [],
163 | "source": [
164 | "i = e.split()"
165 | ]
166 | },
167 | {
168 | "cell_type": "code",
169 | "execution_count": null,
170 | "metadata": {
171 | "collapsed": false
172 | },
173 | "outputs": [],
174 | "source": [
175 | "f=e.split()"
176 | ]
177 | },
178 | {
179 | "cell_type": "code",
180 | "execution_count": null,
181 | "metadata": {
182 | "collapsed": false
183 | },
184 | "outputs": [],
185 | "source": [
186 | "f[0:6:2]"
187 | ]
188 | }
189 | ],
190 | "metadata": {
191 | "kernelspec": {
192 | "display_name": "Python [default]",
193 | "language": "python",
194 | "name": "python3"
195 | },
196 | "language_info": {
197 | "codemirror_mode": {
198 | "name": "ipython",
199 | "version": 3
200 | },
201 | "file_extension": ".py",
202 | "mimetype": "text/x-python",
203 | "name": "python",
204 | "nbconvert_exporter": "python",
205 | "pygments_lexer": "ipython3",
206 | "version": "3.5.2"
207 | }
208 | },
209 | "nbformat": 4,
210 | "nbformat_minor": 0
211 | }
212 |
--------------------------------------------------------------------------------
/.ipynb_checkpoints/B05. Methods-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# B05: Methods"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "A 'Method' is a computer science term and specific to Object Orientated Programming (OOP). Methods are basically functions that are built in to objects/variables. \n",
15 | "\n",
16 | "The syntax for these is simple:\n",
17 | "\n",
18 | "object.method(parameter, parameter...)\n",
19 | "\n",
20 | "Methods can take parameters (or not!) just like a function but note that they are 'coded in' to objects so not all methods are applicable for all types of object. Note that this is a simplification of what methods are. It is not important to understand their intricacies (yet!) only that they are:\n",
21 | "\n",
22 | "Something that performs an action on my objects / variables"
23 | ]
24 | },
25 | {
26 | "cell_type": "markdown",
27 | "metadata": {},
28 | "source": [
29 | "Since we're still learning about lists, lets look at some of the methods available to us with lists."
30 | ]
31 | },
32 | {
33 | "cell_type": "markdown",
34 | "metadata": {},
35 | "source": [
36 | "## List Methods"
37 | ]
38 | },
39 | {
40 | "cell_type": "code",
41 | "execution_count": null,
42 | "metadata": {
43 | "collapsed": true
44 | },
45 | "outputs": [],
46 | "source": [
47 | "mylist1 = ['Eggs','Cheese','Bread','Beer','Crisps']"
48 | ]
49 | },
50 | {
51 | "cell_type": "markdown",
52 | "metadata": {},
53 | "source": [
54 | "Adding an item to a list using append()"
55 | ]
56 | },
57 | {
58 | "cell_type": "code",
59 | "execution_count": null,
60 | "metadata": {
61 | "collapsed": false
62 | },
63 | "outputs": [],
64 | "source": [
65 | "mylist1.append('Fruit') # Adding an item to the end of a list \n",
66 | "mylist1"
67 | ]
68 | },
69 | {
70 | "cell_type": "markdown",
71 | "metadata": {},
72 | "source": [
73 | "Counting the number of times a values appears within a list:"
74 | ]
75 | },
76 | {
77 | "cell_type": "code",
78 | "execution_count": null,
79 | "metadata": {
80 | "collapsed": false
81 | },
82 | "outputs": [],
83 | "source": [
84 | "mylist1.count('Fruit') # Counting how many times a value appears within a list"
85 | ]
86 | },
87 | {
88 | "cell_type": "markdown",
89 | "metadata": {},
90 | "source": [
91 | "Removing values from a list:"
92 | ]
93 | },
94 | {
95 | "cell_type": "code",
96 | "execution_count": null,
97 | "metadata": {
98 | "collapsed": false
99 | },
100 | "outputs": [],
101 | "source": [
102 | "mylist1.remove('Fruit') # Removing all occurances of a value from a list \n",
103 | "mylist1"
104 | ]
105 | },
106 | {
107 | "cell_type": "markdown",
108 | "metadata": {},
109 | "source": [
110 | "Also some methods can take 'keyword arguments'. This is a way of telling Python that you want a method to operate in a certain way or perform a certain action:"
111 | ]
112 | },
113 | {
114 | "cell_type": "code",
115 | "execution_count": null,
116 | "metadata": {
117 | "collapsed": false
118 | },
119 | "outputs": [],
120 | "source": [
121 | "mylist2 = [3,2,6,5,3,2,4,1,2,0,6]\n",
122 | "mylist2.sort(reverse=True) # Sorting a list in reverse order\n",
123 | "mylist2"
124 | ]
125 | },
126 | {
127 | "cell_type": "markdown",
128 | "metadata": {},
129 | "source": [
130 | "Not all methods need an argument or parameter:"
131 | ]
132 | },
133 | {
134 | "cell_type": "code",
135 | "execution_count": null,
136 | "metadata": {
137 | "collapsed": false
138 | },
139 | "outputs": [],
140 | "source": [
141 | "mylist1.reverse() # Reverses the order of values in the list \n",
142 | "mylist1"
143 | ]
144 | },
145 | {
146 | "cell_type": "markdown",
147 | "metadata": {},
148 | "source": [
149 | "Note that Jupyter has some awesome built in functionality to help with methods. If you type your object name followed by a . and then press the tab key it will bring up an exhaustive list of methods applicable to that object type. This is a great way to explore the methods you have available!"
150 | ]
151 | },
152 | {
153 | "cell_type": "code",
154 | "execution_count": null,
155 | "metadata": {
156 | "collapsed": false
157 | },
158 | "outputs": [],
159 | "source": [
160 | "mylist1."
161 | ]
162 | },
163 | {
164 | "cell_type": "markdown",
165 | "metadata": {},
166 | "source": [
167 | "The help function can be used with methods too:"
168 | ]
169 | },
170 | {
171 | "cell_type": "code",
172 | "execution_count": null,
173 | "metadata": {
174 | "collapsed": false
175 | },
176 | "outputs": [],
177 | "source": [
178 | "help(mylist1.sort)"
179 | ]
180 | },
181 | {
182 | "cell_type": "markdown",
183 | "metadata": {},
184 | "source": [
185 | "Lastly, for more detailed help, you can call the help() function:"
186 | ]
187 | },
188 | {
189 | "cell_type": "code",
190 | "execution_count": null,
191 | "metadata": {
192 | "collapsed": false
193 | },
194 | "outputs": [],
195 | "source": [
196 | "help()"
197 | ]
198 | },
199 | {
200 | "cell_type": "markdown",
201 | "metadata": {
202 | "collapsed": true
203 | },
204 | "source": [
205 | "## Variable Methods"
206 | ]
207 | },
208 | {
209 | "cell_type": "markdown",
210 | "metadata": {},
211 | "source": [
212 | "Methods can also be called on variables too. Note that the method list will differ depending upon the type of variable"
213 | ]
214 | },
215 | {
216 | "cell_type": "code",
217 | "execution_count": null,
218 | "metadata": {
219 | "collapsed": true
220 | },
221 | "outputs": [],
222 | "source": [
223 | "a = 'Variable' # String variable"
224 | ]
225 | },
226 | {
227 | "cell_type": "markdown",
228 | "metadata": {},
229 | "source": [
230 | "Some string Variable methods are as follows:"
231 | ]
232 | },
233 | {
234 | "cell_type": "code",
235 | "execution_count": null,
236 | "metadata": {
237 | "collapsed": false
238 | },
239 | "outputs": [],
240 | "source": [
241 | "a.count('a') # Counts the number of times the supplied argument appears in the variable"
242 | ]
243 | },
244 | {
245 | "cell_type": "code",
246 | "execution_count": null,
247 | "metadata": {
248 | "collapsed": false
249 | },
250 | "outputs": [],
251 | "source": [
252 | "a.lower() # Converts the string to lower case"
253 | ]
254 | },
255 | {
256 | "cell_type": "code",
257 | "execution_count": null,
258 | "metadata": {
259 | "collapsed": false
260 | },
261 | "outputs": [],
262 | "source": [
263 | "a.upper() # Converts the string to upper case"
264 | ]
265 | },
266 | {
267 | "cell_type": "code",
268 | "execution_count": null,
269 | "metadata": {
270 | "collapsed": false
271 | },
272 | "outputs": [],
273 | "source": [
274 | "a.capitalize() # Converts the string to title case"
275 | ]
276 | },
277 | {
278 | "cell_type": "code",
279 | "execution_count": null,
280 | "metadata": {
281 | "collapsed": false
282 | },
283 | "outputs": [],
284 | "source": [
285 | "a.find('c') # Finds and returns the index of the first occurance of the argument. Returns -1 if not found."
286 | ]
287 | },
288 | {
289 | "cell_type": "code",
290 | "execution_count": null,
291 | "metadata": {
292 | "collapsed": false
293 | },
294 | "outputs": [],
295 | "source": [
296 | "a.index('c') # Returns the index of the argument. Returns an error if not found"
297 | ]
298 | }
299 | ],
300 | "metadata": {
301 | "anaconda-cloud": {},
302 | "kernelspec": {
303 | "display_name": "Python [default]",
304 | "language": "python",
305 | "name": "python3"
306 | },
307 | "language_info": {
308 | "codemirror_mode": {
309 | "name": "ipython",
310 | "version": 3
311 | },
312 | "file_extension": ".py",
313 | "mimetype": "text/x-python",
314 | "name": "python",
315 | "nbconvert_exporter": "python",
316 | "pygments_lexer": "ipython3",
317 | "version": "3.5.2"
318 | }
319 | },
320 | "nbformat": 4,
321 | "nbformat_minor": 0
322 | }
323 |
--------------------------------------------------------------------------------
/.ipynb_checkpoints/B08E. Loops & Iterating Exercises-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# B08E: Loops & Iterating Exercises"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {
13 | "collapsed": true
14 | },
15 | "source": [
16 | "#### Exercise 1: Create a for loop that takes each item in list1, multiplies it by the multiply variable, adds the add variable and then divides by the divide variable. Then append the output to the answers list. Print the completed answers list."
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 2,
22 | "metadata": {
23 | "collapsed": true
24 | },
25 | "outputs": [],
26 | "source": [
27 | "list1 = [2,4,6,1,6,7,8,4,3,10]\n",
28 | "multiply = 3\n",
29 | "add = 4\n",
30 | "divide = 2.89\n",
31 | "\n",
32 | "answers = []"
33 | ]
34 | },
35 | {
36 | "cell_type": "code",
37 | "execution_count": 4,
38 | "metadata": {
39 | "collapsed": false
40 | },
41 | "outputs": [
42 | {
43 | "data": {
44 | "text/plain": [
45 | "[3.460207612456747,\n",
46 | " 5.536332179930795,\n",
47 | " 7.612456747404844,\n",
48 | " 2.422145328719723,\n",
49 | " 7.612456747404844,\n",
50 | " 8.650519031141869,\n",
51 | " 9.688581314878892,\n",
52 | " 5.536332179930795,\n",
53 | " 4.498269896193771,\n",
54 | " 11.76470588235294,\n",
55 | " 3.460207612456747,\n",
56 | " 5.536332179930795,\n",
57 | " 7.612456747404844,\n",
58 | " 2.422145328719723,\n",
59 | " 7.612456747404844,\n",
60 | " 8.650519031141869,\n",
61 | " 9.688581314878892,\n",
62 | " 5.536332179930795,\n",
63 | " 4.498269896193771,\n",
64 | " 11.76470588235294]"
65 | ]
66 | },
67 | "execution_count": 4,
68 | "metadata": {},
69 | "output_type": "execute_result"
70 | }
71 | ],
72 | "source": [
73 | "for number in list1:\n",
74 | " answer = ((number * multiply) + add)/ divide\n",
75 | " answers.append(answer)\n",
76 | "\n",
77 | "answers"
78 | ]
79 | },
80 | {
81 | "cell_type": "markdown",
82 | "metadata": {},
83 | "source": [
84 | "#### Exercise 2: Create a while loop that prints the value of i as long as it is less than 100 and increases i by 10 for each iteration."
85 | ]
86 | },
87 | {
88 | "cell_type": "code",
89 | "execution_count": 6,
90 | "metadata": {
91 | "collapsed": true
92 | },
93 | "outputs": [],
94 | "source": [
95 | "i = 0"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": 7,
101 | "metadata": {
102 | "collapsed": false
103 | },
104 | "outputs": [
105 | {
106 | "name": "stdout",
107 | "output_type": "stream",
108 | "text": [
109 | "0\n",
110 | "10\n",
111 | "20\n",
112 | "30\n",
113 | "40\n",
114 | "50\n",
115 | "60\n",
116 | "70\n",
117 | "80\n",
118 | "90\n"
119 | ]
120 | }
121 | ],
122 | "source": [
123 | "while i < 100:\n",
124 | " print(i)\n",
125 | " i += 10"
126 | ]
127 | },
128 | {
129 | "cell_type": "markdown",
130 | "metadata": {},
131 | "source": [
132 | "#### Exercise 3: Use a list comprehension to create a new list containing the values in list2 squared."
133 | ]
134 | },
135 | {
136 | "cell_type": "code",
137 | "execution_count": 9,
138 | "metadata": {
139 | "collapsed": true
140 | },
141 | "outputs": [],
142 | "source": [
143 | "list2 = [132,5345,63576,234234,64563,234,745,98679,344535,467568,36,3456,457,67,3456,3456,567,47,48,26]"
144 | ]
145 | },
146 | {
147 | "cell_type": "code",
148 | "execution_count": 15,
149 | "metadata": {
150 | "collapsed": false
151 | },
152 | "outputs": [
153 | {
154 | "data": {
155 | "text/plain": [
156 | "[303595776,\n",
157 | " 816189189450625,\n",
158 | " 16337018469689266176,\n",
159 | " 3010230415457092363536,\n",
160 | " 17375399902721378961,\n",
161 | " 2998219536,\n",
162 | " 308052750625,\n",
163 | " 94819783425503691681,\n",
164 | " 14090726560878920750625,\n",
165 | " 47794632091025109221376,\n",
166 | " 1679616,\n",
167 | " 142657607172096,\n",
168 | " 43617904801,\n",
169 | " 20151121,\n",
170 | " 142657607172096,\n",
171 | " 142657607172096,\n",
172 | " 103355177121,\n",
173 | " 4879681,\n",
174 | " 5308416,\n",
175 | " 456976]"
176 | ]
177 | },
178 | "execution_count": 15,
179 | "metadata": {},
180 | "output_type": "execute_result"
181 | }
182 | ],
183 | "source": [
184 | "list3 = [number**2 for number in list2]\n",
185 | "list3"
186 | ]
187 | },
188 | {
189 | "cell_type": "markdown",
190 | "metadata": {
191 | "collapsed": false
192 | },
193 | "source": [
194 | "#### Exercise 4: Create an iterator that iterates through list3, converting types or passing where appropriate."
195 | ]
196 | },
197 | {
198 | "cell_type": "code",
199 | "execution_count": null,
200 | "metadata": {
201 | "collapsed": true
202 | },
203 | "outputs": [],
204 | "source": [
205 | "list3 = [1,2,3,4,'5',6,7,'8',9,True,10,'11',False,None]\n",
206 | "var = 0"
207 | ]
208 | },
209 | {
210 | "cell_type": "code",
211 | "execution_count": null,
212 | "metadata": {
213 | "collapsed": true
214 | },
215 | "outputs": [],
216 | "source": []
217 | }
218 | ],
219 | "metadata": {
220 | "kernelspec": {
221 | "display_name": "Python [default]",
222 | "language": "python",
223 | "name": "python3"
224 | },
225 | "language_info": {
226 | "codemirror_mode": {
227 | "name": "ipython",
228 | "version": 3
229 | },
230 | "file_extension": ".py",
231 | "mimetype": "text/x-python",
232 | "name": "python",
233 | "nbconvert_exporter": "python",
234 | "pygments_lexer": "ipython3",
235 | "version": "3.5.2"
236 | }
237 | },
238 | "nbformat": 4,
239 | "nbformat_minor": 0
240 | }
241 |
--------------------------------------------------------------------------------
/.ipynb_checkpoints/B08S. Loops & Iterating Solutions-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# B08S: Loops & Iterating Solutions"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {
13 | "collapsed": true
14 | },
15 | "source": [
16 | "#### Exercise 1: Create a for loop that takes each item in list1, multiplies it by the multiply variable, adds the add variable and then divides by the divide variable. Then append the output to the answers list. Print the completed answers list."
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": null,
22 | "metadata": {
23 | "collapsed": true
24 | },
25 | "outputs": [],
26 | "source": [
27 | "list1 = [2,4,6,1,6,7,8,4,3,10]\n",
28 | "multiply = 3\n",
29 | "add = 4\n",
30 | "divide = 2.89\n",
31 | "\n",
32 | "answers = []"
33 | ]
34 | },
35 | {
36 | "cell_type": "code",
37 | "execution_count": null,
38 | "metadata": {
39 | "collapsed": false
40 | },
41 | "outputs": [],
42 | "source": [
43 | "for item in list1:\n",
44 | " answers.append((item * multiply + add)/divide)\n",
45 | " \n",
46 | "print(answers)"
47 | ]
48 | },
49 | {
50 | "cell_type": "markdown",
51 | "metadata": {},
52 | "source": [
53 | "#### Exercise 2: Create a while loop that prints the value of i as long as it is less than 100 and increases i by 10 for each iteration."
54 | ]
55 | },
56 | {
57 | "cell_type": "code",
58 | "execution_count": null,
59 | "metadata": {
60 | "collapsed": true
61 | },
62 | "outputs": [],
63 | "source": [
64 | "i = 0"
65 | ]
66 | },
67 | {
68 | "cell_type": "code",
69 | "execution_count": null,
70 | "metadata": {
71 | "collapsed": false
72 | },
73 | "outputs": [],
74 | "source": [
75 | "while i < 100:\n",
76 | " print(i)\n",
77 | " i+=10"
78 | ]
79 | },
80 | {
81 | "cell_type": "markdown",
82 | "metadata": {},
83 | "source": [
84 | "#### Exercise 3: Use a list comprehension to create a new list containing the values in list2 squared."
85 | ]
86 | },
87 | {
88 | "cell_type": "code",
89 | "execution_count": null,
90 | "metadata": {
91 | "collapsed": true
92 | },
93 | "outputs": [],
94 | "source": [
95 | "list2 = [132,5345,63576,234234,64563,234,745,98679,344535,467568,36,3456,457,67,3456,3456,567,47,48,26]"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": null,
101 | "metadata": {
102 | "collapsed": false
103 | },
104 | "outputs": [],
105 | "source": [
106 | "new_list = [item**2 for item in list2]\n",
107 | "new_list"
108 | ]
109 | },
110 | {
111 | "cell_type": "markdown",
112 | "metadata": {},
113 | "source": [
114 | "#### Exercise 4: Create an iterator that iterates through list3, converting or passing where appropriate."
115 | ]
116 | },
117 | {
118 | "cell_type": "code",
119 | "execution_count": 5,
120 | "metadata": {
121 | "collapsed": false
122 | },
123 | "outputs": [],
124 | "source": [
125 | "list3 = [1,2,3,4,'5',6,7,'8',9,True,10,'11',False,None]\n",
126 | "var = 0"
127 | ]
128 | },
129 | {
130 | "cell_type": "code",
131 | "execution_count": 6,
132 | "metadata": {
133 | "collapsed": false
134 | },
135 | "outputs": [
136 | {
137 | "name": "stdout",
138 | "output_type": "stream",
139 | "text": [
140 | "66\n"
141 | ]
142 | }
143 | ],
144 | "source": [
145 | "for item in list3:\n",
146 | " if type(item) == \n",
147 | " try: \n",
148 | " var = var + item\n",
149 | " except TypeError:\n",
150 | " try:\n",
151 | " var = var + int(item)\n",
152 | " except TypeError:\n",
153 | " pass\n",
154 | " \n",
155 | "print(var)"
156 | ]
157 | },
158 | {
159 | "cell_type": "code",
160 | "execution_count": null,
161 | "metadata": {
162 | "collapsed": true
163 | },
164 | "outputs": [],
165 | "source": []
166 | }
167 | ],
168 | "metadata": {
169 | "anaconda-cloud": {},
170 | "kernelspec": {
171 | "display_name": "Python [default]",
172 | "language": "python",
173 | "name": "python3"
174 | },
175 | "language_info": {
176 | "codemirror_mode": {
177 | "name": "ipython",
178 | "version": 3
179 | },
180 | "file_extension": ".py",
181 | "mimetype": "text/x-python",
182 | "name": "python",
183 | "nbconvert_exporter": "python",
184 | "pygments_lexer": "ipython3",
185 | "version": "3.5.2"
186 | }
187 | },
188 | "nbformat": 4,
189 | "nbformat_minor": 0
190 | }
191 |
--------------------------------------------------------------------------------
/.ipynb_checkpoints/D00. Introduction to Python for Data Analysis-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# D00: Introduction to Python for Data Analysis"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "Welcome to the second section of this course: Python for Data Analysis!\n",
15 | "\n",
16 | "In this section of the course we'll explore how you can use Python to read, clean, manage, structure, wrangle, aggregate, display and output data. This kind of work is critical to being able to glean insight from, and visualise data.\n",
17 | "\n",
18 | "In this lesson we'll start by meeting a few of the useful data analysis libraries that we'll be using."
19 | ]
20 | },
21 | {
22 | "cell_type": "markdown",
23 | "metadata": {},
24 | "source": [
25 | "
"
26 | ]
27 | },
28 | {
29 | "cell_type": "markdown",
30 | "metadata": {},
31 | "source": [
32 | "Numpy is short for 'Numerical Python' and it is a library focused around mathematical. Numpy provides data structures that allow us to create multi-dimensional arrays and matricies and also provides a large number of mathematical functions.\n",
33 | "\n",
34 | "Numpy comes as part of the Anaconda installation so you don't need to download anything further."
35 | ]
36 | },
37 | {
38 | "cell_type": "markdown",
39 | "metadata": {},
40 | "source": [
41 | "
"
42 | ]
43 | },
44 | {
45 | "cell_type": "markdown",
46 | "metadata": {},
47 | "source": [
48 | "Scipy is short for 'Scientific Python' and is generally used for analytics, statistics and engineering.\n",
49 | "\n",
50 | "SciPy contains a wide variety of modules including algegra, interpolation, statistics and many more. We'll not be delving too deeply into scipy as part of this course, however it is an important library with a lot of useful functions that you should be aware of.\n",
51 | "\n",
52 | "As with Numpy, Scipy comes as part of Anaconda."
53 | ]
54 | },
55 | {
56 | "cell_type": "markdown",
57 | "metadata": {
58 | "collapsed": true
59 | },
60 | "source": [
61 | "
"
62 | ]
63 | },
64 | {
65 | "cell_type": "markdown",
66 | "metadata": {},
67 | "source": [
68 | "Pandas is built on top of Numpy and allows us to create more complex data structures than we've previously met, the likes of which we're probably more comfortable with as analysts, including series and dataframes. It also provides us with a wide range of tools for reading, dealing with and transforming these data structures as well as tools to convert data from traditional Python data strucutres such as lists, tuples and dictionaries.\n",
69 | "\n",
70 | "Pandas is built on top of Numpy so the data structures that are created with both packages integrate well with each other and also comes as part of the Anaconda installation."
71 | ]
72 | },
73 | {
74 | "cell_type": "markdown",
75 | "metadata": {
76 | "collapsed": true
77 | },
78 | "source": [
79 | "## Further Reading"
80 | ]
81 | },
82 | {
83 | "cell_type": "markdown",
84 | "metadata": {},
85 | "source": [
86 | "10 Minutes to Pandas
\n",
87 | "Pandas comparison with SQL
\n",
88 | "Pandas comparison with R
\n",
89 | "Pandas comparison with SAS
\n",
90 | "Excellent selection of Pandas tutorials
"
91 | ]
92 | }
93 | ],
94 | "metadata": {
95 | "anaconda-cloud": {},
96 | "kernelspec": {
97 | "display_name": "Python [default]",
98 | "language": "python",
99 | "name": "python3"
100 | },
101 | "language_info": {
102 | "codemirror_mode": {
103 | "name": "ipython",
104 | "version": 3
105 | },
106 | "file_extension": ".py",
107 | "mimetype": "text/x-python",
108 | "name": "python",
109 | "nbconvert_exporter": "python",
110 | "pygments_lexer": "ipython3",
111 | "version": "3.5.2"
112 | }
113 | },
114 | "nbformat": 4,
115 | "nbformat_minor": 0
116 | }
117 |
--------------------------------------------------------------------------------
/.ipynb_checkpoints/D01E. Advanced Data Structures Exercises-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# D01E: Advanced Data Structures Exercises"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "#### Exercise 1: Create the following Numpy arrays with random numbers for the following dimensions:\n",
15 | "\n",
16 | "* 5 x 5\n",
17 | "* 1 x 20\n",
18 | "* 90 x 90\n",
19 | "\n",
20 | "Also round the arrays to 3 decimal places."
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": 5,
26 | "metadata": {
27 | "collapsed": false
28 | },
29 | "outputs": [
30 | {
31 | "data": {
32 | "text/plain": [
33 | "array([[ 0.6, 0.5, 0.8, 0.6, 1. ],\n",
34 | " [ 0.1, 0.6, 0.3, 0.2, 0.1],\n",
35 | " [ 0.7, 0.6, 0.9, 0.1, 0.6],\n",
36 | " [ 0.2, 0.8, 0.6, 0. , 0.6],\n",
37 | " [ 0.4, 1. , 0.9, 0.2, 0.2]])"
38 | ]
39 | },
40 | "execution_count": 5,
41 | "metadata": {},
42 | "output_type": "execute_result"
43 | }
44 | ],
45 | "source": [
46 | "import numpy as np\n",
47 | "\n",
48 | "arr1 = np.random.random((5,5)).round(3)\n",
49 | "arr2 = np.random.random((1,20)).round(3)\n",
50 | "arr3 = np.random.random((90,90)).round(3)\n",
51 | "\n",
52 | "\n",
53 | "arr1"
54 | ]
55 | },
56 | {
57 | "cell_type": "markdown",
58 | "metadata": {},
59 | "source": [
60 | "#### Exercise 2: Create a 1 dimensional array containing 10 records and convert this to a pandas series with the custom index:"
61 | ]
62 | },
63 | {
64 | "cell_type": "code",
65 | "execution_count": null,
66 | "metadata": {
67 | "collapsed": true
68 | },
69 | "outputs": [],
70 | "source": [
71 | "custom_index = ['A','B','C','D','E','F','G','H','I','J']"
72 | ]
73 | },
74 | {
75 | "cell_type": "code",
76 | "execution_count": null,
77 | "metadata": {
78 | "collapsed": false
79 | },
80 | "outputs": [],
81 | "source": [
82 | "import pandas as pd\n",
83 | "\n"
84 | ]
85 | },
86 | {
87 | "cell_type": "markdown",
88 | "metadata": {},
89 | "source": [
90 | "#### Exercise 3: Create a Pandas dataframe from the following data, and name the columns col1 - col5. Then...\n",
91 | "\n",
92 | "1) Convert col1 to an array.\n",
93 | "2) Convert col2 to a list.\n",
94 | "3) Convert col3 to a dictionary."
95 | ]
96 | },
97 | {
98 | "cell_type": "code",
99 | "execution_count": null,
100 | "metadata": {
101 | "collapsed": false
102 | },
103 | "outputs": [],
104 | "source": [
105 | "index = np.arange(0,5)\n",
106 | "data1 = np.random.random(5)\n",
107 | "data2 = np.random.random(5)\n",
108 | "data3 = np.random.random(5)\n",
109 | "data4 = np.random.random(5)\n",
110 | "data5 = np.random.random(5)"
111 | ]
112 | },
113 | {
114 | "cell_type": "code",
115 | "execution_count": null,
116 | "metadata": {
117 | "collapsed": false
118 | },
119 | "outputs": [],
120 | "source": []
121 | }
122 | ],
123 | "metadata": {
124 | "kernelspec": {
125 | "display_name": "Python [default]",
126 | "language": "python",
127 | "name": "python3"
128 | },
129 | "language_info": {
130 | "codemirror_mode": {
131 | "name": "ipython",
132 | "version": 3
133 | },
134 | "file_extension": ".py",
135 | "mimetype": "text/x-python",
136 | "name": "python",
137 | "nbconvert_exporter": "python",
138 | "pygments_lexer": "ipython3",
139 | "version": "3.5.2"
140 | }
141 | },
142 | "nbformat": 4,
143 | "nbformat_minor": 0
144 | }
145 |
--------------------------------------------------------------------------------
/.ipynb_checkpoints/D01S. Advanced Data Structures Solutions-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# D01S: Advanced Data Structures Solutions"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "#### Exercise 1: Create the following Numpy arrays with random numbers for the following dimensions:\n",
15 | "\n",
16 | "* 5 x 5\n",
17 | "* 1 x 20\n",
18 | "* 90 x 90\n",
19 | "\n",
20 | "Also round the arrays to 3 decimal places."
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": null,
26 | "metadata": {
27 | "collapsed": false
28 | },
29 | "outputs": [],
30 | "source": [
31 | "import numpy as np\n",
32 | "\n",
33 | "arr1 = np.random.random((5,5))\n",
34 | "arr1 = np.round(arr1,3) \n",
35 | "arr2 = np.random.random((1,20))\n",
36 | "arr2 = np.round(arr2,3) \n",
37 | "arr3 = np.random.random((90,90))\n",
38 | "arr3 = np.round(arr3,3)\n",
39 | "arr3"
40 | ]
41 | },
42 | {
43 | "cell_type": "markdown",
44 | "metadata": {},
45 | "source": [
46 | "#### Exercise 2: Create a 1 dimensional array containing 10 records and convert this to a pandas series with the custom index:"
47 | ]
48 | },
49 | {
50 | "cell_type": "code",
51 | "execution_count": null,
52 | "metadata": {
53 | "collapsed": true
54 | },
55 | "outputs": [],
56 | "source": [
57 | "custom_index = ['A','B','C','D','E','F','G','H','I','J']"
58 | ]
59 | },
60 | {
61 | "cell_type": "code",
62 | "execution_count": null,
63 | "metadata": {
64 | "collapsed": false
65 | },
66 | "outputs": [],
67 | "source": [
68 | "import pandas as pd\n",
69 | "\n",
70 | "arr4 = np.random.random(10)\n",
71 | "ser1 = pd.Series(data=arr4,index=custom_index)\n",
72 | "ser1"
73 | ]
74 | },
75 | {
76 | "cell_type": "markdown",
77 | "metadata": {},
78 | "source": [
79 | "#### Exercise 3: Create a Pandas dataframe from the following data, and name the columns col1 - col5. Then...\n",
80 | "\n",
81 | "1) Convert col1 to an array.\n",
82 | "2) Convert col2 to a list.\n",
83 | "3) Convert col3 to a dictionary."
84 | ]
85 | },
86 | {
87 | "cell_type": "code",
88 | "execution_count": null,
89 | "metadata": {
90 | "collapsed": false
91 | },
92 | "outputs": [],
93 | "source": [
94 | "index = np.arange(0,5)\n",
95 | "data1 = np.random.random(5)\n",
96 | "data2 = np.random.random(5)\n",
97 | "data3 = np.random.random(5)\n",
98 | "data4 = np.random.random(5)\n",
99 | "data5 = np.random.random(5)\n",
100 | "\n",
101 | "data1"
102 | ]
103 | },
104 | {
105 | "cell_type": "code",
106 | "execution_count": null,
107 | "metadata": {
108 | "collapsed": false
109 | },
110 | "outputs": [],
111 | "source": [
112 | "cols = ['col1','col2','col3','col4','col5']\n",
113 | "df1 = pd.DataFrame(data=[data1,data2,data3,data4,data5], \n",
114 | " index=index, \n",
115 | " columns=cols) \n",
116 | "df1\n",
117 | "\n",
118 | "\n",
119 | "array = df1['col1'].values\n",
120 | "list = df1['col2'].tolist()\n",
121 | "dict = df1['col3'].to_dict()\n",
122 | "\n",
123 | "print(array,list,dict)"
124 | ]
125 | },
126 | {
127 | "cell_type": "code",
128 | "execution_count": null,
129 | "metadata": {
130 | "collapsed": true
131 | },
132 | "outputs": [],
133 | "source": []
134 | }
135 | ],
136 | "metadata": {
137 | "kernelspec": {
138 | "display_name": "Python [default]",
139 | "language": "python",
140 | "name": "python3"
141 | },
142 | "language_info": {
143 | "codemirror_mode": {
144 | "name": "ipython",
145 | "version": 3
146 | },
147 | "file_extension": ".py",
148 | "mimetype": "text/x-python",
149 | "name": "python",
150 | "nbconvert_exporter": "python",
151 | "pygments_lexer": "ipython3",
152 | "version": "3.5.2"
153 | }
154 | },
155 | "nbformat": 4,
156 | "nbformat_minor": 0
157 | }
158 |
--------------------------------------------------------------------------------
/.ipynb_checkpoints/D03E. Dataframes - Handling Data Exercises-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# D03E: Dataframes - Handling Data Exercises"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "#### Exercise 1: Import the file below and then:\n",
15 | "\n",
16 | "1) Remove the Unnamed:0 column
\n",
17 | "2) Rename the wl1-3 colums to say 'Weight Loss Week...' instead of wl
\n",
18 | "3) Rename the se1-3 columns to say 'Self Esteem Week...' instead of se
\n",
19 | "4) Create a function that changes data in the group column from 'DietEx' to 'Diet & Exercise'
\n",
20 | "5) Sort the dataframe by the wl1 column in descending order.\n"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": null,
26 | "metadata": {
27 | "collapsed": true
28 | },
29 | "outputs": [],
30 | "source": [
31 | "path = \"https://vincentarelbundock.github.io/Rdatasets/csv/car/WeightLoss.csv\""
32 | ]
33 | },
34 | {
35 | "cell_type": "code",
36 | "execution_count": null,
37 | "metadata": {
38 | "collapsed": false
39 | },
40 | "outputs": [],
41 | "source": []
42 | },
43 | {
44 | "cell_type": "code",
45 | "execution_count": null,
46 | "metadata": {
47 | "collapsed": false
48 | },
49 | "outputs": [],
50 | "source": []
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": null,
55 | "metadata": {
56 | "collapsed": true
57 | },
58 | "outputs": [],
59 | "source": []
60 | }
61 | ],
62 | "metadata": {
63 | "kernelspec": {
64 | "display_name": "Python [default]",
65 | "language": "python",
66 | "name": "python3"
67 | },
68 | "language_info": {
69 | "codemirror_mode": {
70 | "name": "ipython",
71 | "version": 3
72 | },
73 | "file_extension": ".py",
74 | "mimetype": "text/x-python",
75 | "name": "python",
76 | "nbconvert_exporter": "python",
77 | "pygments_lexer": "ipython3",
78 | "version": "3.5.2"
79 | }
80 | },
81 | "nbformat": 4,
82 | "nbformat_minor": 0
83 | }
84 |
--------------------------------------------------------------------------------
/.ipynb_checkpoints/D04E. Dataframes - Refining and Indexing Exercises-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# D04E: Dataframes - Refining and Indexing Exercises"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "#### Exercise 1: Import the file below and then:\n",
15 | "\n",
16 | "1) Drop the Unnamed:0 column
\n",
17 | "2) Return the columsn list as a new variable
\n",
18 | "3) Keep only the price, ram, cd and trend columns\n",
19 | "4) Create new datasets for each of the following where statements:\n",
20 | "\n",
21 | "* Where trend greater than 12 and less that or equal to 24\n",
22 | "* Where ram is either 16 or 32\n",
23 | "* Where price is greater than 2500 and cd is yes"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": null,
29 | "metadata": {
30 | "collapsed": true
31 | },
32 | "outputs": [],
33 | "source": [
34 | "path = \"https://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Computers.csv\""
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": null,
40 | "metadata": {
41 | "collapsed": false
42 | },
43 | "outputs": [],
44 | "source": [
45 | "import pandas as pd\n",
46 | "\n"
47 | ]
48 | },
49 | {
50 | "cell_type": "markdown",
51 | "metadata": {},
52 | "source": [
53 | "#### Exercise 2: Retrieve the records at the following item locations from the base dataset:\n",
54 | "\n",
55 | "* 300\n",
56 | "* 1000-1010\n",
57 | "* Last 10 records\n",
58 | "* The middle record"
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": null,
64 | "metadata": {
65 | "collapsed": false
66 | },
67 | "outputs": [],
68 | "source": []
69 | },
70 | {
71 | "cell_type": "markdown",
72 | "metadata": {},
73 | "source": [
74 | "#### Exercise 3: From the base dataset you imported:\n",
75 | "* Set the index as the trend column\n",
76 | "* Drop the name of the index\n",
77 | "* Create a new a new dataframe for the trend values 1-6\n",
78 | "* reset the index of this new dataframe\n",
79 | "* drop any unwanted variables"
80 | ]
81 | },
82 | {
83 | "cell_type": "code",
84 | "execution_count": null,
85 | "metadata": {
86 | "collapsed": false
87 | },
88 | "outputs": [],
89 | "source": []
90 | },
91 | {
92 | "cell_type": "code",
93 | "execution_count": null,
94 | "metadata": {
95 | "collapsed": true
96 | },
97 | "outputs": [],
98 | "source": []
99 | }
100 | ],
101 | "metadata": {
102 | "kernelspec": {
103 | "display_name": "Python [default]",
104 | "language": "python",
105 | "name": "python3"
106 | },
107 | "language_info": {
108 | "codemirror_mode": {
109 | "name": "ipython",
110 | "version": 3
111 | },
112 | "file_extension": ".py",
113 | "mimetype": "text/x-python",
114 | "name": "python",
115 | "nbconvert_exporter": "python",
116 | "pygments_lexer": "ipython3",
117 | "version": "3.5.2"
118 | }
119 | },
120 | "nbformat": 4,
121 | "nbformat_minor": 0
122 | }
123 |
--------------------------------------------------------------------------------
/.ipynb_checkpoints/D04S. Dataframes - Refining and Indexing Solutions-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# D04S: Dataframes - Refining and Indexing Solutions"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "#### Exercise 1: Import the file below and then:\n",
15 | "\n",
16 | "1) Drop the Unnamed:0 column
\n",
17 | "2) Return the columsn list as a new variable
\n",
18 | "3) Keep only the price, ram, cd and trend columns\n",
19 | "4) Create new datasets for each of the following where statements:\n",
20 | "\n",
21 | "* Where trend greater than 12 and less that or equal to 24\n",
22 | "* Where ram is either 16 or 32\n",
23 | "* Where price is greater than 2500 and cd is yes"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": null,
29 | "metadata": {
30 | "collapsed": true
31 | },
32 | "outputs": [],
33 | "source": [
34 | "path = \"https://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Computers.csv\""
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": null,
40 | "metadata": {
41 | "collapsed": false
42 | },
43 | "outputs": [],
44 | "source": [
45 | "import pandas as pd\n",
46 | "\n",
47 | "df = pd.read_csv(path)\n",
48 | "\n",
49 | "df = df.drop('Unnamed: 0',axis=1)\n",
50 | "cols = df.columns\n",
51 | "df = df[['price','ram','cd','trend']]\n",
52 | "\n",
53 | "df1 = df[(df['trend'] > 12) & (df['trend'] <= 24)]\n",
54 | "df2 = df[df['ram'].isin([16,32])]\n",
55 | "df3 = df[(df['price'] > 2500) & (df['cd'] == 'yes')]"
56 | ]
57 | },
58 | {
59 | "cell_type": "markdown",
60 | "metadata": {},
61 | "source": [
62 | "#### Exercise 2: Retrieve the records at the following item locations from the base dataset:\n",
63 | "\n",
64 | "* 300\n",
65 | "* 1000-1010\n",
66 | "* Last 10 records\n",
67 | "* The middle record"
68 | ]
69 | },
70 | {
71 | "cell_type": "code",
72 | "execution_count": null,
73 | "metadata": {
74 | "collapsed": false
75 | },
76 | "outputs": [],
77 | "source": [
78 | "df.iloc[300]\n",
79 | "df.iloc[1000:1010]\n",
80 | "df.iloc[-10:-1]\n",
81 | "df.iloc[int((len(df)/2))]"
82 | ]
83 | },
84 | {
85 | "cell_type": "markdown",
86 | "metadata": {},
87 | "source": [
88 | "#### Exercise 3: From the base dataset you imported:\n",
89 | "* Set the index as the trend column\n",
90 | "* Drop the name of the index\n",
91 | "* Create a new a new dataframe for the trend values 1-6\n",
92 | "* reset the index of this new dataframe\n",
93 | "* drop any unwanted variables"
94 | ]
95 | },
96 | {
97 | "cell_type": "code",
98 | "execution_count": null,
99 | "metadata": {
100 | "collapsed": false
101 | },
102 | "outputs": [],
103 | "source": [
104 | "df = pd.read_csv(path)\n",
105 | "\n",
106 | "df = df.set_index('trend')\n",
107 | "df.index.name = None\n",
108 | "new = df.ix[[1,2,3,4,5,6]].reset_index().drop(['index','Unnamed: 0'], axis=1)\n",
109 | "new.head(5)"
110 | ]
111 | },
112 | {
113 | "cell_type": "code",
114 | "execution_count": null,
115 | "metadata": {
116 | "collapsed": true
117 | },
118 | "outputs": [],
119 | "source": []
120 | }
121 | ],
122 | "metadata": {
123 | "kernelspec": {
124 | "display_name": "Python [default]",
125 | "language": "python",
126 | "name": "python3"
127 | },
128 | "language_info": {
129 | "codemirror_mode": {
130 | "name": "ipython",
131 | "version": 3
132 | },
133 | "file_extension": ".py",
134 | "mimetype": "text/x-python",
135 | "name": "python",
136 | "nbconvert_exporter": "python",
137 | "pygments_lexer": "ipython3",
138 | "version": "3.5.2"
139 | }
140 | },
141 | "nbformat": 4,
142 | "nbformat_minor": 0
143 | }
144 |
--------------------------------------------------------------------------------
/.ipynb_checkpoints/D05E. Dataframes Merging & Concatenating Exercises-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# D05E: Dataframes Merging & Concatenating Exercises"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "#### Exercise 1: For the dataframes below:\n",
15 | "\n",
16 | "1) Concatenate df1 and df 2
\n",
17 | "2) Merge df1 and df2 using their index
\n",
18 | "3) Merge df3 and df4 using the key & UID values
\n",
19 | "4) merge df3 and df4 by setting the key / UID value as the index on both datasets and then merging on that.
\n",
20 | "5) What happens when you chance the name of 'data 2' in df4 to 'data 1'?
\n",
21 | "\n"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": null,
27 | "metadata": {
28 | "collapsed": false
29 | },
30 | "outputs": [],
31 | "source": [
32 | "import pandas as pd\n",
33 | "import numpy as np\n",
34 | "\n",
35 | "df1 = pd.DataFrame({'key':np.arange(10),\n",
36 | " 'data 1': np.random.random(10)})\n",
37 | "\n",
38 | "df = pd.DataFrame({'key':np.arange(10)+10,\n",
39 | " 'data 1': np.random.random(10)})\n",
40 | "\n",
41 | "\n",
42 | "df3 = pd.DataFrame({'key':['A1','A2','A3','A3','A4','A5','A6','A7','A8','A8'],\n",
43 | " 'data 1': np.arange(10)})\n",
44 | "\n",
45 | "df4 = pd.DataFrame({'UID':['A1','A2','A3','A4','A5','A6','A7','A8','A9'],\n",
46 | " 'data 2': np.arange(9)+10})"
47 | ]
48 | },
49 | {
50 | "cell_type": "code",
51 | "execution_count": null,
52 | "metadata": {
53 | "collapsed": false
54 | },
55 | "outputs": [],
56 | "source": []
57 | },
58 | {
59 | "cell_type": "code",
60 | "execution_count": null,
61 | "metadata": {
62 | "collapsed": false
63 | },
64 | "outputs": [],
65 | "source": []
66 | },
67 | {
68 | "cell_type": "code",
69 | "execution_count": null,
70 | "metadata": {
71 | "collapsed": false
72 | },
73 | "outputs": [],
74 | "source": []
75 | },
76 | {
77 | "cell_type": "code",
78 | "execution_count": null,
79 | "metadata": {
80 | "collapsed": false
81 | },
82 | "outputs": [],
83 | "source": []
84 | },
85 | {
86 | "cell_type": "code",
87 | "execution_count": null,
88 | "metadata": {
89 | "collapsed": false
90 | },
91 | "outputs": [],
92 | "source": []
93 | }
94 | ],
95 | "metadata": {
96 | "kernelspec": {
97 | "display_name": "Python [default]",
98 | "language": "python",
99 | "name": "python3"
100 | },
101 | "language_info": {
102 | "codemirror_mode": {
103 | "name": "ipython",
104 | "version": 3
105 | },
106 | "file_extension": ".py",
107 | "mimetype": "text/x-python",
108 | "name": "python",
109 | "nbconvert_exporter": "python",
110 | "pygments_lexer": "ipython3",
111 | "version": "3.5.2"
112 | }
113 | },
114 | "nbformat": 4,
115 | "nbformat_minor": 0
116 | }
117 |
--------------------------------------------------------------------------------
/.ipynb_checkpoints/D05S. Dataframes Merging & Concatenating Solutions-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# D05S: Dataframes Merging & Concatenating Solutions"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "#### Exercise 1: For the dataframes below:\n",
15 | "\n",
16 | "1) Concatenate df1 and df 2
\n",
17 | "2) Merge df1 and df2 using their index
\n",
18 | "3) Merge df3 and df4 using the key & UID values
\n",
19 | "4) merge df3 and df4 by setting the key / UID value as the index on both datasets and then merging on that.
\n",
20 | "5) What happens when you chance the name of 'data 2' in df4 to 'data 1'?
\n",
21 | "\n"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": 1,
27 | "metadata": {
28 | "collapsed": false
29 | },
30 | "outputs": [],
31 | "source": [
32 | "import pandas as pd\n",
33 | "import numpy as np\n",
34 | "\n",
35 | "df1 = pd.DataFrame({'key':np.arange(10),\n",
36 | " 'data 1': np.random.random(10)})\n",
37 | "\n",
38 | "df2 = pd.DataFrame({'key':np.arange(10)+10,\n",
39 | " 'data 1': np.random.random(10)})\n",
40 | "\n",
41 | "\n",
42 | "df3 = pd.DataFrame({'key':['A1','A2','A3','A3','A4','A5','A6','A7','A8','A8'],\n",
43 | " 'data 1': np.arange(10)})\n",
44 | "\n",
45 | "df4 = pd.DataFrame({'UID':['A1','A2','A3','A4','A5','A6','A7','A8','A9'],\n",
46 | " 'data 2': np.arange(9)+10})"
47 | ]
48 | },
49 | {
50 | "cell_type": "code",
51 | "execution_count": 3,
52 | "metadata": {
53 | "collapsed": false
54 | },
55 | "outputs": [],
56 | "source": [
57 | "# 1)\n",
58 | "\n",
59 | "Q1 = pd.concat([df1,df2]) # Concatenating the datasets\n",
60 | "Q1 = Q1.reset_index().drop('index',axis=1) # Reseting the index and dropping the column\n",
61 | "Q1 = Q1[['key','data 1']] # Reordering the variables \n",
62 | "Q1"
63 | ]
64 | },
65 | {
66 | "cell_type": "code",
67 | "execution_count": null,
68 | "metadata": {
69 | "collapsed": false
70 | },
71 | "outputs": [],
72 | "source": [
73 | "# 2)\n",
74 | "\n",
75 | "Q2 = pd.merge(left=df3,\n",
76 | " right=df4,\n",
77 | " left_index=True, \n",
78 | " right_index=True) # Merging the data\n",
79 | "\n",
80 | "Q2"
81 | ]
82 | },
83 | {
84 | "cell_type": "code",
85 | "execution_count": null,
86 | "metadata": {
87 | "collapsed": false
88 | },
89 | "outputs": [],
90 | "source": [
91 | "# 3)\n",
92 | "\n",
93 | "Q3 = pd.merge(left=df3,\n",
94 | " right=df4,\n",
95 | " left_on='key', \n",
96 | " right_on='UID') # Merging the data\n",
97 | "Q3 = Q3.drop('UID',axis=1) # Dropping the repeated variable\n",
98 | "Q3 = Q3.set_index('key') # Setting the index as the key value\n",
99 | "Q3.index.name = None # Renaming the index to None \n",
100 | "Q3"
101 | ]
102 | },
103 | {
104 | "cell_type": "code",
105 | "execution_count": null,
106 | "metadata": {
107 | "collapsed": false
108 | },
109 | "outputs": [],
110 | "source": [
111 | "# 4)\n",
112 | "\n",
113 | "df3 = df3.set_index('key')\n",
114 | "df4 = df4.set_index('UID')\n",
115 | "\n",
116 | "Q4 = pd.merge(left=df3,\n",
117 | " right=df4,\n",
118 | " left_index=True, \n",
119 | " right_index=True) # Merging the data\n",
120 | "\n",
121 | "Q4\n"
122 | ]
123 | },
124 | {
125 | "cell_type": "code",
126 | "execution_count": null,
127 | "metadata": {
128 | "collapsed": false
129 | },
130 | "outputs": [],
131 | "source": [
132 | "df4 = df4.rename(columns={'data 2':'data 1'})\n",
133 | "\n",
134 | "Q5 = pd.merge(left=df3,\n",
135 | " right=df4,\n",
136 | " left_index=True, \n",
137 | " right_index=True) # Merging the data\n",
138 | "\n",
139 | "Q5"
140 | ]
141 | }
142 | ],
143 | "metadata": {
144 | "kernelspec": {
145 | "display_name": "Python [default]",
146 | "language": "python",
147 | "name": "python3"
148 | },
149 | "language_info": {
150 | "codemirror_mode": {
151 | "name": "ipython",
152 | "version": 3
153 | },
154 | "file_extension": ".py",
155 | "mimetype": "text/x-python",
156 | "name": "python",
157 | "nbconvert_exporter": "python",
158 | "pygments_lexer": "ipython3",
159 | "version": "3.5.2"
160 | }
161 | },
162 | "nbformat": 4,
163 | "nbformat_minor": 0
164 | }
165 |
--------------------------------------------------------------------------------
/.ipynb_checkpoints/D06E. Summary Statistics & GroupBy Exercises-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# D06E: Summary Statistics & GroupBy Exercises"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {
13 | "collapsed": true
14 | },
15 | "source": [
16 | "#### Exercise 1: Using the titanic survivors csv dataset :\n",
17 | "\n",
18 | "1) Import the data and create a pandas dataframe
\n",
19 | "2) Create a summary table of this data. (Hint - this is easier of you create a numeric variable and use sum())
"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": 56,
25 | "metadata": {
26 | "collapsed": true
27 | },
28 | "outputs": [],
29 | "source": [
30 | "import pandas as pd\n",
31 | "\n",
32 | "path = \"https://vincentarelbundock.github.io/Rdatasets/csv/COUNT/titanic.csv\""
33 | ]
34 | },
35 | {
36 | "cell_type": "code",
37 | "execution_count": null,
38 | "metadata": {
39 | "collapsed": false
40 | },
41 | "outputs": [],
42 | "source": []
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": null,
47 | "metadata": {
48 | "collapsed": false
49 | },
50 | "outputs": [],
51 | "source": []
52 | },
53 | {
54 | "cell_type": "code",
55 | "execution_count": null,
56 | "metadata": {
57 | "collapsed": false
58 | },
59 | "outputs": [],
60 | "source": []
61 | },
62 | {
63 | "cell_type": "code",
64 | "execution_count": null,
65 | "metadata": {
66 | "collapsed": false
67 | },
68 | "outputs": [],
69 | "source": []
70 | },
71 | {
72 | "cell_type": "code",
73 | "execution_count": null,
74 | "metadata": {
75 | "collapsed": true
76 | },
77 | "outputs": [],
78 | "source": []
79 | },
80 | {
81 | "cell_type": "code",
82 | "execution_count": null,
83 | "metadata": {
84 | "collapsed": true
85 | },
86 | "outputs": [],
87 | "source": []
88 | }
89 | ],
90 | "metadata": {
91 | "kernelspec": {
92 | "display_name": "Python [default]",
93 | "language": "python",
94 | "name": "python3"
95 | },
96 | "language_info": {
97 | "codemirror_mode": {
98 | "name": "ipython",
99 | "version": 3
100 | },
101 | "file_extension": ".py",
102 | "mimetype": "text/x-python",
103 | "name": "python",
104 | "nbconvert_exporter": "python",
105 | "pygments_lexer": "ipython3",
106 | "version": "3.5.2"
107 | }
108 | },
109 | "nbformat": 4,
110 | "nbformat_minor": 0
111 | }
112 |
--------------------------------------------------------------------------------
/.ipynb_checkpoints/D06S. Summary Statistics & GroupBy Solutions-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# D06S: Summary Statistics & GroupBy Solutions"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {
13 | "collapsed": true
14 | },
15 | "source": [
16 | "#### Exercise 1: Using the titanic survivors csv dataset :\n",
17 | "\n",
18 | "1) Import the data and create a pandas dataframe
\n",
19 | "2) Create a summary table of this data. (Hint - this is easier of you create a numeric variable and use sum())
\n",
20 | "3) How could you create a percentage variable? (Hint - Google / StackOverflow!)
\n",
21 | "4) Merge the new Percentage variable onto your summary table\n"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": null,
27 | "metadata": {
28 | "collapsed": true
29 | },
30 | "outputs": [],
31 | "source": [
32 | "import pandas as pd\n",
33 | "\n",
34 | "path = \"https://vincentarelbundock.github.io/Rdatasets/csv/COUNT/titanic.csv\""
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": null,
40 | "metadata": {
41 | "collapsed": false
42 | },
43 | "outputs": [],
44 | "source": [
45 | "# Q1)\n",
46 | "\n",
47 | "tt = pd.read_csv(path)\n",
48 | "tt = tt.drop('Unnamed: 0', axis=1)\n",
49 | "tt['passengers'] = 1 # Creating a numeric variable\n",
50 | "tt.head(5)"
51 | ]
52 | },
53 | {
54 | "cell_type": "code",
55 | "execution_count": null,
56 | "metadata": {
57 | "collapsed": false
58 | },
59 | "outputs": [],
60 | "source": [
61 | "# Q2)\n",
62 | "\n",
63 | "gp1 = tt.groupby(['class','survived']).sum() # NB. This only works because we created the passengers variable\n",
64 | "gp1"
65 | ]
66 | },
67 | {
68 | "cell_type": "code",
69 | "execution_count": null,
70 | "metadata": {
71 | "collapsed": false
72 | },
73 | "outputs": [],
74 | "source": [
75 | "# Q3) Excellent example here: http://stackoverflow.com/questions/23377108/pandas-percentage-of-total-with-groupby\n",
76 | "\n",
77 | "# state_pcts = state_office.groupby(level=0).apply(lambda x: 100*x/float(x.sum()))\n",
78 | "\n",
79 | "gp2 = gp1.groupby(level=0).apply(lambda x: 100*x/float(x.sum()))\n",
80 | "gp2\n"
81 | ]
82 | },
83 | {
84 | "cell_type": "code",
85 | "execution_count": null,
86 | "metadata": {
87 | "collapsed": false
88 | },
89 | "outputs": [],
90 | "source": [
91 | "# Q4)\n",
92 | "\n",
93 | "gp2 = gp2.rename(columns={'passengers':'passengers %'})\n",
94 | "gp2\n",
95 | "gp_merge = pd.merge(left = gp1,\n",
96 | " right = gp2,\n",
97 | " left_index=True, \n",
98 | " right_index=True)\n",
99 | "gp_merge"
100 | ]
101 | },
102 | {
103 | "cell_type": "code",
104 | "execution_count": null,
105 | "metadata": {
106 | "collapsed": true
107 | },
108 | "outputs": [],
109 | "source": []
110 | }
111 | ],
112 | "metadata": {
113 | "kernelspec": {
114 | "display_name": "Python [default]",
115 | "language": "python",
116 | "name": "python3"
117 | },
118 | "language_info": {
119 | "codemirror_mode": {
120 | "name": "ipython",
121 | "version": 3
122 | },
123 | "file_extension": ".py",
124 | "mimetype": "text/x-python",
125 | "name": "python",
126 | "nbconvert_exporter": "python",
127 | "pygments_lexer": "ipython3",
128 | "version": "3.5.2"
129 | }
130 | },
131 | "nbformat": 4,
132 | "nbformat_minor": 0
133 | }
134 |
--------------------------------------------------------------------------------
/.ipynb_checkpoints/V00. Introduction to Data Visualisation-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "from IPython.display import Image"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {},
17 | "source": [
18 | "# V00: Introduction to Data Visualisation"
19 | ]
20 | },
21 | {
22 | "cell_type": "markdown",
23 | "metadata": {},
24 | "source": [
25 | "You'll likely have heard the term 'data visualisation' (commonly abbreviated to 'data vis') before. It's a general term that describes helping users understand the data by placing it in a visual context. Patterns, trends and correlations that might go undetected in text-based data can be gleaned and highlighted easier with data visualization software and languages, such as R and of course Python.\n",
26 | "\n",
27 | "More recently, data vis has grown beyond Excel spreadsheets and charts and become more sophisticated allowing data to be displayed in ways such as GIS maps, infographics, sparklines, heatmaps etc."
28 | ]
29 | },
30 | {
31 | "cell_type": "markdown",
32 | "metadata": {},
33 | "source": [
34 | "## Data Vis in Python"
35 | ]
36 | },
37 | {
38 | "cell_type": "markdown",
39 | "metadata": {},
40 | "source": [
41 | "Python has some excellent packages for data visualisation and we'll be giving an overview of some of these in this chapter.\n"
42 | ]
43 | },
44 | {
45 | "cell_type": "markdown",
46 | "metadata": {},
47 | "source": [
48 | ""
49 | ]
50 | },
51 | {
52 | "cell_type": "markdown",
53 | "metadata": {},
54 | "source": [
55 | "Matplotlib is probably the most popular data vis library in Python. It was originally created in 2002 making it one of the oldest Python libraries still in use and is based upon the MATLAB visualisation suite.\n",
56 | "\n",
57 | "Matplotlib can be used in Python scripts, Jupyter, web application servers, and graphical user interface toolkits."
58 | ]
59 | },
60 | {
61 | "cell_type": "markdown",
62 | "metadata": {},
63 | "source": [
64 | ""
65 | ]
66 | },
67 | {
68 | "cell_type": "markdown",
69 | "metadata": {},
70 | "source": [
71 | "Seaborn is a library for making attractive and informative statistical graphics in Python. It is built on top of matplotlib and tightly integrated with Anaconda, including support for numpy and pandas data structures and statistical routines from scipy and statsmodels.\n",
72 | "\n",
73 | "Some of the features that seaborn offers are:\n",
74 | "\n",
75 | "* Several built-in themes that improve on the default matplotlib aesthetics\n",
76 | "* Tools for choosing color palettes to make beautiful plots that reveal patterns in your data\n",
77 | "* Functions for visualizing univariate and bivariate distributions or for comparing them between subsets of data\n",
78 | "* Tools that fit and visualize linear regression models for different kinds of independent and dependent variables\n",
79 | "* Functions that visualize matrices of data and use clustering algorithms to discover structure in those matrices\n",
80 | "* A function to plot statistical timeseries data with flexible estimation and representation of uncertainty around the estimate\n",
81 | "* High-level abstractions for structuring grids of plots that let you easily build complex visualizations\n",
82 | "\n",
83 | "You can install it as follows:"
84 | ]
85 | },
86 | {
87 | "cell_type": "raw",
88 | "metadata": {},
89 | "source": [
90 | "pip install seaborn"
91 | ]
92 | },
93 | {
94 | "cell_type": "markdown",
95 | "metadata": {},
96 | "source": [
97 | ""
98 | ]
99 | },
100 | {
101 | "cell_type": "markdown",
102 | "metadata": {},
103 | "source": [
104 | "Bokeh is a Python interactive visualization library that targets modern web browsers for presentation. Its goal is to provide elegant, concise construction of novel graphics in the style of D3.js, and to extend this capability with high-performance interactivity over very large or streaming datasets. Bokeh can help anyone who would like to quickly and easily create interactive plots, dashboards, and data applications.\n",
105 | "\n",
106 | "Also (if you needed any more incentive to use it!), Bokeh is made by Continuum Analytics, the very same people responsible for putting Anaconda together and comes as part of the standard installation."
107 | ]
108 | },
109 | {
110 | "cell_type": "markdown",
111 | "metadata": {},
112 | "source": [
113 | ""
114 | ]
115 | },
116 | {
117 | "cell_type": "markdown",
118 | "metadata": {},
119 | "source": [
120 | "Plotly is an online analytics and data visualization tool,and provides online graphing, analytics, and stats tools for individuals and collaboration. It can also be integrated with other software and languages such as Python, R, MATLAB, Perl, Julia, Arduino, and REST. Up until very recently Plotly was a 'paid' service (and still is if you want to host files online), however they've recently taken the decision to go open source.\n",
121 | "\n",
122 | "Plotly isn't a 'typical' Python library in that whilst you can use it offline, much of the content is posted to the web instead of output in Jupyter. This can make it difficult to use sensitive data and is an added layer of complexity.\n",
123 | "\n",
124 | "You can install it as follows:"
125 | ]
126 | },
127 | {
128 | "cell_type": "raw",
129 | "metadata": {
130 | "collapsed": true
131 | },
132 | "source": [
133 | "pip install plotly"
134 | ]
135 | },
136 | {
137 | "cell_type": "markdown",
138 | "metadata": {},
139 | "source": [
140 | "We won't be going through Plotly as part of this course, however there are some excellent tutorials available here."
141 | ]
142 | },
143 | {
144 | "cell_type": "markdown",
145 | "metadata": {},
146 | "source": [
147 | ""
148 | ]
149 | },
150 | {
151 | "cell_type": "markdown",
152 | "metadata": {
153 | "collapsed": true
154 | },
155 | "source": [
156 | "Similar to Ploty, Lightning integrates with a number of software languages and produces some quite swanky looking graphs. Note that whilst the graphs are interactive to an extent, they don't appear to have tooltips that pop up which is a shame.\n",
157 | "\n",
158 | "You can install it as follows:\n"
159 | ]
160 | },
161 | {
162 | "cell_type": "raw",
163 | "metadata": {},
164 | "source": [
165 | "pip install lightning-python"
166 | ]
167 | },
168 | {
169 | "cell_type": "markdown",
170 | "metadata": {},
171 | "source": [
172 | "## Structure of this Section"
173 | ]
174 | },
175 | {
176 | "cell_type": "markdown",
177 | "metadata": {},
178 | "source": [
179 | "Data Vis in Python is a massive area and you could quite easily fill a training course with examples and exercises for each of the libraries listed. As such the trianing here will show the basics for a few libraries and signpost you to more information and material to enable you to learn more after the course."
180 | ]
181 | }
182 | ],
183 | "metadata": {
184 | "anaconda-cloud": {},
185 | "kernelspec": {
186 | "display_name": "Python [default]",
187 | "language": "python",
188 | "name": "python3"
189 | },
190 | "language_info": {
191 | "codemirror_mode": {
192 | "name": "ipython",
193 | "version": 3
194 | },
195 | "file_extension": ".py",
196 | "mimetype": "text/x-python",
197 | "name": "python",
198 | "nbconvert_exporter": "python",
199 | "pygments_lexer": "ipython3",
200 | "version": "3.5.2"
201 | }
202 | },
203 | "nbformat": 4,
204 | "nbformat_minor": 0
205 | }
206 |
--------------------------------------------------------------------------------
/.ipynb_checkpoints/V02E. Matplotlib Exercises-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# V02E. Matplotlib Exercises"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "#### Exercise 1. For the following dataframe:\n",
15 | "1) Plot a simple bar chart.
\n",
16 | "2) Set the title and axis labels of the graph.
\n",
17 | "3) In the plot method, remove the legend, change the colour, and increase the width of the bars.
\n",
18 | "4) Remove the ticks (It's OK to C + P this!)
\n",
19 | "5) Remove the top and right borders
"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": 1,
25 | "metadata": {
26 | "collapsed": true
27 | },
28 | "outputs": [],
29 | "source": [
30 | "import pandas as pd\n",
31 | "import numpy as np\n",
32 | "import matplotlib.pyplot as plt\n",
33 | "%matplotlib inline"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": 2,
39 | "metadata": {
40 | "collapsed": false
41 | },
42 | "outputs": [
43 | {
44 | "data": {
45 | "text/html": [
46 | "
\n",
47 | "
\n",
48 | " \n",
49 | " \n",
50 | " | \n",
51 | " data | \n",
52 | "
\n",
53 | " \n",
54 | " \n",
55 | " \n",
56 | " 11-May-16 | \n",
57 | " 52500 | \n",
58 | "
\n",
59 | " \n",
60 | " 18-May-16 | \n",
61 | " 68400 | \n",
62 | "
\n",
63 | " \n",
64 | " 01-Jun-16 | \n",
65 | " 83200 | \n",
66 | "
\n",
67 | " \n",
68 | " 08-Jun-16 | \n",
69 | " 64200 | \n",
70 | "
\n",
71 | " \n",
72 | " 15-Jun-16 | \n",
73 | " 7300 | \n",
74 | "
\n",
75 | " \n",
76 | "
\n",
77 | "
"
78 | ],
79 | "text/plain": [
80 | " data\n",
81 | "11-May-16 52500\n",
82 | "18-May-16 68400\n",
83 | "01-Jun-16 83200\n",
84 | "08-Jun-16 64200\n",
85 | "15-Jun-16 7300"
86 | ]
87 | },
88 | "execution_count": 2,
89 | "metadata": {},
90 | "output_type": "execute_result"
91 | }
92 | ],
93 | "source": [
94 | "df1 = pd.DataFrame({'date':['11-May-16','18-May-16','01-Jun-16','08-Jun-16','15-Jun-16'], # Importing some dates as a string\n",
95 | " 'data':np.random.randint(0,1000,5)*100})\n",
96 | "df1 = df1.set_index('date')\n",
97 | "df1.index.name = None\n",
98 | "df1"
99 | ]
100 | },
101 | {
102 | "cell_type": "code",
103 | "execution_count": null,
104 | "metadata": {
105 | "collapsed": false
106 | },
107 | "outputs": [],
108 | "source": []
109 | },
110 | {
111 | "cell_type": "markdown",
112 | "metadata": {},
113 | "source": [
114 | "#### Exercise 2. For the following dataframe:\n",
115 | "1) Plot a basic scatter chart.
\n",
116 | "2) Set the title and axis labels of the graph.
\n",
117 | "3) See if you can find out how to set the scale to 0 - 1000
\n",
118 | "4) See if you can find out how to change the scatter marker to a square
"
119 | ]
120 | },
121 | {
122 | "cell_type": "code",
123 | "execution_count": null,
124 | "metadata": {
125 | "collapsed": false
126 | },
127 | "outputs": [],
128 | "source": [
129 | "df2 = pd.DataFrame({'data1':np.random.randint(0,1000,1000),\n",
130 | " 'data2':np.random.randint(0,1000,1000)})\n",
131 | "df2"
132 | ]
133 | },
134 | {
135 | "cell_type": "code",
136 | "execution_count": null,
137 | "metadata": {
138 | "collapsed": true
139 | },
140 | "outputs": [],
141 | "source": []
142 | }
143 | ],
144 | "metadata": {
145 | "kernelspec": {
146 | "display_name": "Python [default]",
147 | "language": "python",
148 | "name": "python3"
149 | },
150 | "language_info": {
151 | "codemirror_mode": {
152 | "name": "ipython",
153 | "version": 3
154 | },
155 | "file_extension": ".py",
156 | "mimetype": "text/x-python",
157 | "name": "python",
158 | "nbconvert_exporter": "python",
159 | "pygments_lexer": "ipython3",
160 | "version": "3.5.2"
161 | }
162 | },
163 | "nbformat": 4,
164 | "nbformat_minor": 0
165 | }
166 |
--------------------------------------------------------------------------------
/.ipynb_checkpoints/V04E. Seaborn Exercises-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# V04E. Seaborn Exercises"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "#### Exercise 1: Using the dataframe below:\n",
15 | "\n",
16 | "1) Create a jointplot to show the distribution of the data.
\n",
17 | "2) Change the size of the plot to make it larger.
\n",
18 | "3) Add a title to the plot (NB You'll have to adjust the plot down).
"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 1,
24 | "metadata": {
25 | "collapsed": true
26 | },
27 | "outputs": [],
28 | "source": [
29 | "import pandas as pd\n",
30 | "import numpy as np\n",
31 | "from numpy.random import randn\n",
32 | "import matplotlib.pyplot as plt \n",
33 | "import seaborn as sns\n",
34 | "%matplotlib inline"
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": 2,
40 | "metadata": {
41 | "collapsed": false
42 | },
43 | "outputs": [
44 | {
45 | "data": {
46 | "text/html": [
47 | "\n",
48 | "
\n",
49 | " \n",
50 | " \n",
51 | " | \n",
52 | " data1 | \n",
53 | " data2 | \n",
54 | "
\n",
55 | " \n",
56 | " \n",
57 | " \n",
58 | " 0 | \n",
59 | " 0 | \n",
60 | " 304 | \n",
61 | "
\n",
62 | " \n",
63 | " 1 | \n",
64 | " 1 | \n",
65 | " 232 | \n",
66 | "
\n",
67 | " \n",
68 | " 2 | \n",
69 | " 2 | \n",
70 | " 83 | \n",
71 | "
\n",
72 | " \n",
73 | " 3 | \n",
74 | " 3 | \n",
75 | " 471 | \n",
76 | "
\n",
77 | " \n",
78 | " 4 | \n",
79 | " 4 | \n",
80 | " 41 | \n",
81 | "
\n",
82 | " \n",
83 | "
\n",
84 | "
"
85 | ],
86 | "text/plain": [
87 | " data1 data2\n",
88 | "0 0 304\n",
89 | "1 1 232\n",
90 | "2 2 83\n",
91 | "3 3 471\n",
92 | "4 4 41"
93 | ]
94 | },
95 | "execution_count": 2,
96 | "metadata": {},
97 | "output_type": "execute_result"
98 | }
99 | ],
100 | "source": [
101 | "df1a = pd.DataFrame({'data1':np.arange(0,400),\n",
102 | " 'data2':np.random.randint(0,500,400)})\n",
103 | "df1b = pd.DataFrame({'data1':np.arange(400,1000),\n",
104 | " 'data2':np.random.randint(0,1000,600)})\n",
105 | "df1 = pd.concat([df1a,df1b]) \n",
106 | "df1.head(5)"
107 | ]
108 | },
109 | {
110 | "cell_type": "code",
111 | "execution_count": null,
112 | "metadata": {
113 | "collapsed": false
114 | },
115 | "outputs": [],
116 | "source": []
117 | },
118 | {
119 | "cell_type": "markdown",
120 | "metadata": {},
121 | "source": [
122 | "#### Exercise 2: Using the Numpy Array below:\n",
123 | "\n",
124 | "1) Plot a heatmap.
\n",
125 | "2) Increase the size of the heatmap.
\n",
126 | "3) Add values to the heatmap via annotation.
\n",
127 | "4) Change the colours to a nicer palette.
\n",
128 | "5) Set a title for the heatmap."
129 | ]
130 | },
131 | {
132 | "cell_type": "code",
133 | "execution_count": null,
134 | "metadata": {
135 | "collapsed": false
136 | },
137 | "outputs": [],
138 | "source": [
139 | "data = np.random.rand(10,12)\n",
140 | "data"
141 | ]
142 | },
143 | {
144 | "cell_type": "code",
145 | "execution_count": null,
146 | "metadata": {
147 | "collapsed": false
148 | },
149 | "outputs": [],
150 | "source": []
151 | },
152 | {
153 | "cell_type": "code",
154 | "execution_count": null,
155 | "metadata": {
156 | "collapsed": true
157 | },
158 | "outputs": [],
159 | "source": []
160 | }
161 | ],
162 | "metadata": {
163 | "kernelspec": {
164 | "display_name": "Python [default]",
165 | "language": "python",
166 | "name": "python3"
167 | },
168 | "language_info": {
169 | "codemirror_mode": {
170 | "name": "ipython",
171 | "version": 3
172 | },
173 | "file_extension": ".py",
174 | "mimetype": "text/x-python",
175 | "name": "python",
176 | "nbconvert_exporter": "python",
177 | "pygments_lexer": "ipython3",
178 | "version": "3.5.2"
179 | }
180 | },
181 | "nbformat": 4,
182 | "nbformat_minor": 0
183 | }
184 |
--------------------------------------------------------------------------------
/A00. Course Overview 10-09-2016.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Python for Analysts Training"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "Hi! And welcome to the Python for Analysts training course. This covers everything you need to know to start using Python for data analysis and visualisation as well as showcasing some more advanced and snazzy stuff, including Statistics, Machine Learning, Web Scraping / Interaction etc.\n",
15 | "\n",
16 | "The course assumes no prior knowledge of Python and will teach you everything you need to know in order to use Python for data analysis and visualisation, including interfacing with Python via the Jupyter interface, using Text Editors / Integrated Development Environments (IDEs), upgrading Python, working with the command line etc.\n",
17 | "\n",
18 | "Lastly, note that the course can only hope to give you an introduction to Python for Data Analysis over the 2 days. You'll no doubt want to continue your learning afterward, and the course provides links to relevant material with which to further your development."
19 | ]
20 | },
21 | {
22 | "cell_type": "markdown",
23 | "metadata": {},
24 | "source": [
25 | "## About me"
26 | ]
27 | },
28 | {
29 | "cell_type": "markdown",
30 | "metadata": {},
31 | "source": [
32 | "I'm Tom Ewing, a data scientist working for the Department for Transport. I've been using Python just under 2 years having made the jump from SAS. \n",
33 | "\n",
34 | "One of the first things I noticed about Python was that much of the training out there is geared towards programmers rather than analysts and this made learning more difficult. Additionally much of this assumes knowledge of things like packages, the command line etc. which analysts are less likely to have experience with.\n",
35 | "\n",
36 | "Having delivered training courses in other technology before, I was really keen to use this experience to develop my own Python for Analysts training course."
37 | ]
38 | },
39 | {
40 | "cell_type": "markdown",
41 | "metadata": {},
42 | "source": [
43 | "## Structure of the Course"
44 | ]
45 | },
46 | {
47 | "cell_type": "markdown",
48 | "metadata": {},
49 | "source": [
50 | "This course has been designed to be both delivered as a classroom training course, but can also be taken offline as well, in your own time.\n",
51 | "\n",
52 | "In a classroom environment course is designed to cover the best part of 2 days with time for exercises and consolidation in between. There is also more material for you to explore afterwatrd also.\n",
53 | "\n",
54 | "You will be expected to have a project to practice with! This will allow you to consolidate your Python training and continue to learn and develop.\n",
55 | "\n",
56 | "The structure of the course is as follows:\n",
57 | "\n",
58 | "## Day 1-2:\n",
59 | "\n",
60 | "### Basics \n",
61 | "\n",
62 | "* Interfacing with Python\n",
63 | "* Basic Python Sytnax\n",
64 | "* Data Structures\n",
65 | "* Coding concepts\n",
66 | "* Looping\n",
67 | "* Enhancing Python with Packages\n",
68 | "\n",
69 | "### Working with data\n",
70 | "\n",
71 | "* Data Analysis Libraries\n",
72 | "* Advanced Data Structures\n",
73 | "* Importing / Exporting Data\n",
74 | "* Working with DataFrames\n",
75 | "* Summary Statistics\n",
76 | "* Tables\n",
77 | "\n",
78 | "### Visualisation\n",
79 | "\n",
80 | "* Static Visualisation\n",
81 | "* Statistical Visualisation\n",
82 | "* Interactive Visualisation\n"
83 | ]
84 | },
85 | {
86 | "cell_type": "markdown",
87 | "metadata": {},
88 | "source": [
89 | "Those taking the course should note that the best way to consolidate their learning is via your project. Not only will this help you embed what you've leared, but it will also get you used to solving problems and continuing your learning journey in Python!"
90 | ]
91 | },
92 | {
93 | "cell_type": "markdown",
94 | "metadata": {},
95 | "source": [
96 | "## Following along"
97 | ]
98 | },
99 | {
100 | "cell_type": "markdown",
101 | "metadata": {},
102 | "source": [
103 | "During the lectures, you might wish to just listen, follow along on your screen, or execute the code in your own blank notebook, make notes etc. All of this is fine so long as you pay attention!\n",
104 | "\n",
105 | "In most of the lectures the code is 'pre-baked' - we will explain what it does, execute it and show you and talk you through the output. This means we can give the class our full attention and not focus on finding typos or wondering why code didn't run properly!"
106 | ]
107 | },
108 | {
109 | "cell_type": "markdown",
110 | "metadata": {},
111 | "source": [
112 | "## Domestic Arrangements"
113 | ]
114 | },
115 | {
116 | "cell_type": "markdown",
117 | "metadata": {},
118 | "source": [
119 | "* Laptops\n",
120 | "* Toilets\n",
121 | "* Fire Alarm\n",
122 | "* Breaks & Lunch\n",
123 | "* Phones"
124 | ]
125 | },
126 | {
127 | "cell_type": "code",
128 | "execution_count": null,
129 | "metadata": {
130 | "collapsed": true
131 | },
132 | "outputs": [],
133 | "source": []
134 | }
135 | ],
136 | "metadata": {
137 | "anaconda-cloud": {},
138 | "kernelspec": {
139 | "display_name": "Python [default]",
140 | "language": "python",
141 | "name": "python3"
142 | },
143 | "language_info": {
144 | "codemirror_mode": {
145 | "name": "ipython",
146 | "version": 3
147 | },
148 | "file_extension": ".py",
149 | "mimetype": "text/x-python",
150 | "name": "python",
151 | "nbconvert_exporter": "python",
152 | "pygments_lexer": "ipython3",
153 | "version": "3.5.2"
154 | }
155 | },
156 | "nbformat": 4,
157 | "nbformat_minor": 0
158 | }
159 |
--------------------------------------------------------------------------------
/B03E. Functions Exercises.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# B03E. Functions Exercises"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "## Introduction to Exercises"
15 | ]
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "Welcome to the first set of exercises! Whilst doing these we reccomend that you:\n",
22 | "\n",
23 | "1) Refer to the lesson notebooks and your notes in order to answer them but don't copy and paste over from them! You'll learn better if you start committing the code to muscle memory through typing!
\n",
24 | "2) Go beyond the scope of the exercises wherever you can - feel free to try stuff and experiment. It's unlikely that you'll break anything.
\n",
25 | "3) Google anything that you want to know more about.
\n",
26 | "4) Copy code over to your Cookbook for future reference.
\n",
27 | "5) Have fun =)
"
28 | ]
29 | },
30 | {
31 | "cell_type": "markdown",
32 | "metadata": {},
33 | "source": [
34 | "#### Exercise 1: Create a function that prints the type and value of an object / variable and use it to evaluate variables a through e below."
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": 1,
40 | "metadata": {
41 | "collapsed": true
42 | },
43 | "outputs": [],
44 | "source": [
45 | "a = 'beans'\n",
46 | "b = 12\n",
47 | "c = 34.65\n",
48 | "d = True\n",
49 | "e = None"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": 15,
55 | "metadata": {
56 | "collapsed": false
57 | },
58 | "outputs": [
59 | {
60 | "name": "stdout",
61 | "output_type": "stream",
62 | "text": [
63 | "beans\n",
64 | "\n",
65 | "12\n",
66 | "\n",
67 | "34.65\n",
68 | "\n"
69 | ]
70 | }
71 | ],
72 | "source": [
73 | "def printer1(anything):\n",
74 | " print(anything)\n",
75 | " print(type(anything))\n",
76 | " \n",
77 | "printer1(a)\n",
78 | "printer1(b)\n",
79 | "printer1(c)"
80 | ]
81 | },
82 | {
83 | "cell_type": "markdown",
84 | "metadata": {},
85 | "source": [
86 | "#### Exercise 2: Create a function that determines how many letters are in a character string and outputs this in a meaningful statement to the user. Then use it to evaluate variables f through j below:"
87 | ]
88 | },
89 | {
90 | "cell_type": "code",
91 | "execution_count": 3,
92 | "metadata": {
93 | "collapsed": true
94 | },
95 | "outputs": [],
96 | "source": [
97 | "f = 'The Shawshank Redemption'\n",
98 | "g = 'The Godfather'\n",
99 | "h = 'The Godfather: Part II'\n",
100 | "i = 'The Dark Knight'\n",
101 | "j = \"Schindler's List\""
102 | ]
103 | },
104 | {
105 | "cell_type": "code",
106 | "execution_count": 23,
107 | "metadata": {
108 | "collapsed": false
109 | },
110 | "outputs": [
111 | {
112 | "name": "stdout",
113 | "output_type": "stream",
114 | "text": [
115 | "The Shawshank Redemption contains 24 characters\n"
116 | ]
117 | }
118 | ],
119 | "source": [
120 | "def char(var):\n",
121 | " '''Determines how many characters are in a string'''\n",
122 | " print(var + \" contains \" + str(len(var)) + \" characters\")\n",
123 | " \n",
124 | "char(f)"
125 | ]
126 | },
127 | {
128 | "cell_type": "markdown",
129 | "metadata": {},
130 | "source": [
131 | "#### Exercise 3: Create a function using if/elif/else that determines what mode of transport to take based upon the number of people who want to travel:\n",
132 | " \n",
133 | "1-2: Walk
\n",
134 | "3-5: Car
\n",
135 | "6-50: Coach
\n",
136 | "51-100: Train
\n",
137 | "101+: Plane
\n",
138 | "\n",
139 | "#### Then use this function to evaluate variables k through o:"
140 | ]
141 | },
142 | {
143 | "cell_type": "code",
144 | "execution_count": null,
145 | "metadata": {
146 | "collapsed": true
147 | },
148 | "outputs": [],
149 | "source": [
150 | "k = 56\n",
151 | "l = 1\n",
152 | "m = 4\n",
153 | "n = 180\n",
154 | "o = 12"
155 | ]
156 | },
157 | {
158 | "cell_type": "code",
159 | "execution_count": null,
160 | "metadata": {
161 | "collapsed": false
162 | },
163 | "outputs": [],
164 | "source": [
165 | "\n"
166 | ]
167 | }
168 | ],
169 | "metadata": {
170 | "anaconda-cloud": {},
171 | "kernelspec": {
172 | "display_name": "Python [default]",
173 | "language": "python",
174 | "name": "python3"
175 | },
176 | "language_info": {
177 | "codemirror_mode": {
178 | "name": "ipython",
179 | "version": 3
180 | },
181 | "file_extension": ".py",
182 | "mimetype": "text/x-python",
183 | "name": "python",
184 | "nbconvert_exporter": "python",
185 | "pygments_lexer": "ipython3",
186 | "version": "3.5.2"
187 | }
188 | },
189 | "nbformat": 4,
190 | "nbformat_minor": 0
191 | }
192 |
--------------------------------------------------------------------------------
/B03S. Functions Solutions.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# B03S. Functions Solutions"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "## Introduction to Exercises"
15 | ]
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "Welcome to the first set of exercises! Whilst doing these we reccomend that you:\n",
22 | "\n",
23 | "1) Refer to the lesson notebooks and your notes in order to answer them but don't copy and paste over from them! You'll learn better if you start committing the code to muscle memory through typing!
\n",
24 | "2) Go beyond the scope of the exercises wherever you can - feel free to try stuff and experiment. It's unlikely that you'll break anything.
\n",
25 | "3) Google anything that you want to know more about.
\n",
26 | "4) Copy code over to your Cookbook for future reference.
\n",
27 | "5) Have fun =)
"
28 | ]
29 | },
30 | {
31 | "cell_type": "markdown",
32 | "metadata": {},
33 | "source": [
34 | "#### Exercise 1: Create a function that prints the type and value of an object / variable and use it to evaluate variables a through e below."
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": null,
40 | "metadata": {
41 | "collapsed": true
42 | },
43 | "outputs": [],
44 | "source": [
45 | "a = 'beans'\n",
46 | "b = 12\n",
47 | "c = 34.65\n",
48 | "d = True\n",
49 | "e = None\n",
50 | "f= 3"
51 | ]
52 | },
53 | {
54 | "cell_type": "code",
55 | "execution_count": null,
56 | "metadata": {
57 | "collapsed": false
58 | },
59 | "outputs": [],
60 | "source": [
61 | "def obj(var):\n",
62 | " '''Prints the type and value of an object'''\n",
63 | " print(\"The type of the variable is:\",type(var))\n",
64 | " print(\"The value of the object is:\",var)\n",
65 | " \n",
66 | "obj(a)\n",
67 | "obj(b)\n",
68 | "obj(c)\n",
69 | "obj(d)\n",
70 | "obj(e)\n",
71 | "obj(f)"
72 | ]
73 | },
74 | {
75 | "cell_type": "markdown",
76 | "metadata": {},
77 | "source": [
78 | "#### Exercise 2: Create a function that determines how many letters are in a character string and outputs this in a meaningful statement to the user. Then use it to evaluate variables f through j below:"
79 | ]
80 | },
81 | {
82 | "cell_type": "code",
83 | "execution_count": null,
84 | "metadata": {
85 | "collapsed": true
86 | },
87 | "outputs": [],
88 | "source": [
89 | "f = 'The Shawshank Redemption'\n",
90 | "g = 'The Godfather'\n",
91 | "h = 'The Godfather: Part II'\n",
92 | "i = 'The Dark Knight'\n",
93 | "j = \"Schindler's List\""
94 | ]
95 | },
96 | {
97 | "cell_type": "code",
98 | "execution_count": null,
99 | "metadata": {
100 | "collapsed": false
101 | },
102 | "outputs": [],
103 | "source": [
104 | "def char(var):\n",
105 | " '''Determines how many characters are in a string'''\n",
106 | " print(\"%s contains\" % var,len(var),\"characters\")\n",
107 | "\n",
108 | "char(f)\n",
109 | "char(g)\n",
110 | "char(h)\n",
111 | "char(i)\n",
112 | "char(j)"
113 | ]
114 | },
115 | {
116 | "cell_type": "markdown",
117 | "metadata": {},
118 | "source": [
119 | "#### Exercise 3: Create a function using if/elif/else that determines what mode of transport to take based upon the number of people who want to travel:\n",
120 | " \n",
121 | "1-2: Walk
\n",
122 | "3-5: Car
\n",
123 | "6-50: Coach
\n",
124 | "51-100: Train
\n",
125 | "101+: Plane
\n",
126 | "\n",
127 | "#### Then use this function to evaluate variables k through o:"
128 | ]
129 | },
130 | {
131 | "cell_type": "code",
132 | "execution_count": 2,
133 | "metadata": {
134 | "collapsed": true
135 | },
136 | "outputs": [],
137 | "source": [
138 | "k = 56\n",
139 | "l = 1\n",
140 | "m = 4\n",
141 | "n = 180\n",
142 | "o = 12"
143 | ]
144 | },
145 | {
146 | "cell_type": "code",
147 | "execution_count": 3,
148 | "metadata": {
149 | "collapsed": false
150 | },
151 | "outputs": [
152 | {
153 | "name": "stdout",
154 | "output_type": "stream",
155 | "text": [
156 | "k = Train l = Walk m = Car n = Plane o = Coach\n"
157 | ]
158 | }
159 | ],
160 | "source": [
161 | "def people(var):\n",
162 | " '''Determines the best mode of transport based upon the volume of people travelling'''\n",
163 | " if var > 0 and var <= 2:\n",
164 | " return 'Walk'\n",
165 | " elif var > 2 and var <= 5:\n",
166 | " return 'Car'\n",
167 | " elif var > 5 and var <= 50:\n",
168 | " return 'Coach'\n",
169 | " elif var > 50 and var <= 100:\n",
170 | " return 'Train'\n",
171 | " elif var > 100:\n",
172 | " return 'Plane' \n",
173 | " else:\n",
174 | " return 'Error'\n",
175 | "\n",
176 | "print(\"k =\",people(k),\"l =\",people(l),\"m =\",people(m),\"n =\",people(n),\"o =\",people(o))\n"
177 | ]
178 | },
179 | {
180 | "cell_type": "code",
181 | "execution_count": null,
182 | "metadata": {
183 | "collapsed": true
184 | },
185 | "outputs": [],
186 | "source": []
187 | }
188 | ],
189 | "metadata": {
190 | "anaconda-cloud": {},
191 | "kernelspec": {
192 | "display_name": "Python [default]",
193 | "language": "python",
194 | "name": "python3"
195 | },
196 | "language_info": {
197 | "codemirror_mode": {
198 | "name": "ipython",
199 | "version": 3
200 | },
201 | "file_extension": ".py",
202 | "mimetype": "text/x-python",
203 | "name": "python",
204 | "nbconvert_exporter": "python",
205 | "pygments_lexer": "ipython3",
206 | "version": "3.5.2"
207 | }
208 | },
209 | "nbformat": 4,
210 | "nbformat_minor": 0
211 | }
212 |
--------------------------------------------------------------------------------
/B04E. Basic Data Structures Part 1 Exercises.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# B04E: Basic Data Structures Part 1 Exercises"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "#### Exercise 1: Create a function that returns the first and last items of a list and prints this to the user. Use the function to evaluate lists a, b and c as follows:"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 3,
20 | "metadata": {
21 | "collapsed": true
22 | },
23 | "outputs": [],
24 | "source": [
25 | "a = [1,2,3,4,5,6,7,8,9]\n",
26 | "b = ['alpha', 'beta', 'charlie', 'delta', 'echo']\n",
27 | "c = [1.1,1.2,1.3,1.4,1.5,1.6,1.7]"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 18,
33 | "metadata": {
34 | "collapsed": false
35 | },
36 | "outputs": [
37 | {
38 | "name": "stdout",
39 | "output_type": "stream",
40 | "text": [
41 | "1.1 1.7\n"
42 | ]
43 | }
44 | ],
45 | "source": [
46 | "def lister1(var):\n",
47 | " print(var[0],var[-1])\n",
48 | " \n",
49 | "lister1(c)"
50 | ]
51 | },
52 | {
53 | "cell_type": "markdown",
54 | "metadata": {},
55 | "source": [
56 | "#### Exercise 2: Modify your function so that it works on variable d below:"
57 | ]
58 | },
59 | {
60 | "cell_type": "code",
61 | "execution_count": 21,
62 | "metadata": {
63 | "collapsed": false,
64 | "scrolled": true
65 | },
66 | "outputs": [
67 | {
68 | "data": {
69 | "text/plain": [
70 | "[[1, 2, 3, 4, 5, 6, 7, 8, 9],\n",
71 | " ['alpha', 'beta', 'charlie', 'delta', 'echo'],\n",
72 | " [1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7]]"
73 | ]
74 | },
75 | "execution_count": 21,
76 | "metadata": {},
77 | "output_type": "execute_result"
78 | }
79 | ],
80 | "source": [
81 | "d = [a,b,c]\n",
82 | "d"
83 | ]
84 | },
85 | {
86 | "cell_type": "code",
87 | "execution_count": 23,
88 | "metadata": {
89 | "collapsed": false
90 | },
91 | "outputs": [
92 | {
93 | "name": "stdout",
94 | "output_type": "stream",
95 | "text": [
96 | "1 9\n",
97 | "alpha echo\n",
98 | "1.1 1.7\n"
99 | ]
100 | }
101 | ],
102 | "source": [
103 | "def lister2(var):\n",
104 | " print(var[0][0],var[0][-1])\n",
105 | " print(var[1][0],var[1][-1])\n",
106 | " print(var[2][0],var[2][-1])\n",
107 | "lister2(d)"
108 | ]
109 | },
110 | {
111 | "cell_type": "markdown",
112 | "metadata": {},
113 | "source": [
114 | "#### Exercise 3: Create 3 new variables using slicing as follows:\n",
115 | "\n",
116 | "f = Every 5th character of e, starting at the beginning of the string
\n",
117 | "g = Every 2nd character of e starting at the end of the string
\n",
118 | "h = Every 6th character of e starting at the middle of the string
\n",
119 | "\n",
120 | "Hint! You'll need to use a function for h!"
121 | ]
122 | },
123 | {
124 | "cell_type": "code",
125 | "execution_count": 24,
126 | "metadata": {
127 | "collapsed": true
128 | },
129 | "outputs": [],
130 | "source": [
131 | "e = \"Lorem Ipsum is simply dummy text of the printing and typesetting industry. It has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like PageMaker including versions of Lorem Ipsum\""
132 | ]
133 | },
134 | {
135 | "cell_type": "code",
136 | "execution_count": null,
137 | "metadata": {
138 | "collapsed": false
139 | },
140 | "outputs": [],
141 | "source": [
142 | "f = e[0::5]\n",
143 | "f"
144 | ]
145 | },
146 | {
147 | "cell_type": "code",
148 | "execution_count": null,
149 | "metadata": {
150 | "collapsed": true
151 | },
152 | "outputs": [],
153 | "source": [
154 | "g = e[-1::-2]\n",
155 | "g"
156 | ]
157 | },
158 | {
159 | "cell_type": "code",
160 | "execution_count": null,
161 | "metadata": {
162 | "collapsed": true
163 | },
164 | "outputs": [],
165 | "source": [
166 | "h = e[(int(len(e)/2))::6]\n",
167 | "h"
168 | ]
169 | },
170 | {
171 | "cell_type": "markdown",
172 | "metadata": {},
173 | "source": [
174 | "#### Exercise 4: Find a way to create a list containing all the words in e as separate items and assign this list to variable i.\n",
175 | "\n",
176 | "Hint! It's OK to use Google!"
177 | ]
178 | },
179 | {
180 | "cell_type": "code",
181 | "execution_count": 25,
182 | "metadata": {
183 | "collapsed": true
184 | },
185 | "outputs": [],
186 | "source": [
187 | "i = e.split()"
188 | ]
189 | }
190 | ],
191 | "metadata": {
192 | "anaconda-cloud": {},
193 | "kernelspec": {
194 | "display_name": "Python [default]",
195 | "language": "python",
196 | "name": "python3"
197 | },
198 | "language_info": {
199 | "codemirror_mode": {
200 | "name": "ipython",
201 | "version": 3
202 | },
203 | "file_extension": ".py",
204 | "mimetype": "text/x-python",
205 | "name": "python",
206 | "nbconvert_exporter": "python",
207 | "pygments_lexer": "ipython3",
208 | "version": "3.5.2"
209 | }
210 | },
211 | "nbformat": 4,
212 | "nbformat_minor": 0
213 | }
214 |
--------------------------------------------------------------------------------
/B04S. Basic Data Structures Part 1 Solutions.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# B04S: Basic Data Structures Part 1 Solutions\n"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "#### Exercise 1: Create a function that returns the first and last items of a list and prints this to the user. Use the function to evaluate lists a, b and c as follows:"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": null,
20 | "metadata": {
21 | "collapsed": true
22 | },
23 | "outputs": [],
24 | "source": [
25 | "a = [1,2,3,4,5,6,7,8,9]\n",
26 | "b = ['alpha', 'beta', 'charlie', 'delta', 'echo']\n",
27 | "c = [1.1,1.2,1.3,1.4,1.5,1.6,1.7]"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": null,
33 | "metadata": {
34 | "collapsed": false
35 | },
36 | "outputs": [],
37 | "source": [
38 | "def fl(var):\n",
39 | " '''Prints the first and last items in a list'''\n",
40 | " print(\"The first item is:\",var[0])\n",
41 | " print(\"The last item is:\",var[-1])\n",
42 | " \n",
43 | "fl(a)\n",
44 | "fl(b)\n",
45 | "fl(c)"
46 | ]
47 | },
48 | {
49 | "cell_type": "markdown",
50 | "metadata": {},
51 | "source": [
52 | "#### Exercise 2: Modify your function so that it works on variable d below:"
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": null,
58 | "metadata": {
59 | "collapsed": false
60 | },
61 | "outputs": [],
62 | "source": [
63 | "d = [a,b,c]"
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": null,
69 | "metadata": {
70 | "collapsed": false
71 | },
72 | "outputs": [],
73 | "source": [
74 | "def fl2(var):\n",
75 | " '''Prints the first last and middle items in a list'''\n",
76 | " print(\"The first item is:\",var[0][0])\n",
77 | " print(\"The last item is:\",var[0][-1])\n",
78 | " print(\"The first item is:\",var[1][0])\n",
79 | " print(\"The last item is:\",var[1][-1])\n",
80 | " print(\"The first item is:\",var[2][0])\n",
81 | " print(\"The last item is:\",var[2][-1])\n",
82 | " \n",
83 | "fl2(d)\n",
84 | "print(d)"
85 | ]
86 | },
87 | {
88 | "cell_type": "markdown",
89 | "metadata": {},
90 | "source": [
91 | "#### Exercise 3: Create 3 new variables using slicing as follows:\n",
92 | "\n",
93 | "f = Every 5th character of e, starting at the beginning of the string
\n",
94 | "g = Every 2nd character of e starting at the end of the string
\n",
95 | "h = Every 6th character of e starting at the middle of the string
\n",
96 | "\n",
97 | "Hint! You'll need to use a function for h!"
98 | ]
99 | },
100 | {
101 | "cell_type": "code",
102 | "execution_count": 2,
103 | "metadata": {
104 | "collapsed": true
105 | },
106 | "outputs": [],
107 | "source": [
108 | "e = \"Lorem Ipsum is simply dummy text of the printing and typesetting industry. It has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like PageMaker including versions of Lorem Ipsum\""
109 | ]
110 | },
111 | {
112 | "cell_type": "code",
113 | "execution_count": 3,
114 | "metadata": {
115 | "collapsed": false
116 | },
117 | "outputs": [
118 | {
119 | "data": {
120 | "text/plain": [
121 | "'L msymx pinptitIsn ssn ytrce0hnnpeogytacltmaec .huetyetsto tceirnetyhd padt9wteeLsh agesaenrcyhkps wlPaidvofes'"
122 | ]
123 | },
124 | "execution_count": 3,
125 | "metadata": {},
126 | "output_type": "execute_result"
127 | }
128 | ],
129 | "source": [
130 | "f = e[0::5]\n",
131 | "f"
132 | ]
133 | },
134 | {
135 | "cell_type": "code",
136 | "execution_count": 4,
137 | "metadata": {
138 | "collapsed": false
139 | },
140 | "outputs": [
141 | {
142 | "data": {
143 | "text/plain": [
144 | "\"msImrLf nirvgiucirkMgPei rwfsgislu oke twytee rmda,easpmsImrLgiito testsre oealrethi 09 h idsrlppswt dgacuylinsegiimr,nteey ioteeon aletol u siuncei lotndvvu a I.obnmcp ptaea tt ebac n ptf elgako enr wnn anh s01etensrv xtymddant 'rsdietne a I.rsdigitsptdaginr h ote mu lmss up eo\""
145 | ]
146 | },
147 | "execution_count": 4,
148 | "metadata": {},
149 | "output_type": "execute_result"
150 | }
151 | ],
152 | "source": [
153 | "g = e[-1::-2]\n",
154 | "g"
155 | ]
156 | },
157 | {
158 | "cell_type": "code",
159 | "execution_count": 5,
160 | "metadata": {
161 | "collapsed": false
162 | },
163 | "outputs": [
164 | {
165 | "data": {
166 | "text/plain": [
167 | "'uolneienmgnya sldh0hreetttgmmaarewe ssreMiir rs'"
168 | ]
169 | },
170 | "execution_count": 5,
171 | "metadata": {},
172 | "output_type": "execute_result"
173 | }
174 | ],
175 | "source": [
176 | "h = e[(int(len(e)/2))::6]\n",
177 | "h"
178 | ]
179 | },
180 | {
181 | "cell_type": "markdown",
182 | "metadata": {},
183 | "source": [
184 | "#### Exercise 4: Find a way to create a list containing all the words in e as separate items and assign this list to variable i.\n",
185 | "\n",
186 | "Hint! It's OK to use Google!"
187 | ]
188 | },
189 | {
190 | "cell_type": "code",
191 | "execution_count": null,
192 | "metadata": {
193 | "collapsed": true
194 | },
195 | "outputs": [],
196 | "source": [
197 | "i = e.split()"
198 | ]
199 | },
200 | {
201 | "cell_type": "code",
202 | "execution_count": null,
203 | "metadata": {
204 | "collapsed": false
205 | },
206 | "outputs": [],
207 | "source": [
208 | "f= e.split()"
209 | ]
210 | },
211 | {
212 | "cell_type": "code",
213 | "execution_count": null,
214 | "metadata": {
215 | "collapsed": false
216 | },
217 | "outputs": [],
218 | "source": [
219 | "f[0:6:2]"
220 | ]
221 | }
222 | ],
223 | "metadata": {
224 | "kernelspec": {
225 | "display_name": "Python [default]",
226 | "language": "python",
227 | "name": "python3"
228 | },
229 | "language_info": {
230 | "codemirror_mode": {
231 | "name": "ipython",
232 | "version": 3
233 | },
234 | "file_extension": ".py",
235 | "mimetype": "text/x-python",
236 | "name": "python",
237 | "nbconvert_exporter": "python",
238 | "pygments_lexer": "ipython3",
239 | "version": "3.5.2"
240 | }
241 | },
242 | "nbformat": 4,
243 | "nbformat_minor": 0
244 | }
245 |
--------------------------------------------------------------------------------
/B06E. Basic Data Structures Part 2 Exercises.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# B06E: Basic Data Structures Part 2 Exercises"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "#### Exercise 1: For the following list:\n",
15 | "\n",
16 | "1) Return the count of the number of items in it.
\n",
17 | "2) Remove the 'Tom' and 'Behnom' values from the list
\n",
18 | "3) Sort the list into alphabetical order
\n",
19 | "4) Append a new value to the list of 'Brian'.
\n",
20 | "5) Sort the list in reverse order and convert this to a Tuple.
\n",
21 | "6) Why can't we sort the tuple?
"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": 64,
27 | "metadata": {
28 | "collapsed": true
29 | },
30 | "outputs": [],
31 | "source": [
32 | "beatles = ['John','Paul','George','Ringo','Tom','Behnom']"
33 | ]
34 | },
35 | {
36 | "cell_type": "code",
37 | "execution_count": 65,
38 | "metadata": {
39 | "collapsed": false
40 | },
41 | "outputs": [
42 | {
43 | "data": {
44 | "text/plain": [
45 | "6"
46 | ]
47 | },
48 | "execution_count": 65,
49 | "metadata": {},
50 | "output_type": "execute_result"
51 | }
52 | ],
53 | "source": [
54 | "len(beatles)"
55 | ]
56 | },
57 | {
58 | "cell_type": "code",
59 | "execution_count": 66,
60 | "metadata": {
61 | "collapsed": false
62 | },
63 | "outputs": [
64 | {
65 | "data": {
66 | "text/plain": [
67 | "['John', 'Paul', 'George', 'Ringo']"
68 | ]
69 | },
70 | "execution_count": 66,
71 | "metadata": {},
72 | "output_type": "execute_result"
73 | }
74 | ],
75 | "source": [
76 | "beatles.remove('Tom')\n",
77 | "beatles.remove('Behnom')\n",
78 | "beatles"
79 | ]
80 | },
81 | {
82 | "cell_type": "code",
83 | "execution_count": 67,
84 | "metadata": {
85 | "collapsed": false
86 | },
87 | "outputs": [
88 | {
89 | "data": {
90 | "text/plain": [
91 | "['George', 'John', 'Paul', 'Ringo']"
92 | ]
93 | },
94 | "execution_count": 67,
95 | "metadata": {},
96 | "output_type": "execute_result"
97 | }
98 | ],
99 | "source": [
100 | "beatles.sort()\n",
101 | "beatles"
102 | ]
103 | },
104 | {
105 | "cell_type": "code",
106 | "execution_count": 68,
107 | "metadata": {
108 | "collapsed": false
109 | },
110 | "outputs": [
111 | {
112 | "data": {
113 | "text/plain": [
114 | "['George', 'John', 'Paul', 'Ringo', 'Brian']"
115 | ]
116 | },
117 | "execution_count": 68,
118 | "metadata": {},
119 | "output_type": "execute_result"
120 | }
121 | ],
122 | "source": [
123 | "beatles.append('Brian')\n",
124 | "beatles"
125 | ]
126 | },
127 | {
128 | "cell_type": "code",
129 | "execution_count": 72,
130 | "metadata": {
131 | "collapsed": false
132 | },
133 | "outputs": [
134 | {
135 | "data": {
136 | "text/plain": [
137 | "('Ringo', 'Paul', 'John', 'George', 'Brian')"
138 | ]
139 | },
140 | "execution_count": 72,
141 | "metadata": {},
142 | "output_type": "execute_result"
143 | }
144 | ],
145 | "source": [
146 | "beatles.sort(reverse=True)\n",
147 | "beatles_tuple = tuple(beatles)\n",
148 | "beatles_tuple"
149 | ]
150 | },
151 | {
152 | "cell_type": "markdown",
153 | "metadata": {
154 | "collapsed": true
155 | },
156 | "source": [
157 | "#### Exercise 2: On the following dicitionary...\n",
158 | "\n",
159 | "1) Use key indexing to print the values for a, d and h in the dictionary 'dict'
\n",
160 | "2) Update the 'dict' dictionary with the key 'k' with the value 11
\n",
161 | "3) Create separate lists for the keys and values (Move on if you get stuck!)
"
162 | ]
163 | },
164 | {
165 | "cell_type": "code",
166 | "execution_count": 44,
167 | "metadata": {
168 | "collapsed": false
169 | },
170 | "outputs": [],
171 | "source": [
172 | "mydict1 = {'a':1,\n",
173 | " 'b':2,\n",
174 | " 'c':3,\n",
175 | " 'd':4,\n",
176 | " 'e':5,\n",
177 | " 'f':6,\n",
178 | " 'g':7,\n",
179 | " 'h':8,\n",
180 | " 'i':9,\n",
181 | " 'j':10}"
182 | ]
183 | },
184 | {
185 | "cell_type": "code",
186 | "execution_count": 45,
187 | "metadata": {
188 | "collapsed": false
189 | },
190 | "outputs": [
191 | {
192 | "name": "stdout",
193 | "output_type": "stream",
194 | "text": [
195 | "1 4 8\n"
196 | ]
197 | }
198 | ],
199 | "source": [
200 | "print(\n",
201 | " mydict1['a'],\n",
202 | " mydict1['d'],\n",
203 | " mydict1['h']\n",
204 | ")"
205 | ]
206 | },
207 | {
208 | "cell_type": "code",
209 | "execution_count": 47,
210 | "metadata": {
211 | "collapsed": false
212 | },
213 | "outputs": [
214 | {
215 | "data": {
216 | "text/plain": [
217 | "{'a': 1,\n",
218 | " 'b': 2,\n",
219 | " 'c': 3,\n",
220 | " 'd': 4,\n",
221 | " 'e': 5,\n",
222 | " 'f': 6,\n",
223 | " 'g': 7,\n",
224 | " 'h': 8,\n",
225 | " 'i': 9,\n",
226 | " 'j': 10,\n",
227 | " 'k': 11}"
228 | ]
229 | },
230 | "execution_count": 47,
231 | "metadata": {},
232 | "output_type": "execute_result"
233 | }
234 | ],
235 | "source": [
236 | "mydict1['k'] = 11\n",
237 | "mydict1"
238 | ]
239 | },
240 | {
241 | "cell_type": "code",
242 | "execution_count": null,
243 | "metadata": {
244 | "collapsed": false
245 | },
246 | "outputs": [],
247 | "source": [
248 | "keys = list(dict1.keys())\n",
249 | "vals = list(dict1.values())"
250 | ]
251 | },
252 | {
253 | "cell_type": "markdown",
254 | "metadata": {},
255 | "source": [
256 | "#### Exercise 3: Below are 4 data structures...\n",
257 | "\n",
258 | "1) Create a nested dictionary of lists containing these with the keys as follows: Cheese, Biscuits, Vegetables, Milk
\n",
259 | "2) From the dictionary, call the:
\n",
260 | "* third value in cheese
\n",
261 | "* first value in biscuits
\n",
262 | "* last value in vegetables
\n",
263 | "* second value in milk
"
264 | ]
265 | },
266 | {
267 | "cell_type": "code",
268 | "execution_count": 79,
269 | "metadata": {
270 | "collapsed": false
271 | },
272 | "outputs": [],
273 | "source": [
274 | "cheese = ['Stilton','Wensleydale','Cheddar','Brie','Edam','Feta','Halloumi']\n",
275 | "biscuits = ['Digestive','Rich Tea','Hob Nob','Custard Creme','Shortbread']\n",
276 | "vegetables = ('Peas','Carrots','Green Bean','Sweetcorn')\n",
277 | "milk = set()\n",
278 | "milk.add('Semi-Skimmed')\n",
279 | "milk.add('Skimmed')\n",
280 | "milk.add('Whole')\n",
281 | "milk.add('Soy')"
282 | ]
283 | },
284 | {
285 | "cell_type": "code",
286 | "execution_count": null,
287 | "metadata": {
288 | "collapsed": false
289 | },
290 | "outputs": [],
291 | "source": [
292 | "vegetables_list = list(vegetables)\n",
293 | "milk_list = list(milk)\n",
294 | "\n",
295 | "print(type(vegetables_list),type(milk_list)) # Making sure they've converted correctly\n",
296 | "\n",
297 | "\n",
298 | "dict1 = {'Cheese':cheese,\n",
299 | " 'Biscuits':biscuits,\n",
300 | " 'Vegetables':vegetables_list,\n",
301 | " 'Milk':milk_list}\n",
302 | "dict1"
303 | ]
304 | },
305 | {
306 | "cell_type": "code",
307 | "execution_count": null,
308 | "metadata": {
309 | "collapsed": false
310 | },
311 | "outputs": [],
312 | "source": [
313 | "print(\n",
314 | "dict1['Cheese'][2],\n",
315 | "dict1['Biscuits'][0],\n",
316 | "dict1['Vegetables'][-1],\n",
317 | "dict1['Milk'][1]\n",
318 | ")"
319 | ]
320 | },
321 | {
322 | "cell_type": "code",
323 | "execution_count": null,
324 | "metadata": {
325 | "collapsed": true
326 | },
327 | "outputs": [],
328 | "source": []
329 | }
330 | ],
331 | "metadata": {
332 | "kernelspec": {
333 | "display_name": "Python [default]",
334 | "language": "python",
335 | "name": "python3"
336 | },
337 | "language_info": {
338 | "codemirror_mode": {
339 | "name": "ipython",
340 | "version": 3
341 | },
342 | "file_extension": ".py",
343 | "mimetype": "text/x-python",
344 | "name": "python",
345 | "nbconvert_exporter": "python",
346 | "pygments_lexer": "ipython3",
347 | "version": "3.5.2"
348 | }
349 | },
350 | "nbformat": 4,
351 | "nbformat_minor": 0
352 | }
353 |
--------------------------------------------------------------------------------
/B08E. Loops & Iterating Exercises.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# B08E: Loops & Iterating Exercises"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {
13 | "collapsed": true
14 | },
15 | "source": [
16 | "#### Exercise 1: Create a for loop that takes each item in list1, multiplies it by the multiply variable, adds the add variable and then divides by the divide variable. Then append the output to the answers list. Print the completed answers list."
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 16,
22 | "metadata": {
23 | "collapsed": true
24 | },
25 | "outputs": [],
26 | "source": [
27 | "list1 = [2,4,6,1,6,7,8,4,3,10]\n",
28 | "multiply = 3\n",
29 | "add = 4\n",
30 | "divide = 2.89\n",
31 | "\n"
32 | ]
33 | },
34 | {
35 | "cell_type": "code",
36 | "execution_count": 23,
37 | "metadata": {
38 | "collapsed": false
39 | },
40 | "outputs": [
41 | {
42 | "data": {
43 | "text/plain": [
44 | "[3.460207612456747,\n",
45 | " 5.536332179930795,\n",
46 | " 7.612456747404844,\n",
47 | " 2.422145328719723,\n",
48 | " 7.612456747404844,\n",
49 | " 8.650519031141869,\n",
50 | " 9.688581314878892,\n",
51 | " 5.536332179930795,\n",
52 | " 4.498269896193771,\n",
53 | " 11.76470588235294]"
54 | ]
55 | },
56 | "execution_count": 23,
57 | "metadata": {},
58 | "output_type": "execute_result"
59 | }
60 | ],
61 | "source": [
62 | "answers = []\n",
63 | "\n",
64 | "for number in list1:\n",
65 | " answer = ((number * multiply) + add)/ divide\n",
66 | " answers.append(answer)\n",
67 | "\n",
68 | "answers"
69 | ]
70 | },
71 | {
72 | "cell_type": "markdown",
73 | "metadata": {},
74 | "source": [
75 | "#### Exercise 2: Create a while loop that prints the value of i as long as it is less than 100 and increases i by 10 for each iteration."
76 | ]
77 | },
78 | {
79 | "cell_type": "code",
80 | "execution_count": 24,
81 | "metadata": {
82 | "collapsed": true
83 | },
84 | "outputs": [],
85 | "source": []
86 | },
87 | {
88 | "cell_type": "code",
89 | "execution_count": 27,
90 | "metadata": {
91 | "collapsed": false
92 | },
93 | "outputs": [
94 | {
95 | "data": {
96 | "text/plain": [
97 | "100"
98 | ]
99 | },
100 | "execution_count": 27,
101 | "metadata": {},
102 | "output_type": "execute_result"
103 | }
104 | ],
105 | "source": [
106 | "i = 0\n",
107 | "\n",
108 | "while i < 100:\n",
109 | " print(i)\n",
110 | " i += 10"
111 | ]
112 | },
113 | {
114 | "cell_type": "markdown",
115 | "metadata": {},
116 | "source": [
117 | "#### Exercise 3: Use a list comprehension to create a new list containing the values in list2 squared."
118 | ]
119 | },
120 | {
121 | "cell_type": "code",
122 | "execution_count": 9,
123 | "metadata": {
124 | "collapsed": true
125 | },
126 | "outputs": [],
127 | "source": [
128 | "list2 = [132,5345,63576,234234,64563,234,745,98679,344535,467568,36,3456,457,67,3456,3456,567,47,48,26]"
129 | ]
130 | },
131 | {
132 | "cell_type": "code",
133 | "execution_count": 29,
134 | "metadata": {
135 | "collapsed": false
136 | },
137 | "outputs": [
138 | {
139 | "data": {
140 | "text/plain": [
141 | "[1605976966052654874624,\n",
142 | " 19031678624653117601418574705087890625,\n",
143 | " 1078777798742930814383587776072731454343401701376,\n",
144 | " 497163628364268070563569844081052925558505634137547776,\n",
145 | " 1258453063040672570127639252430058258473408875849,\n",
146 | " 492219227058666339787776,\n",
147 | " 52669928340462973740244140625,\n",
148 | " 87548235519000890227459496093724547513024849581201,\n",
149 | " 23568582761849485535596445998913148108046310041494140625,\n",
150 | " 499399159642511431459758945818903329471905680756387610624,\n",
151 | " 3656158440062976,\n",
152 | " 243073345330964281680845098425778176,\n",
153 | " 397339737654378065640319249,\n",
154 | " 1822837804551761449,\n",
155 | " 243073345330964281680845098425778176,\n",
156 | " 243073345330964281680845098425778176,\n",
157 | " 3434239577805805268237746449,\n",
158 | " 52599132235830049,\n",
159 | " 64925062108545024,\n",
160 | " 141167095653376]"
161 | ]
162 | },
163 | "execution_count": 29,
164 | "metadata": {},
165 | "output_type": "execute_result"
166 | }
167 | ],
168 | "source": [
169 | "list3 = [number for number in list2]\n",
170 | "list3"
171 | ]
172 | },
173 | {
174 | "cell_type": "markdown",
175 | "metadata": {
176 | "collapsed": false
177 | },
178 | "source": [
179 | "#### Exercise 4: Create an iterator that iterates through list3, converting types or passing where appropriate."
180 | ]
181 | },
182 | {
183 | "cell_type": "code",
184 | "execution_count": null,
185 | "metadata": {
186 | "collapsed": true
187 | },
188 | "outputs": [],
189 | "source": [
190 | "list3 = [1,2,3,4,'5',6,7,'8',9,True,10,'11',False,None]\n",
191 | "var = 0"
192 | ]
193 | },
194 | {
195 | "cell_type": "code",
196 | "execution_count": null,
197 | "metadata": {
198 | "collapsed": true
199 | },
200 | "outputs": [],
201 | "source": []
202 | }
203 | ],
204 | "metadata": {
205 | "kernelspec": {
206 | "display_name": "Python [default]",
207 | "language": "python",
208 | "name": "python3"
209 | },
210 | "language_info": {
211 | "codemirror_mode": {
212 | "name": "ipython",
213 | "version": 3
214 | },
215 | "file_extension": ".py",
216 | "mimetype": "text/x-python",
217 | "name": "python",
218 | "nbconvert_exporter": "python",
219 | "pygments_lexer": "ipython3",
220 | "version": "3.5.2"
221 | }
222 | },
223 | "nbformat": 4,
224 | "nbformat_minor": 0
225 | }
226 |
--------------------------------------------------------------------------------
/B08S. Loops & Iterating Solutions.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# B08S: Loops & Iterating Solutions"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {
13 | "collapsed": true
14 | },
15 | "source": [
16 | "#### Exercise 1: Create a for loop that takes each item in list1, multiplies it by the multiply variable, adds the add variable and then divides by the divide variable. Then append the output to the answers list. Print the completed answers list."
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": null,
22 | "metadata": {
23 | "collapsed": true
24 | },
25 | "outputs": [],
26 | "source": [
27 | "list1 = [2,4,6,1,6,7,8,4,3,10]\n",
28 | "multiply = 3\n",
29 | "add = 4\n",
30 | "divide = 2.89\n",
31 | "\n",
32 | "answers = []"
33 | ]
34 | },
35 | {
36 | "cell_type": "code",
37 | "execution_count": null,
38 | "metadata": {
39 | "collapsed": false
40 | },
41 | "outputs": [],
42 | "source": [
43 | "for item in list1:\n",
44 | " answers.append((item * multiply + add)/divide)\n",
45 | " \n",
46 | "print(answers)"
47 | ]
48 | },
49 | {
50 | "cell_type": "markdown",
51 | "metadata": {},
52 | "source": [
53 | "#### Exercise 2: Create a while loop that prints the value of i as long as it is less than 100 and increases i by 10 for each iteration."
54 | ]
55 | },
56 | {
57 | "cell_type": "code",
58 | "execution_count": null,
59 | "metadata": {
60 | "collapsed": true
61 | },
62 | "outputs": [],
63 | "source": [
64 | "i = 0"
65 | ]
66 | },
67 | {
68 | "cell_type": "code",
69 | "execution_count": null,
70 | "metadata": {
71 | "collapsed": false
72 | },
73 | "outputs": [],
74 | "source": [
75 | "while i < 100:\n",
76 | " print(i)\n",
77 | " i+=10"
78 | ]
79 | },
80 | {
81 | "cell_type": "markdown",
82 | "metadata": {},
83 | "source": [
84 | "#### Exercise 3: Use a list comprehension to create a new list containing the values in list2 squared."
85 | ]
86 | },
87 | {
88 | "cell_type": "code",
89 | "execution_count": null,
90 | "metadata": {
91 | "collapsed": true
92 | },
93 | "outputs": [],
94 | "source": [
95 | "list2 = [132,5345,63576,234234,64563,234,745,98679,344535,467568,36,3456,457,67,3456,3456,567,47,48,26]"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": null,
101 | "metadata": {
102 | "collapsed": false
103 | },
104 | "outputs": [],
105 | "source": [
106 | "new_list = [item**2 for item in list2]\n",
107 | "new_list"
108 | ]
109 | },
110 | {
111 | "cell_type": "markdown",
112 | "metadata": {},
113 | "source": [
114 | "#### Exercise 4: Create an iterator that iterates through list3, converting or passing where appropriate."
115 | ]
116 | },
117 | {
118 | "cell_type": "code",
119 | "execution_count": 11,
120 | "metadata": {
121 | "collapsed": false
122 | },
123 | "outputs": [],
124 | "source": [
125 | "list3 = [1,2,3,4,'5',6,7,'8',9,True,10,'11',False,None]\n",
126 | "var = 0"
127 | ]
128 | },
129 | {
130 | "cell_type": "code",
131 | "execution_count": 12,
132 | "metadata": {
133 | "collapsed": false
134 | },
135 | "outputs": [
136 | {
137 | "name": "stdout",
138 | "output_type": "stream",
139 | "text": [
140 | "66\n"
141 | ]
142 | }
143 | ],
144 | "source": [
145 | "for item in list3:\n",
146 | " if type(item) == bool:\n",
147 | " item = 0\n",
148 | " else: \n",
149 | " try: \n",
150 | " var = var + item\n",
151 | " except TypeError:\n",
152 | " try:\n",
153 | " var = var + int(item)\n",
154 | " except TypeError:\n",
155 | " pass\n",
156 | " \n",
157 | "print(var)"
158 | ]
159 | },
160 | {
161 | "cell_type": "code",
162 | "execution_count": 9,
163 | "metadata": {
164 | "collapsed": false
165 | },
166 | "outputs": [
167 | {
168 | "data": {
169 | "text/plain": [
170 | "bool"
171 | ]
172 | },
173 | "execution_count": 9,
174 | "metadata": {},
175 | "output_type": "execute_result"
176 | }
177 | ],
178 | "source": [
179 | "type(True)"
180 | ]
181 | },
182 | {
183 | "cell_type": "code",
184 | "execution_count": null,
185 | "metadata": {
186 | "collapsed": true
187 | },
188 | "outputs": [],
189 | "source": []
190 | }
191 | ],
192 | "metadata": {
193 | "anaconda-cloud": {},
194 | "kernelspec": {
195 | "display_name": "Python [default]",
196 | "language": "python",
197 | "name": "python3"
198 | },
199 | "language_info": {
200 | "codemirror_mode": {
201 | "name": "ipython",
202 | "version": 3
203 | },
204 | "file_extension": ".py",
205 | "mimetype": "text/x-python",
206 | "name": "python",
207 | "nbconvert_exporter": "python",
208 | "pygments_lexer": "ipython3",
209 | "version": "3.5.2"
210 | }
211 | },
212 | "nbformat": 4,
213 | "nbformat_minor": 0
214 | }
215 |
--------------------------------------------------------------------------------
/B12. Summary of the Basics section of the course.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# B12: Summary of the Basics Section of the course"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "Well done on compelting the Basics section of the course!\n",
15 | "\n",
16 | "We learnt a ton of cool stuff about base Python:\n",
17 | "\n",
18 | "* Python, Anaconda & Jupyter\n",
19 | "* Data Structures\n",
20 | "* Objects, Functions and Methods\n",
21 | "* If / Then / Else \n",
22 | "* Try / Except\n",
23 | "* Loops & Iterators\n",
24 | "* Built in Functions\n",
25 | "* Packages"
26 | ]
27 | },
28 | {
29 | "cell_type": "markdown",
30 | "metadata": {},
31 | "source": [
32 | "However!!! There is a lot that we haven't covered and we haven't scratched particularly deeply into some of the things that Python is capable of. Remember that this training is just the start of your learning - we're teaching you the basics and only what you need to know to get up and running."
33 | ]
34 | }
35 | ],
36 | "metadata": {
37 | "kernelspec": {
38 | "display_name": "Python 3",
39 | "language": "python",
40 | "name": "python3"
41 | },
42 | "language_info": {
43 | "codemirror_mode": {
44 | "name": "ipython",
45 | "version": 3
46 | },
47 | "file_extension": ".py",
48 | "mimetype": "text/x-python",
49 | "name": "python",
50 | "nbconvert_exporter": "python",
51 | "pygments_lexer": "ipython3",
52 | "version": "3.5.1"
53 | }
54 | },
55 | "nbformat": 4,
56 | "nbformat_minor": 0
57 | }
58 |
--------------------------------------------------------------------------------
/Chart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/department-for-transport/Python-for-Analysts/89f4be24d0394f0269262f532cae36d937699530/Chart.png
--------------------------------------------------------------------------------
/D00. Introduction to Python for Data Analysis.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# D00: Introduction to Python for Data Analysis"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "Welcome to the second section of this course: Python for Data Analysis!\n",
15 | "\n",
16 | "In this section of the course we'll explore how you can use Python to read, clean, manage, structure, wrangle, aggregate, display and output data. This kind of work is critical to being able to glean insight from, and visualise data.\n",
17 | "\n",
18 | "In this lesson we'll start by meeting a few of the useful data analysis libraries that we'll be using."
19 | ]
20 | },
21 | {
22 | "cell_type": "markdown",
23 | "metadata": {},
24 | "source": [
25 | "
"
26 | ]
27 | },
28 | {
29 | "cell_type": "markdown",
30 | "metadata": {},
31 | "source": [
32 | "Numpy is short for 'Numerical Python' and it is a library focused around mathematical. Numpy provides data structures that allow us to create multi-dimensional arrays and matricies and also provides a large number of mathematical functions.\n",
33 | "\n",
34 | "Numpy comes as part of the Anaconda installation so you don't need to download anything further."
35 | ]
36 | },
37 | {
38 | "cell_type": "markdown",
39 | "metadata": {},
40 | "source": [
41 | "
"
42 | ]
43 | },
44 | {
45 | "cell_type": "markdown",
46 | "metadata": {},
47 | "source": [
48 | "Scipy is short for 'Scientific Python' and is generally used for analytics, statistics and engineering.\n",
49 | "\n",
50 | "SciPy contains a wide variety of modules including algegra, interpolation, statistics and many more. We'll not be delving too deeply into scipy as part of this course, however it is an important library with a lot of useful functions that you should be aware of.\n",
51 | "\n",
52 | "As with Numpy, Scipy comes as part of Anaconda."
53 | ]
54 | },
55 | {
56 | "cell_type": "markdown",
57 | "metadata": {
58 | "collapsed": true
59 | },
60 | "source": [
61 | "
"
62 | ]
63 | },
64 | {
65 | "cell_type": "markdown",
66 | "metadata": {},
67 | "source": [
68 | "Pandas is built on top of Numpy and allows us to create more complex data structures than we've previously met, the likes of which we're probably more comfortable with as analysts, including series and dataframes. It also provides us with a wide range of tools for reading, dealing with and transforming these data structures as well as tools to convert data from traditional Python data strucutres such as lists, tuples and dictionaries.\n",
69 | "\n",
70 | "Pandas is built on top of Numpy so the data structures that are created with both packages integrate well with each other and also comes as part of the Anaconda installation."
71 | ]
72 | },
73 | {
74 | "cell_type": "markdown",
75 | "metadata": {
76 | "collapsed": true
77 | },
78 | "source": [
79 | "## Further Reading"
80 | ]
81 | },
82 | {
83 | "cell_type": "markdown",
84 | "metadata": {},
85 | "source": [
86 | "10 Minutes to Pandas
\n",
87 | "Pandas comparison with SQL
\n",
88 | "Pandas comparison with R
\n",
89 | "Pandas comparison with SAS
\n",
90 | "Excellent selection of Pandas tutorials
"
91 | ]
92 | }
93 | ],
94 | "metadata": {
95 | "anaconda-cloud": {},
96 | "kernelspec": {
97 | "display_name": "Python [default]",
98 | "language": "python",
99 | "name": "python3"
100 | },
101 | "language_info": {
102 | "codemirror_mode": {
103 | "name": "ipython",
104 | "version": 3
105 | },
106 | "file_extension": ".py",
107 | "mimetype": "text/x-python",
108 | "name": "python",
109 | "nbconvert_exporter": "python",
110 | "pygments_lexer": "ipython3",
111 | "version": "3.5.2"
112 | }
113 | },
114 | "nbformat": 4,
115 | "nbformat_minor": 0
116 | }
117 |
--------------------------------------------------------------------------------
/D01E. Advanced Data Structures Exercises.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# D01E: Advanced Data Structures Exercises"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "#### Exercise 1: Create the following Numpy arrays with random numbers for the following dimensions:\n",
15 | "\n",
16 | "* 5 x 5\n",
17 | "* 1 x 20\n",
18 | "* 90 x 90\n",
19 | "\n",
20 | "Also round the arrays to 3 decimal places."
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": 5,
26 | "metadata": {
27 | "collapsed": false
28 | },
29 | "outputs": [
30 | {
31 | "data": {
32 | "text/plain": [
33 | "array([[ 0.6, 0.5, 0.8, 0.6, 1. ],\n",
34 | " [ 0.1, 0.6, 0.3, 0.2, 0.1],\n",
35 | " [ 0.7, 0.6, 0.9, 0.1, 0.6],\n",
36 | " [ 0.2, 0.8, 0.6, 0. , 0.6],\n",
37 | " [ 0.4, 1. , 0.9, 0.2, 0.2]])"
38 | ]
39 | },
40 | "execution_count": 5,
41 | "metadata": {},
42 | "output_type": "execute_result"
43 | }
44 | ],
45 | "source": [
46 | "import numpy as np\n",
47 | "\n",
48 | "arr1 = np.random.random((5,5)).round(3)\n",
49 | "arr2 = np.random.random((1,20)).round(3)\n",
50 | "arr3 = np.random.random((90,90)).round(3)\n",
51 | "\n",
52 | "\n",
53 | "arr1"
54 | ]
55 | },
56 | {
57 | "cell_type": "markdown",
58 | "metadata": {},
59 | "source": [
60 | "#### Exercise 2: Create a 1 dimensional array containing 10 records and convert this to a pandas series with the custom index:"
61 | ]
62 | },
63 | {
64 | "cell_type": "code",
65 | "execution_count": 7,
66 | "metadata": {
67 | "collapsed": true
68 | },
69 | "outputs": [],
70 | "source": [
71 | "custom_index = ['A','B','C','D','E','F','G','H','I','J']"
72 | ]
73 | },
74 | {
75 | "cell_type": "code",
76 | "execution_count": 23,
77 | "metadata": {
78 | "collapsed": false
79 | },
80 | "outputs": [
81 | {
82 | "data": {
83 | "text/plain": [
84 | "A 0\n",
85 | "B 1\n",
86 | "C 2\n",
87 | "D 3\n",
88 | "E 4\n",
89 | "F 5\n",
90 | "G 6\n",
91 | "H 7\n",
92 | "I 8\n",
93 | "J 9\n",
94 | "dtype: int64"
95 | ]
96 | },
97 | "execution_count": 23,
98 | "metadata": {},
99 | "output_type": "execute_result"
100 | }
101 | ],
102 | "source": [
103 | "import pandas as pd\n",
104 | "\n",
105 | "data1 = np.arange(10)\n",
106 | "\n",
107 | "ser1 = pd.Series(data1,index=custom_index)\n",
108 | "ser1"
109 | ]
110 | },
111 | {
112 | "cell_type": "markdown",
113 | "metadata": {},
114 | "source": [
115 | "#### Exercise 3: Create a Pandas dataframe from the following data, and name the columns col1 - col5. Then...\n",
116 | "\n",
117 | "1) Convert col1 to an array.\n",
118 | "2) Convert col2 to a list.\n",
119 | "3) Convert col3 to a dictionary."
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": 25,
125 | "metadata": {
126 | "collapsed": false
127 | },
128 | "outputs": [],
129 | "source": [
130 | "index = np.arange(0,5)\n",
131 | "data1 = np.random.random(5).round(2)\n",
132 | "data2 = np.random.random(5).round(2)\n",
133 | "data3 = np.random.random(5).round(2)\n",
134 | "data4 = np.random.random(5).round(2)\n",
135 | "data5 = np.random.random(5).round(2)"
136 | ]
137 | },
138 | {
139 | "cell_type": "code",
140 | "execution_count": 26,
141 | "metadata": {
142 | "collapsed": false
143 | },
144 | "outputs": [
145 | {
146 | "data": {
147 | "text/html": [
148 | "\n",
149 | "
\n",
150 | " \n",
151 | " \n",
152 | " | \n",
153 | " col1 | \n",
154 | " col2 | \n",
155 | " col3 | \n",
156 | " col4 | \n",
157 | " col5 | \n",
158 | "
\n",
159 | " \n",
160 | " \n",
161 | " \n",
162 | " 0 | \n",
163 | " 0.80 | \n",
164 | " 0.75 | \n",
165 | " 0.81 | \n",
166 | " 0.58 | \n",
167 | " 0.78 | \n",
168 | "
\n",
169 | " \n",
170 | " 1 | \n",
171 | " 0.89 | \n",
172 | " 0.08 | \n",
173 | " 0.78 | \n",
174 | " 0.87 | \n",
175 | " 0.18 | \n",
176 | "
\n",
177 | " \n",
178 | " 2 | \n",
179 | " 0.99 | \n",
180 | " 0.70 | \n",
181 | " 0.72 | \n",
182 | " 0.15 | \n",
183 | " 0.05 | \n",
184 | "
\n",
185 | " \n",
186 | " 3 | \n",
187 | " 0.29 | \n",
188 | " 0.29 | \n",
189 | " 0.06 | \n",
190 | " 0.34 | \n",
191 | " 0.33 | \n",
192 | "
\n",
193 | " \n",
194 | " 4 | \n",
195 | " 0.25 | \n",
196 | " 0.40 | \n",
197 | " 0.99 | \n",
198 | " 0.33 | \n",
199 | " 0.77 | \n",
200 | "
\n",
201 | " \n",
202 | "
\n",
203 | "
"
204 | ],
205 | "text/plain": [
206 | " col1 col2 col3 col4 col5\n",
207 | "0 0.80 0.75 0.81 0.58 0.78\n",
208 | "1 0.89 0.08 0.78 0.87 0.18\n",
209 | "2 0.99 0.70 0.72 0.15 0.05\n",
210 | "3 0.29 0.29 0.06 0.34 0.33\n",
211 | "4 0.25 0.40 0.99 0.33 0.77"
212 | ]
213 | },
214 | "execution_count": 26,
215 | "metadata": {},
216 | "output_type": "execute_result"
217 | }
218 | ],
219 | "source": [
220 | "df = pd.DataFrame(data= [data1,data2,data3,data4,data5],\n",
221 | " index = index,\n",
222 | " columns = ['col1','col2','col3','col4','col5'])\n",
223 | "\n",
224 | "df\n",
225 | "\n"
226 | ]
227 | },
228 | {
229 | "cell_type": "code",
230 | "execution_count": 18,
231 | "metadata": {
232 | "collapsed": false
233 | },
234 | "outputs": [
235 | {
236 | "data": {
237 | "text/plain": [
238 | "[0.50822657285376105,\n",
239 | " 0.64089422379526018,\n",
240 | " 0.14118480994904115,\n",
241 | " 0.6585429006086484,\n",
242 | " 0.89851069371997294]"
243 | ]
244 | },
245 | "execution_count": 18,
246 | "metadata": {},
247 | "output_type": "execute_result"
248 | }
249 | ],
250 | "source": [
251 | "list1"
252 | ]
253 | },
254 | {
255 | "cell_type": "code",
256 | "execution_count": null,
257 | "metadata": {
258 | "collapsed": true
259 | },
260 | "outputs": [],
261 | "source": []
262 | }
263 | ],
264 | "metadata": {
265 | "kernelspec": {
266 | "display_name": "Python [default]",
267 | "language": "python",
268 | "name": "python3"
269 | },
270 | "language_info": {
271 | "codemirror_mode": {
272 | "name": "ipython",
273 | "version": 3
274 | },
275 | "file_extension": ".py",
276 | "mimetype": "text/x-python",
277 | "name": "python",
278 | "nbconvert_exporter": "python",
279 | "pygments_lexer": "ipython3",
280 | "version": "3.5.2"
281 | }
282 | },
283 | "nbformat": 4,
284 | "nbformat_minor": 0
285 | }
286 |
--------------------------------------------------------------------------------
/D01S. Advanced Data Structures Solutions.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# D01S: Advanced Data Structures Solutions"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "#### Exercise 1: Create the following Numpy arrays with random numbers for the following dimensions:\n",
15 | "\n",
16 | "* 5 x 5\n",
17 | "* 1 x 20\n",
18 | "* 90 x 90\n",
19 | "\n",
20 | "Also round the arrays to 3 decimal places."
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": null,
26 | "metadata": {
27 | "collapsed": false
28 | },
29 | "outputs": [],
30 | "source": [
31 | "import numpy as np\n",
32 | "\n",
33 | "arr1 = np.random.random((5,5))\n",
34 | "arr1 = np.round(arr1,3) \n",
35 | "arr2 = np.random.random((1,20))\n",
36 | "arr2 = np.round(arr2,3) \n",
37 | "arr3 = np.random.random((90,90))\n",
38 | "arr3 = np.round(arr3,3)\n",
39 | "arr3"
40 | ]
41 | },
42 | {
43 | "cell_type": "markdown",
44 | "metadata": {},
45 | "source": [
46 | "#### Exercise 2: Create a 1 dimensional array containing 10 records and convert this to a pandas series with the custom index:"
47 | ]
48 | },
49 | {
50 | "cell_type": "code",
51 | "execution_count": null,
52 | "metadata": {
53 | "collapsed": true
54 | },
55 | "outputs": [],
56 | "source": [
57 | "custom_index = ['A','B','C','D','E','F','G','H','I','J']"
58 | ]
59 | },
60 | {
61 | "cell_type": "code",
62 | "execution_count": null,
63 | "metadata": {
64 | "collapsed": false
65 | },
66 | "outputs": [],
67 | "source": [
68 | "import pandas as pd\n",
69 | "\n",
70 | "arr4 = np.random.random(10)\n",
71 | "ser1 = pd.Series(data=arr4,index=custom_index)\n",
72 | "ser1"
73 | ]
74 | },
75 | {
76 | "cell_type": "markdown",
77 | "metadata": {},
78 | "source": [
79 | "#### Exercise 3: Create a Pandas dataframe from the following data, and name the columns col1 - col5. Then...\n",
80 | "\n",
81 | "1) Convert col1 to an array.\n",
82 | "2) Convert col2 to a list.\n",
83 | "3) Convert col3 to a dictionary."
84 | ]
85 | },
86 | {
87 | "cell_type": "code",
88 | "execution_count": null,
89 | "metadata": {
90 | "collapsed": false
91 | },
92 | "outputs": [],
93 | "source": [
94 | "index = np.arange(0,5)\n",
95 | "data1 = np.random.random(5)\n",
96 | "data2 = np.random.random(5)\n",
97 | "data3 = np.random.random(5)\n",
98 | "data4 = np.random.random(5)\n",
99 | "data5 = np.random.random(5)\n",
100 | "\n",
101 | "data1"
102 | ]
103 | },
104 | {
105 | "cell_type": "code",
106 | "execution_count": null,
107 | "metadata": {
108 | "collapsed": false
109 | },
110 | "outputs": [],
111 | "source": [
112 | "cols = ['col1','col2','col3','col4','col5']\n",
113 | "df1 = pd.DataFrame(data=[data1,data2,data3,data4,data5], \n",
114 | " index=index, \n",
115 | " columns=cols) \n",
116 | "df1\n",
117 | "\n",
118 | "\n",
119 | "array = df1['col1'].values\n",
120 | "list = df1['col2'].tolist()\n",
121 | "dict = df1['col3'].to_dict()\n",
122 | "\n",
123 | "print(array,list,dict)"
124 | ]
125 | },
126 | {
127 | "cell_type": "code",
128 | "execution_count": null,
129 | "metadata": {
130 | "collapsed": true
131 | },
132 | "outputs": [],
133 | "source": []
134 | }
135 | ],
136 | "metadata": {
137 | "kernelspec": {
138 | "display_name": "Python [default]",
139 | "language": "python",
140 | "name": "python3"
141 | },
142 | "language_info": {
143 | "codemirror_mode": {
144 | "name": "ipython",
145 | "version": 3
146 | },
147 | "file_extension": ".py",
148 | "mimetype": "text/x-python",
149 | "name": "python",
150 | "nbconvert_exporter": "python",
151 | "pygments_lexer": "ipython3",
152 | "version": "3.5.2"
153 | }
154 | },
155 | "nbformat": 4,
156 | "nbformat_minor": 0
157 | }
158 |
--------------------------------------------------------------------------------
/D03E. Dataframes - Handling Data Exercises.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# D03E: Dataframes - Handling Data Exercises"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "#### Exercise 1: Import the file below and then:\n",
15 | "\n",
16 | "1) Remove the Unnamed:0 column
\n",
17 | "2) Rename the wl1-3 colums to say 'Weight Loss Week...' instead of wl
\n",
18 | "3) Rename the se1-3 columns to say 'Self Esteem Week...' instead of se
\n",
19 | "4) Create a function that changes data in the group column from 'DietEx' to 'Diet & Exercise'
\n",
20 | "5) Sort the dataframe by the wl1 column in descending order.\n"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": null,
26 | "metadata": {
27 | "collapsed": true
28 | },
29 | "outputs": [],
30 | "source": [
31 | "path = \"https://vincentarelbundock.github.io/Rdatasets/csv/car/WeightLoss.csv\""
32 | ]
33 | },
34 | {
35 | "cell_type": "code",
36 | "execution_count": null,
37 | "metadata": {
38 | "collapsed": false
39 | },
40 | "outputs": [],
41 | "source": []
42 | },
43 | {
44 | "cell_type": "code",
45 | "execution_count": null,
46 | "metadata": {
47 | "collapsed": false
48 | },
49 | "outputs": [],
50 | "source": []
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": null,
55 | "metadata": {
56 | "collapsed": true
57 | },
58 | "outputs": [],
59 | "source": []
60 | }
61 | ],
62 | "metadata": {
63 | "kernelspec": {
64 | "display_name": "Python [default]",
65 | "language": "python",
66 | "name": "python3"
67 | },
68 | "language_info": {
69 | "codemirror_mode": {
70 | "name": "ipython",
71 | "version": 3
72 | },
73 | "file_extension": ".py",
74 | "mimetype": "text/x-python",
75 | "name": "python",
76 | "nbconvert_exporter": "python",
77 | "pygments_lexer": "ipython3",
78 | "version": "3.5.2"
79 | }
80 | },
81 | "nbformat": 4,
82 | "nbformat_minor": 0
83 | }
84 |
--------------------------------------------------------------------------------
/D04E. Dataframes - Refining and Indexing Exercises.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# D04E: Dataframes - Refining and Indexing Exercises"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "#### Exercise 1: Import the file below and then:\n",
15 | "\n",
16 | "1) Drop the Unnamed:0 column
\n",
17 | "2) Return the columsn list as a new variable
\n",
18 | "3) Keep only the price, ram, cd and trend columns\n",
19 | "4) Create new datasets for each of the following where statements:\n",
20 | "\n",
21 | "* Where trend greater than 12 and less that or equal to 24\n",
22 | "* Where ram is either 16 or 32\n",
23 | "* Where price is greater than 2500 and cd is yes"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": null,
29 | "metadata": {
30 | "collapsed": true
31 | },
32 | "outputs": [],
33 | "source": [
34 | "path = \"https://vincentarelbundock.github.io/Rdatasets/csv/Ecdat/Computers.csv\""
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": null,
40 | "metadata": {
41 | "collapsed": false
42 | },
43 | "outputs": [],
44 | "source": [
45 | "import pandas as pd\n",
46 | "\n"
47 | ]
48 | },
49 | {
50 | "cell_type": "markdown",
51 | "metadata": {},
52 | "source": [
53 | "#### Exercise 2: Retrieve the records at the following item locations from the base dataset:\n",
54 | "\n",
55 | "* 300\n",
56 | "* 1000-1010\n",
57 | "* Last 10 records\n",
58 | "* The middle record"
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": null,
64 | "metadata": {
65 | "collapsed": false
66 | },
67 | "outputs": [],
68 | "source": []
69 | },
70 | {
71 | "cell_type": "markdown",
72 | "metadata": {},
73 | "source": [
74 | "#### Exercise 3: From the base dataset you imported:\n",
75 | "* Set the index as the trend column\n",
76 | "* Drop the name of the index\n",
77 | "* Create a new a new dataframe for the trend values 1-6\n",
78 | "* reset the index of this new dataframe\n",
79 | "* drop any unwanted variables"
80 | ]
81 | },
82 | {
83 | "cell_type": "code",
84 | "execution_count": null,
85 | "metadata": {
86 | "collapsed": false
87 | },
88 | "outputs": [],
89 | "source": []
90 | }
91 | ],
92 | "metadata": {
93 | "kernelspec": {
94 | "display_name": "Python [default]",
95 | "language": "python",
96 | "name": "python3"
97 | },
98 | "language_info": {
99 | "codemirror_mode": {
100 | "name": "ipython",
101 | "version": 3
102 | },
103 | "file_extension": ".py",
104 | "mimetype": "text/x-python",
105 | "name": "python",
106 | "nbconvert_exporter": "python",
107 | "pygments_lexer": "ipython3",
108 | "version": "3.5.2"
109 | }
110 | },
111 | "nbformat": 4,
112 | "nbformat_minor": 0
113 | }
114 |
--------------------------------------------------------------------------------
/D05E. Dataframes Merging & Concatenating Exercises.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# D05E: Dataframes Merging & Concatenating Exercises"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "#### Exercise 1: For the dataframes below:\n",
15 | "\n",
16 | "1) Concatenate df1 and df 2
\n",
17 | "2) Merge df1 and df2 using their index
\n",
18 | "3) Merge df3 and df4 using the key & UID values
\n",
19 | "4) merge df3 and df4 by setting the key / UID value as the index on both datasets and then merging on that.
\n",
20 | "5) What happens when you chance the name of 'data 2' in df4 to 'data 1'?
\n",
21 | "\n"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": null,
27 | "metadata": {
28 | "collapsed": false
29 | },
30 | "outputs": [],
31 | "source": [
32 | "import pandas as pd\n",
33 | "import numpy as np\n",
34 | "\n",
35 | "df1 = pd.DataFrame({'key':np.arange(10),\n",
36 | " 'data 1': np.random.random(10)})\n",
37 | "\n",
38 | "df = pd.DataFrame({'key':np.arange(10)+10,\n",
39 | " 'data 1': np.random.random(10)})\n",
40 | "\n",
41 | "\n",
42 | "df3 = pd.DataFrame({'key':['A1','A2','A3','A3','A4','A5','A6','A7','A8','A8'],\n",
43 | " 'data 1': np.arange(10)})\n",
44 | "\n",
45 | "df4 = pd.DataFrame({'UID':['A1','A2','A3','A4','A5','A6','A7','A8','A9'],\n",
46 | " 'data 2': np.arange(9)+10})"
47 | ]
48 | },
49 | {
50 | "cell_type": "code",
51 | "execution_count": null,
52 | "metadata": {
53 | "collapsed": false
54 | },
55 | "outputs": [],
56 | "source": []
57 | },
58 | {
59 | "cell_type": "code",
60 | "execution_count": null,
61 | "metadata": {
62 | "collapsed": false
63 | },
64 | "outputs": [],
65 | "source": []
66 | },
67 | {
68 | "cell_type": "code",
69 | "execution_count": null,
70 | "metadata": {
71 | "collapsed": false
72 | },
73 | "outputs": [],
74 | "source": []
75 | },
76 | {
77 | "cell_type": "code",
78 | "execution_count": null,
79 | "metadata": {
80 | "collapsed": false
81 | },
82 | "outputs": [],
83 | "source": []
84 | },
85 | {
86 | "cell_type": "code",
87 | "execution_count": null,
88 | "metadata": {
89 | "collapsed": false
90 | },
91 | "outputs": [],
92 | "source": []
93 | }
94 | ],
95 | "metadata": {
96 | "kernelspec": {
97 | "display_name": "Python [default]",
98 | "language": "python",
99 | "name": "python3"
100 | },
101 | "language_info": {
102 | "codemirror_mode": {
103 | "name": "ipython",
104 | "version": 3
105 | },
106 | "file_extension": ".py",
107 | "mimetype": "text/x-python",
108 | "name": "python",
109 | "nbconvert_exporter": "python",
110 | "pygments_lexer": "ipython3",
111 | "version": "3.5.2"
112 | }
113 | },
114 | "nbformat": 4,
115 | "nbformat_minor": 0
116 | }
117 |
--------------------------------------------------------------------------------
/D06E. Summary Statistics & GroupBy Exercises.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# D06E: Summary Statistics & GroupBy Exercises"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {
13 | "collapsed": true
14 | },
15 | "source": [
16 | "#### Exercise 1: Using the titanic survivors csv dataset :\n",
17 | "\n",
18 | "1) Import the data and create a pandas dataframe
\n",
19 | "2) Create a summary table of this data. (Hint - this is easier of you create a numeric variable and use sum())
"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": 56,
25 | "metadata": {
26 | "collapsed": true
27 | },
28 | "outputs": [],
29 | "source": [
30 | "import pandas as pd\n",
31 | "\n",
32 | "path = \"https://vincentarelbundock.github.io/Rdatasets/csv/COUNT/titanic.csv\""
33 | ]
34 | },
35 | {
36 | "cell_type": "code",
37 | "execution_count": null,
38 | "metadata": {
39 | "collapsed": false
40 | },
41 | "outputs": [],
42 | "source": [
43 | "tt['passengers'] = 1"
44 | ]
45 | },
46 | {
47 | "cell_type": "code",
48 | "execution_count": null,
49 | "metadata": {
50 | "collapsed": false
51 | },
52 | "outputs": [],
53 | "source": []
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": null,
58 | "metadata": {
59 | "collapsed": false
60 | },
61 | "outputs": [],
62 | "source": []
63 | },
64 | {
65 | "cell_type": "code",
66 | "execution_count": null,
67 | "metadata": {
68 | "collapsed": false
69 | },
70 | "outputs": [],
71 | "source": []
72 | },
73 | {
74 | "cell_type": "code",
75 | "execution_count": null,
76 | "metadata": {
77 | "collapsed": true
78 | },
79 | "outputs": [],
80 | "source": []
81 | },
82 | {
83 | "cell_type": "code",
84 | "execution_count": null,
85 | "metadata": {
86 | "collapsed": true
87 | },
88 | "outputs": [],
89 | "source": []
90 | }
91 | ],
92 | "metadata": {
93 | "kernelspec": {
94 | "display_name": "Python [default]",
95 | "language": "python",
96 | "name": "python3"
97 | },
98 | "language_info": {
99 | "codemirror_mode": {
100 | "name": "ipython",
101 | "version": 3
102 | },
103 | "file_extension": ".py",
104 | "mimetype": "text/x-python",
105 | "name": "python",
106 | "nbconvert_exporter": "python",
107 | "pygments_lexer": "ipython3",
108 | "version": "3.5.2"
109 | }
110 | },
111 | "nbformat": 4,
112 | "nbformat_minor": 0
113 | }
114 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | # Python for Analysts Training
3 |
4 | Hi! And welcome to the Python for Analysts training course. This covers everything you need to know to start using Python for data analysis and visualisation as well as showcasing some more advanced and snazzy stuff, including Statistics, Machine Learning, Web Scraping / Interaction etc.
5 |
6 | The course assumes no prior knowledge of Python and will teach you everything you need to know in order to use Python for data analysis and visualisation, including interfacing with Python via the Jupyter interface, using Text Editors / Integrated Development Environments (IDEs), upgrading Python, working with the command line etc.
7 |
8 | Lastly, note that the course can only hope to give you an introduction to Python for Data Analysis over the 3 days. You'll no doubt want to continue your learning afterward, and the course provides links to relevant material with which to further your development.
9 |
10 | ## Structure of the Course
11 |
12 | The course is designed to cover the best part of 2 days with time for exercises and consolidation in between.
13 |
14 | You will be expected to have a project to practice with ideally for days 1 and 2 but definitely for day 3! This will allow you to consolidate your Python training and continue to learn and develop.
15 |
16 | The structure of the course is as follows:
17 |
18 | ## The Basics
19 |
20 | * Interfacing with Python
21 | * Basic Python Sytnax
22 | * Data Structures
23 | * Coding concepts
24 | * Looping
25 | * Enhancing Python with Packages
26 |
27 | ## Working with data
28 |
29 | * Data Analysis Libraries
30 | * Advanced Data Structures
31 | * Importing / Exporting Data
32 | * Working with DataFrames
33 | * Summary Statistics
34 | * Tables
35 |
36 | ## Visualisation
37 |
38 | * Static Visualisation
39 | * Statistical Visualisation
40 | * Interactive Visualisation
41 |
42 | ## Advanced Concepts
43 |
44 | * Using APIs
45 | * Web Scraping
46 | * Statistics
47 | * Machine Learning
48 | * Natural Language Processing
49 |
50 | It also contains the Training Cookbook.py file containing pre-baked useful code for you to pinch at your leisure.
51 |
52 | ## Credits
53 |
54 | Massive thanks to Emma Beynon for her work on the Statistics and Machine Learning notebooks and also for her help in QA'ing and delivering this.
55 |
56 | Also another massive thanks to Jose Portilla whose excellent Python For Analysis and visualisation served as much of the inspiration for this course. You can check out his course on Udemy here: https://www.udemy.com/learning-python-for-data-analysis-and-visualization/
57 |
--------------------------------------------------------------------------------
/V00. Introduction to Data Visualisation.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "from IPython.display import Image"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {},
17 | "source": [
18 | "# V00: Introduction to Data Visualisation"
19 | ]
20 | },
21 | {
22 | "cell_type": "markdown",
23 | "metadata": {},
24 | "source": [
25 | "You'll likely have heard the term 'data visualisation' (commonly abbreviated to 'data vis') before. It's a general term that describes helping users understand the data by placing it in a visual context. Patterns, trends and correlations that might go undetected in text-based data can be gleaned and highlighted easier with data visualization software and languages, such as R and of course Python.\n",
26 | "\n",
27 | "More recently, data vis has grown beyond Excel spreadsheets and charts and become more sophisticated allowing data to be displayed in ways such as GIS maps, infographics, sparklines, heatmaps etc."
28 | ]
29 | },
30 | {
31 | "cell_type": "markdown",
32 | "metadata": {},
33 | "source": [
34 | "## Data Vis in Python"
35 | ]
36 | },
37 | {
38 | "cell_type": "markdown",
39 | "metadata": {},
40 | "source": [
41 | "Python has some excellent packages for data visualisation and we'll be giving an overview of some of these in this chapter.\n"
42 | ]
43 | },
44 | {
45 | "cell_type": "markdown",
46 | "metadata": {},
47 | "source": [
48 | ""
49 | ]
50 | },
51 | {
52 | "cell_type": "markdown",
53 | "metadata": {},
54 | "source": [
55 | "Matplotlib is probably the most popular data vis library in Python. It was originally created in 2002 making it one of the oldest Python libraries still in use and is based upon the MATLAB visualisation suite.\n",
56 | "\n",
57 | "Matplotlib can be used in Python scripts, Jupyter, web application servers, and graphical user interface toolkits."
58 | ]
59 | },
60 | {
61 | "cell_type": "markdown",
62 | "metadata": {},
63 | "source": [
64 | ""
65 | ]
66 | },
67 | {
68 | "cell_type": "markdown",
69 | "metadata": {},
70 | "source": [
71 | "Seaborn is a library for making attractive and informative statistical graphics in Python. It is built on top of matplotlib and tightly integrated with Anaconda, including support for numpy and pandas data structures and statistical routines from scipy and statsmodels.\n",
72 | "\n",
73 | "Some of the features that seaborn offers are:\n",
74 | "\n",
75 | "* Several built-in themes that improve on the default matplotlib aesthetics\n",
76 | "* Tools for choosing color palettes to make beautiful plots that reveal patterns in your data\n",
77 | "* Functions for visualizing univariate and bivariate distributions or for comparing them between subsets of data\n",
78 | "* Tools that fit and visualize linear regression models for different kinds of independent and dependent variables\n",
79 | "* Functions that visualize matrices of data and use clustering algorithms to discover structure in those matrices\n",
80 | "* A function to plot statistical timeseries data with flexible estimation and representation of uncertainty around the estimate\n",
81 | "* High-level abstractions for structuring grids of plots that let you easily build complex visualizations\n",
82 | "\n",
83 | "You can install it as follows:"
84 | ]
85 | },
86 | {
87 | "cell_type": "raw",
88 | "metadata": {},
89 | "source": [
90 | "pip install seaborn"
91 | ]
92 | },
93 | {
94 | "cell_type": "markdown",
95 | "metadata": {},
96 | "source": [
97 | ""
98 | ]
99 | },
100 | {
101 | "cell_type": "markdown",
102 | "metadata": {},
103 | "source": [
104 | "Bokeh is a Python interactive visualization library that targets modern web browsers for presentation. Its goal is to provide elegant, concise construction of novel graphics in the style of D3.js, and to extend this capability with high-performance interactivity over very large or streaming datasets. Bokeh can help anyone who would like to quickly and easily create interactive plots, dashboards, and data applications.\n",
105 | "\n",
106 | "Also (if you needed any more incentive to use it!), Bokeh is made by Continuum Analytics, the very same people responsible for putting Anaconda together and comes as part of the standard installation."
107 | ]
108 | },
109 | {
110 | "cell_type": "markdown",
111 | "metadata": {},
112 | "source": [
113 | ""
114 | ]
115 | },
116 | {
117 | "cell_type": "markdown",
118 | "metadata": {},
119 | "source": [
120 | "Plotly is an online analytics and data visualization tool,and provides online graphing, analytics, and stats tools for individuals and collaboration. It can also be integrated with other software and languages such as Python, R, MATLAB, Perl, Julia, Arduino, and REST. Up until very recently Plotly was a 'paid' service (and still is if you want to host files online), however they've recently taken the decision to go open source.\n",
121 | "\n",
122 | "Plotly isn't a 'typical' Python library in that whilst you can use it offline, much of the content is posted to the web instead of output in Jupyter. This can make it difficult to use sensitive data and is an added layer of complexity.\n",
123 | "\n",
124 | "You can install it as follows:"
125 | ]
126 | },
127 | {
128 | "cell_type": "raw",
129 | "metadata": {
130 | "collapsed": true
131 | },
132 | "source": [
133 | "pip install plotly"
134 | ]
135 | },
136 | {
137 | "cell_type": "markdown",
138 | "metadata": {},
139 | "source": [
140 | "We won't be going through Plotly as part of this course, however there are some excellent tutorials available here."
141 | ]
142 | },
143 | {
144 | "cell_type": "markdown",
145 | "metadata": {},
146 | "source": [
147 | ""
148 | ]
149 | },
150 | {
151 | "cell_type": "markdown",
152 | "metadata": {
153 | "collapsed": true
154 | },
155 | "source": [
156 | "Similar to Ploty, Lightning integrates with a number of software languages and produces some quite swanky looking graphs. Note that whilst the graphs are interactive to an extent, they don't appear to have tooltips that pop up which is a shame.\n",
157 | "\n",
158 | "You can install it as follows:\n"
159 | ]
160 | },
161 | {
162 | "cell_type": "raw",
163 | "metadata": {},
164 | "source": [
165 | "pip install lightning-python"
166 | ]
167 | },
168 | {
169 | "cell_type": "markdown",
170 | "metadata": {},
171 | "source": [
172 | "## Structure of this Section"
173 | ]
174 | },
175 | {
176 | "cell_type": "markdown",
177 | "metadata": {},
178 | "source": [
179 | "Data Vis in Python is a massive area and you could quite easily fill a training course with examples and exercises for each of the libraries listed. As such the trianing here will show the basics for a few libraries and signpost you to more information and material to enable you to learn more after the course."
180 | ]
181 | }
182 | ],
183 | "metadata": {
184 | "anaconda-cloud": {},
185 | "kernelspec": {
186 | "display_name": "Python [default]",
187 | "language": "python",
188 | "name": "python3"
189 | },
190 | "language_info": {
191 | "codemirror_mode": {
192 | "name": "ipython",
193 | "version": 3
194 | },
195 | "file_extension": ".py",
196 | "mimetype": "text/x-python",
197 | "name": "python",
198 | "nbconvert_exporter": "python",
199 | "pygments_lexer": "ipython3",
200 | "version": "3.5.2"
201 | }
202 | },
203 | "nbformat": 4,
204 | "nbformat_minor": 0
205 | }
206 |
--------------------------------------------------------------------------------
/V02E. Matplotlib Exercises.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# V02E. Matplotlib Exercises"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "#### Exercise 1. For the following dataframe:\n",
15 | "1) Plot a simple bar chart.
\n",
16 | "2) Set the title and axis labels of the graph.
\n",
17 | "3) In the plot method, remove the legend, change the colour, and increase the width of the bars.
\n",
18 | "4) Remove the ticks (It's OK to C + P this!)
\n",
19 | "5) Remove the top and right borders
"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": 1,
25 | "metadata": {
26 | "collapsed": true
27 | },
28 | "outputs": [],
29 | "source": [
30 | "import pandas as pd\n",
31 | "import numpy as np\n",
32 | "import matplotlib.pyplot as plt\n",
33 | "%matplotlib inline"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": 2,
39 | "metadata": {
40 | "collapsed": false
41 | },
42 | "outputs": [
43 | {
44 | "data": {
45 | "text/html": [
46 | "\n",
47 | "
\n",
48 | " \n",
49 | " \n",
50 | " | \n",
51 | " data | \n",
52 | "
\n",
53 | " \n",
54 | " \n",
55 | " \n",
56 | " 11-May-16 | \n",
57 | " 52500 | \n",
58 | "
\n",
59 | " \n",
60 | " 18-May-16 | \n",
61 | " 68400 | \n",
62 | "
\n",
63 | " \n",
64 | " 01-Jun-16 | \n",
65 | " 83200 | \n",
66 | "
\n",
67 | " \n",
68 | " 08-Jun-16 | \n",
69 | " 64200 | \n",
70 | "
\n",
71 | " \n",
72 | " 15-Jun-16 | \n",
73 | " 7300 | \n",
74 | "
\n",
75 | " \n",
76 | "
\n",
77 | "
"
78 | ],
79 | "text/plain": [
80 | " data\n",
81 | "11-May-16 52500\n",
82 | "18-May-16 68400\n",
83 | "01-Jun-16 83200\n",
84 | "08-Jun-16 64200\n",
85 | "15-Jun-16 7300"
86 | ]
87 | },
88 | "execution_count": 2,
89 | "metadata": {},
90 | "output_type": "execute_result"
91 | }
92 | ],
93 | "source": [
94 | "df1 = pd.DataFrame({'date':['11-May-16','18-May-16','01-Jun-16','08-Jun-16','15-Jun-16'], # Importing some dates as a string\n",
95 | " 'data':np.random.randint(0,1000,5)*100})\n",
96 | "df1 = df1.set_index('date')\n",
97 | "df1.index.name = None\n",
98 | "df1"
99 | ]
100 | },
101 | {
102 | "cell_type": "code",
103 | "execution_count": null,
104 | "metadata": {
105 | "collapsed": false
106 | },
107 | "outputs": [],
108 | "source": []
109 | },
110 | {
111 | "cell_type": "markdown",
112 | "metadata": {},
113 | "source": [
114 | "#### Exercise 2. For the following dataframe:\n",
115 | "1) Plot a basic scatter chart.
\n",
116 | "2) Set the title and axis labels of the graph.
\n",
117 | "3) See if you can find out how to set the scale to 0 - 1000
\n",
118 | "4) See if you can find out how to change the scatter marker to a square
"
119 | ]
120 | },
121 | {
122 | "cell_type": "code",
123 | "execution_count": null,
124 | "metadata": {
125 | "collapsed": false
126 | },
127 | "outputs": [],
128 | "source": [
129 | "df2 = pd.DataFrame({'data1':np.random.randint(0,1000,1000),\n",
130 | " 'data2':np.random.randint(0,1000,1000)})\n",
131 | "df2"
132 | ]
133 | },
134 | {
135 | "cell_type": "code",
136 | "execution_count": null,
137 | "metadata": {
138 | "collapsed": true
139 | },
140 | "outputs": [],
141 | "source": []
142 | }
143 | ],
144 | "metadata": {
145 | "kernelspec": {
146 | "display_name": "Python [default]",
147 | "language": "python",
148 | "name": "python3"
149 | },
150 | "language_info": {
151 | "codemirror_mode": {
152 | "name": "ipython",
153 | "version": 3
154 | },
155 | "file_extension": ".py",
156 | "mimetype": "text/x-python",
157 | "name": "python",
158 | "nbconvert_exporter": "python",
159 | "pygments_lexer": "ipython3",
160 | "version": "3.5.2"
161 | }
162 | },
163 | "nbformat": 4,
164 | "nbformat_minor": 0
165 | }
166 |
--------------------------------------------------------------------------------
/V04E. Seaborn Exercises.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# V04E. Seaborn Exercises"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "#### Exercise 1: Using the dataframe below:\n",
15 | "\n",
16 | "1) Create a jointplot to show the distribution of the data.
\n",
17 | "2) Change the size of the plot to make it larger.
\n",
18 | "3) Add a title to the plot (NB You'll have to adjust the plot down).
"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 1,
24 | "metadata": {
25 | "collapsed": true
26 | },
27 | "outputs": [],
28 | "source": [
29 | "import pandas as pd\n",
30 | "import numpy as np\n",
31 | "from numpy.random import randn\n",
32 | "import matplotlib.pyplot as plt \n",
33 | "import seaborn as sns\n",
34 | "%matplotlib inline"
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": 2,
40 | "metadata": {
41 | "collapsed": false
42 | },
43 | "outputs": [
44 | {
45 | "data": {
46 | "text/html": [
47 | "\n",
48 | "
\n",
49 | " \n",
50 | " \n",
51 | " | \n",
52 | " data1 | \n",
53 | " data2 | \n",
54 | "
\n",
55 | " \n",
56 | " \n",
57 | " \n",
58 | " 0 | \n",
59 | " 0 | \n",
60 | " 304 | \n",
61 | "
\n",
62 | " \n",
63 | " 1 | \n",
64 | " 1 | \n",
65 | " 232 | \n",
66 | "
\n",
67 | " \n",
68 | " 2 | \n",
69 | " 2 | \n",
70 | " 83 | \n",
71 | "
\n",
72 | " \n",
73 | " 3 | \n",
74 | " 3 | \n",
75 | " 471 | \n",
76 | "
\n",
77 | " \n",
78 | " 4 | \n",
79 | " 4 | \n",
80 | " 41 | \n",
81 | "
\n",
82 | " \n",
83 | "
\n",
84 | "
"
85 | ],
86 | "text/plain": [
87 | " data1 data2\n",
88 | "0 0 304\n",
89 | "1 1 232\n",
90 | "2 2 83\n",
91 | "3 3 471\n",
92 | "4 4 41"
93 | ]
94 | },
95 | "execution_count": 2,
96 | "metadata": {},
97 | "output_type": "execute_result"
98 | }
99 | ],
100 | "source": [
101 | "df1a = pd.DataFrame({'data1':np.arange(0,400),\n",
102 | " 'data2':np.random.randint(0,500,400)})\n",
103 | "df1b = pd.DataFrame({'data1':np.arange(400,1000),\n",
104 | " 'data2':np.random.randint(0,1000,600)})\n",
105 | "df1 = pd.concat([df1a,df1b]) \n",
106 | "df1.head(5)"
107 | ]
108 | },
109 | {
110 | "cell_type": "code",
111 | "execution_count": null,
112 | "metadata": {
113 | "collapsed": false
114 | },
115 | "outputs": [],
116 | "source": []
117 | },
118 | {
119 | "cell_type": "markdown",
120 | "metadata": {},
121 | "source": [
122 | "#### Exercise 2: Using the Numpy Array below:\n",
123 | "\n",
124 | "1) Plot a heatmap.
\n",
125 | "2) Increase the size of the heatmap.
\n",
126 | "3) Add values to the heatmap via annotation.
\n",
127 | "4) Change the colours to a nicer palette.
\n",
128 | "5) Set a title for the heatmap."
129 | ]
130 | },
131 | {
132 | "cell_type": "code",
133 | "execution_count": null,
134 | "metadata": {
135 | "collapsed": false
136 | },
137 | "outputs": [],
138 | "source": [
139 | "data = np.random.rand(10,12)\n",
140 | "data"
141 | ]
142 | },
143 | {
144 | "cell_type": "code",
145 | "execution_count": null,
146 | "metadata": {
147 | "collapsed": false
148 | },
149 | "outputs": [],
150 | "source": []
151 | },
152 | {
153 | "cell_type": "code",
154 | "execution_count": null,
155 | "metadata": {
156 | "collapsed": true
157 | },
158 | "outputs": [],
159 | "source": []
160 | }
161 | ],
162 | "metadata": {
163 | "kernelspec": {
164 | "display_name": "Python [default]",
165 | "language": "python",
166 | "name": "python3"
167 | },
168 | "language_info": {
169 | "codemirror_mode": {
170 | "name": "ipython",
171 | "version": 3
172 | },
173 | "file_extension": ".py",
174 | "mimetype": "text/x-python",
175 | "name": "python",
176 | "nbconvert_exporter": "python",
177 | "pygments_lexer": "ipython3",
178 | "version": "3.5.2"
179 | }
180 | },
181 | "nbformat": 4,
182 | "nbformat_minor": 0
183 | }
184 |
--------------------------------------------------------------------------------
/V05. Sandbox Challenge!.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Sandbox challenge!"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "#### 1) Import, wrangle, tabulate and visualise the Beat the Blues dataset."
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 5,
20 | "metadata": {
21 | "collapsed": false
22 | },
23 | "outputs": [],
24 | "source": [
25 | "import pandas as pd\n",
26 | "import numpy as np\n",
27 | "\n",
28 | "path = \"https://vincentarelbundock.github.io/Rdatasets/csv/HSAUR/BtheB.csv\""
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": null,
34 | "metadata": {
35 | "collapsed": true
36 | },
37 | "outputs": [],
38 | "source": [
39 | "df"
40 | ]
41 | }
42 | ],
43 | "metadata": {
44 | "kernelspec": {
45 | "display_name": "Python 3",
46 | "language": "python",
47 | "name": "python3"
48 | },
49 | "language_info": {
50 | "codemirror_mode": {
51 | "name": "ipython",
52 | "version": 3
53 | },
54 | "file_extension": ".py",
55 | "mimetype": "text/x-python",
56 | "name": "python",
57 | "nbconvert_exporter": "python",
58 | "pygments_lexer": "ipython3",
59 | "version": "3.5.1"
60 | }
61 | },
62 | "nbformat": 4,
63 | "nbformat_minor": 0
64 | }
65 |
--------------------------------------------------------------------------------
/X00. Recap & Sandbox Challenge.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# X00: Recap & Sandbox Challenge"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "Using the datasets below:\n",
15 | "\n",
16 | "* Import the data into pandas using read_csv\n",
17 | "\n",
18 | "* Clean the data1 dataframe - there will be a lot NaN data, columns we don't need and unclear codes! (Hint: Lesson D05 deals with missing data.)\n",
19 | "\n",
20 | "* Merge the timezones and airport names from data2 on to data 1 using the 3 letter airport code. You should remove any aiports with NaN values first. (Hint: Lesson D05 deals with merging data.)\n",
21 | "\n",
22 | "* Convert the Date/Time fields from a string into a DateTime value (Hint: Lesson D07 deals with Datetime formats but you've not had a chance to practice them yet. You will also need a Try/Except Loop from Lesson B07)\n",
23 | "\n",
24 | "* Where possible create UTC datetime columns - these need to take account of the Timezone variable from data2 alongside the local time.\n",
25 | "\n",
26 | "* Create a numeric variable upon which to perform aggregation etc. (Hint: This is as simple as data['column'] = 1 )\n",
27 | "\n",
28 | "* Aggregate the flights by hour and by departure airport (Hint: Lesson D06 deals with Aggregating data.)\n",
29 | "\n",
30 | "* Create some charts (or maybe a heatmap!) of the aggregated data. (Hint: The Visualisation Lessons are a good place to start for this!)\n",
31 | "\n",
32 | "\n",
33 | "Note that you can also use the Training Cookbook for reference!"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": 31,
39 | "metadata": {
40 | "collapsed": true
41 | },
42 | "outputs": [],
43 | "source": [
44 | "# import pandas as pd\n",
45 | "\n",
46 | "data1 = 'https://s3-eu-west-1.amazonaws.com/imcreate/Flight+Data.csv' # 1 day of flight data for all UK airport\n",
47 | "data2 = 'https://raw.githubusercontent.com/jpatokal/openflights/master/data/airports.dat' # Airport Codes data\n",
48 | "data2_cols = ['Aiport ID','Name','City','Country','IATA/FAA Code','ICAO','Latitude',\n",
49 | " 'Longitude','Altitude','Timezone','DST','TZ database time zone'] # Columns for the data2 dataset"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": 25,
55 | "metadata": {
56 | "collapsed": false
57 | },
58 | "outputs": [
59 | {
60 | "name": "stderr",
61 | "output_type": "stream",
62 | "text": [
63 | "C:\\Users\\ITS Admin\\Anaconda3\\lib\\site-packages\\IPython\\core\\interactiveshell.py:2723: DtypeWarning: Columns (0,1,2,4,5,6,7,8,9,11,12,14) have mixed types. Specify dtype option on import or set low_memory=False.\n",
64 | " interactivity=interactivity, compiler=compiler, result=result)\n"
65 | ]
66 | }
67 | ],
68 | "source": [
69 | "df_data = pd.read_csv(data1) # Importing data1\n",
70 | "df_codes = pd.read_csv(data2,names=data2_cols,index_col=0) # Importing data2"
71 | ]
72 | }
73 | ],
74 | "metadata": {
75 | "kernelspec": {
76 | "display_name": "Python 3",
77 | "language": "python",
78 | "name": "python3"
79 | },
80 | "language_info": {
81 | "codemirror_mode": {
82 | "name": "ipython",
83 | "version": 3
84 | },
85 | "file_extension": ".py",
86 | "mimetype": "text/x-python",
87 | "name": "python",
88 | "nbconvert_exporter": "python",
89 | "pygments_lexer": "ipython3",
90 | "version": "3.5.1"
91 | }
92 | },
93 | "nbformat": 4,
94 | "nbformat_minor": 0
95 | }
96 |
--------------------------------------------------------------------------------
/X01E.Introduction to APIs Exercies.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# X01E.Introduction to APIs Exercises"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "## Flightstats API"
15 | ]
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "Below are a User Name, API key and URL to the Flightstats API.\n",
22 | "\n",
23 | "Challenge 1: Head over to the Flightstats API Console and explore using the Interactive Documentation section to return JSON data. You can then Copy & Paste this into Python to model it into a Pandas DataFrame.\n",
24 | "\n",
25 | "Challenge 2: Instead of using the Interactive Console, see if you can use Python and Requests to get a response from the API. You'll be able to use the console for clues as to the structure of the API."
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": 2,
31 | "metadata": {
32 | "collapsed": true
33 | },
34 | "outputs": [],
35 | "source": [
36 | "import requests\n",
37 | "import pandas as pd\n",
38 | "\n",
39 | "app_id = 'b7da91e7'\n",
40 | "app_key = '485251145270dd267769ad79550c7de4'\n",
41 | "url = 'https://developer.flightstats.com/api-docs/flightstatus/v2/json/airport/status'"
42 | ]
43 | },
44 | {
45 | "cell_type": "markdown",
46 | "metadata": {},
47 | "source": [
48 | "## Hacktrain API"
49 | ]
50 | },
51 | {
52 | "cell_type": "markdown",
53 | "metadata": {},
54 | "source": [
55 | "Challenge 1: Head over to the Hacktrain API and explore how you can use the console to return data which you can then copy and paste into Python to model it into a Pandas DataFrame.\n",
56 | "\n",
57 | "Challenge 2: Instead of using the console, use Python and Requests to interface with the the API and return data to model into a Pandas dataframe."
58 | ]
59 | },
60 | {
61 | "cell_type": "code",
62 | "execution_count": 3,
63 | "metadata": {
64 | "collapsed": true
65 | },
66 | "outputs": [],
67 | "source": [
68 | "import requests\n",
69 | "import pandas as pd\n",
70 | "\n",
71 | "url = 'http://darwin.hacktrain.com/api/train/' # The Location of the API\n",
72 | "values= {'apiKey':'b05cc6d2-7704-4350-a44f-062b59ba39c5','rows':'10'} # A Dictionary for our API key and limiting the rows to 50\n",
73 | "stat = 'EUS'\t\t\t\t\t\t\t\t\t # The API parameter for Station"
74 | ]
75 | },
76 | {
77 | "cell_type": "code",
78 | "execution_count": null,
79 | "metadata": {
80 | "collapsed": true
81 | },
82 | "outputs": [],
83 | "source": []
84 | }
85 | ],
86 | "metadata": {
87 | "kernelspec": {
88 | "display_name": "Python 3",
89 | "language": "python",
90 | "name": "python3"
91 | },
92 | "language_info": {
93 | "codemirror_mode": {
94 | "name": "ipython",
95 | "version": 3
96 | },
97 | "file_extension": ".py",
98 | "mimetype": "text/x-python",
99 | "name": "python",
100 | "nbconvert_exporter": "python",
101 | "pygments_lexer": "ipython3",
102 | "version": "3.5.1"
103 | }
104 | },
105 | "nbformat": 4,
106 | "nbformat_minor": 0
107 | }
108 |
--------------------------------------------------------------------------------
/Z0. In Closing....ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Z0: In Closing... 10-08-2016"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "Well, we've come to the end of the course! Congratulations on making it this far and I hope you've enjoyed the course and are feeling motivated and ready to go and exercise your new found Python skills!\n",
15 | "\n",
16 | "As I've said a few times, it's VITAL that you have a project to consolidate and expand your learning on. It's even MORE VITAL that you devote the necessary time and effort to not only starting, but carrying on and completing your project. To help you with this I've put together a checklist of things to do to make sure that your project is a success!"
17 | ]
18 | },
19 | {
20 | "cell_type": "markdown",
21 | "metadata": {},
22 | "source": [
23 | "### 1) Have a plan (around your day job!)\n",
24 | "### 2) Have a Deadline\n",
25 | "### 3) Set Aside Time in your Diary\n",
26 | "### 4) Avoid Interruptions!\n",
27 | "### 5) Don't be afraid of getting stuck!\n",
28 | "### 6) Beware Procrastination!\n",
29 | "### 7) Codeclubs & Show and Tell!"
30 | ]
31 | },
32 | {
33 | "cell_type": "markdown",
34 | "metadata": {},
35 | "source": [
36 | "### Further Learning..."
37 | ]
38 | },
39 | {
40 | "cell_type": "markdown",
41 | "metadata": {},
42 | "source": [
43 | "Once you've spent some time using Python, you'll probably want to start investigating other concepts, particularly some of the more exciting ones such as APIs, Web Scraping, Machine Learning etc."
44 | ]
45 | },
46 | {
47 | "cell_type": "markdown",
48 | "metadata": {},
49 | "source": [
50 | "These are massive topics worthy of training courses in their own right, but you will find some material to get you started in my Github Analysts Training repo available as follows:"
51 | ]
52 | },
53 | {
54 | "cell_type": "markdown",
55 | "metadata": {},
56 | "source": [
57 | "asdf"
58 | ]
59 | },
60 | {
61 | "cell_type": "markdown",
62 | "metadata": {},
63 | "source": [
64 | "This contains both the advanced stuff as well as the lessons you've gone through as part of this training also. You can view them online here:"
65 | ]
66 | },
67 | {
68 | "cell_type": "markdown",
69 | "metadata": {},
70 | "source": [
71 | "asdf"
72 | ]
73 | },
74 | {
75 | "cell_type": "code",
76 | "execution_count": null,
77 | "metadata": {
78 | "collapsed": true
79 | },
80 | "outputs": [],
81 | "source": []
82 | }
83 | ],
84 | "metadata": {
85 | "kernelspec": {
86 | "display_name": "Python 3",
87 | "language": "python",
88 | "name": "python3"
89 | },
90 | "language_info": {
91 | "codemirror_mode": {
92 | "name": "ipython",
93 | "version": 3
94 | },
95 | "file_extension": ".py",
96 | "mimetype": "text/x-python",
97 | "name": "python",
98 | "nbconvert_exporter": "python",
99 | "pygments_lexer": "ipython3",
100 | "version": "3.5.1"
101 | }
102 | },
103 | "nbformat": 4,
104 | "nbformat_minor": 0
105 | }
106 |
--------------------------------------------------------------------------------
/img/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/department-for-transport/Python-for-Analysts/89f4be24d0394f0269262f532cae36d937699530/img/.DS_Store
--------------------------------------------------------------------------------
/img/API.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/department-for-transport/Python-for-Analysts/89f4be24d0394f0269262f532cae36d937699530/img/API.png
--------------------------------------------------------------------------------
/img/Clipboard04.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/department-for-transport/Python-for-Analysts/89f4be24d0394f0269262f532cae36d937699530/img/Clipboard04.png
--------------------------------------------------------------------------------
/img/Icon
:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/department-for-transport/Python-for-Analysts/89f4be24d0394f0269262f532cae36d937699530/img/Icon
--------------------------------------------------------------------------------
/img/Launcher.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/department-for-transport/Python-for-Analysts/89f4be24d0394f0269262f532cae36d937699530/img/Launcher.jpg
--------------------------------------------------------------------------------
/img/Requests.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/department-for-transport/Python-for-Analysts/89f4be24d0394f0269262f532cae36d937699530/img/Requests.png
--------------------------------------------------------------------------------
/img/bokeh.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/department-for-transport/Python-for-Analysts/89f4be24d0394f0269262f532cae36d937699530/img/bokeh.png
--------------------------------------------------------------------------------
/img/cell type.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/department-for-transport/Python-for-Analysts/89f4be24d0394f0269262f532cae36d937699530/img/cell type.png
--------------------------------------------------------------------------------
/img/desktop.ini:
--------------------------------------------------------------------------------
1 | [.ShellClassInfo]
2 | InfoTip=This folder is shared online.
3 | IconFile=C:\Program Files (x86)\Google\Drive\googledrivesync.exe
4 | IconIndex=12
5 |
--------------------------------------------------------------------------------
/img/desktop.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/department-for-transport/Python-for-Analysts/89f4be24d0394f0269262f532cae36d937699530/img/desktop.png
--------------------------------------------------------------------------------
/img/github.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/department-for-transport/Python-for-Analysts/89f4be24d0394f0269262f532cae36d937699530/img/github.jpg
--------------------------------------------------------------------------------
/img/interrupt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/department-for-transport/Python-for-Analysts/89f4be24d0394f0269262f532cae36d937699530/img/interrupt.png
--------------------------------------------------------------------------------
/img/joins.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/department-for-transport/Python-for-Analysts/89f4be24d0394f0269262f532cae36d937699530/img/joins.jpg
--------------------------------------------------------------------------------
/img/joins.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/department-for-transport/Python-for-Analysts/89f4be24d0394f0269262f532cae36d937699530/img/joins.png
--------------------------------------------------------------------------------
/img/jupyter.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/department-for-transport/Python-for-Analysts/89f4be24d0394f0269262f532cae36d937699530/img/jupyter.jpg
--------------------------------------------------------------------------------
/img/lightning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/department-for-transport/Python-for-Analysts/89f4be24d0394f0269262f532cae36d937699530/img/lightning.png
--------------------------------------------------------------------------------
/img/matplot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/department-for-transport/Python-for-Analysts/89f4be24d0394f0269262f532cae36d937699530/img/matplot.png
--------------------------------------------------------------------------------
/img/numpy.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/department-for-transport/Python-for-Analysts/89f4be24d0394f0269262f532cae36d937699530/img/numpy.jpg
--------------------------------------------------------------------------------
/img/pandas.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/department-for-transport/Python-for-Analysts/89f4be24d0394f0269262f532cae36d937699530/img/pandas.jpg
--------------------------------------------------------------------------------
/img/plotly.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/department-for-transport/Python-for-Analysts/89f4be24d0394f0269262f532cae36d937699530/img/plotly.png
--------------------------------------------------------------------------------
/img/rename.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/department-for-transport/Python-for-Analysts/89f4be24d0394f0269262f532cae36d937699530/img/rename.png
--------------------------------------------------------------------------------
/img/run cell.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/department-for-transport/Python-for-Analysts/89f4be24d0394f0269262f532cae36d937699530/img/run cell.png
--------------------------------------------------------------------------------
/img/scikit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/department-for-transport/Python-for-Analysts/89f4be24d0394f0269262f532cae36d937699530/img/scikit.png
--------------------------------------------------------------------------------
/img/scipy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/department-for-transport/Python-for-Analysts/89f4be24d0394f0269262f532cae36d937699530/img/scipy.png
--------------------------------------------------------------------------------
/img/seaborn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/department-for-transport/Python-for-Analysts/89f4be24d0394f0269262f532cae36d937699530/img/seaborn.png
--------------------------------------------------------------------------------
/img/shell.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/department-for-transport/Python-for-Analysts/89f4be24d0394f0269262f532cae36d937699530/img/shell.jpg
--------------------------------------------------------------------------------
/img/slack.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/department-for-transport/Python-for-Analysts/89f4be24d0394f0269262f532cae36d937699530/img/slack.png
--------------------------------------------------------------------------------
/img/stack.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/department-for-transport/Python-for-Analysts/89f4be24d0394f0269262f532cae36d937699530/img/stack.png
--------------------------------------------------------------------------------
/img/statsmodels.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/department-for-transport/Python-for-Analysts/89f4be24d0394f0269262f532cae36d937699530/img/statsmodels.png
--------------------------------------------------------------------------------
/img/structure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/department-for-transport/Python-for-Analysts/89f4be24d0394f0269262f532cae36d937699530/img/structure.png
--------------------------------------------------------------------------------
/img/tags.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/department-for-transport/Python-for-Analysts/89f4be24d0394f0269262f532cae36d937699530/img/tags.png
--------------------------------------------------------------------------------