├── .gitattributes
├── .gitignore
├── .ipynb_checkpoints
    ├── Course FAQ-checkpoint.ipynb
    ├── Data Project - Stock Market Analysis-checkpoint.ipynb
    └── Lec 15 - DataFrames-checkpoint.ipynb
├── Course FAQ.ipynb
├── Data Project - Election Analysis.ipynb
├── Data Project - Stock Market Analysis.ipynb
├── Intro to Data Project Section.ipynb
├── Intro to Data Projects - Titanic.ipynb
├── Lec 10 - Array Transposition.ipynb
├── Lec 11 - Universal Array Function.ipynb
├── Lec 12 - Array Processing.ipynb
├── Lec 13 - Array Input and Output.ipynb
├── Lec 14 - Series.ipynb
├── Lec 15 - DataFrames.ipynb
├── Lec 16 - Index Objects.ipynb
├── Lec 17 -Reindexing.ipynb
├── Lec 18 -Drop Entry.ipynb
├── Lec 19 - Selecting Entries.ipynb
├── Lec 20 - Data Alignment.ipynb
├── Lec 21 - Rank and Sort.ipynb
├── Lec 22 - Summary Statistics.ipynb
├── Lec 23 - Missing Data.ipynb
├── Lec 24 - Index Hierarchy.ipynb
├── Lec 25 - Reading and Writing Text Files.ipynb
├── Lec 26 - JSON with Python.ipynb
├── Lec 27 - HTML with Python.ipynb
├── Lec 28 - Excel with Python.ipynb
├── Lec 29 - Merge.ipynb
├── Lec 30 -Merge on Index.ipynb
├── Lec 31 - Concatenate.ipynb
├── Lec 32 - Combining DataFrames.ipynb
├── Lec 33 - Reshaping.ipynb
├── Lec 34 - Pivoting.ipynb
├── Lec 35 - Duplicates in DataFrames.ipynb
├── Lec 36 - Mapping.ipynb
├── Lec 37 - Replace.ipynb
├── Lec 38 - Rename Index.ipynb
├── Lec 39 - Binning.ipynb
├── Lec 40 - Outliers.ipynb
├── Lec 41 - Permutation.ipynb
├── Lec 42 - GroupBy on DataFrames.ipynb
├── Lec 43 - Groupby on Dict and Series.ipynb
├── Lec 44 - Aggregation.ipynb
├── Lec 45 - Splitting, Applying and Combining.ipynb
├── Lec 46 - Cross-Tabulation.ipynb
├── Lec 47 - Installing Seaborn.ipynb
├── Lec 48 - Histograms.ipynb
├── Lec 49 - Kernel Density Estimation Plots.ipynb
├── Lec 50 - Combining Plot Styles.ipynb
├── Lec 51 - Box and Violin Plots.ipynb
├── Lec 52 - Regression Plots.ipynb
├── Lec 53 - Heatmaps and Clustered Matrices.ipynb
├── Lec 7 - Creating Arrays.ipynb
├── Lec 8 - Using arrays and scalars.ipynb
├── Lec 9 -Indexing Arrays.ipynb
├── Lec_28_test.xlsx
├── README.md
├── lec25.csv
├── lec25.txt
├── my_array.npy
├── my_test_text.txt
├── mytextdata_out.csv
├── report_summaries_form_3p.csv
├── two_arrays.npz
└── winequality_red.csv


/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Auto detect text files and perform LF normalization
 2 | * text=auto
 3 | 
 4 | # Custom for Visual Studio
 5 | *.cs     diff=csharp
 6 | *.sln    merge=union
 7 | *.csproj merge=union
 8 | *.vbproj merge=union
 9 | *.fsproj merge=union
10 | *.dbproj merge=union
11 | 
12 | # Standard to msysgit
13 | *.doc	 diff=astextplain
14 | *.DOC	 diff=astextplain
15 | *.docx diff=astextplain
16 | *.DOCX diff=astextplain
17 | *.dot  diff=astextplain
18 | *.DOT  diff=astextplain
19 | *.pdf  diff=astextplain
20 | *.PDF	 diff=astextplain
21 | *.rtf	 diff=astextplain
22 | *.RTF	 diff=astextplain
23 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Windows image file caches
 2 | Thumbs.db
 3 | ehthumbs.db
 4 | 
 5 | # Folder config file
 6 | Desktop.ini
 7 | 
 8 | # Recycle Bin used on file shares
 9 | $RECYCLE.BIN/
10 | 
11 | # Windows Installer files
12 | *.cab
13 | *.msi
14 | *.msm
15 | *.msp
16 | 
17 | # =========================
18 | # Operating System Files
19 | # =========================
20 | 
21 | # OSX
22 | # =========================
23 | 
24 | .DS_Store
25 | .AppleDouble
26 | .LSOverride
27 | 
28 | # Icon must end with two \r
29 | Icon
30 | 
31 | # Thumbnails
32 | ._*
33 | 
34 | # Files that might appear on external disk
35 | .Spotlight-V100
36 | .Trashes
37 | 
38 | # Directories potentially created on remote AFP share
39 | .AppleDB
40 | .AppleDesktop
41 | Network Trash Folder
42 | Temporary Items
43 | .apdisk
44 | 


--------------------------------------------------------------------------------
/.ipynb_checkpoints/Course FAQ-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# FAQ FOR THE COURSE"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## INSTALLING PYTHON\n",
 15 |     "\n",
 16 |     "**How do I install Python?**\n",
 17 |     "\n",
 18 |     "It is HIGHLY suggested you use the Anaconda distribution for this course and installing packages. You can install the Anaconda distribution here:http://continuum.io/downloads\n",
 19 |     "\n",
 20 |     "Pick the correct OS and note that the course is in Python 2 *not* Python 3. However, you can use either for this course, there will just be some minor changes you will have to do to follow along (e.g. print functions, range instead of xrange,integer division, etc.) For more info on the differences between Python 2 and Python 3, check out this resource:\n",
 21 |     "\n",
 22 |     "http://sebastianraschka.com/Articles/2014_python_2_3_key_diff.html"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "##INSTALLING PACKAGES\n",
 30 |     "\n",
 31 |     "**How do I install packages for Python?**\n",
 32 |     "\n",
 33 |     "To install packages for Python (such as the seaborn package for statistical plotting) you have two options, using \"conda install\" or \"pip install\". It is HIGHLY recommended you use \"conda install\". You can install a package through the following steps:\n",
 34 |     "\n",
 35 |     "    1.) Open up your command line. (Linux and OSX MAC users can open up a terminal (bash) command line interface). For Windows users, open up powershell (or Anaconda Command Prompt). For windows users, search for Anaconda Command Prompt with your search tool.\n",
 36 |     "    \n",
 37 |     "    2.) Once in your command line interface, type: conda install *package-name* (or pip install if you do not have the Anaconda distribution).\n",
 38 |     "    \n",
 39 |     "    3.) Remember to install any dependecies for the particular package you are trying to install (conda install will do this manually)\n",
 40 |     "    \n",
 41 |     "    4.) All done! Restart python (or your iPython Notebook) and you are good to go!"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "markdown",
 46 |    "metadata": {},
 47 |    "source": [
 48 |     "## iPython Notebooks and nbviewer\n",
 49 |     "\n",
 50 |     "** How do I view the iPython Notebooks?**\n",
 51 |     "\n",
 52 |     "You have two options: go to http://nbviewer.ipython.org/github/jmportilla/ or go to https://github.com/jmportilla/Udemy-notes\n",
 53 |     "\n",
 54 |     "Then click on the lecture you are looking for. Note: Machine Learning Lectures are in a different Folder!"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "markdown",
 59 |    "metadata": {},
 60 |    "source": [
 61 |     "## iPython\n",
 62 |     "\n",
 63 |     "** Why use iPython instead of just regular Python?**\n",
 64 |     "\n",
 65 |     "ipython is an advanced interactive python shell.\n",
 66 |     "\n",
 67 |     "When you invoke it, you will get a python interactive interepreter (like running python), but with some features that make it much easier and more pleasant to use. The few ones I routinely use are:\n",
 68 |     "\n",
 69 |     "    * Tab autocompletion (on class names, functions, methods, variables)\n",
 70 |     "    * More explicit and colour-highlighted error messages\n",
 71 |     "    * Better history management\n",
 72 |     "    * Basic UNIX shell integration (you can run simple shell commands such as cp, ls, rm, cp, etc. directly from the ipython command line)\n",
 73 |     "    * Nice integration with many common GUI modules (PyQt, PyGTK, tkinter)\n",
 74 |     "    * very nice integration with Data Science Python packages!\n"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "markdown",
 79 |    "metadata": {},
 80 |    "source": [
 81 |     "# Questions on lecture material\n",
 82 |     "\n",
 83 |     "** What do I do if I have a question?**\n",
 84 |     "\n",
 85 |     "Make sure to visit the discussion section of the particular lecture you have a question about. Many common questions have already been answered. If not, use the search box to find if a general question has already been answered.\n",
 86 |     "\n",
 87 |     "** There is so much material! Where do I start?**\n",
 88 |     "\n",
 89 |     "I suggest you start with the setup lectures. Then depending on your previous programming experience, check out Appendix A for material and resources for learning Python. Afterwards continue on on with the rest of the course in order. Check out the rest of the Appendix Resources as you see fit, for example if you are trying to use what you've learned with a SQL database at work, check out the SQL and Python appendix."
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "markdown",
 94 |    "metadata": {},
 95 |    "source": [
 96 |     "##GIT AND GITHUB\n",
 97 |     "\n",
 98 |     "**What is the difference between Git and GitHub?**\n",
 99 |     "\n",
100 |     "Git is a version control system; think of it as a series of snapshots (commits) of your code. You see a path of these snapshots, in which order they were created. You can make branches to experiment and come back to snapshots you took.\n",
101 |     "GitHub, is a web-page on which you can publish your Git repositories and collaborate with other people.\n",
102 |     "\n",
103 |     "**Is Git saving every repository locally (in the user's machine) and in GitHub?**\n",
104 |     "No, it's only local. You can decide to push (publish) some branches on GitHub.\n",
105 |     "\n",
106 |     "**Can you use Git without GitHub? If yes, what would be the benefit for using GitHub?**\n",
107 |     "Yes, Git runs local if you don't use GitHub. An alternative to using GitHub could be running Git on files hosted on Dropbox, but GitHub is a more streamlined service as it was made especially for Git.\n",
108 |     "\n",
109 |     "**How does Git compare to a backup system such as Time Machine?**\n",
110 |     "It's a different thing, Git lets you track changes and your development process. If you use Git with GitHub, it becomes effectively a backup. However usually you would not push all the time to GitHub, at which point you do not have a full backup if things go wrong. I use git in a folder that is synchronized with Dropbox.\n",
111 |     "\n",
112 |     "**Is this a manual process, in other words if you don't commit you won't have a new version of the changes made?**\n",
113 |     "Yes, committing and pushing are both manual.\n",
114 |     "\n",
115 |     "**If are not collaborating and you are already using a backup system why would you use Git?**\n",
116 |     "\n",
117 |     "If you encounter an error between commits you can use the command git diff to see the differences between the current code and the last working commit, helping you to locate your error.\n",
118 |     "\n",
119 |     "You can also just go back to the last working commit.\n",
120 |     "\n",
121 |     "If you want to try a change, but are not sure that it will work. You create a branch to test you code change. If it works fine, you merge it to the main branch. If it does not you just throw the branch away and go back to the main branch.\n",
122 |     "\n",
123 |     "You did some debugging. Before you commit you always look at the changes from the last commit. You see your debug print statement that you forgot to delete.\n",
124 |     "For more info check out: http://gitimmersion.com/\n"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": null,
130 |    "metadata": {
131 |     "collapsed": true
132 |    },
133 |    "outputs": [],
134 |    "source": []
135 |   }
136 |  ],
137 |  "metadata": {
138 |   "kernelspec": {
139 |    "display_name": "Python 2",
140 |    "language": "python",
141 |    "name": "python2"
142 |   },
143 |   "language_info": {
144 |    "codemirror_mode": {
145 |     "name": "ipython",
146 |     "version": 2
147 |    },
148 |    "file_extension": ".py",
149 |    "mimetype": "text/x-python",
150 |    "name": "python",
151 |    "nbconvert_exporter": "python",
152 |    "pygments_lexer": "ipython2",
153 |    "version": "2.7.7"
154 |   }
155 |  },
156 |  "nbformat": 4,
157 |  "nbformat_minor": 0
158 | }
159 | 


--------------------------------------------------------------------------------
/Course FAQ.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# FAQ FOR THE COURSE"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## INSTALLING PYTHON\n",
 15 |     "\n",
 16 |     "**How do I install Python?**\n",
 17 |     "\n",
 18 |     "It is HIGHLY suggested you use the Anaconda distribution for this course and installing packages. You can install the Anaconda distribution here:http://continuum.io/downloads\n",
 19 |     "\n",
 20 |     "Pick the correct OS and note that the course is in Python 2 *not* Python 3. However, you can use either for this course, there will just be some minor changes you will have to do to follow along (e.g. print functions, range instead of xrange,integer division, etc.) For more info on the differences between Python 2 and Python 3, check out this resource:\n",
 21 |     "\n",
 22 |     "http://sebastianraschka.com/Articles/2014_python_2_3_key_diff.html"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "##INSTALLING PACKAGES\n",
 30 |     "\n",
 31 |     "**How do I install packages for Python?**\n",
 32 |     "\n",
 33 |     "To install packages for Python (such as the seaborn package for statistical plotting) you have two options, using \"conda install\" or \"pip install\". It is HIGHLY recommended you use \"conda install\". You can install a package through the following steps:\n",
 34 |     "\n",
 35 |     "    1.) Open up your command line. (Linux and OSX MAC users can open up a terminal (bash) command line interface). For Windows users, open up powershell (or Anaconda Command Prompt). For windows users, search for Anaconda Command Prompt with your search tool.\n",
 36 |     "    \n",
 37 |     "    2.) Once in your command line interface, type: conda install *package-name* (or pip install if you do not have the Anaconda distribution).\n",
 38 |     "    \n",
 39 |     "    3.) Remember to install any dependecies for the particular package you are trying to install (conda install will do this manually)\n",
 40 |     "    \n",
 41 |     "    4.) All done! Restart python (or your iPython Notebook) and you are good to go!"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "markdown",
 46 |    "metadata": {},
 47 |    "source": [
 48 |     "## iPython Notebooks and nbviewer\n",
 49 |     "\n",
 50 |     "** How do I view the iPython Notebooks?**\n",
 51 |     "\n",
 52 |     "You have two options: go to http://nbviewer.ipython.org/github/jmportilla/ or go to https://github.com/jmportilla/Udemy-notes\n",
 53 |     "\n",
 54 |     "Then click on the lecture you are looking for. Note: Machine Learning Lectures are in a different Folder!"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "markdown",
 59 |    "metadata": {},
 60 |    "source": [
 61 |     "## iPython\n",
 62 |     "\n",
 63 |     "** Why use iPython instead of just regular Python?**\n",
 64 |     "\n",
 65 |     "ipython is an advanced interactive python shell.\n",
 66 |     "\n",
 67 |     "When you invoke it, you will get a python interactive interepreter (like running python), but with some features that make it much easier and more pleasant to use. The few ones I routinely use are:\n",
 68 |     "\n",
 69 |     "    * Tab autocompletion (on class names, functions, methods, variables)\n",
 70 |     "    * More explicit and colour-highlighted error messages\n",
 71 |     "    * Better history management\n",
 72 |     "    * Basic UNIX shell integration (you can run simple shell commands such as cp, ls, rm, cp, etc. directly from the ipython command line)\n",
 73 |     "    * Nice integration with many common GUI modules (PyQt, PyGTK, tkinter)\n",
 74 |     "    * very nice integration with Data Science Python packages!\n"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "markdown",
 79 |    "metadata": {},
 80 |    "source": [
 81 |     "# Questions on lecture material\n",
 82 |     "\n",
 83 |     "** What do I do if I have a question?**\n",
 84 |     "\n",
 85 |     "Make sure to visit the discussion section of the particular lecture you have a question about. Many common questions have already been answered. If not, use the search box to find if a general question has already been answered.\n",
 86 |     "\n",
 87 |     "** There is so much material! Where do I start?**\n",
 88 |     "\n",
 89 |     "I suggest you start with the setup lectures. Then depending on your previous programming experience, check out Appendix A for material and resources for learning Python. Afterwards continue on on with the rest of the course in order. Check out the rest of the Appendix Resources as you see fit, for example if you are trying to use what you've learned with a SQL database at work, check out the SQL and Python appendix."
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "markdown",
 94 |    "metadata": {},
 95 |    "source": [
 96 |     "##GIT AND GITHUB\n",
 97 |     "\n",
 98 |     "**What is the difference between Git and GitHub?**\n",
 99 |     "\n",
100 |     "Git is a version control system; think of it as a series of snapshots (commits) of your code. You see a path of these snapshots, in which order they were created. You can make branches to experiment and come back to snapshots you took.\n",
101 |     "GitHub, is a web-page on which you can publish your Git repositories and collaborate with other people.\n",
102 |     "\n",
103 |     "**Is Git saving every repository locally (in the user's machine) and in GitHub?**\n",
104 |     "No, it's only local. You can decide to push (publish) some branches on GitHub.\n",
105 |     "\n",
106 |     "**Can you use Git without GitHub? If yes, what would be the benefit for using GitHub?**\n",
107 |     "Yes, Git runs local if you don't use GitHub. An alternative to using GitHub could be running Git on files hosted on Dropbox, but GitHub is a more streamlined service as it was made especially for Git.\n",
108 |     "\n",
109 |     "**How does Git compare to a backup system such as Time Machine?**\n",
110 |     "It's a different thing, Git lets you track changes and your development process. If you use Git with GitHub, it becomes effectively a backup. However usually you would not push all the time to GitHub, at which point you do not have a full backup if things go wrong. I use git in a folder that is synchronized with Dropbox.\n",
111 |     "\n",
112 |     "**Is this a manual process, in other words if you don't commit you won't have a new version of the changes made?**\n",
113 |     "Yes, committing and pushing are both manual.\n",
114 |     "\n",
115 |     "**If are not collaborating and you are already using a backup system why would you use Git?**\n",
116 |     "\n",
117 |     "If you encounter an error between commits you can use the command git diff to see the differences between the current code and the last working commit, helping you to locate your error.\n",
118 |     "\n",
119 |     "You can also just go back to the last working commit.\n",
120 |     "\n",
121 |     "If you want to try a change, but are not sure that it will work. You create a branch to test you code change. If it works fine, you merge it to the main branch. If it does not you just throw the branch away and go back to the main branch.\n",
122 |     "\n",
123 |     "You did some debugging. Before you commit you always look at the changes from the last commit. You see your debug print statement that you forgot to delete.\n",
124 |     "For more info check out: http://gitimmersion.com/\n"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": null,
130 |    "metadata": {
131 |     "collapsed": true
132 |    },
133 |    "outputs": [],
134 |    "source": []
135 |   }
136 |  ],
137 |  "metadata": {
138 |   "kernelspec": {
139 |    "display_name": "Python 2",
140 |    "language": "python",
141 |    "name": "python2"
142 |   },
143 |   "language_info": {
144 |    "codemirror_mode": {
145 |     "name": "ipython",
146 |     "version": 2
147 |    },
148 |    "file_extension": ".py",
149 |    "mimetype": "text/x-python",
150 |    "name": "python",
151 |    "nbconvert_exporter": "python",
152 |    "pygments_lexer": "ipython2",
153 |    "version": "2.7.7"
154 |   }
155 |  },
156 |  "nbformat": 4,
157 |  "nbformat_minor": 0
158 | }
159 | 


--------------------------------------------------------------------------------
/Intro to Data Project Section.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "#INTRO TO DATA PROJECTS"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "markdown",
12 |    "metadata": {},
13 |    "source": [
14 |     "Congrats! Let's get started.\n",
15 |     "\n",
16 |     "You have the skills you need to analyze large data sets! \n",
17 |     "\n",
18 |     "If you want, go ahead and just go for it with these free public data set resources:\n",
19 |     "\n",
20 |     "http://www.data.gov/\n",
21 |     "\n",
22 |     "http://aws.amazon.com/public-data-sets/\n",
23 |     "\n",
24 |     "http://www.google.com/publicdata/directory"
25 |    ]
26 |   },
27 |   {
28 |    "cell_type": "code",
29 |    "execution_count": null,
30 |    "metadata": {
31 |     "collapsed": true
32 |    },
33 |    "outputs": [],
34 |    "source": []
35 |   }
36 |  ],
37 |  "metadata": {
38 |   "kernelspec": {
39 |    "display_name": "Python 2",
40 |    "language": "python",
41 |    "name": "python2"
42 |   },
43 |   "language_info": {
44 |    "codemirror_mode": {
45 |     "name": "ipython",
46 |     "version": 2
47 |    },
48 |    "file_extension": ".py",
49 |    "mimetype": "text/x-python",
50 |    "name": "python",
51 |    "nbconvert_exporter": "python",
52 |    "pygments_lexer": "ipython2",
53 |    "version": "2.7.7"
54 |   }
55 |  },
56 |  "nbformat": 4,
57 |  "nbformat_minor": 0
58 | }
59 | 


--------------------------------------------------------------------------------
/Lec 10 - Array Transposition.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "name": "",
  4 |   "signature": "sha256:bb4fdb58a55209fe5ad2a5c84c1e89e7c983816d3eea16db99f1d5c8d070d2c5"
  5 |  },
  6 |  "nbformat": 3,
  7 |  "nbformat_minor": 0,
  8 |  "worksheets": [
  9 |   {
 10 |    "cells": [
 11 |     {
 12 |      "cell_type": "code",
 13 |      "collapsed": false,
 14 |      "input": [
 15 |       "import numpy as np"
 16 |      ],
 17 |      "language": "python",
 18 |      "metadata": {},
 19 |      "outputs": [],
 20 |      "prompt_number": 2
 21 |     },
 22 |     {
 23 |      "cell_type": "code",
 24 |      "collapsed": false,
 25 |      "input": [
 26 |       "#Create array\n",
 27 |       "arr = np.arange(50).reshape((10,5))\n",
 28 |       "\n",
 29 |       "#Show\n",
 30 |       "arr"
 31 |      ],
 32 |      "language": "python",
 33 |      "metadata": {},
 34 |      "outputs": [
 35 |       {
 36 |        "metadata": {},
 37 |        "output_type": "pyout",
 38 |        "prompt_number": 6,
 39 |        "text": [
 40 |         "array([[ 0,  1,  2,  3,  4],\n",
 41 |         "       [ 5,  6,  7,  8,  9],\n",
 42 |         "       [10, 11, 12, 13, 14],\n",
 43 |         "       [15, 16, 17, 18, 19],\n",
 44 |         "       [20, 21, 22, 23, 24],\n",
 45 |         "       [25, 26, 27, 28, 29],\n",
 46 |         "       [30, 31, 32, 33, 34],\n",
 47 |         "       [35, 36, 37, 38, 39],\n",
 48 |         "       [40, 41, 42, 43, 44],\n",
 49 |         "       [45, 46, 47, 48, 49]])"
 50 |        ]
 51 |       }
 52 |      ],
 53 |      "prompt_number": 6
 54 |     },
 55 |     {
 56 |      "cell_type": "code",
 57 |      "collapsed": false,
 58 |      "input": [
 59 |       "#Lets transpose\n",
 60 |       "arr.T"
 61 |      ],
 62 |      "language": "python",
 63 |      "metadata": {},
 64 |      "outputs": [
 65 |       {
 66 |        "metadata": {},
 67 |        "output_type": "pyout",
 68 |        "prompt_number": 6,
 69 |        "text": [
 70 |         "array([[ 0,  5, 10, 15, 20, 25, 30, 35, 40, 45],\n",
 71 |         "       [ 1,  6, 11, 16, 21, 26, 31, 36, 41, 46],\n",
 72 |         "       [ 2,  7, 12, 17, 22, 27, 32, 37, 42, 47],\n",
 73 |         "       [ 3,  8, 13, 18, 23, 28, 33, 38, 43, 48],\n",
 74 |         "       [ 4,  9, 14, 19, 24, 29, 34, 39, 44, 49]])"
 75 |        ]
 76 |       }
 77 |      ],
 78 |      "prompt_number": 6
 79 |     },
 80 |     {
 81 |      "cell_type": "code",
 82 |      "collapsed": false,
 83 |      "input": [
 84 |       "# Taking dot product of matrices\n",
 85 |       "np.dot(arr.T,arr)"
 86 |      ],
 87 |      "language": "python",
 88 |      "metadata": {},
 89 |      "outputs": [
 90 |       {
 91 |        "metadata": {},
 92 |        "output_type": "pyout",
 93 |        "prompt_number": 8,
 94 |        "text": [
 95 |         "array([[7125, 7350, 7575, 7800, 8025],\n",
 96 |         "       [7350, 7585, 7820, 8055, 8290],\n",
 97 |         "       [7575, 7820, 8065, 8310, 8555],\n",
 98 |         "       [7800, 8055, 8310, 8565, 8820],\n",
 99 |         "       [8025, 8290, 8555, 8820, 9085]])"
100 |        ]
101 |       }
102 |      ],
103 |      "prompt_number": 8
104 |     },
105 |     {
106 |      "cell_type": "code",
107 |      "collapsed": false,
108 |      "input": [
109 |       "# For 3D matrix\n",
110 |       "arr3d = np.arange(50).reshape((5,5,2))\n",
111 |       "\n",
112 |       "#Show\n",
113 |       "arr3d"
114 |      ],
115 |      "language": "python",
116 |      "metadata": {},
117 |      "outputs": [
118 |       {
119 |        "metadata": {},
120 |        "output_type": "pyout",
121 |        "prompt_number": 3,
122 |        "text": [
123 |         "array([[[ 0,  1],\n",
124 |         "        [ 2,  3],\n",
125 |         "        [ 4,  5],\n",
126 |         "        [ 6,  7],\n",
127 |         "        [ 8,  9]],\n",
128 |         "\n",
129 |         "       [[10, 11],\n",
130 |         "        [12, 13],\n",
131 |         "        [14, 15],\n",
132 |         "        [16, 17],\n",
133 |         "        [18, 19]],\n",
134 |         "\n",
135 |         "       [[20, 21],\n",
136 |         "        [22, 23],\n",
137 |         "        [24, 25],\n",
138 |         "        [26, 27],\n",
139 |         "        [28, 29]],\n",
140 |         "\n",
141 |         "       [[30, 31],\n",
142 |         "        [32, 33],\n",
143 |         "        [34, 35],\n",
144 |         "        [36, 37],\n",
145 |         "        [38, 39]],\n",
146 |         "\n",
147 |         "       [[40, 41],\n",
148 |         "        [42, 43],\n",
149 |         "        [44, 45],\n",
150 |         "        [46, 47],\n",
151 |         "        [48, 49]]])"
152 |        ]
153 |       }
154 |      ],
155 |      "prompt_number": 3
156 |     },
157 |     {
158 |      "cell_type": "code",
159 |      "collapsed": false,
160 |      "input": [
161 |       "#We can also transpose a 3d matrix\n",
162 |       "\n",
163 |       "arr3d.transpose((1,0,2))"
164 |      ],
165 |      "language": "python",
166 |      "metadata": {},
167 |      "outputs": [
168 |       {
169 |        "metadata": {},
170 |        "output_type": "pyout",
171 |        "prompt_number": 4,
172 |        "text": [
173 |         "array([[[ 0,  1],\n",
174 |         "        [10, 11],\n",
175 |         "        [20, 21],\n",
176 |         "        [30, 31],\n",
177 |         "        [40, 41]],\n",
178 |         "\n",
179 |         "       [[ 2,  3],\n",
180 |         "        [12, 13],\n",
181 |         "        [22, 23],\n",
182 |         "        [32, 33],\n",
183 |         "        [42, 43]],\n",
184 |         "\n",
185 |         "       [[ 4,  5],\n",
186 |         "        [14, 15],\n",
187 |         "        [24, 25],\n",
188 |         "        [34, 35],\n",
189 |         "        [44, 45]],\n",
190 |         "\n",
191 |         "       [[ 6,  7],\n",
192 |         "        [16, 17],\n",
193 |         "        [26, 27],\n",
194 |         "        [36, 37],\n",
195 |         "        [46, 47]],\n",
196 |         "\n",
197 |         "       [[ 8,  9],\n",
198 |         "        [18, 19],\n",
199 |         "        [28, 29],\n",
200 |         "        [38, 39],\n",
201 |         "        [48, 49]]])"
202 |        ]
203 |       }
204 |      ],
205 |      "prompt_number": 4
206 |     },
207 |     {
208 |      "cell_type": "code",
209 |      "collapsed": false,
210 |      "input": [
211 |       "# If you need to get more specific use swapaxes\n",
212 |       "arr = np.array([[1,2,3]])\n",
213 |       "\n",
214 |       "#Show \n",
215 |       "arr"
216 |      ],
217 |      "language": "python",
218 |      "metadata": {},
219 |      "outputs": [
220 |       {
221 |        "metadata": {},
222 |        "output_type": "pyout",
223 |        "prompt_number": 18,
224 |        "text": [
225 |         "array([[1, 2, 3]])"
226 |        ]
227 |       }
228 |      ],
229 |      "prompt_number": 18
230 |     },
231 |     {
232 |      "cell_type": "code",
233 |      "collapsed": false,
234 |      "input": [
235 |       "arr.swapaxes(0,1)"
236 |      ],
237 |      "language": "python",
238 |      "metadata": {},
239 |      "outputs": [
240 |       {
241 |        "metadata": {},
242 |        "output_type": "pyout",
243 |        "prompt_number": 19,
244 |        "text": [
245 |         "array([[1],\n",
246 |         "       [2],\n",
247 |         "       [3]])"
248 |        ]
249 |       }
250 |      ],
251 |      "prompt_number": 19
252 |     },
253 |     {
254 |      "cell_type": "code",
255 |      "collapsed": false,
256 |      "input": [],
257 |      "language": "python",
258 |      "metadata": {},
259 |      "outputs": []
260 |     }
261 |    ],
262 |    "metadata": {}
263 |   }
264 |  ]
265 | }


--------------------------------------------------------------------------------
/Lec 11 - Universal Array Function.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "name": "",
  4 |   "signature": "sha256:4dbb48cfd953559c70b10d7a06f4889f400ba1afcdf3ca64e7f89dcb5b2fdfcf"
  5 |  },
  6 |  "nbformat": 3,
  7 |  "nbformat_minor": 0,
  8 |  "worksheets": [
  9 |   {
 10 |    "cells": [
 11 |     {
 12 |      "cell_type": "code",
 13 |      "collapsed": false,
 14 |      "input": [
 15 |       "import numpy as np\n",
 16 |       "\n",
 17 |       "# Universal functions perform operations on all elements in an array"
 18 |      ],
 19 |      "language": "python",
 20 |      "metadata": {},
 21 |      "outputs": [],
 22 |      "prompt_number": 11
 23 |     },
 24 |     {
 25 |      "cell_type": "code",
 26 |      "collapsed": false,
 27 |      "input": [
 28 |       "arr = np.arange(11)\n",
 29 |       "\n",
 30 |       "arr"
 31 |      ],
 32 |      "language": "python",
 33 |      "metadata": {},
 34 |      "outputs": [
 35 |       {
 36 |        "metadata": {},
 37 |        "output_type": "pyout",
 38 |        "prompt_number": 4,
 39 |        "text": [
 40 |         "array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])"
 41 |        ]
 42 |       }
 43 |      ],
 44 |      "prompt_number": 4
 45 |     },
 46 |     {
 47 |      "cell_type": "code",
 48 |      "collapsed": false,
 49 |      "input": [
 50 |       "#Taking Square Roots\n",
 51 |       "np.sqrt(arr)"
 52 |      ],
 53 |      "language": "python",
 54 |      "metadata": {},
 55 |      "outputs": [
 56 |       {
 57 |        "metadata": {},
 58 |        "output_type": "pyout",
 59 |        "prompt_number": 6,
 60 |        "text": [
 61 |         "array([ 0.        ,  1.        ,  1.41421356,  1.73205081,  2.        ,\n",
 62 |         "        2.23606798,  2.44948974,  2.64575131,  2.82842712,  3.        ,\n",
 63 |         "        3.16227766])"
 64 |        ]
 65 |       }
 66 |      ],
 67 |      "prompt_number": 6
 68 |     },
 69 |     {
 70 |      "cell_type": "code",
 71 |      "collapsed": false,
 72 |      "input": [
 73 |       "#Calcualting exponential (e^)\n",
 74 |       "np.exp(arr)"
 75 |      ],
 76 |      "language": "python",
 77 |      "metadata": {},
 78 |      "outputs": [
 79 |       {
 80 |        "metadata": {},
 81 |        "output_type": "pyout",
 82 |        "prompt_number": 8,
 83 |        "text": [
 84 |         "array([  1.00000000e+00,   2.71828183e+00,   7.38905610e+00,\n",
 85 |         "         2.00855369e+01,   5.45981500e+01,   1.48413159e+02,\n",
 86 |         "         4.03428793e+02,   1.09663316e+03,   2.98095799e+03,\n",
 87 |         "         8.10308393e+03,   2.20264658e+04])"
 88 |        ]
 89 |       }
 90 |      ],
 91 |      "prompt_number": 8
 92 |     },
 93 |     {
 94 |      "cell_type": "code",
 95 |      "collapsed": false,
 96 |      "input": [
 97 |       "# Binary Functions require two arrays\n",
 98 |       "\n",
 99 |       "#Random array (normal dist)\n",
100 |       "A = np.random.randn(10)\n",
101 |       "\n",
102 |       "A\n"
103 |      ],
104 |      "language": "python",
105 |      "metadata": {},
106 |      "outputs": [
107 |       {
108 |        "metadata": {},
109 |        "output_type": "pyout",
110 |        "prompt_number": 17,
111 |        "text": [
112 |         "array([ 1.67337614,  0.313715  , -0.54322771, -0.62989299, -1.80379121,\n",
113 |         "       -2.14201895,  0.55044184, -0.30340582,  0.38182576,  1.61825713])"
114 |        ]
115 |       }
116 |      ],
117 |      "prompt_number": 17
118 |     },
119 |     {
120 |      "cell_type": "code",
121 |      "collapsed": false,
122 |      "input": [
123 |       "#Random array (normal dist)\n",
124 |       "B = np.random.randn(10)\n",
125 |       "B"
126 |      ],
127 |      "language": "python",
128 |      "metadata": {},
129 |      "outputs": [
130 |       {
131 |        "metadata": {},
132 |        "output_type": "pyout",
133 |        "prompt_number": 18,
134 |        "text": [
135 |         "array([-0.36856389,  0.88334254,  0.73189034,  0.39825998, -0.13333202,\n",
136 |         "       -0.04089832, -0.17904368, -0.31792306, -0.51818526,  0.17130282])"
137 |        ]
138 |       }
139 |      ],
140 |      "prompt_number": 18
141 |     },
142 |     {
143 |      "cell_type": "code",
144 |      "collapsed": false,
145 |      "input": [
146 |       "#Addition\n",
147 |       "np.add(A,B)"
148 |      ],
149 |      "language": "python",
150 |      "metadata": {},
151 |      "outputs": [
152 |       {
153 |        "metadata": {},
154 |        "output_type": "pyout",
155 |        "prompt_number": 19,
156 |        "text": [
157 |         "array([-0.26311799,  0.46621906,  0.77886172, -1.34775418,  0.84899094,\n",
158 |         "       -0.10713425,  1.12217039,  0.30454513, -0.17441401,  1.15341669])"
159 |        ]
160 |       }
161 |      ],
162 |      "prompt_number": 19
163 |     },
164 |     {
165 |      "cell_type": "code",
166 |      "collapsed": false,
167 |      "input": [
168 |       "#Finding max or min between two arrays\n",
169 |       "np.maximum(A,B)"
170 |      ],
171 |      "language": "python",
172 |      "metadata": {},
173 |      "outputs": [
174 |       {
175 |        "metadata": {},
176 |        "output_type": "pyout",
177 |        "prompt_number": 20,
178 |        "text": [
179 |         "array([ 0.10544591,  0.88334254,  0.73189034,  0.39825998,  0.98232296,\n",
180 |         "       -0.04089832,  1.30121407,  0.62246819,  0.34377126,  0.98211387])"
181 |        ]
182 |       }
183 |      ],
184 |      "prompt_number": 20
185 |     },
186 |     {
187 |      "cell_type": "code",
188 |      "collapsed": false,
189 |      "input": [
190 |       "#For full and extensive list of all universal functions\n",
191 |       "website = \"http://docs.scipy.org/doc/numpy/reference/ufuncs.html#available-ufuncs\"\n",
192 |       "import webbrowser\n",
193 |       "webbrowser.open(website)"
194 |      ],
195 |      "language": "python",
196 |      "metadata": {},
197 |      "outputs": [
198 |       {
199 |        "metadata": {},
200 |        "output_type": "pyout",
201 |        "prompt_number": 22,
202 |        "text": [
203 |         "True"
204 |        ]
205 |       }
206 |      ],
207 |      "prompt_number": 22
208 |     },
209 |     {
210 |      "cell_type": "code",
211 |      "collapsed": false,
212 |      "input": [],
213 |      "language": "python",
214 |      "metadata": {},
215 |      "outputs": []
216 |     }
217 |    ],
218 |    "metadata": {}
219 |   }
220 |  ]
221 | }


--------------------------------------------------------------------------------
/Lec 13 - Array Input and Output.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "name": "",
  4 |   "signature": "sha256:c8c912c7d9f0d49af75f9f9b584d5a59bff10023a450f197b0412ce14ebb071e"
  5 |  },
  6 |  "nbformat": 3,
  7 |  "nbformat_minor": 0,
  8 |  "worksheets": [
  9 |   {
 10 |    "cells": [
 11 |     {
 12 |      "cell_type": "code",
 13 |      "collapsed": false,
 14 |      "input": [
 15 |       "import numpy as np\n",
 16 |       "\n"
 17 |      ],
 18 |      "language": "python",
 19 |      "metadata": {},
 20 |      "outputs": [],
 21 |      "prompt_number": 1
 22 |     },
 23 |     {
 24 |      "cell_type": "code",
 25 |      "collapsed": false,
 26 |      "input": [
 27 |       "#Create an array\n",
 28 |       "arr = np.arange(5)"
 29 |      ],
 30 |      "language": "python",
 31 |      "metadata": {},
 32 |      "outputs": [],
 33 |      "prompt_number": 2
 34 |     },
 35 |     {
 36 |      "cell_type": "code",
 37 |      "collapsed": false,
 38 |      "input": [
 39 |       "#Saving array on disk in binary format (file extension .npy)\n",
 40 |       "np.save('my_array',arr)"
 41 |      ],
 42 |      "language": "python",
 43 |      "metadata": {},
 44 |      "outputs": [],
 45 |      "prompt_number": 3
 46 |     },
 47 |     {
 48 |      "cell_type": "code",
 49 |      "collapsed": false,
 50 |      "input": [
 51 |       "#Change arr\n",
 52 |       "arr = np.arange(10)\n",
 53 |       "#Show\n",
 54 |       "arr"
 55 |      ],
 56 |      "language": "python",
 57 |      "metadata": {},
 58 |      "outputs": [
 59 |       {
 60 |        "metadata": {},
 61 |        "output_type": "pyout",
 62 |        "prompt_number": 4,
 63 |        "text": [
 64 |         "array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])"
 65 |        ]
 66 |       }
 67 |      ],
 68 |      "prompt_number": 4
 69 |     },
 70 |     {
 71 |      "cell_type": "code",
 72 |      "collapsed": false,
 73 |      "input": [
 74 |       "#Lets see the original saved copy\n",
 75 |       "np.load('my_array.npy')"
 76 |      ],
 77 |      "language": "python",
 78 |      "metadata": {},
 79 |      "outputs": [
 80 |       {
 81 |        "metadata": {},
 82 |        "output_type": "pyout",
 83 |        "prompt_number": 7,
 84 |        "text": [
 85 |         "array([0, 1, 2, 3, 4])"
 86 |        ]
 87 |       }
 88 |      ],
 89 |      "prompt_number": 7
 90 |     },
 91 |     {
 92 |      "cell_type": "code",
 93 |      "collapsed": false,
 94 |      "input": [
 95 |       "#Saving multiple arrays into a zip file\n",
 96 |       "np.savez('two_arrays.npz',x=arr,y=arr)"
 97 |      ],
 98 |      "language": "python",
 99 |      "metadata": {},
100 |      "outputs": [],
101 |      "prompt_number": 8
102 |     },
103 |     {
104 |      "cell_type": "code",
105 |      "collapsed": false,
106 |      "input": [
107 |       "#Now loading multiple arrays\n",
108 |       "archive_array = np.load('two_arrays.npz')\n",
109 |       "\n",
110 |       "#Show\n",
111 |       "archive_array['x']"
112 |      ],
113 |      "language": "python",
114 |      "metadata": {},
115 |      "outputs": [
116 |       {
117 |        "metadata": {},
118 |        "output_type": "pyout",
119 |        "prompt_number": 14,
120 |        "text": [
121 |         "array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])"
122 |        ]
123 |       }
124 |      ],
125 |      "prompt_number": 14
126 |     },
127 |     {
128 |      "cell_type": "code",
129 |      "collapsed": false,
130 |      "input": [
131 |       "#Now lets remove them from the memory\n",
132 |       "rm my_array.npy\n",
133 |       "rm two_arrays.npz"
134 |      ],
135 |      "language": "python",
136 |      "metadata": {},
137 |      "outputs": [
138 |       {
139 |        "ename": "SyntaxError",
140 |        "evalue": "invalid syntax (<ipython-input-16-7d53e496e836>, line 2)",
141 |        "output_type": "pyerr",
142 |        "traceback": [
143 |         "\u001b[1;36m  File \u001b[1;32m\"<ipython-input-16-7d53e496e836>\"\u001b[1;36m, line \u001b[1;32m2\u001b[0m\n\u001b[1;33m    rm my_array.npy\u001b[0m\n\u001b[1;37m              ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n"
144 |        ]
145 |       }
146 |      ],
147 |      "prompt_number": 16
148 |     },
149 |     {
150 |      "cell_type": "code",
151 |      "collapsed": false,
152 |      "input": [
153 |       "#Now saving and loading text files\n",
154 |       "\n",
155 |       "arr = np.array([[1,2,3],[4,5,6]])\n",
156 |       "np.savetxt('my_test_text.txt',arr,delimiter=',')"
157 |      ],
158 |      "language": "python",
159 |      "metadata": {},
160 |      "outputs": [],
161 |      "prompt_number": 25
162 |     },
163 |     {
164 |      "cell_type": "code",
165 |      "collapsed": false,
166 |      "input": [
167 |       "arr = np.loadtxt('my_test_text.txt',delimiter = ',')\n",
168 |       "arr\n"
169 |      ],
170 |      "language": "python",
171 |      "metadata": {},
172 |      "outputs": [
173 |       {
174 |        "metadata": {},
175 |        "output_type": "pyout",
176 |        "prompt_number": 26,
177 |        "text": [
178 |         "array([[ 1.,  2.,  3.],\n",
179 |         "       [ 4.,  5.,  6.]])"
180 |        ]
181 |       }
182 |      ],
183 |      "prompt_number": 26
184 |     },
185 |     {
186 |      "cell_type": "code",
187 |      "collapsed": false,
188 |      "input": [],
189 |      "language": "python",
190 |      "metadata": {},
191 |      "outputs": []
192 |     }
193 |    ],
194 |    "metadata": {}
195 |   }
196 |  ]
197 | }


--------------------------------------------------------------------------------
/Lec 14 - Series.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "name": "",
  4 |   "signature": "sha256:6bd2dc1e9c59ef00c1bc71cb32cd677229e49ba23d835a03faec26c1e768c572"
  5 |  },
  6 |  "nbformat": 3,
  7 |  "nbformat_minor": 0,
  8 |  "worksheets": [
  9 |   {
 10 |    "cells": [
 11 |     {
 12 |      "cell_type": "code",
 13 |      "collapsed": false,
 14 |      "input": [
 15 |       "import numpy as np\n",
 16 |       "\n",
 17 |       "from pandas import Series,DataFrame\n",
 18 |       "import pandas as pd\n"
 19 |      ],
 20 |      "language": "python",
 21 |      "metadata": {},
 22 |      "outputs": [],
 23 |      "prompt_number": 15
 24 |     },
 25 |     {
 26 |      "cell_type": "code",
 27 |      "collapsed": false,
 28 |      "input": [
 29 |       "#Lets create a Series (array of data and data labels, its index)\n",
 30 |       "\n",
 31 |       "obj = Series([3,6,9,12])\n",
 32 |       "\n",
 33 |       "#Show\n",
 34 |       "obj"
 35 |      ],
 36 |      "language": "python",
 37 |      "metadata": {},
 38 |      "outputs": [
 39 |       {
 40 |        "metadata": {},
 41 |        "output_type": "pyout",
 42 |        "prompt_number": 16,
 43 |        "text": [
 44 |         "0     3\n",
 45 |         "1     6\n",
 46 |         "2     9\n",
 47 |         "3    12\n",
 48 |         "dtype: int64"
 49 |        ]
 50 |       }
 51 |      ],
 52 |      "prompt_number": 16
 53 |     },
 54 |     {
 55 |      "cell_type": "code",
 56 |      "collapsed": false,
 57 |      "input": [
 58 |       "#Lets show the values\n",
 59 |       "obj.values"
 60 |      ],
 61 |      "language": "python",
 62 |      "metadata": {},
 63 |      "outputs": [
 64 |       {
 65 |        "metadata": {},
 66 |        "output_type": "pyout",
 67 |        "prompt_number": 17,
 68 |        "text": [
 69 |         "array([ 3,  6,  9, 12], dtype=int64)"
 70 |        ]
 71 |       }
 72 |      ],
 73 |      "prompt_number": 17
 74 |     },
 75 |     {
 76 |      "cell_type": "code",
 77 |      "collapsed": false,
 78 |      "input": [
 79 |       "#Lets show the index\n",
 80 |       "obj.index"
 81 |      ],
 82 |      "language": "python",
 83 |      "metadata": {},
 84 |      "outputs": [
 85 |       {
 86 |        "metadata": {},
 87 |        "output_type": "pyout",
 88 |        "prompt_number": 18,
 89 |        "text": [
 90 |         "Int64Index([0, 1, 2, 3], dtype='int64')"
 91 |        ]
 92 |       }
 93 |      ],
 94 |      "prompt_number": 18
 95 |     },
 96 |     {
 97 |      "cell_type": "code",
 98 |      "collapsed": false,
 99 |      "input": [
100 |       "#Now lets create a Series with an index\n",
101 |       "\n",
102 |       "#WW2 casualties \n",
103 |       "ww2_cas = Series([8700000,4300000,3000000,2100000,400000],index=['USSR','Germany','China','Japan','USA'])\n",
104 |       "\n",
105 |       "#Show\n",
106 |       "ww2_cas"
107 |      ],
108 |      "language": "python",
109 |      "metadata": {},
110 |      "outputs": [
111 |       {
112 |        "metadata": {},
113 |        "output_type": "pyout",
114 |        "prompt_number": 21,
115 |        "text": [
116 |         "USSR       8700000\n",
117 |         "Germany    4300000\n",
118 |         "China      3000000\n",
119 |         "Japan      2100000\n",
120 |         "USA         400000\n",
121 |         "dtype: int64"
122 |        ]
123 |       }
124 |      ],
125 |      "prompt_number": 21
126 |     },
127 |     {
128 |      "cell_type": "code",
129 |      "collapsed": false,
130 |      "input": [
131 |       "#Now we can use index values to select Series values\n",
132 |       "ww2_cas['USA']"
133 |      ],
134 |      "language": "python",
135 |      "metadata": {},
136 |      "outputs": [
137 |       {
138 |        "metadata": {},
139 |        "output_type": "pyout",
140 |        "prompt_number": 22,
141 |        "text": [
142 |         "400000"
143 |        ]
144 |       }
145 |      ],
146 |      "prompt_number": 22
147 |     },
148 |     {
149 |      "cell_type": "code",
150 |      "collapsed": false,
151 |      "input": [
152 |       "#Can also check with array operations\n",
153 |       "\n",
154 |       "#Check who had casualties greater than 4 million\n",
155 |       "ww2_cas[ww2_cas>4000000]"
156 |      ],
157 |      "language": "python",
158 |      "metadata": {},
159 |      "outputs": [
160 |       {
161 |        "metadata": {},
162 |        "output_type": "pyout",
163 |        "prompt_number": 26,
164 |        "text": [
165 |         "USSR       8700000\n",
166 |         "Germany    4300000\n",
167 |         "dtype: int64"
168 |        ]
169 |       }
170 |      ],
171 |      "prompt_number": 26
172 |     },
173 |     {
174 |      "cell_type": "code",
175 |      "collapsed": false,
176 |      "input": [
177 |       "#Can treat Series as ordered dictionary\n",
178 |       "\n",
179 |       "#Check if USSR is in Series\n",
180 |       "'USSR' in ww2_cas"
181 |      ],
182 |      "language": "python",
183 |      "metadata": {},
184 |      "outputs": [
185 |       {
186 |        "metadata": {},
187 |        "output_type": "pyout",
188 |        "prompt_number": 27,
189 |        "text": [
190 |         "True"
191 |        ]
192 |       }
193 |      ],
194 |      "prompt_number": 27
195 |     },
196 |     {
197 |      "cell_type": "code",
198 |      "collapsed": false,
199 |      "input": [
200 |       "#Can convert Series into Python dictionary\n",
201 |       "ww2_dict = ww2_cas.to_dict()\n",
202 |       "\n",
203 |       "#Show\n",
204 |       "ww2_dict"
205 |      ],
206 |      "language": "python",
207 |      "metadata": {},
208 |      "outputs": [
209 |       {
210 |        "metadata": {},
211 |        "output_type": "pyout",
212 |        "prompt_number": 31,
213 |        "text": [
214 |         "{'China': 3000000,\n",
215 |         " 'Germany': 4300000,\n",
216 |         " 'Japan': 2100000,\n",
217 |         " 'USA': 400000,\n",
218 |         " 'USSR': 8700000}"
219 |        ]
220 |       }
221 |      ],
222 |      "prompt_number": 31
223 |     },
224 |     {
225 |      "cell_type": "code",
226 |      "collapsed": false,
227 |      "input": [
228 |       "#Can convert back into a Series\n",
229 |       "WW2_Series = Series(ww2_dict)"
230 |      ],
231 |      "language": "python",
232 |      "metadata": {},
233 |      "outputs": [],
234 |      "prompt_number": 34
235 |     },
236 |     {
237 |      "cell_type": "code",
238 |      "collapsed": false,
239 |      "input": [
240 |       "#Show\n",
241 |       "WW2_Series"
242 |      ],
243 |      "language": "python",
244 |      "metadata": {},
245 |      "outputs": [
246 |       {
247 |        "metadata": {},
248 |        "output_type": "pyout",
249 |        "prompt_number": 35,
250 |        "text": [
251 |         "China      3000000\n",
252 |         "Germany    4300000\n",
253 |         "Japan      2100000\n",
254 |         "USA         400000\n",
255 |         "USSR       8700000\n",
256 |         "dtype: int64"
257 |        ]
258 |       }
259 |      ],
260 |      "prompt_number": 35
261 |     },
262 |     {
263 |      "cell_type": "code",
264 |      "collapsed": false,
265 |      "input": [
266 |       "#Passing a dictionary the index will have the dict keys in order\n",
267 |       "countries = ['China','Germany','Japan','USA','USSR','Argentina']\n"
268 |      ],
269 |      "language": "python",
270 |      "metadata": {},
271 |      "outputs": [],
272 |      "prompt_number": 36
273 |     },
274 |     {
275 |      "cell_type": "code",
276 |      "collapsed": false,
277 |      "input": [
278 |       "#Lets redefine a Series\n",
279 |       "obj2 = Series(ww2_dict,index=countries)"
280 |      ],
281 |      "language": "python",
282 |      "metadata": {},
283 |      "outputs": [],
284 |      "prompt_number": 37
285 |     },
286 |     {
287 |      "cell_type": "code",
288 |      "collapsed": false,
289 |      "input": [
290 |       "#Show\n",
291 |       "obj2"
292 |      ],
293 |      "language": "python",
294 |      "metadata": {},
295 |      "outputs": [
296 |       {
297 |        "metadata": {},
298 |        "output_type": "pyout",
299 |        "prompt_number": 38,
300 |        "text": [
301 |         "China        3000000\n",
302 |         "Germany      4300000\n",
303 |         "Japan        2100000\n",
304 |         "USA           400000\n",
305 |         "USSR         8700000\n",
306 |         "Argentina        NaN\n",
307 |         "dtype: float64"
308 |        ]
309 |       }
310 |      ],
311 |      "prompt_number": 38
312 |     },
313 |     {
314 |      "cell_type": "code",
315 |      "collapsed": false,
316 |      "input": [
317 |       "#We can use isnull and notnull to find missing data\n",
318 |       "pd.isnull(obj2)\n",
319 |       "\n",
320 |       "#obj2.isnull() "
321 |      ],
322 |      "language": "python",
323 |      "metadata": {},
324 |      "outputs": [
325 |       {
326 |        "metadata": {},
327 |        "output_type": "pyout",
328 |        "prompt_number": 39,
329 |        "text": [
330 |         "China        False\n",
331 |         "Germany      False\n",
332 |         "Japan        False\n",
333 |         "USA          False\n",
334 |         "USSR         False\n",
335 |         "Argentina     True\n",
336 |         "dtype: bool"
337 |        ]
338 |       }
339 |      ],
340 |      "prompt_number": 39
341 |     },
342 |     {
343 |      "cell_type": "code",
344 |      "collapsed": false,
345 |      "input": [
346 |       "#Same for the opposite\n",
347 |       "pd.notnull(obj2)\n",
348 |       "\n",
349 |       "#obj2.notnull()"
350 |      ],
351 |      "language": "python",
352 |      "metadata": {},
353 |      "outputs": [
354 |       {
355 |        "metadata": {},
356 |        "output_type": "pyout",
357 |        "prompt_number": 40,
358 |        "text": [
359 |         "China         True\n",
360 |         "Germany       True\n",
361 |         "Japan         True\n",
362 |         "USA           True\n",
363 |         "USSR          True\n",
364 |         "Argentina    False\n",
365 |         "dtype: bool"
366 |        ]
367 |       }
368 |      ],
369 |      "prompt_number": 40
370 |     },
371 |     {
372 |      "cell_type": "code",
373 |      "collapsed": false,
374 |      "input": [
375 |       "#Lets see the ww2 Series again\n",
376 |       "WW2_Series"
377 |      ],
378 |      "language": "python",
379 |      "metadata": {},
380 |      "outputs": [
381 |       {
382 |        "metadata": {},
383 |        "output_type": "pyout",
384 |        "prompt_number": 41,
385 |        "text": [
386 |         "China      3000000\n",
387 |         "Germany    4300000\n",
388 |         "Japan      2100000\n",
389 |         "USA         400000\n",
390 |         "USSR       8700000\n",
391 |         "dtype: int64"
392 |        ]
393 |       }
394 |      ],
395 |      "prompt_number": 41
396 |     },
397 |     {
398 |      "cell_type": "code",
399 |      "collapsed": false,
400 |      "input": [
401 |       "#Lets check our Series with Argentine again\n",
402 |       "obj2"
403 |      ],
404 |      "language": "python",
405 |      "metadata": {},
406 |      "outputs": [
407 |       {
408 |        "metadata": {},
409 |        "output_type": "pyout",
410 |        "prompt_number": 42,
411 |        "text": [
412 |         "China        3000000\n",
413 |         "Germany      4300000\n",
414 |         "Japan        2100000\n",
415 |         "USA           400000\n",
416 |         "USSR         8700000\n",
417 |         "Argentina        NaN\n",
418 |         "dtype: float64"
419 |        ]
420 |       }
421 |      ],
422 |      "prompt_number": 42
423 |     },
424 |     {
425 |      "cell_type": "code",
426 |      "collapsed": false,
427 |      "input": [
428 |       "#Now we can add and pandas automatically aligns data by index\n",
429 |       "WW2_Series + obj2"
430 |      ],
431 |      "language": "python",
432 |      "metadata": {},
433 |      "outputs": [
434 |       {
435 |        "metadata": {},
436 |        "output_type": "pyout",
437 |        "prompt_number": 43,
438 |        "text": [
439 |         "Argentina         NaN\n",
440 |         "China         6000000\n",
441 |         "Germany       8600000\n",
442 |         "Japan         4200000\n",
443 |         "USA            800000\n",
444 |         "USSR         17400000\n",
445 |         "dtype: float64"
446 |        ]
447 |       }
448 |      ],
449 |      "prompt_number": 43
450 |     },
451 |     {
452 |      "cell_type": "code",
453 |      "collapsed": false,
454 |      "input": [
455 |       "#We can give Series names\n",
456 |       "obj2.name = \"World War 2 Casualties\""
457 |      ],
458 |      "language": "python",
459 |      "metadata": {},
460 |      "outputs": [],
461 |      "prompt_number": 45
462 |     },
463 |     {
464 |      "cell_type": "code",
465 |      "collapsed": false,
466 |      "input": [
467 |       "#Show\n",
468 |       "obj2"
469 |      ],
470 |      "language": "python",
471 |      "metadata": {},
472 |      "outputs": [
473 |       {
474 |        "metadata": {},
475 |        "output_type": "pyout",
476 |        "prompt_number": 46,
477 |        "text": [
478 |         "China        3000000\n",
479 |         "Germany      4300000\n",
480 |         "Japan        2100000\n",
481 |         "USA           400000\n",
482 |         "USSR         8700000\n",
483 |         "Argentina        NaN\n",
484 |         "Name: World War 2 Casualties, dtype: float64"
485 |        ]
486 |       }
487 |      ],
488 |      "prompt_number": 46
489 |     },
490 |     {
491 |      "cell_type": "code",
492 |      "collapsed": false,
493 |      "input": [
494 |       "#We can also name index\n",
495 |       "obj2.index.name = 'Countries'"
496 |      ],
497 |      "language": "python",
498 |      "metadata": {},
499 |      "outputs": [],
500 |      "prompt_number": 47
501 |     },
502 |     {
503 |      "cell_type": "code",
504 |      "collapsed": false,
505 |      "input": [
506 |       "#Show\n",
507 |       "obj2"
508 |      ],
509 |      "language": "python",
510 |      "metadata": {},
511 |      "outputs": [
512 |       {
513 |        "metadata": {},
514 |        "output_type": "pyout",
515 |        "prompt_number": 48,
516 |        "text": [
517 |         "Countries\n",
518 |         "China        3000000\n",
519 |         "Germany      4300000\n",
520 |         "Japan        2100000\n",
521 |         "USA           400000\n",
522 |         "USSR         8700000\n",
523 |         "Argentina        NaN\n",
524 |         "Name: World War 2 Casualties, dtype: float64"
525 |        ]
526 |       }
527 |      ],
528 |      "prompt_number": 48
529 |     },
530 |     {
531 |      "cell_type": "code",
532 |      "collapsed": false,
533 |      "input": [
534 |       "#Next we'll learn DataFrames!"
535 |      ],
536 |      "language": "python",
537 |      "metadata": {},
538 |      "outputs": [],
539 |      "prompt_number": 49
540 |     },
541 |     {
542 |      "cell_type": "code",
543 |      "collapsed": false,
544 |      "input": [],
545 |      "language": "python",
546 |      "metadata": {},
547 |      "outputs": []
548 |     }
549 |    ],
550 |    "metadata": {}
551 |   }
552 |  ]
553 | }


--------------------------------------------------------------------------------
/Lec 16 - Index Objects.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "name": "",
  4 |   "signature": "sha256:dec69e1ae60e3fa81f66fec576c0c89c5399b8c2e980f25ad3472ca0253583ab"
  5 |  },
  6 |  "nbformat": 3,
  7 |  "nbformat_minor": 0,
  8 |  "worksheets": [
  9 |   {
 10 |    "cells": [
 11 |     {
 12 |      "cell_type": "code",
 13 |      "collapsed": false,
 14 |      "input": [
 15 |       "import numpy as np\n",
 16 |       "\n",
 17 |       "from pandas import Series,DataFrame\n",
 18 |       "\n",
 19 |       "import pandas as pd"
 20 |      ],
 21 |      "language": "python",
 22 |      "metadata": {},
 23 |      "outputs": [],
 24 |      "prompt_number": 1
 25 |     },
 26 |     {
 27 |      "cell_type": "code",
 28 |      "collapsed": false,
 29 |      "input": [
 30 |       "#Let's learn/review about Index Objects\n",
 31 |       "my_ser = Series([1,2,3,4],index=['A','B','C','D'])\n",
 32 |       "\n",
 33 |       "#Get the index\n",
 34 |       "my_index = my_ser.index"
 35 |      ],
 36 |      "language": "python",
 37 |      "metadata": {},
 38 |      "outputs": [],
 39 |      "prompt_number": 6
 40 |     },
 41 |     {
 42 |      "cell_type": "code",
 43 |      "collapsed": false,
 44 |      "input": [
 45 |       "#Show\n",
 46 |       "my_index"
 47 |      ],
 48 |      "language": "python",
 49 |      "metadata": {},
 50 |      "outputs": [
 51 |       {
 52 |        "metadata": {},
 53 |        "output_type": "pyout",
 54 |        "prompt_number": 5,
 55 |        "text": [
 56 |         "Index([u'a', u'b', u'c', u'd'], dtype='object')"
 57 |        ]
 58 |       }
 59 |      ],
 60 |      "prompt_number": 5
 61 |     },
 62 |     {
 63 |      "cell_type": "code",
 64 |      "collapsed": false,
 65 |      "input": [
 66 |       "#Can grab index ranges\n",
 67 |       "my_index[2:]"
 68 |      ],
 69 |      "language": "python",
 70 |      "metadata": {},
 71 |      "outputs": [
 72 |       {
 73 |        "metadata": {},
 74 |        "output_type": "pyout",
 75 |        "prompt_number": 7,
 76 |        "text": [
 77 |         "Index([u'C', u'D'], dtype='object')"
 78 |        ]
 79 |       }
 80 |      ],
 81 |      "prompt_number": 7
 82 |     },
 83 |     {
 84 |      "cell_type": "code",
 85 |      "collapsed": false,
 86 |      "input": [
 87 |       "#What happens if we try to change an index value?\n",
 88 |       "my_index[0] = 'Z'"
 89 |      ],
 90 |      "language": "python",
 91 |      "metadata": {},
 92 |      "outputs": [
 93 |       {
 94 |        "ename": "TypeError",
 95 |        "evalue": "Indexes does not support mutable operations",
 96 |        "output_type": "pyerr",
 97 |        "traceback": [
 98 |         "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
 99 |         "\u001b[1;32m<ipython-input-8-599f591f1af8>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[1;31m#What happens if we try to change an index value?\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mmy_index\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m'Z'\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
100 |         "\u001b[1;32mC:\\Users\\Marcial\\Anaconda\\lib\\site-packages\\pandas\\core\\index.pyc\u001b[0m in \u001b[0;36m__setitem__\u001b[1;34m(self, key, value)\u001b[0m\n\u001b[0;32m    894\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    895\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0m__setitem__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 896\u001b[1;33m         \u001b[1;32mraise\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Indexes does not support mutable operations\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    897\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    898\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0m__getitem__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
101 |         "\u001b[1;31mTypeError\u001b[0m: Indexes does not support mutable operations"
102 |        ]
103 |       }
104 |      ],
105 |      "prompt_number": 8
106 |     },
107 |     {
108 |      "cell_type": "code",
109 |      "collapsed": false,
110 |      "input": [
111 |       "#Excellent! Indexes are immutable!"
112 |      ],
113 |      "language": "python",
114 |      "metadata": {},
115 |      "outputs": [],
116 |      "prompt_number": 9
117 |     },
118 |     {
119 |      "cell_type": "code",
120 |      "collapsed": false,
121 |      "input": [
122 |       "#Next we'll learn about Reindexing."
123 |      ],
124 |      "language": "python",
125 |      "metadata": {},
126 |      "outputs": []
127 |     }
128 |    ],
129 |    "metadata": {}
130 |   }
131 |  ]
132 | }


--------------------------------------------------------------------------------
/Lec 18 -Drop Entry.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "name": "",
  4 |   "signature": "sha256:1142a42caa61e163fffc05959c60098c39550507cb10c842f35741a1826c5afd"
  5 |  },
  6 |  "nbformat": 3,
  7 |  "nbformat_minor": 0,
  8 |  "worksheets": [
  9 |   {
 10 |    "cells": [
 11 |     {
 12 |      "cell_type": "code",
 13 |      "collapsed": false,
 14 |      "input": [
 15 |       "#Now we'll learn about dropping entries\n",
 16 |       "import numpy as np\n",
 17 |       "from pandas import Series,DataFrame\n",
 18 |       "import pandas as pd"
 19 |      ],
 20 |      "language": "python",
 21 |      "metadata": {},
 22 |      "outputs": [],
 23 |      "prompt_number": 1
 24 |     },
 25 |     {
 26 |      "cell_type": "code",
 27 |      "collapsed": false,
 28 |      "input": [
 29 |       "#Create a new series to play with\n",
 30 |       "ser1 = Series(np.arange(3),index=['a','b','c'])\n",
 31 |       "\n",
 32 |       "#Show\n",
 33 |       "ser1"
 34 |      ],
 35 |      "language": "python",
 36 |      "metadata": {},
 37 |      "outputs": [
 38 |       {
 39 |        "metadata": {},
 40 |        "output_type": "pyout",
 41 |        "prompt_number": 5,
 42 |        "text": [
 43 |         "a    0\n",
 44 |         "b    1\n",
 45 |         "c    2\n",
 46 |         "dtype: int32"
 47 |        ]
 48 |       }
 49 |      ],
 50 |      "prompt_number": 5
 51 |     },
 52 |     {
 53 |      "cell_type": "code",
 54 |      "collapsed": false,
 55 |      "input": [
 56 |       "#Now let's drop an index\n",
 57 |       "ser1.drop('b')"
 58 |      ],
 59 |      "language": "python",
 60 |      "metadata": {},
 61 |      "outputs": [
 62 |       {
 63 |        "metadata": {},
 64 |        "output_type": "pyout",
 65 |        "prompt_number": 6,
 66 |        "text": [
 67 |         "a    0\n",
 68 |         "c    2\n",
 69 |         "dtype: int32"
 70 |        ]
 71 |       }
 72 |      ],
 73 |      "prompt_number": 6
 74 |     },
 75 |     {
 76 |      "cell_type": "code",
 77 |      "collapsed": false,
 78 |      "input": [
 79 |       "#With a DataFrame we can drop values from either axis\n",
 80 |       "dframe1 = DataFrame(np.arange(9).reshape((3,3)),index=['SF','LA','NY'],columns=['pop','size','year'])\n",
 81 |       "\n",
 82 |       "#Show (remember just random values)\n",
 83 |       "dframe1"
 84 |      ],
 85 |      "language": "python",
 86 |      "metadata": {},
 87 |      "outputs": [
 88 |       {
 89 |        "html": [
 90 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
 91 |         "<table border=\"1\" class=\"dataframe\">\n",
 92 |         "  <thead>\n",
 93 |         "    <tr style=\"text-align: right;\">\n",
 94 |         "      <th></th>\n",
 95 |         "      <th>pop</th>\n",
 96 |         "      <th>size</th>\n",
 97 |         "      <th>year</th>\n",
 98 |         "    </tr>\n",
 99 |         "  </thead>\n",
100 |         "  <tbody>\n",
101 |         "    <tr>\n",
102 |         "      <th>SF</th>\n",
103 |         "      <td> 0</td>\n",
104 |         "      <td> 1</td>\n",
105 |         "      <td> 2</td>\n",
106 |         "    </tr>\n",
107 |         "    <tr>\n",
108 |         "      <th>LA</th>\n",
109 |         "      <td> 3</td>\n",
110 |         "      <td> 4</td>\n",
111 |         "      <td> 5</td>\n",
112 |         "    </tr>\n",
113 |         "    <tr>\n",
114 |         "      <th>NY</th>\n",
115 |         "      <td> 6</td>\n",
116 |         "      <td> 7</td>\n",
117 |         "      <td> 8</td>\n",
118 |         "    </tr>\n",
119 |         "  </tbody>\n",
120 |         "</table>\n",
121 |         "</div>"
122 |        ],
123 |        "metadata": {},
124 |        "output_type": "pyout",
125 |        "prompt_number": 9,
126 |        "text": [
127 |         "    pop  size  year\n",
128 |         "SF    0     1     2\n",
129 |         "LA    3     4     5\n",
130 |         "NY    6     7     8"
131 |        ]
132 |       }
133 |      ],
134 |      "prompt_number": 9
135 |     },
136 |     {
137 |      "cell_type": "code",
138 |      "collapsed": false,
139 |      "input": [
140 |       "#Now dropping a row\n",
141 |       "dframe1.drop('LA')"
142 |      ],
143 |      "language": "python",
144 |      "metadata": {},
145 |      "outputs": [
146 |       {
147 |        "html": [
148 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
149 |         "<table border=\"1\" class=\"dataframe\">\n",
150 |         "  <thead>\n",
151 |         "    <tr style=\"text-align: right;\">\n",
152 |         "      <th></th>\n",
153 |         "      <th>pop</th>\n",
154 |         "      <th>size</th>\n",
155 |         "      <th>year</th>\n",
156 |         "    </tr>\n",
157 |         "  </thead>\n",
158 |         "  <tbody>\n",
159 |         "    <tr>\n",
160 |         "      <th>SF</th>\n",
161 |         "      <td> 0</td>\n",
162 |         "      <td> 1</td>\n",
163 |         "      <td> 2</td>\n",
164 |         "    </tr>\n",
165 |         "    <tr>\n",
166 |         "      <th>NY</th>\n",
167 |         "      <td> 6</td>\n",
168 |         "      <td> 7</td>\n",
169 |         "      <td> 8</td>\n",
170 |         "    </tr>\n",
171 |         "  </tbody>\n",
172 |         "</table>\n",
173 |         "</div>"
174 |        ],
175 |        "metadata": {},
176 |        "output_type": "pyout",
177 |        "prompt_number": 10,
178 |        "text": [
179 |         "    pop  size  year\n",
180 |         "SF    0     1     2\n",
181 |         "NY    6     7     8"
182 |        ]
183 |       }
184 |      ],
185 |      "prompt_number": 10
186 |     },
187 |     {
188 |      "cell_type": "code",
189 |      "collapsed": false,
190 |      "input": [
191 |       "#Or we could drop a column\n",
192 |       "\n",
193 |       "#Need to specify that axis is 1, not 0\n",
194 |       "dframe1.drop('year',axis=1)"
195 |      ],
196 |      "language": "python",
197 |      "metadata": {},
198 |      "outputs": [
199 |       {
200 |        "html": [
201 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
202 |         "<table border=\"1\" class=\"dataframe\">\n",
203 |         "  <thead>\n",
204 |         "    <tr style=\"text-align: right;\">\n",
205 |         "      <th></th>\n",
206 |         "      <th>pop</th>\n",
207 |         "      <th>size</th>\n",
208 |         "    </tr>\n",
209 |         "  </thead>\n",
210 |         "  <tbody>\n",
211 |         "    <tr>\n",
212 |         "      <th>SF</th>\n",
213 |         "      <td> 0</td>\n",
214 |         "      <td> 1</td>\n",
215 |         "    </tr>\n",
216 |         "    <tr>\n",
217 |         "      <th>LA</th>\n",
218 |         "      <td> 3</td>\n",
219 |         "      <td> 4</td>\n",
220 |         "    </tr>\n",
221 |         "    <tr>\n",
222 |         "      <th>NY</th>\n",
223 |         "      <td> 6</td>\n",
224 |         "      <td> 7</td>\n",
225 |         "    </tr>\n",
226 |         "  </tbody>\n",
227 |         "</table>\n",
228 |         "</div>"
229 |        ],
230 |        "metadata": {},
231 |        "output_type": "pyout",
232 |        "prompt_number": 13,
233 |        "text": [
234 |         "    pop  size\n",
235 |         "SF    0     1\n",
236 |         "LA    3     4\n",
237 |         "NY    6     7"
238 |        ]
239 |       }
240 |      ],
241 |      "prompt_number": 13
242 |     },
243 |     {
244 |      "cell_type": "code",
245 |      "collapsed": false,
246 |      "input": [
247 |       "#Next we'll learn about selecting entires in a DataFrame!"
248 |      ],
249 |      "language": "python",
250 |      "metadata": {},
251 |      "outputs": []
252 |     }
253 |    ],
254 |    "metadata": {}
255 |   }
256 |  ]
257 | }


--------------------------------------------------------------------------------
/Lec 20 - Data Alignment.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "name": "",
  4 |   "signature": "sha256:d01c647fe1df2bcdaac734e1b1d3bdd603d82201130ebca8826862b416de7209"
  5 |  },
  6 |  "nbformat": 3,
  7 |  "nbformat_minor": 0,
  8 |  "worksheets": [
  9 |   {
 10 |    "cells": [
 11 |     {
 12 |      "cell_type": "code",
 13 |      "collapsed": false,
 14 |      "input": [
 15 |       "#Now we'll learn about arithmetic between DataFrames with different indexes\n",
 16 |       "import numpy as np\n",
 17 |       "from pandas import Series,DataFrame\n",
 18 |       "\n",
 19 |       "import pandas as pd"
 20 |      ],
 21 |      "language": "python",
 22 |      "metadata": {},
 23 |      "outputs": [],
 24 |      "prompt_number": 1
 25 |     },
 26 |     {
 27 |      "cell_type": "code",
 28 |      "collapsed": false,
 29 |      "input": [
 30 |       "#Lets start by making two Series\n",
 31 |       "\n",
 32 |       "ser1 = Series([0,1,2],index=['A','B','C'])\n",
 33 |       "\n",
 34 |       "#Show\n",
 35 |       "ser1"
 36 |      ],
 37 |      "language": "python",
 38 |      "metadata": {},
 39 |      "outputs": [
 40 |       {
 41 |        "metadata": {},
 42 |        "output_type": "pyout",
 43 |        "prompt_number": 2,
 44 |        "text": [
 45 |         "A    0\n",
 46 |         "B    1\n",
 47 |         "C    2\n",
 48 |         "dtype: int64"
 49 |        ]
 50 |       }
 51 |      ],
 52 |      "prompt_number": 2
 53 |     },
 54 |     {
 55 |      "cell_type": "code",
 56 |      "collapsed": false,
 57 |      "input": [
 58 |       "#Now second Series 2\n",
 59 |       "ser2 = Series([3,4,5,6],index=['A','B','C','D'])\n",
 60 |       "\n",
 61 |       "#Show \n",
 62 |       "ser2 "
 63 |      ],
 64 |      "language": "python",
 65 |      "metadata": {},
 66 |      "outputs": [
 67 |       {
 68 |        "metadata": {},
 69 |        "output_type": "pyout",
 70 |        "prompt_number": 5,
 71 |        "text": [
 72 |         "A    3\n",
 73 |         "B    4\n",
 74 |         "C    5\n",
 75 |         "D    6\n",
 76 |         "dtype: int64"
 77 |        ]
 78 |       }
 79 |      ],
 80 |      "prompt_number": 5
 81 |     },
 82 |     {
 83 |      "cell_type": "code",
 84 |      "collapsed": false,
 85 |      "input": [
 86 |       "#So what happens when we add these together\n",
 87 |       "ser1 + ser2"
 88 |      ],
 89 |      "language": "python",
 90 |      "metadata": {},
 91 |      "outputs": [
 92 |       {
 93 |        "metadata": {},
 94 |        "output_type": "pyout",
 95 |        "prompt_number": 6,
 96 |        "text": [
 97 |         "A     3\n",
 98 |         "B     5\n",
 99 |         "C     7\n",
100 |         "D   NaN\n",
101 |         "dtype: float64"
102 |        ]
103 |       }
104 |      ],
105 |      "prompt_number": 6
106 |     },
107 |     {
108 |      "cell_type": "code",
109 |      "collapsed": false,
110 |      "input": [
111 |       "#Note the NaN values are added in automatically"
112 |      ],
113 |      "language": "python",
114 |      "metadata": {},
115 |      "outputs": [],
116 |      "prompt_number": 7
117 |     },
118 |     {
119 |      "cell_type": "code",
120 |      "collapsed": false,
121 |      "input": [
122 |       "# Now let's try it with DataFrames!\n",
123 |       "dframe1 = DataFrame(np.arange(4).reshape(2,2),columns=list('AB'),index=['NYC','LA'])\n",
124 |       "\n",
125 |       "#Show\n",
126 |       "dframe1"
127 |      ],
128 |      "language": "python",
129 |      "metadata": {},
130 |      "outputs": [
131 |       {
132 |        "html": [
133 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
134 |         "<table border=\"1\" class=\"dataframe\">\n",
135 |         "  <thead>\n",
136 |         "    <tr style=\"text-align: right;\">\n",
137 |         "      <th></th>\n",
138 |         "      <th>A</th>\n",
139 |         "      <th>B</th>\n",
140 |         "    </tr>\n",
141 |         "  </thead>\n",
142 |         "  <tbody>\n",
143 |         "    <tr>\n",
144 |         "      <th>NYC</th>\n",
145 |         "      <td> 0</td>\n",
146 |         "      <td> 1</td>\n",
147 |         "    </tr>\n",
148 |         "    <tr>\n",
149 |         "      <th>LA</th>\n",
150 |         "      <td> 2</td>\n",
151 |         "      <td> 3</td>\n",
152 |         "    </tr>\n",
153 |         "  </tbody>\n",
154 |         "</table>\n",
155 |         "</div>"
156 |        ],
157 |        "metadata": {},
158 |        "output_type": "pyout",
159 |        "prompt_number": 8,
160 |        "text": [
161 |         "     A  B\n",
162 |         "NYC  0  1\n",
163 |         "LA   2  3"
164 |        ]
165 |       }
166 |      ],
167 |      "prompt_number": 8
168 |     },
169 |     {
170 |      "cell_type": "code",
171 |      "collapsed": false,
172 |      "input": [
173 |       "#Second DataFrame\n",
174 |       "dframe2 = DataFrame(np.arange(9).reshape(3,3),columns=list('ADC'),index=['NYC','SF','LA'])\n",
175 |       "\n",
176 |       "#Show\n",
177 |       "dframe2"
178 |      ],
179 |      "language": "python",
180 |      "metadata": {},
181 |      "outputs": [
182 |       {
183 |        "html": [
184 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
185 |         "<table border=\"1\" class=\"dataframe\">\n",
186 |         "  <thead>\n",
187 |         "    <tr style=\"text-align: right;\">\n",
188 |         "      <th></th>\n",
189 |         "      <th>A</th>\n",
190 |         "      <th>D</th>\n",
191 |         "      <th>C</th>\n",
192 |         "    </tr>\n",
193 |         "  </thead>\n",
194 |         "  <tbody>\n",
195 |         "    <tr>\n",
196 |         "      <th>NYC</th>\n",
197 |         "      <td> 0</td>\n",
198 |         "      <td> 1</td>\n",
199 |         "      <td> 2</td>\n",
200 |         "    </tr>\n",
201 |         "    <tr>\n",
202 |         "      <th>SF</th>\n",
203 |         "      <td> 3</td>\n",
204 |         "      <td> 4</td>\n",
205 |         "      <td> 5</td>\n",
206 |         "    </tr>\n",
207 |         "    <tr>\n",
208 |         "      <th>LA</th>\n",
209 |         "      <td> 6</td>\n",
210 |         "      <td> 7</td>\n",
211 |         "      <td> 8</td>\n",
212 |         "    </tr>\n",
213 |         "  </tbody>\n",
214 |         "</table>\n",
215 |         "</div>"
216 |        ],
217 |        "metadata": {},
218 |        "output_type": "pyout",
219 |        "prompt_number": 10,
220 |        "text": [
221 |         "     A  D  C\n",
222 |         "NYC  0  1  2\n",
223 |         "SF   3  4  5\n",
224 |         "LA   6  7  8"
225 |        ]
226 |       }
227 |      ],
228 |      "prompt_number": 10
229 |     },
230 |     {
231 |      "cell_type": "code",
232 |      "collapsed": false,
233 |      "input": [
234 |       "#What happens when we add them together?\n",
235 |       "\n",
236 |       "dframe1 + dframe2\n"
237 |      ],
238 |      "language": "python",
239 |      "metadata": {},
240 |      "outputs": [
241 |       {
242 |        "html": [
243 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
244 |         "<table border=\"1\" class=\"dataframe\">\n",
245 |         "  <thead>\n",
246 |         "    <tr style=\"text-align: right;\">\n",
247 |         "      <th></th>\n",
248 |         "      <th>A</th>\n",
249 |         "      <th>B</th>\n",
250 |         "      <th>C</th>\n",
251 |         "      <th>D</th>\n",
252 |         "    </tr>\n",
253 |         "  </thead>\n",
254 |         "  <tbody>\n",
255 |         "    <tr>\n",
256 |         "      <th>LA</th>\n",
257 |         "      <td>  8</td>\n",
258 |         "      <td>NaN</td>\n",
259 |         "      <td>NaN</td>\n",
260 |         "      <td>NaN</td>\n",
261 |         "    </tr>\n",
262 |         "    <tr>\n",
263 |         "      <th>NYC</th>\n",
264 |         "      <td>  0</td>\n",
265 |         "      <td>NaN</td>\n",
266 |         "      <td>NaN</td>\n",
267 |         "      <td>NaN</td>\n",
268 |         "    </tr>\n",
269 |         "    <tr>\n",
270 |         "      <th>SF</th>\n",
271 |         "      <td>NaN</td>\n",
272 |         "      <td>NaN</td>\n",
273 |         "      <td>NaN</td>\n",
274 |         "      <td>NaN</td>\n",
275 |         "    </tr>\n",
276 |         "  </tbody>\n",
277 |         "</table>\n",
278 |         "</div>"
279 |        ],
280 |        "metadata": {},
281 |        "output_type": "pyout",
282 |        "prompt_number": 11,
283 |        "text": [
284 |         "      A   B   C   D\n",
285 |         "LA    8 NaN NaN NaN\n",
286 |         "NYC   0 NaN NaN NaN\n",
287 |         "SF  NaN NaN NaN NaN"
288 |        ]
289 |       }
290 |      ],
291 |      "prompt_number": 11
292 |     },
293 |     {
294 |      "cell_type": "code",
295 |      "collapsed": false,
296 |      "input": [
297 |       "#What if we want to replace the NaN values\n",
298 |       "# Then we can use .add()\n",
299 |       "\n",
300 |       "dframe1.add(dframe2,fill_value=0)"
301 |      ],
302 |      "language": "python",
303 |      "metadata": {},
304 |      "outputs": [
305 |       {
306 |        "html": [
307 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
308 |         "<table border=\"1\" class=\"dataframe\">\n",
309 |         "  <thead>\n",
310 |         "    <tr style=\"text-align: right;\">\n",
311 |         "      <th></th>\n",
312 |         "      <th>A</th>\n",
313 |         "      <th>B</th>\n",
314 |         "      <th>C</th>\n",
315 |         "      <th>D</th>\n",
316 |         "    </tr>\n",
317 |         "  </thead>\n",
318 |         "  <tbody>\n",
319 |         "    <tr>\n",
320 |         "      <th>LA</th>\n",
321 |         "      <td> 8</td>\n",
322 |         "      <td>  3</td>\n",
323 |         "      <td> 8</td>\n",
324 |         "      <td> 7</td>\n",
325 |         "    </tr>\n",
326 |         "    <tr>\n",
327 |         "      <th>NYC</th>\n",
328 |         "      <td> 0</td>\n",
329 |         "      <td>  1</td>\n",
330 |         "      <td> 2</td>\n",
331 |         "      <td> 1</td>\n",
332 |         "    </tr>\n",
333 |         "    <tr>\n",
334 |         "      <th>SF</th>\n",
335 |         "      <td> 3</td>\n",
336 |         "      <td>NaN</td>\n",
337 |         "      <td> 5</td>\n",
338 |         "      <td> 4</td>\n",
339 |         "    </tr>\n",
340 |         "  </tbody>\n",
341 |         "</table>\n",
342 |         "</div>"
343 |        ],
344 |        "metadata": {},
345 |        "output_type": "pyout",
346 |        "prompt_number": 13,
347 |        "text": [
348 |         "     A   B  C  D\n",
349 |         "LA   8   3  8  7\n",
350 |         "NYC  0   1  2  1\n",
351 |         "SF   3 NaN  5  4"
352 |        ]
353 |       }
354 |      ],
355 |      "prompt_number": 13
356 |     },
357 |     {
358 |      "cell_type": "code",
359 |      "collapsed": false,
360 |      "input": [
361 |       "#Now we can see that the values are filled, however there was no SF,B value so that is still NaN"
362 |      ],
363 |      "language": "python",
364 |      "metadata": {},
365 |      "outputs": [],
366 |      "prompt_number": 14
367 |     },
368 |     {
369 |      "cell_type": "code",
370 |      "collapsed": false,
371 |      "input": [
372 |       "#Lets learn about operations betwen a Series and a DataFrame"
373 |      ],
374 |      "language": "python",
375 |      "metadata": {},
376 |      "outputs": [],
377 |      "prompt_number": 18
378 |     },
379 |     {
380 |      "cell_type": "code",
381 |      "collapsed": false,
382 |      "input": [
383 |       "#Show\n",
384 |       "dframe2"
385 |      ],
386 |      "language": "python",
387 |      "metadata": {},
388 |      "outputs": [
389 |       {
390 |        "html": [
391 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
392 |         "<table border=\"1\" class=\"dataframe\">\n",
393 |         "  <thead>\n",
394 |         "    <tr style=\"text-align: right;\">\n",
395 |         "      <th></th>\n",
396 |         "      <th>A</th>\n",
397 |         "      <th>D</th>\n",
398 |         "      <th>C</th>\n",
399 |         "    </tr>\n",
400 |         "  </thead>\n",
401 |         "  <tbody>\n",
402 |         "    <tr>\n",
403 |         "      <th>NYC</th>\n",
404 |         "      <td> 0</td>\n",
405 |         "      <td> 1</td>\n",
406 |         "      <td> 2</td>\n",
407 |         "    </tr>\n",
408 |         "    <tr>\n",
409 |         "      <th>SF</th>\n",
410 |         "      <td> 3</td>\n",
411 |         "      <td> 4</td>\n",
412 |         "      <td> 5</td>\n",
413 |         "    </tr>\n",
414 |         "    <tr>\n",
415 |         "      <th>LA</th>\n",
416 |         "      <td> 6</td>\n",
417 |         "      <td> 7</td>\n",
418 |         "      <td> 8</td>\n",
419 |         "    </tr>\n",
420 |         "  </tbody>\n",
421 |         "</table>\n",
422 |         "</div>"
423 |        ],
424 |        "metadata": {},
425 |        "output_type": "pyout",
426 |        "prompt_number": 19,
427 |        "text": [
428 |         "     A  D  C\n",
429 |         "NYC  0  1  2\n",
430 |         "SF   3  4  5\n",
431 |         "LA   6  7  8"
432 |        ]
433 |       }
434 |      ],
435 |      "prompt_number": 19
436 |     },
437 |     {
438 |      "cell_type": "code",
439 |      "collapsed": false,
440 |      "input": [
441 |       "#Create a Series from DataFrame's 0 row\n",
442 |       "ser3 = dframe2.ix[0]\n",
443 |       "\n",
444 |       "#Show\n",
445 |       "ser3"
446 |      ],
447 |      "language": "python",
448 |      "metadata": {},
449 |      "outputs": [
450 |       {
451 |        "metadata": {},
452 |        "output_type": "pyout",
453 |        "prompt_number": 23,
454 |        "text": [
455 |         "A    0\n",
456 |         "D    1\n",
457 |         "C    2\n",
458 |         "Name: NYC, dtype: int32"
459 |        ]
460 |       }
461 |      ],
462 |      "prompt_number": 23
463 |     },
464 |     {
465 |      "cell_type": "code",
466 |      "collapsed": false,
467 |      "input": [
468 |       "#Now we can use arithmetic operations\n",
469 |       "dframe2-ser3"
470 |      ],
471 |      "language": "python",
472 |      "metadata": {},
473 |      "outputs": [
474 |       {
475 |        "html": [
476 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
477 |         "<table border=\"1\" class=\"dataframe\">\n",
478 |         "  <thead>\n",
479 |         "    <tr style=\"text-align: right;\">\n",
480 |         "      <th></th>\n",
481 |         "      <th>A</th>\n",
482 |         "      <th>D</th>\n",
483 |         "      <th>C</th>\n",
484 |         "    </tr>\n",
485 |         "  </thead>\n",
486 |         "  <tbody>\n",
487 |         "    <tr>\n",
488 |         "      <th>NYC</th>\n",
489 |         "      <td> 0</td>\n",
490 |         "      <td> 0</td>\n",
491 |         "      <td> 0</td>\n",
492 |         "    </tr>\n",
493 |         "    <tr>\n",
494 |         "      <th>SF</th>\n",
495 |         "      <td> 3</td>\n",
496 |         "      <td> 3</td>\n",
497 |         "      <td> 3</td>\n",
498 |         "    </tr>\n",
499 |         "    <tr>\n",
500 |         "      <th>LA</th>\n",
501 |         "      <td> 6</td>\n",
502 |         "      <td> 6</td>\n",
503 |         "      <td> 6</td>\n",
504 |         "    </tr>\n",
505 |         "  </tbody>\n",
506 |         "</table>\n",
507 |         "</div>"
508 |        ],
509 |        "metadata": {},
510 |        "output_type": "pyout",
511 |        "prompt_number": 24,
512 |        "text": [
513 |         "     A  D  C\n",
514 |         "NYC  0  0  0\n",
515 |         "SF   3  3  3\n",
516 |         "LA   6  6  6"
517 |        ]
518 |       }
519 |      ],
520 |      "prompt_number": 24
521 |     },
522 |     {
523 |      "cell_type": "code",
524 |      "collapsed": false,
525 |      "input": [
526 |       "#Next we'll learn about sorting and ranking!"
527 |      ],
528 |      "language": "python",
529 |      "metadata": {},
530 |      "outputs": []
531 |     },
532 |     {
533 |      "cell_type": "code",
534 |      "collapsed": false,
535 |      "input": [],
536 |      "language": "python",
537 |      "metadata": {},
538 |      "outputs": []
539 |     },
540 |     {
541 |      "cell_type": "code",
542 |      "collapsed": false,
543 |      "input": [],
544 |      "language": "python",
545 |      "metadata": {},
546 |      "outputs": []
547 |     }
548 |    ],
549 |    "metadata": {}
550 |   }
551 |  ]
552 | }


--------------------------------------------------------------------------------
/Lec 21 - Rank and Sort.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "name": "",
  4 |   "signature": "sha256:4bba170c2c83530b20a83f4c66e2fd31663a4a0904cf06b4cf5826e58b9d6bb6"
  5 |  },
  6 |  "nbformat": 3,
  7 |  "nbformat_minor": 0,
  8 |  "worksheets": [
  9 |   {
 10 |    "cells": [
 11 |     {
 12 |      "cell_type": "code",
 13 |      "collapsed": false,
 14 |      "input": [
 15 |       "import numpy as np\n",
 16 |       "from pandas import Series,DataFrame\n",
 17 |       "import pandas as pd"
 18 |      ],
 19 |      "language": "python",
 20 |      "metadata": {},
 21 |      "outputs": [],
 22 |      "prompt_number": 1
 23 |     },
 24 |     {
 25 |      "cell_type": "code",
 26 |      "collapsed": false,
 27 |      "input": [
 28 |       "#Sorting by index\n",
 29 |       "ser1 = Series(range(3),index=['C','A','B'])\n",
 30 |       "\n",
 31 |       "#show\n",
 32 |       "ser1"
 33 |      ],
 34 |      "language": "python",
 35 |      "metadata": {},
 36 |      "outputs": [
 37 |       {
 38 |        "metadata": {},
 39 |        "output_type": "pyout",
 40 |        "prompt_number": 4,
 41 |        "text": [
 42 |         "C    0\n",
 43 |         "A    1\n",
 44 |         "B    2\n",
 45 |         "dtype: int64"
 46 |        ]
 47 |       }
 48 |      ],
 49 |      "prompt_number": 4
 50 |     },
 51 |     {
 52 |      "cell_type": "code",
 53 |      "collapsed": false,
 54 |      "input": [
 55 |       "#Now sort_index\n",
 56 |       "ser1.sort_index()"
 57 |      ],
 58 |      "language": "python",
 59 |      "metadata": {},
 60 |      "outputs": [
 61 |       {
 62 |        "metadata": {},
 63 |        "output_type": "pyout",
 64 |        "prompt_number": 5,
 65 |        "text": [
 66 |         "A    1\n",
 67 |         "B    2\n",
 68 |         "C    0\n",
 69 |         "dtype: int64"
 70 |        ]
 71 |       }
 72 |      ],
 73 |      "prompt_number": 5
 74 |     },
 75 |     {
 76 |      "cell_type": "code",
 77 |      "collapsed": false,
 78 |      "input": [
 79 |       "#Can sort a Series by its values\n",
 80 |       "ser1.order()"
 81 |      ],
 82 |      "language": "python",
 83 |      "metadata": {},
 84 |      "outputs": [
 85 |       {
 86 |        "metadata": {},
 87 |        "output_type": "pyout",
 88 |        "prompt_number": 6,
 89 |        "text": [
 90 |         "C    0\n",
 91 |         "A    1\n",
 92 |         "B    2\n",
 93 |         "dtype: int64"
 94 |        ]
 95 |       }
 96 |      ],
 97 |      "prompt_number": 6
 98 |     },
 99 |     {
100 |      "cell_type": "code",
101 |      "collapsed": false,
102 |      "input": [
103 |       "#Lets see how ranking works\n",
104 |       "\n",
105 |       "from numpy.random import randn\n",
106 |       "ser2 = Series(randn(10))\n",
107 |       "\n",
108 |       "#Show\n",
109 |       "ser2"
110 |      ],
111 |      "language": "python",
112 |      "metadata": {},
113 |      "outputs": [
114 |       {
115 |        "metadata": {},
116 |        "output_type": "pyout",
117 |        "prompt_number": 10,
118 |        "text": [
119 |         "0    0.524553\n",
120 |         "1   -1.987343\n",
121 |         "2   -0.883902\n",
122 |         "3   -0.875829\n",
123 |         "4    0.216089\n",
124 |         "5    0.744837\n",
125 |         "6   -0.761465\n",
126 |         "7    0.792798\n",
127 |         "8   -0.144650\n",
128 |         "9    0.100972\n",
129 |         "dtype: float64"
130 |        ]
131 |       }
132 |      ],
133 |      "prompt_number": 10
134 |     },
135 |     {
136 |      "cell_type": "code",
137 |      "collapsed": false,
138 |      "input": [
139 |       "#This will show you the rank used if you sort the series\n",
140 |       "ser2.rank()"
141 |      ],
142 |      "language": "python",
143 |      "metadata": {},
144 |      "outputs": [
145 |       {
146 |        "metadata": {},
147 |        "output_type": "pyout",
148 |        "prompt_number": 11,
149 |        "text": [
150 |         "0     8\n",
151 |         "1     1\n",
152 |         "2     2\n",
153 |         "3     3\n",
154 |         "4     7\n",
155 |         "5     9\n",
156 |         "6     4\n",
157 |         "7    10\n",
158 |         "8     5\n",
159 |         "9     6\n",
160 |         "dtype: float64"
161 |        ]
162 |       }
163 |      ],
164 |      "prompt_number": 11
165 |     },
166 |     {
167 |      "cell_type": "code",
168 |      "collapsed": false,
169 |      "input": [
170 |       "#Lets sort it now\n",
171 |       "ser2.sort()\n",
172 |       "\n",
173 |       "#Show\n",
174 |       "ser2"
175 |      ],
176 |      "language": "python",
177 |      "metadata": {},
178 |      "outputs": [
179 |       {
180 |        "metadata": {},
181 |        "output_type": "pyout",
182 |        "prompt_number": 13,
183 |        "text": [
184 |         "1   -1.987343\n",
185 |         "2   -0.883902\n",
186 |         "3   -0.875829\n",
187 |         "6   -0.761465\n",
188 |         "8   -0.144650\n",
189 |         "9    0.100972\n",
190 |         "4    0.216089\n",
191 |         "0    0.524553\n",
192 |         "5    0.744837\n",
193 |         "7    0.792798\n",
194 |         "dtype: float64"
195 |        ]
196 |       }
197 |      ],
198 |      "prompt_number": 13
199 |     },
200 |     {
201 |      "cell_type": "code",
202 |      "collapsed": false,
203 |      "input": [
204 |       "#After sorting let's check the rank and see iof it makes sense\n",
205 |       "ser2.rank()"
206 |      ],
207 |      "language": "python",
208 |      "metadata": {},
209 |      "outputs": [
210 |       {
211 |        "metadata": {},
212 |        "output_type": "pyout",
213 |        "prompt_number": 15,
214 |        "text": [
215 |         "1     1\n",
216 |         "2     2\n",
217 |         "3     3\n",
218 |         "6     4\n",
219 |         "8     5\n",
220 |         "9     6\n",
221 |         "4     7\n",
222 |         "0     8\n",
223 |         "5     9\n",
224 |         "7    10\n",
225 |         "dtype: float64"
226 |        ]
227 |       }
228 |      ],
229 |      "prompt_number": 15
230 |     },
231 |     {
232 |      "cell_type": "code",
233 |      "collapsed": false,
234 |      "input": [
235 |       "#On the left column we see th original index value and on the right we see it's rank!"
236 |      ],
237 |      "language": "python",
238 |      "metadata": {},
239 |      "outputs": [],
240 |      "prompt_number": 16
241 |     },
242 |     {
243 |      "cell_type": "code",
244 |      "collapsed": false,
245 |      "input": [
246 |       "#Next we'll learn about using descriptive statistics on dataframes!"
247 |      ],
248 |      "language": "python",
249 |      "metadata": {},
250 |      "outputs": []
251 |     }
252 |    ],
253 |    "metadata": {}
254 |   }
255 |  ]
256 | }


--------------------------------------------------------------------------------
/Lec 26 - JSON with Python.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "name": "",
  4 |   "signature": "sha256:6d2393f4566d57db7cfe7c666bb925c7e798d6bdea3c6ac40223d0d0c327a3b8"
  5 |  },
  6 |  "nbformat": 3,
  7 |  "nbformat_minor": 0,
  8 |  "worksheets": [
  9 |   {
 10 |    "cells": [
 11 |     {
 12 |      "cell_type": "code",
 13 |      "collapsed": false,
 14 |      "input": [
 15 |       "import numpy as np\n",
 16 |       "from pandas import Series, DataFrame\n",
 17 |       "import pandas as pd"
 18 |      ],
 19 |      "language": "python",
 20 |      "metadata": {},
 21 |      "outputs": [],
 22 |      "prompt_number": 1
 23 |     },
 24 |     {
 25 |      "cell_type": "code",
 26 |      "collapsed": false,
 27 |      "input": [
 28 |       "# Heres an example of what a JSON (JavaScript Object Notation) looks like:\n",
 29 |       "json_obj = \"\"\"\n",
 30 |       "{   \"zoo_animal\": \"Lion\",\n",
 31 |       "    \"food\": [\"Meat\", \"Veggies\", \"Honey\"],\n",
 32 |       "    \"fur\": \"Golden\",\n",
 33 |       "    \"clothes\": null, \n",
 34 |       "    \"diet\": [{\"zoo_animal\": \"Gazelle\", \"food\":\"grass\", \"fur\": \"Brown\"}]\n",
 35 |       "}\n",
 36 |       "\"\"\""
 37 |      ],
 38 |      "language": "python",
 39 |      "metadata": {},
 40 |      "outputs": [],
 41 |      "prompt_number": 3
 42 |     },
 43 |     {
 44 |      "cell_type": "code",
 45 |      "collapsed": false,
 46 |      "input": [
 47 |       "#Let import json module\n",
 48 |       "import json\n",
 49 |       "\n",
 50 |       "#Lets load json data\n",
 51 |       "data = json.loads(json_obj)"
 52 |      ],
 53 |      "language": "python",
 54 |      "metadata": {},
 55 |      "outputs": [],
 56 |      "prompt_number": 4
 57 |     },
 58 |     {
 59 |      "cell_type": "code",
 60 |      "collapsed": false,
 61 |      "input": [
 62 |       "#Show\n",
 63 |       "data"
 64 |      ],
 65 |      "language": "python",
 66 |      "metadata": {},
 67 |      "outputs": [
 68 |       {
 69 |        "metadata": {},
 70 |        "output_type": "pyout",
 71 |        "prompt_number": 5,
 72 |        "text": [
 73 |         "{u'clothes': None,\n",
 74 |         " u'diet': [{u'food': u'grass', u'fur': u'Brown', u'zoo_animal': u'Gazelle'}],\n",
 75 |         " u'food': [u'Meat', u'Veggies', u'Honey'],\n",
 76 |         " u'fur': u'Golden',\n",
 77 |         " u'zoo_animal': u'Lion'}"
 78 |        ]
 79 |       }
 80 |      ],
 81 |      "prompt_number": 5
 82 |     },
 83 |     {
 84 |      "cell_type": "code",
 85 |      "collapsed": false,
 86 |      "input": [
 87 |       "#WE can also convert back to JSON\n",
 88 |       "json.dumps(data)"
 89 |      ],
 90 |      "language": "python",
 91 |      "metadata": {},
 92 |      "outputs": [
 93 |       {
 94 |        "metadata": {},
 95 |        "output_type": "pyout",
 96 |        "prompt_number": 6,
 97 |        "text": [
 98 |         "'{\"food\": [\"Meat\", \"Veggies\", \"Honey\"], \"zoo_animal\": \"Lion\", \"fur\": \"Golden\", \"diet\": [{\"food\": \"grass\", \"zoo_animal\": \"Gazelle\", \"fur\": \"Brown\"}], \"clothes\": null}'"
 99 |        ]
100 |       }
101 |      ],
102 |      "prompt_number": 6
103 |     },
104 |     {
105 |      "cell_type": "code",
106 |      "collapsed": false,
107 |      "input": [
108 |       "#We can simply open JSON data after loading with a DataFrame\n",
109 |       "dframe = DataFrame(data['diet'])"
110 |      ],
111 |      "language": "python",
112 |      "metadata": {},
113 |      "outputs": [],
114 |      "prompt_number": 7
115 |     },
116 |     {
117 |      "cell_type": "code",
118 |      "collapsed": false,
119 |      "input": [
120 |       "#Show\n",
121 |       "dframe"
122 |      ],
123 |      "language": "python",
124 |      "metadata": {},
125 |      "outputs": [
126 |       {
127 |        "html": [
128 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
129 |         "<table border=\"1\" class=\"dataframe\">\n",
130 |         "  <thead>\n",
131 |         "    <tr style=\"text-align: right;\">\n",
132 |         "      <th></th>\n",
133 |         "      <th>food</th>\n",
134 |         "      <th>fur</th>\n",
135 |         "      <th>zoo_animal</th>\n",
136 |         "    </tr>\n",
137 |         "  </thead>\n",
138 |         "  <tbody>\n",
139 |         "    <tr>\n",
140 |         "      <th>0</th>\n",
141 |         "      <td> grass</td>\n",
142 |         "      <td> Brown</td>\n",
143 |         "      <td> Gazelle</td>\n",
144 |         "    </tr>\n",
145 |         "  </tbody>\n",
146 |         "</table>\n",
147 |         "</div>"
148 |        ],
149 |        "metadata": {},
150 |        "output_type": "pyout",
151 |        "prompt_number": 8,
152 |        "text": [
153 |         "    food    fur zoo_animal\n",
154 |         "0  grass  Brown    Gazelle"
155 |        ]
156 |       }
157 |      ],
158 |      "prompt_number": 8
159 |     },
160 |     {
161 |      "cell_type": "code",
162 |      "collapsed": false,
163 |      "input": [
164 |       "# Theres lost of custom selection you can do, based on what you do or dont want in your DataFrame (you can specify columns..etc)\n"
165 |      ],
166 |      "language": "python",
167 |      "metadata": {},
168 |      "outputs": [],
169 |      "prompt_number": 9
170 |     },
171 |     {
172 |      "cell_type": "code",
173 |      "collapsed": false,
174 |      "input": [
175 |       "#Next up, XML and HTML file format with python!"
176 |      ],
177 |      "language": "python",
178 |      "metadata": {},
179 |      "outputs": []
180 |     }
181 |    ],
182 |    "metadata": {}
183 |   }
184 |  ]
185 | }


--------------------------------------------------------------------------------
/Lec 28 - Excel with Python.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "name": "",
  4 |   "signature": "sha256:17dde204321b539d48444124b1615b10314d0fad3a8f12472070bcf99ec1bec9"
  5 |  },
  6 |  "nbformat": 3,
  7 |  "nbformat_minor": 0,
  8 |  "worksheets": [
  9 |   {
 10 |    "cells": [
 11 |     {
 12 |      "cell_type": "code",
 13 |      "collapsed": false,
 14 |      "input": [
 15 |       "#Now we'll learn how to work with excel files"
 16 |      ],
 17 |      "language": "python",
 18 |      "metadata": {},
 19 |      "outputs": [],
 20 |      "prompt_number": 1
 21 |     },
 22 |     {
 23 |      "cell_type": "code",
 24 |      "collapsed": false,
 25 |      "input": [
 26 |       "\"\"\"\n",
 27 |       "IMPORTANT NOTE: NEED TO HAVE xlrd AND openpyxl INSTALLED!!!\n",
 28 |       "\"\"\""
 29 |      ],
 30 |      "language": "python",
 31 |      "metadata": {},
 32 |      "outputs": [
 33 |       {
 34 |        "metadata": {},
 35 |        "output_type": "pyout",
 36 |        "prompt_number": 2,
 37 |        "text": [
 38 |         "'\\nIMPORTANT NOTE: NEED TO HAVE xlrd AND openpyxl INSTALLED!!!\\n'"
 39 |        ]
 40 |       }
 41 |      ],
 42 |      "prompt_number": 2
 43 |     },
 44 |     {
 45 |      "cell_type": "code",
 46 |      "collapsed": false,
 47 |      "input": [
 48 |       "import pandas as pd\n"
 49 |      ],
 50 |      "language": "python",
 51 |      "metadata": {},
 52 |      "outputs": [],
 53 |      "prompt_number": 2
 54 |     },
 55 |     {
 56 |      "cell_type": "code",
 57 |      "collapsed": false,
 58 |      "input": [
 59 |       "# Open the excel file as an object\n",
 60 |       "xlsfile = pd.ExcelFile('Lec_28_test.xlsx')"
 61 |      ],
 62 |      "language": "python",
 63 |      "metadata": {},
 64 |      "outputs": [],
 65 |      "prompt_number": 7
 66 |     },
 67 |     {
 68 |      "cell_type": "code",
 69 |      "collapsed": false,
 70 |      "input": [
 71 |       "# Parse the first sheet of the excel file and set as DataFrame\n",
 72 |       "dframe = xlsfile.parse('Sheet1')"
 73 |      ],
 74 |      "language": "python",
 75 |      "metadata": {},
 76 |      "outputs": [],
 77 |      "prompt_number": 8
 78 |     },
 79 |     {
 80 |      "cell_type": "code",
 81 |      "collapsed": false,
 82 |      "input": [
 83 |       "#Show!\n",
 84 |       "dframe"
 85 |      ],
 86 |      "language": "python",
 87 |      "metadata": {},
 88 |      "outputs": [
 89 |       {
 90 |        "html": [
 91 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
 92 |         "<table border=\"1\" class=\"dataframe\">\n",
 93 |         "  <thead>\n",
 94 |         "    <tr style=\"text-align: right;\">\n",
 95 |         "      <th></th>\n",
 96 |         "      <th>This is a test</th>\n",
 97 |         "      <th>Unnamed: 1</th>\n",
 98 |         "      <th>Unnamed: 2</th>\n",
 99 |         "    </tr>\n",
100 |         "  </thead>\n",
101 |         "  <tbody>\n",
102 |         "    <tr>\n",
103 |         "      <th>0</th>\n",
104 |         "      <td>  23</td>\n",
105 |         "      <td> 6678</td>\n",
106 |         "      <td>  456</td>\n",
107 |         "    </tr>\n",
108 |         "    <tr>\n",
109 |         "      <th>1</th>\n",
110 |         "      <td> 234</td>\n",
111 |         "      <td>  678</td>\n",
112 |         "      <td>  456</td>\n",
113 |         "    </tr>\n",
114 |         "    <tr>\n",
115 |         "      <th>2</th>\n",
116 |         "      <td> 234</td>\n",
117 |         "      <td>    7</td>\n",
118 |         "      <td>  345</td>\n",
119 |         "    </tr>\n",
120 |         "    <tr>\n",
121 |         "      <th>3</th>\n",
122 |         "      <td>  34</td>\n",
123 |         "      <td>   56</td>\n",
124 |         "      <td>  234</td>\n",
125 |         "    </tr>\n",
126 |         "    <tr>\n",
127 |         "      <th>4</th>\n",
128 |         "      <td>   5</td>\n",
129 |         "      <td>  456</td>\n",
130 |         "      <td> 4365</td>\n",
131 |         "    </tr>\n",
132 |         "  </tbody>\n",
133 |         "</table>\n",
134 |         "</div>"
135 |        ],
136 |        "metadata": {},
137 |        "output_type": "pyout",
138 |        "prompt_number": 9,
139 |        "text": [
140 |         "   This is a test  Unnamed: 1  Unnamed: 2\n",
141 |         "0              23        6678         456\n",
142 |         "1             234         678         456\n",
143 |         "2             234           7         345\n",
144 |         "3              34          56         234\n",
145 |         "4               5         456        4365"
146 |        ]
147 |       }
148 |      ],
149 |      "prompt_number": 9
150 |     },
151 |     {
152 |      "cell_type": "code",
153 |      "collapsed": false,
154 |      "input": [
155 |       "#Now we know how to open various file types! Great!\n",
156 |       "#Next well learn about various DataFrame Techniques!"
157 |      ],
158 |      "language": "python",
159 |      "metadata": {},
160 |      "outputs": [],
161 |      "prompt_number": 10
162 |     },
163 |     {
164 |      "cell_type": "code",
165 |      "collapsed": false,
166 |      "input": [],
167 |      "language": "python",
168 |      "metadata": {},
169 |      "outputs": []
170 |     }
171 |    ],
172 |    "metadata": {}
173 |   }
174 |  ]
175 | }


--------------------------------------------------------------------------------
/Lec 32 - Combining DataFrames.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "name": "",
  4 |   "signature": "sha256:b82eaf936d673802b3e8255d702cbaf5faa956bb4d22f50257b9aa9576ec3c6c"
  5 |  },
  6 |  "nbformat": 3,
  7 |  "nbformat_minor": 0,
  8 |  "worksheets": [
  9 |   {
 10 |    "cells": [
 11 |     {
 12 |      "cell_type": "code",
 13 |      "collapsed": false,
 14 |      "input": [
 15 |       "import numpy as np\n",
 16 |       "import pandas as pd\n",
 17 |       "from pandas import Series,DataFrame"
 18 |      ],
 19 |      "language": "python",
 20 |      "metadata": {},
 21 |      "outputs": [],
 22 |      "prompt_number": 1
 23 |     },
 24 |     {
 25 |      "cell_type": "code",
 26 |      "collapsed": false,
 27 |      "input": [
 28 |       "#Lets make some Series to work with\n",
 29 |       "\n",
 30 |       "#First Series\n",
 31 |       "ser1 = Series([2,np.nan,4,np.nan,6,np.nan],\n",
 32 |       "           index=['Q','R','S','T','U','V'])\n",
 33 |       "\n",
 34 |       "#Second Series (based off length of ser1)\n",
 35 |       "ser2 = Series(np.arange(len(ser1), dtype=np.float64),\n",
 36 |       "           index=['Q','R','S','T','U','V'])\n",
 37 |       "\n",
 38 |       "ser2[-1] = np.nan"
 39 |      ],
 40 |      "language": "python",
 41 |      "metadata": {},
 42 |      "outputs": [],
 43 |      "prompt_number": 6
 44 |     },
 45 |     {
 46 |      "cell_type": "code",
 47 |      "collapsed": false,
 48 |      "input": [
 49 |       "ser1"
 50 |      ],
 51 |      "language": "python",
 52 |      "metadata": {},
 53 |      "outputs": [
 54 |       {
 55 |        "metadata": {},
 56 |        "output_type": "pyout",
 57 |        "prompt_number": 7,
 58 |        "text": [
 59 |         "Q     2\n",
 60 |         "R   NaN\n",
 61 |         "S     4\n",
 62 |         "T   NaN\n",
 63 |         "U     6\n",
 64 |         "V   NaN\n",
 65 |         "dtype: float64"
 66 |        ]
 67 |       }
 68 |      ],
 69 |      "prompt_number": 7
 70 |     },
 71 |     {
 72 |      "cell_type": "code",
 73 |      "collapsed": false,
 74 |      "input": [
 75 |       "ser2"
 76 |      ],
 77 |      "language": "python",
 78 |      "metadata": {},
 79 |      "outputs": [
 80 |       {
 81 |        "metadata": {},
 82 |        "output_type": "pyout",
 83 |        "prompt_number": 8,
 84 |        "text": [
 85 |         "Q     0\n",
 86 |         "R     1\n",
 87 |         "S     2\n",
 88 |         "T     3\n",
 89 |         "U     4\n",
 90 |         "V   NaN\n",
 91 |         "dtype: float64"
 92 |        ]
 93 |       }
 94 |      ],
 95 |      "prompt_number": 8
 96 |     },
 97 |     {
 98 |      "cell_type": "code",
 99 |      "collapsed": false,
100 |      "input": [
101 |       "# Now let's get a series where the value of ser1 is chosen if ser2 is NAN,otherwise let the value be ser1\n",
102 |       "Series(np.where(pd.isnull(ser1),ser2,ser1),index=ser1.index)"
103 |      ],
104 |      "language": "python",
105 |      "metadata": {},
106 |      "outputs": [
107 |       {
108 |        "metadata": {},
109 |        "output_type": "pyout",
110 |        "prompt_number": 14,
111 |        "text": [
112 |         "Q     2\n",
113 |         "R     1\n",
114 |         "S     4\n",
115 |         "T     3\n",
116 |         "U     6\n",
117 |         "V   NaN\n",
118 |         "dtype: float64"
119 |        ]
120 |       }
121 |      ],
122 |      "prompt_number": 14
123 |     },
124 |     {
125 |      "cell_type": "code",
126 |      "collapsed": false,
127 |      "input": [
128 |       "#Take a moment to really understand how the above worked"
129 |      ],
130 |      "language": "python",
131 |      "metadata": {},
132 |      "outputs": [],
133 |      "prompt_number": 11
134 |     },
135 |     {
136 |      "cell_type": "code",
137 |      "collapsed": false,
138 |      "input": [
139 |       "#Now we can do the same thing simply by using combine_first with pandas\n",
140 |       "ser1.combine_first(ser2)\n",
141 |       "\n",
142 |       "#This combines the Series values, choosing the values of the calling Series first, unless its a NAN"
143 |      ],
144 |      "language": "python",
145 |      "metadata": {},
146 |      "outputs": [
147 |       {
148 |        "metadata": {},
149 |        "output_type": "pyout",
150 |        "prompt_number": 21,
151 |        "text": [
152 |         "Q     2\n",
153 |         "R     1\n",
154 |         "S     4\n",
155 |         "T     3\n",
156 |         "U     6\n",
157 |         "V   NaN\n",
158 |         "dtype: float64"
159 |        ]
160 |       }
161 |      ],
162 |      "prompt_number": 21
163 |     },
164 |     {
165 |      "cell_type": "code",
166 |      "collapsed": false,
167 |      "input": [
168 |       "#Now lets how this works on a DataFrame!"
169 |      ],
170 |      "language": "python",
171 |      "metadata": {},
172 |      "outputs": [],
173 |      "prompt_number": 22
174 |     },
175 |     {
176 |      "cell_type": "code",
177 |      "collapsed": false,
178 |      "input": [
179 |       "#Lets make some \n",
180 |       "dframe_odds = DataFrame({'X': [1., np.nan, 3., np.nan],\n",
181 |       "                     'Y': [np.nan, 5., np.nan, 7.],\n",
182 |       "                     'Z': [np.nan, 9., np.nan, 11.]})\n",
183 |       "dframe_evens = DataFrame({'X': [2., 4., np.nan, 6., 8.],\n",
184 |       "                     'Y': [np.nan, 10., 12., 14., 16.]})\n"
185 |      ],
186 |      "language": "python",
187 |      "metadata": {},
188 |      "outputs": [],
189 |      "prompt_number": 34
190 |     },
191 |     {
192 |      "cell_type": "code",
193 |      "collapsed": false,
194 |      "input": [
195 |       "#Show\n",
196 |       "dframe_odds"
197 |      ],
198 |      "language": "python",
199 |      "metadata": {},
200 |      "outputs": [
201 |       {
202 |        "html": [
203 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
204 |         "<table border=\"1\" class=\"dataframe\">\n",
205 |         "  <thead>\n",
206 |         "    <tr style=\"text-align: right;\">\n",
207 |         "      <th></th>\n",
208 |         "      <th>X</th>\n",
209 |         "      <th>Y</th>\n",
210 |         "      <th>Z</th>\n",
211 |         "    </tr>\n",
212 |         "  </thead>\n",
213 |         "  <tbody>\n",
214 |         "    <tr>\n",
215 |         "      <th>0</th>\n",
216 |         "      <td>  1</td>\n",
217 |         "      <td>NaN</td>\n",
218 |         "      <td>NaN</td>\n",
219 |         "    </tr>\n",
220 |         "    <tr>\n",
221 |         "      <th>1</th>\n",
222 |         "      <td>NaN</td>\n",
223 |         "      <td>  5</td>\n",
224 |         "      <td>  9</td>\n",
225 |         "    </tr>\n",
226 |         "    <tr>\n",
227 |         "      <th>2</th>\n",
228 |         "      <td>  3</td>\n",
229 |         "      <td>NaN</td>\n",
230 |         "      <td>NaN</td>\n",
231 |         "    </tr>\n",
232 |         "    <tr>\n",
233 |         "      <th>3</th>\n",
234 |         "      <td>NaN</td>\n",
235 |         "      <td>  7</td>\n",
236 |         "      <td> 11</td>\n",
237 |         "    </tr>\n",
238 |         "  </tbody>\n",
239 |         "</table>\n",
240 |         "</div>"
241 |        ],
242 |        "metadata": {},
243 |        "output_type": "pyout",
244 |        "prompt_number": 35,
245 |        "text": [
246 |         "    X   Y   Z\n",
247 |         "0   1 NaN NaN\n",
248 |         "1 NaN   5   9\n",
249 |         "2   3 NaN NaN\n",
250 |         "3 NaN   7  11"
251 |        ]
252 |       }
253 |      ],
254 |      "prompt_number": 35
255 |     },
256 |     {
257 |      "cell_type": "code",
258 |      "collapsed": false,
259 |      "input": [
260 |       "#Show\n",
261 |       "dframe_evens"
262 |      ],
263 |      "language": "python",
264 |      "metadata": {},
265 |      "outputs": [
266 |       {
267 |        "html": [
268 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
269 |         "<table border=\"1\" class=\"dataframe\">\n",
270 |         "  <thead>\n",
271 |         "    <tr style=\"text-align: right;\">\n",
272 |         "      <th></th>\n",
273 |         "      <th>X</th>\n",
274 |         "      <th>Y</th>\n",
275 |         "    </tr>\n",
276 |         "  </thead>\n",
277 |         "  <tbody>\n",
278 |         "    <tr>\n",
279 |         "      <th>0</th>\n",
280 |         "      <td>  2</td>\n",
281 |         "      <td>NaN</td>\n",
282 |         "    </tr>\n",
283 |         "    <tr>\n",
284 |         "      <th>1</th>\n",
285 |         "      <td>  4</td>\n",
286 |         "      <td> 10</td>\n",
287 |         "    </tr>\n",
288 |         "    <tr>\n",
289 |         "      <th>2</th>\n",
290 |         "      <td>NaN</td>\n",
291 |         "      <td> 12</td>\n",
292 |         "    </tr>\n",
293 |         "    <tr>\n",
294 |         "      <th>3</th>\n",
295 |         "      <td>  6</td>\n",
296 |         "      <td> 14</td>\n",
297 |         "    </tr>\n",
298 |         "    <tr>\n",
299 |         "      <th>4</th>\n",
300 |         "      <td>  8</td>\n",
301 |         "      <td> 16</td>\n",
302 |         "    </tr>\n",
303 |         "  </tbody>\n",
304 |         "</table>\n",
305 |         "</div>"
306 |        ],
307 |        "metadata": {},
308 |        "output_type": "pyout",
309 |        "prompt_number": 36,
310 |        "text": [
311 |         "    X   Y\n",
312 |         "0   2 NaN\n",
313 |         "1   4  10\n",
314 |         "2 NaN  12\n",
315 |         "3   6  14\n",
316 |         "4   8  16"
317 |        ]
318 |       }
319 |      ],
320 |      "prompt_number": 36
321 |     },
322 |     {
323 |      "cell_type": "code",
324 |      "collapsed": false,
325 |      "input": [
326 |       "#Now lets combine using odds values first, unless theres a NAN, then put the evens values\n",
327 |       "dframe_odds.combine_first(dframe_evens)"
328 |      ],
329 |      "language": "python",
330 |      "metadata": {},
331 |      "outputs": [
332 |       {
333 |        "html": [
334 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
335 |         "<table border=\"1\" class=\"dataframe\">\n",
336 |         "  <thead>\n",
337 |         "    <tr style=\"text-align: right;\">\n",
338 |         "      <th></th>\n",
339 |         "      <th>X</th>\n",
340 |         "      <th>Y</th>\n",
341 |         "      <th>Z</th>\n",
342 |         "    </tr>\n",
343 |         "  </thead>\n",
344 |         "  <tbody>\n",
345 |         "    <tr>\n",
346 |         "      <th>0</th>\n",
347 |         "      <td> 1</td>\n",
348 |         "      <td>NaN</td>\n",
349 |         "      <td>NaN</td>\n",
350 |         "    </tr>\n",
351 |         "    <tr>\n",
352 |         "      <th>1</th>\n",
353 |         "      <td> 4</td>\n",
354 |         "      <td>  5</td>\n",
355 |         "      <td>  9</td>\n",
356 |         "    </tr>\n",
357 |         "    <tr>\n",
358 |         "      <th>2</th>\n",
359 |         "      <td> 3</td>\n",
360 |         "      <td> 12</td>\n",
361 |         "      <td>NaN</td>\n",
362 |         "    </tr>\n",
363 |         "    <tr>\n",
364 |         "      <th>3</th>\n",
365 |         "      <td> 6</td>\n",
366 |         "      <td>  7</td>\n",
367 |         "      <td> 11</td>\n",
368 |         "    </tr>\n",
369 |         "    <tr>\n",
370 |         "      <th>4</th>\n",
371 |         "      <td> 8</td>\n",
372 |         "      <td> 16</td>\n",
373 |         "      <td>NaN</td>\n",
374 |         "    </tr>\n",
375 |         "  </tbody>\n",
376 |         "</table>\n",
377 |         "</div>"
378 |        ],
379 |        "metadata": {},
380 |        "output_type": "pyout",
381 |        "prompt_number": 38,
382 |        "text": [
383 |         "   X   Y   Z\n",
384 |         "0  1 NaN NaN\n",
385 |         "1  4   5   9\n",
386 |         "2  3  12 NaN\n",
387 |         "3  6   7  11\n",
388 |         "4  8  16 NaN"
389 |        ]
390 |       }
391 |      ],
392 |      "prompt_number": 38
393 |     },
394 |     {
395 |      "cell_type": "code",
396 |      "collapsed": false,
397 |      "input": [
398 |       "#Next up: Reshaping DataFrames!"
399 |      ],
400 |      "language": "python",
401 |      "metadata": {},
402 |      "outputs": []
403 |     }
404 |    ],
405 |    "metadata": {}
406 |   }
407 |  ]
408 | }


--------------------------------------------------------------------------------
/Lec 33 - Reshaping.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "name": "",
  4 |   "signature": "sha256:044e1106e394ba9dde7e4fad66b3a168f5cd1f4cbb50ccc25eb864319e6a8979"
  5 |  },
  6 |  "nbformat": 3,
  7 |  "nbformat_minor": 0,
  8 |  "worksheets": [
  9 |   {
 10 |    "cells": [
 11 |     {
 12 |      "cell_type": "code",
 13 |      "collapsed": false,
 14 |      "input": [
 15 |       "import numpy as np\n",
 16 |       "import pandas as pd\n",
 17 |       "from pandas import Series, DataFrame"
 18 |      ],
 19 |      "language": "python",
 20 |      "metadata": {},
 21 |      "outputs": [],
 22 |      "prompt_number": 1
 23 |     },
 24 |     {
 25 |      "cell_type": "code",
 26 |      "collapsed": false,
 27 |      "input": [
 28 |       "#Let's see how stack and unstack work\n",
 29 |       "\n",
 30 |       "# Create DataFrame\n",
 31 |       "dframe1 = DataFrame(np.arange(8).reshape((2, 4)),\n",
 32 |       "                 index=pd.Index(['LA', 'SF'], name='city'),\n",
 33 |       "                 columns=pd.Index(['A', 'B', 'C','D'], name='letter'))\n",
 34 |       "#Show\n",
 35 |       "dframe1"
 36 |      ],
 37 |      "language": "python",
 38 |      "metadata": {},
 39 |      "outputs": [
 40 |       {
 41 |        "html": [
 42 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
 43 |         "<table border=\"1\" class=\"dataframe\">\n",
 44 |         "  <thead>\n",
 45 |         "    <tr style=\"text-align: right;\">\n",
 46 |         "      <th>letter</th>\n",
 47 |         "      <th>A</th>\n",
 48 |         "      <th>B</th>\n",
 49 |         "      <th>C</th>\n",
 50 |         "      <th>D</th>\n",
 51 |         "    </tr>\n",
 52 |         "    <tr>\n",
 53 |         "      <th>city</th>\n",
 54 |         "      <th></th>\n",
 55 |         "      <th></th>\n",
 56 |         "      <th></th>\n",
 57 |         "      <th></th>\n",
 58 |         "    </tr>\n",
 59 |         "  </thead>\n",
 60 |         "  <tbody>\n",
 61 |         "    <tr>\n",
 62 |         "      <th>LA</th>\n",
 63 |         "      <td> 0</td>\n",
 64 |         "      <td> 1</td>\n",
 65 |         "      <td> 2</td>\n",
 66 |         "      <td> 3</td>\n",
 67 |         "    </tr>\n",
 68 |         "    <tr>\n",
 69 |         "      <th>SF</th>\n",
 70 |         "      <td> 4</td>\n",
 71 |         "      <td> 5</td>\n",
 72 |         "      <td> 6</td>\n",
 73 |         "      <td> 7</td>\n",
 74 |         "    </tr>\n",
 75 |         "  </tbody>\n",
 76 |         "</table>\n",
 77 |         "</div>"
 78 |        ],
 79 |        "metadata": {},
 80 |        "output_type": "pyout",
 81 |        "prompt_number": 2,
 82 |        "text": [
 83 |         "letter  A  B  C  D\n",
 84 |         "city              \n",
 85 |         "LA      0  1  2  3\n",
 86 |         "SF      4  5  6  7"
 87 |        ]
 88 |       }
 89 |      ],
 90 |      "prompt_number": 2
 91 |     },
 92 |     {
 93 |      "cell_type": "code",
 94 |      "collapsed": false,
 95 |      "input": [
 96 |       "# Use stack to pivot the columns into the rows\n",
 97 |       "dframe_st = dframe1.stack()\n",
 98 |       "\n",
 99 |       "#Show\n",
100 |       "dframe_st"
101 |      ],
102 |      "language": "python",
103 |      "metadata": {},
104 |      "outputs": [
105 |       {
106 |        "metadata": {},
107 |        "output_type": "pyout",
108 |        "prompt_number": 7,
109 |        "text": [
110 |         "city  letter\n",
111 |         "LA    A         0\n",
112 |         "      B         1\n",
113 |         "      C         2\n",
114 |         "      D         3\n",
115 |         "SF    A         4\n",
116 |         "      B         5\n",
117 |         "      C         6\n",
118 |         "      D         7\n",
119 |         "dtype: int32"
120 |        ]
121 |       }
122 |      ],
123 |      "prompt_number": 7
124 |     },
125 |     {
126 |      "cell_type": "code",
127 |      "collapsed": false,
128 |      "input": [
129 |       "#We can always rearrange back into a DataFrame\n",
130 |       "dframe_st.unstack()"
131 |      ],
132 |      "language": "python",
133 |      "metadata": {},
134 |      "outputs": [
135 |       {
136 |        "html": [
137 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
138 |         "<table border=\"1\" class=\"dataframe\">\n",
139 |         "  <thead>\n",
140 |         "    <tr style=\"text-align: right;\">\n",
141 |         "      <th>letter</th>\n",
142 |         "      <th>A</th>\n",
143 |         "      <th>B</th>\n",
144 |         "      <th>C</th>\n",
145 |         "      <th>D</th>\n",
146 |         "    </tr>\n",
147 |         "    <tr>\n",
148 |         "      <th>city</th>\n",
149 |         "      <th></th>\n",
150 |         "      <th></th>\n",
151 |         "      <th></th>\n",
152 |         "      <th></th>\n",
153 |         "    </tr>\n",
154 |         "  </thead>\n",
155 |         "  <tbody>\n",
156 |         "    <tr>\n",
157 |         "      <th>LA</th>\n",
158 |         "      <td> 0</td>\n",
159 |         "      <td> 1</td>\n",
160 |         "      <td> 2</td>\n",
161 |         "      <td> 3</td>\n",
162 |         "    </tr>\n",
163 |         "    <tr>\n",
164 |         "      <th>SF</th>\n",
165 |         "      <td> 4</td>\n",
166 |         "      <td> 5</td>\n",
167 |         "      <td> 6</td>\n",
168 |         "      <td> 7</td>\n",
169 |         "    </tr>\n",
170 |         "  </tbody>\n",
171 |         "</table>\n",
172 |         "</div>"
173 |        ],
174 |        "metadata": {},
175 |        "output_type": "pyout",
176 |        "prompt_number": 8,
177 |        "text": [
178 |         "letter  A  B  C  D\n",
179 |         "city              \n",
180 |         "LA      0  1  2  3\n",
181 |         "SF      4  5  6  7"
182 |        ]
183 |       }
184 |      ],
185 |      "prompt_number": 8
186 |     },
187 |     {
188 |      "cell_type": "code",
189 |      "collapsed": false,
190 |      "input": [
191 |       "#We can choose which level to unstack by\n",
192 |       "dframe_st.unstack(0)"
193 |      ],
194 |      "language": "python",
195 |      "metadata": {},
196 |      "outputs": [
197 |       {
198 |        "html": [
199 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
200 |         "<table border=\"1\" class=\"dataframe\">\n",
201 |         "  <thead>\n",
202 |         "    <tr style=\"text-align: right;\">\n",
203 |         "      <th>city</th>\n",
204 |         "      <th>LA</th>\n",
205 |         "      <th>SF</th>\n",
206 |         "    </tr>\n",
207 |         "    <tr>\n",
208 |         "      <th>letter</th>\n",
209 |         "      <th></th>\n",
210 |         "      <th></th>\n",
211 |         "    </tr>\n",
212 |         "  </thead>\n",
213 |         "  <tbody>\n",
214 |         "    <tr>\n",
215 |         "      <th>A</th>\n",
216 |         "      <td> 0</td>\n",
217 |         "      <td> 4</td>\n",
218 |         "    </tr>\n",
219 |         "    <tr>\n",
220 |         "      <th>B</th>\n",
221 |         "      <td> 1</td>\n",
222 |         "      <td> 5</td>\n",
223 |         "    </tr>\n",
224 |         "    <tr>\n",
225 |         "      <th>C</th>\n",
226 |         "      <td> 2</td>\n",
227 |         "      <td> 6</td>\n",
228 |         "    </tr>\n",
229 |         "    <tr>\n",
230 |         "      <th>D</th>\n",
231 |         "      <td> 3</td>\n",
232 |         "      <td> 7</td>\n",
233 |         "    </tr>\n",
234 |         "  </tbody>\n",
235 |         "</table>\n",
236 |         "</div>"
237 |        ],
238 |        "metadata": {},
239 |        "output_type": "pyout",
240 |        "prompt_number": 10,
241 |        "text": [
242 |         "city    LA  SF\n",
243 |         "letter        \n",
244 |         "A        0   4\n",
245 |         "B        1   5\n",
246 |         "C        2   6\n",
247 |         "D        3   7"
248 |        ]
249 |       }
250 |      ],
251 |      "prompt_number": 10
252 |     },
253 |     {
254 |      "cell_type": "code",
255 |      "collapsed": false,
256 |      "input": [
257 |       "# Also by which name to unstack by\n",
258 |       "dframe_st.unstack('letter')"
259 |      ],
260 |      "language": "python",
261 |      "metadata": {},
262 |      "outputs": [
263 |       {
264 |        "html": [
265 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
266 |         "<table border=\"1\" class=\"dataframe\">\n",
267 |         "  <thead>\n",
268 |         "    <tr style=\"text-align: right;\">\n",
269 |         "      <th>letter</th>\n",
270 |         "      <th>A</th>\n",
271 |         "      <th>B</th>\n",
272 |         "      <th>C</th>\n",
273 |         "      <th>D</th>\n",
274 |         "    </tr>\n",
275 |         "    <tr>\n",
276 |         "      <th>city</th>\n",
277 |         "      <th></th>\n",
278 |         "      <th></th>\n",
279 |         "      <th></th>\n",
280 |         "      <th></th>\n",
281 |         "    </tr>\n",
282 |         "  </thead>\n",
283 |         "  <tbody>\n",
284 |         "    <tr>\n",
285 |         "      <th>LA</th>\n",
286 |         "      <td> 0</td>\n",
287 |         "      <td> 1</td>\n",
288 |         "      <td> 2</td>\n",
289 |         "      <td> 3</td>\n",
290 |         "    </tr>\n",
291 |         "    <tr>\n",
292 |         "      <th>SF</th>\n",
293 |         "      <td> 4</td>\n",
294 |         "      <td> 5</td>\n",
295 |         "      <td> 6</td>\n",
296 |         "      <td> 7</td>\n",
297 |         "    </tr>\n",
298 |         "  </tbody>\n",
299 |         "</table>\n",
300 |         "</div>"
301 |        ],
302 |        "metadata": {},
303 |        "output_type": "pyout",
304 |        "prompt_number": 12,
305 |        "text": [
306 |         "letter  A  B  C  D\n",
307 |         "city              \n",
308 |         "LA      0  1  2  3\n",
309 |         "SF      4  5  6  7"
310 |        ]
311 |       }
312 |      ],
313 |      "prompt_number": 12
314 |     },
315 |     {
316 |      "cell_type": "code",
317 |      "collapsed": false,
318 |      "input": [
319 |       "# Also by which name to unstack by\n",
320 |       "dframe_st.unstack('city')"
321 |      ],
322 |      "language": "python",
323 |      "metadata": {},
324 |      "outputs": [
325 |       {
326 |        "html": [
327 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
328 |         "<table border=\"1\" class=\"dataframe\">\n",
329 |         "  <thead>\n",
330 |         "    <tr style=\"text-align: right;\">\n",
331 |         "      <th>city</th>\n",
332 |         "      <th>LA</th>\n",
333 |         "      <th>SF</th>\n",
334 |         "    </tr>\n",
335 |         "    <tr>\n",
336 |         "      <th>letter</th>\n",
337 |         "      <th></th>\n",
338 |         "      <th></th>\n",
339 |         "    </tr>\n",
340 |         "  </thead>\n",
341 |         "  <tbody>\n",
342 |         "    <tr>\n",
343 |         "      <th>A</th>\n",
344 |         "      <td> 0</td>\n",
345 |         "      <td> 4</td>\n",
346 |         "    </tr>\n",
347 |         "    <tr>\n",
348 |         "      <th>B</th>\n",
349 |         "      <td> 1</td>\n",
350 |         "      <td> 5</td>\n",
351 |         "    </tr>\n",
352 |         "    <tr>\n",
353 |         "      <th>C</th>\n",
354 |         "      <td> 2</td>\n",
355 |         "      <td> 6</td>\n",
356 |         "    </tr>\n",
357 |         "    <tr>\n",
358 |         "      <th>D</th>\n",
359 |         "      <td> 3</td>\n",
360 |         "      <td> 7</td>\n",
361 |         "    </tr>\n",
362 |         "  </tbody>\n",
363 |         "</table>\n",
364 |         "</div>"
365 |        ],
366 |        "metadata": {},
367 |        "output_type": "pyout",
368 |        "prompt_number": 13,
369 |        "text": [
370 |         "city    LA  SF\n",
371 |         "letter        \n",
372 |         "A        0   4\n",
373 |         "B        1   5\n",
374 |         "C        2   6\n",
375 |         "D        3   7"
376 |        ]
377 |       }
378 |      ],
379 |      "prompt_number": 13
380 |     },
381 |     {
382 |      "cell_type": "code",
383 |      "collapsed": false,
384 |      "input": [
385 |       "# Let's see how stack and unstack handle NAN\n",
386 |       "\n",
387 |       "#Make two series\n",
388 |       "ser1 = Series([0, 1, 2], index=['Q', 'X', 'Y'])\n",
389 |       "ser2 = Series([4, 5, 6], index=['X', 'Y', 'Z'])\n",
390 |       "\n",
391 |       "#Concat to make a dframe\n",
392 |       "dframe = pd.concat([ser1, ser2], keys=['Alpha', 'Beta'])\n",
393 |       "\n",
394 |       "# Unstack resulting DataFrame\n",
395 |       "dframe.unstack()"
396 |      ],
397 |      "language": "python",
398 |      "metadata": {},
399 |      "outputs": [
400 |       {
401 |        "html": [
402 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
403 |         "<table border=\"1\" class=\"dataframe\">\n",
404 |         "  <thead>\n",
405 |         "    <tr style=\"text-align: right;\">\n",
406 |         "      <th></th>\n",
407 |         "      <th>Q</th>\n",
408 |         "      <th>X</th>\n",
409 |         "      <th>Y</th>\n",
410 |         "      <th>Z</th>\n",
411 |         "    </tr>\n",
412 |         "  </thead>\n",
413 |         "  <tbody>\n",
414 |         "    <tr>\n",
415 |         "      <th>Alpha</th>\n",
416 |         "      <td>  0</td>\n",
417 |         "      <td> 1</td>\n",
418 |         "      <td> 2</td>\n",
419 |         "      <td>NaN</td>\n",
420 |         "    </tr>\n",
421 |         "    <tr>\n",
422 |         "      <th>Beta</th>\n",
423 |         "      <td>NaN</td>\n",
424 |         "      <td> 4</td>\n",
425 |         "      <td> 5</td>\n",
426 |         "      <td>  6</td>\n",
427 |         "    </tr>\n",
428 |         "  </tbody>\n",
429 |         "</table>\n",
430 |         "</div>"
431 |        ],
432 |        "metadata": {},
433 |        "output_type": "pyout",
434 |        "prompt_number": 15,
435 |        "text": [
436 |         "        Q  X  Y   Z\n",
437 |         "Alpha   0  1  2 NaN\n",
438 |         "Beta  NaN  4  5   6"
439 |        ]
440 |       }
441 |      ],
442 |      "prompt_number": 15
443 |     },
444 |     {
445 |      "cell_type": "code",
446 |      "collapsed": false,
447 |      "input": [
448 |       "# Now stack will filter out NAN by default\n",
449 |       "dframe.unstack().stack()"
450 |      ],
451 |      "language": "python",
452 |      "metadata": {},
453 |      "outputs": [
454 |       {
455 |        "metadata": {},
456 |        "output_type": "pyout",
457 |        "prompt_number": 16,
458 |        "text": [
459 |         "Alpha  Q    0\n",
460 |         "       X    1\n",
461 |         "       Y    2\n",
462 |         "Beta   X    4\n",
463 |         "       Y    5\n",
464 |         "       Z    6\n",
465 |         "dtype: float64"
466 |        ]
467 |       }
468 |      ],
469 |      "prompt_number": 16
470 |     },
471 |     {
472 |      "cell_type": "code",
473 |      "collapsed": false,
474 |      "input": [
475 |       "# IF we dont want this we can set it to False\n",
476 |       "dframe.unstack().stack(dropna=False)"
477 |      ],
478 |      "language": "python",
479 |      "metadata": {},
480 |      "outputs": [
481 |       {
482 |        "metadata": {},
483 |        "output_type": "pyout",
484 |        "prompt_number": 17,
485 |        "text": [
486 |         "Alpha  Q     0\n",
487 |         "       X     1\n",
488 |         "       Y     2\n",
489 |         "       Z   NaN\n",
490 |         "Beta   Q   NaN\n",
491 |         "       X     4\n",
492 |         "       Y     5\n",
493 |         "       Z     6\n",
494 |         "dtype: float64"
495 |        ]
496 |       }
497 |      ],
498 |      "prompt_number": 17
499 |     },
500 |     {
501 |      "cell_type": "code",
502 |      "collapsed": false,
503 |      "input": [
504 |       "# Next we'll learn more abot Pivoting DataFrames!"
505 |      ],
506 |      "language": "python",
507 |      "metadata": {},
508 |      "outputs": []
509 |     }
510 |    ],
511 |    "metadata": {}
512 |   }
513 |  ]
514 | }


--------------------------------------------------------------------------------
/Lec 34 - Pivoting.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "name": "",
  4 |   "signature": "sha256:1048b6a43fd8bac8d5856ed4783dc109901e124b69a3134cb301899bee2daf23"
  5 |  },
  6 |  "nbformat": 3,
  7 |  "nbformat_minor": 0,
  8 |  "worksheets": [
  9 |   {
 10 |    "cells": [
 11 |     {
 12 |      "cell_type": "code",
 13 |      "collapsed": false,
 14 |      "input": [
 15 |       "import numpy as np\n",
 16 |       "import pandas as pd\n",
 17 |       "from pandas import Series, DataFrame"
 18 |      ],
 19 |      "language": "python",
 20 |      "metadata": {},
 21 |      "outputs": [],
 22 |      "prompt_number": 1
 23 |     },
 24 |     {
 25 |      "cell_type": "code",
 26 |      "collapsed": false,
 27 |      "input": [
 28 |       "# Lets create some data to play with:\n",
 29 |       "\n",
 30 |       "# Note: It is not necessary to understand how this dataset was made to understand this Lecture.\n",
 31 |       "\n",
 32 |       "#import pandas testing utility\n",
 33 |       "import pandas.util.testing as tm; tm.N = 3\n",
 34 |       "\n",
 35 |       "#Create a unpivoted function\n",
 36 |       "def unpivot(frame):\n",
 37 |       "    N, K = frame.shape\n",
 38 |       "    \n",
 39 |       "    data = {'value' : frame.values.ravel('F'),\n",
 40 |       "            'variable' : np.asarray(frame.columns).repeat(N),\n",
 41 |       "            'date' : np.tile(np.asarray(frame.index), K)}\n",
 42 |       "    \n",
 43 |       "    # Return the DataFrame\n",
 44 |       "    return DataFrame(data, columns=['date', 'variable', 'value'])\n",
 45 |       "\n",
 46 |       "#Set the DataFrame we'll be using\n",
 47 |       "dframe = unpivot(tm.makeTimeDataFrame())"
 48 |      ],
 49 |      "language": "python",
 50 |      "metadata": {},
 51 |      "outputs": [],
 52 |      "prompt_number": 62
 53 |     },
 54 |     {
 55 |      "cell_type": "code",
 56 |      "collapsed": false,
 57 |      "input": [
 58 |       "#Show the \"stacked\" data, note how there are multiple variables and values for the dates\n",
 59 |       "dframe"
 60 |      ],
 61 |      "language": "python",
 62 |      "metadata": {},
 63 |      "outputs": [
 64 |       {
 65 |        "html": [
 66 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
 67 |         "<table border=\"1\" class=\"dataframe\">\n",
 68 |         "  <thead>\n",
 69 |         "    <tr style=\"text-align: right;\">\n",
 70 |         "      <th></th>\n",
 71 |         "      <th>date</th>\n",
 72 |         "      <th>variable</th>\n",
 73 |         "      <th>value</th>\n",
 74 |         "    </tr>\n",
 75 |         "  </thead>\n",
 76 |         "  <tbody>\n",
 77 |         "    <tr>\n",
 78 |         "      <th>0 </th>\n",
 79 |         "      <td>2000-01-03</td>\n",
 80 |         "      <td> A</td>\n",
 81 |         "      <td>-0.157500</td>\n",
 82 |         "    </tr>\n",
 83 |         "    <tr>\n",
 84 |         "      <th>1 </th>\n",
 85 |         "      <td>2000-01-04</td>\n",
 86 |         "      <td> A</td>\n",
 87 |         "      <td>-0.200030</td>\n",
 88 |         "    </tr>\n",
 89 |         "    <tr>\n",
 90 |         "      <th>2 </th>\n",
 91 |         "      <td>2000-01-05</td>\n",
 92 |         "      <td> A</td>\n",
 93 |         "      <td> 1.395275</td>\n",
 94 |         "    </tr>\n",
 95 |         "    <tr>\n",
 96 |         "      <th>3 </th>\n",
 97 |         "      <td>2000-01-03</td>\n",
 98 |         "      <td> B</td>\n",
 99 |         "      <td> 0.553046</td>\n",
100 |         "    </tr>\n",
101 |         "    <tr>\n",
102 |         "      <th>4 </th>\n",
103 |         "      <td>2000-01-04</td>\n",
104 |         "      <td> B</td>\n",
105 |         "      <td> 0.393459</td>\n",
106 |         "    </tr>\n",
107 |         "    <tr>\n",
108 |         "      <th>5 </th>\n",
109 |         "      <td>2000-01-05</td>\n",
110 |         "      <td> B</td>\n",
111 |         "      <td> 0.176259</td>\n",
112 |         "    </tr>\n",
113 |         "    <tr>\n",
114 |         "      <th>6 </th>\n",
115 |         "      <td>2000-01-03</td>\n",
116 |         "      <td> C</td>\n",
117 |         "      <td> 1.652481</td>\n",
118 |         "    </tr>\n",
119 |         "    <tr>\n",
120 |         "      <th>7 </th>\n",
121 |         "      <td>2000-01-04</td>\n",
122 |         "      <td> C</td>\n",
123 |         "      <td> 1.645395</td>\n",
124 |         "    </tr>\n",
125 |         "    <tr>\n",
126 |         "      <th>8 </th>\n",
127 |         "      <td>2000-01-05</td>\n",
128 |         "      <td> C</td>\n",
129 |         "      <td> 0.311638</td>\n",
130 |         "    </tr>\n",
131 |         "    <tr>\n",
132 |         "      <th>9 </th>\n",
133 |         "      <td>2000-01-03</td>\n",
134 |         "      <td> D</td>\n",
135 |         "      <td>-1.394883</td>\n",
136 |         "    </tr>\n",
137 |         "    <tr>\n",
138 |         "      <th>10</th>\n",
139 |         "      <td>2000-01-04</td>\n",
140 |         "      <td> D</td>\n",
141 |         "      <td> 0.067484</td>\n",
142 |         "    </tr>\n",
143 |         "    <tr>\n",
144 |         "      <th>11</th>\n",
145 |         "      <td>2000-01-05</td>\n",
146 |         "      <td> D</td>\n",
147 |         "      <td>-0.819208</td>\n",
148 |         "    </tr>\n",
149 |         "  </tbody>\n",
150 |         "</table>\n",
151 |         "</div>"
152 |        ],
153 |        "metadata": {},
154 |        "output_type": "pyout",
155 |        "prompt_number": 63,
156 |        "text": [
157 |         "         date variable     value\n",
158 |         "0  2000-01-03        A -0.157500\n",
159 |         "1  2000-01-04        A -0.200030\n",
160 |         "2  2000-01-05        A  1.395275\n",
161 |         "3  2000-01-03        B  0.553046\n",
162 |         "4  2000-01-04        B  0.393459\n",
163 |         "5  2000-01-05        B  0.176259\n",
164 |         "6  2000-01-03        C  1.652481\n",
165 |         "7  2000-01-04        C  1.645395\n",
166 |         "8  2000-01-05        C  0.311638\n",
167 |         "9  2000-01-03        D -1.394883\n",
168 |         "10 2000-01-04        D  0.067484\n",
169 |         "11 2000-01-05        D -0.819208"
170 |        ]
171 |       }
172 |      ],
173 |      "prompt_number": 63
174 |     },
175 |     {
176 |      "cell_type": "code",
177 |      "collapsed": false,
178 |      "input": [
179 |       "# Now let's pivot the data\n",
180 |       "\n",
181 |       "# First two value spassed are teh row and column indexes, then finally an optional fill value\n",
182 |       "dframe_piv = dframe.pivot('date','variable','value')\n",
183 |       "\n",
184 |       "#Show\n",
185 |       "dframe_piv"
186 |      ],
187 |      "language": "python",
188 |      "metadata": {},
189 |      "outputs": [
190 |       {
191 |        "html": [
192 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
193 |         "<table border=\"1\" class=\"dataframe\">\n",
194 |         "  <thead>\n",
195 |         "    <tr style=\"text-align: right;\">\n",
196 |         "      <th>variable</th>\n",
197 |         "      <th>A</th>\n",
198 |         "      <th>B</th>\n",
199 |         "      <th>C</th>\n",
200 |         "      <th>D</th>\n",
201 |         "    </tr>\n",
202 |         "    <tr>\n",
203 |         "      <th>date</th>\n",
204 |         "      <th></th>\n",
205 |         "      <th></th>\n",
206 |         "      <th></th>\n",
207 |         "      <th></th>\n",
208 |         "    </tr>\n",
209 |         "  </thead>\n",
210 |         "  <tbody>\n",
211 |         "    <tr>\n",
212 |         "      <th>2000-01-03</th>\n",
213 |         "      <td>-0.157500</td>\n",
214 |         "      <td> 0.553046</td>\n",
215 |         "      <td> 1.652481</td>\n",
216 |         "      <td>-1.394883</td>\n",
217 |         "    </tr>\n",
218 |         "    <tr>\n",
219 |         "      <th>2000-01-04</th>\n",
220 |         "      <td>-0.200030</td>\n",
221 |         "      <td> 0.393459</td>\n",
222 |         "      <td> 1.645395</td>\n",
223 |         "      <td> 0.067484</td>\n",
224 |         "    </tr>\n",
225 |         "    <tr>\n",
226 |         "      <th>2000-01-05</th>\n",
227 |         "      <td> 1.395275</td>\n",
228 |         "      <td> 0.176259</td>\n",
229 |         "      <td> 0.311638</td>\n",
230 |         "      <td>-0.819208</td>\n",
231 |         "    </tr>\n",
232 |         "  </tbody>\n",
233 |         "</table>\n",
234 |         "</div>"
235 |        ],
236 |        "metadata": {},
237 |        "output_type": "pyout",
238 |        "prompt_number": 68,
239 |        "text": [
240 |         "variable           A         B         C         D\n",
241 |         "date                                              \n",
242 |         "2000-01-03 -0.157500  0.553046  1.652481 -1.394883\n",
243 |         "2000-01-04 -0.200030  0.393459  1.645395  0.067484\n",
244 |         "2000-01-05  1.395275  0.176259  0.311638 -0.819208"
245 |        ]
246 |       }
247 |      ],
248 |      "prompt_number": 68
249 |     },
250 |     {
251 |      "cell_type": "code",
252 |      "collapsed": false,
253 |      "input": [
254 |       "#Next we'll learn about duplicates in DataFrames!"
255 |      ],
256 |      "language": "python",
257 |      "metadata": {},
258 |      "outputs": [],
259 |      "prompt_number": 70
260 |     },
261 |     {
262 |      "cell_type": "code",
263 |      "collapsed": false,
264 |      "input": [],
265 |      "language": "python",
266 |      "metadata": {},
267 |      "outputs": []
268 |     }
269 |    ],
270 |    "metadata": {}
271 |   }
272 |  ]
273 | }


--------------------------------------------------------------------------------
/Lec 35 - Duplicates in DataFrames.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "name": "",
  4 |   "signature": "sha256:c63e4f15bdac12da4d100d59d003459f6c68048899095d45c3532a652e30fa7f"
  5 |  },
  6 |  "nbformat": 3,
  7 |  "nbformat_minor": 0,
  8 |  "worksheets": [
  9 |   {
 10 |    "cells": [
 11 |     {
 12 |      "cell_type": "code",
 13 |      "collapsed": false,
 14 |      "input": [
 15 |       "import numpy as np\n",
 16 |       "import pandas as pd\n",
 17 |       "from pandas import Series,DataFrame"
 18 |      ],
 19 |      "language": "python",
 20 |      "metadata": {},
 21 |      "outputs": [],
 22 |      "prompt_number": 1
 23 |     },
 24 |     {
 25 |      "cell_type": "code",
 26 |      "collapsed": false,
 27 |      "input": [
 28 |       "#Lets get a dataframe with duplicates\n",
 29 |       "\n",
 30 |       "dframe = DataFrame({'key1': ['A'] * 2 + ['B'] * 3,\n",
 31 |       "                  'key2': [2, 2, 2, 3, 3]})\n",
 32 |       "\n",
 33 |       "#Show\n",
 34 |       "dframe"
 35 |      ],
 36 |      "language": "python",
 37 |      "metadata": {},
 38 |      "outputs": [
 39 |       {
 40 |        "html": [
 41 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
 42 |         "<table border=\"1\" class=\"dataframe\">\n",
 43 |         "  <thead>\n",
 44 |         "    <tr style=\"text-align: right;\">\n",
 45 |         "      <th></th>\n",
 46 |         "      <th>key1</th>\n",
 47 |         "      <th>key2</th>\n",
 48 |         "    </tr>\n",
 49 |         "  </thead>\n",
 50 |         "  <tbody>\n",
 51 |         "    <tr>\n",
 52 |         "      <th>0</th>\n",
 53 |         "      <td> A</td>\n",
 54 |         "      <td> 2</td>\n",
 55 |         "    </tr>\n",
 56 |         "    <tr>\n",
 57 |         "      <th>1</th>\n",
 58 |         "      <td> A</td>\n",
 59 |         "      <td> 2</td>\n",
 60 |         "    </tr>\n",
 61 |         "    <tr>\n",
 62 |         "      <th>2</th>\n",
 63 |         "      <td> B</td>\n",
 64 |         "      <td> 2</td>\n",
 65 |         "    </tr>\n",
 66 |         "    <tr>\n",
 67 |         "      <th>3</th>\n",
 68 |         "      <td> B</td>\n",
 69 |         "      <td> 3</td>\n",
 70 |         "    </tr>\n",
 71 |         "    <tr>\n",
 72 |         "      <th>4</th>\n",
 73 |         "      <td> B</td>\n",
 74 |         "      <td> 3</td>\n",
 75 |         "    </tr>\n",
 76 |         "  </tbody>\n",
 77 |         "</table>\n",
 78 |         "</div>"
 79 |        ],
 80 |        "metadata": {},
 81 |        "output_type": "pyout",
 82 |        "prompt_number": 7,
 83 |        "text": [
 84 |         "  key1  key2\n",
 85 |         "0    A     2\n",
 86 |         "1    A     2\n",
 87 |         "2    B     2\n",
 88 |         "3    B     3\n",
 89 |         "4    B     3"
 90 |        ]
 91 |       }
 92 |      ],
 93 |      "prompt_number": 7
 94 |     },
 95 |     {
 96 |      "cell_type": "code",
 97 |      "collapsed": false,
 98 |      "input": [
 99 |       "#We can use duplicated to find duplicates\n",
100 |       "dframe.duplicated()"
101 |      ],
102 |      "language": "python",
103 |      "metadata": {},
104 |      "outputs": [
105 |       {
106 |        "metadata": {},
107 |        "output_type": "pyout",
108 |        "prompt_number": 8,
109 |        "text": [
110 |         "0    False\n",
111 |         "1     True\n",
112 |         "2    False\n",
113 |         "3    False\n",
114 |         "4     True\n",
115 |         "dtype: bool"
116 |        ]
117 |       }
118 |      ],
119 |      "prompt_number": 8
120 |     },
121 |     {
122 |      "cell_type": "code",
123 |      "collapsed": false,
124 |      "input": [
125 |       "# We can also drop duplicates like this:\n",
126 |       "dframe.drop_duplicates()"
127 |      ],
128 |      "language": "python",
129 |      "metadata": {},
130 |      "outputs": [
131 |       {
132 |        "html": [
133 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
134 |         "<table border=\"1\" class=\"dataframe\">\n",
135 |         "  <thead>\n",
136 |         "    <tr style=\"text-align: right;\">\n",
137 |         "      <th></th>\n",
138 |         "      <th>key1</th>\n",
139 |         "      <th>key2</th>\n",
140 |         "    </tr>\n",
141 |         "  </thead>\n",
142 |         "  <tbody>\n",
143 |         "    <tr>\n",
144 |         "      <th>0</th>\n",
145 |         "      <td> A</td>\n",
146 |         "      <td> 2</td>\n",
147 |         "    </tr>\n",
148 |         "    <tr>\n",
149 |         "      <th>2</th>\n",
150 |         "      <td> B</td>\n",
151 |         "      <td> 2</td>\n",
152 |         "    </tr>\n",
153 |         "    <tr>\n",
154 |         "      <th>3</th>\n",
155 |         "      <td> B</td>\n",
156 |         "      <td> 3</td>\n",
157 |         "    </tr>\n",
158 |         "  </tbody>\n",
159 |         "</table>\n",
160 |         "</div>"
161 |        ],
162 |        "metadata": {},
163 |        "output_type": "pyout",
164 |        "prompt_number": 9,
165 |        "text": [
166 |         "  key1  key2\n",
167 |         "0    A     2\n",
168 |         "2    B     2\n",
169 |         "3    B     3"
170 |        ]
171 |       }
172 |      ],
173 |      "prompt_number": 9
174 |     },
175 |     {
176 |      "cell_type": "code",
177 |      "collapsed": false,
178 |      "input": [
179 |       "#You can filter which duplicates to drop by a single column\n",
180 |       "dframe.drop_duplicates(['key1'])"
181 |      ],
182 |      "language": "python",
183 |      "metadata": {},
184 |      "outputs": [
185 |       {
186 |        "html": [
187 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
188 |         "<table border=\"1\" class=\"dataframe\">\n",
189 |         "  <thead>\n",
190 |         "    <tr style=\"text-align: right;\">\n",
191 |         "      <th></th>\n",
192 |         "      <th>key1</th>\n",
193 |         "      <th>key2</th>\n",
194 |         "    </tr>\n",
195 |         "  </thead>\n",
196 |         "  <tbody>\n",
197 |         "    <tr>\n",
198 |         "      <th>0</th>\n",
199 |         "      <td> A</td>\n",
200 |         "      <td> 2</td>\n",
201 |         "    </tr>\n",
202 |         "    <tr>\n",
203 |         "      <th>2</th>\n",
204 |         "      <td> B</td>\n",
205 |         "      <td> 2</td>\n",
206 |         "    </tr>\n",
207 |         "  </tbody>\n",
208 |         "</table>\n",
209 |         "</div>"
210 |        ],
211 |        "metadata": {},
212 |        "output_type": "pyout",
213 |        "prompt_number": 10,
214 |        "text": [
215 |         "  key1  key2\n",
216 |         "0    A     2\n",
217 |         "2    B     2"
218 |        ]
219 |       }
220 |      ],
221 |      "prompt_number": 10
222 |     },
223 |     {
224 |      "cell_type": "code",
225 |      "collapsed": false,
226 |      "input": [
227 |       "#Show original\n",
228 |       "dframe"
229 |      ],
230 |      "language": "python",
231 |      "metadata": {},
232 |      "outputs": [
233 |       {
234 |        "html": [
235 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
236 |         "<table border=\"1\" class=\"dataframe\">\n",
237 |         "  <thead>\n",
238 |         "    <tr style=\"text-align: right;\">\n",
239 |         "      <th></th>\n",
240 |         "      <th>key1</th>\n",
241 |         "      <th>key2</th>\n",
242 |         "    </tr>\n",
243 |         "  </thead>\n",
244 |         "  <tbody>\n",
245 |         "    <tr>\n",
246 |         "      <th>0</th>\n",
247 |         "      <td> A</td>\n",
248 |         "      <td> 2</td>\n",
249 |         "    </tr>\n",
250 |         "    <tr>\n",
251 |         "      <th>1</th>\n",
252 |         "      <td> A</td>\n",
253 |         "      <td> 2</td>\n",
254 |         "    </tr>\n",
255 |         "    <tr>\n",
256 |         "      <th>2</th>\n",
257 |         "      <td> B</td>\n",
258 |         "      <td> 2</td>\n",
259 |         "    </tr>\n",
260 |         "    <tr>\n",
261 |         "      <th>3</th>\n",
262 |         "      <td> B</td>\n",
263 |         "      <td> 3</td>\n",
264 |         "    </tr>\n",
265 |         "    <tr>\n",
266 |         "      <th>4</th>\n",
267 |         "      <td> B</td>\n",
268 |         "      <td> 3</td>\n",
269 |         "    </tr>\n",
270 |         "  </tbody>\n",
271 |         "</table>\n",
272 |         "</div>"
273 |        ],
274 |        "metadata": {},
275 |        "output_type": "pyout",
276 |        "prompt_number": 11,
277 |        "text": [
278 |         "  key1  key2\n",
279 |         "0    A     2\n",
280 |         "1    A     2\n",
281 |         "2    B     2\n",
282 |         "3    B     3\n",
283 |         "4    B     3"
284 |        ]
285 |       }
286 |      ],
287 |      "prompt_number": 11
288 |     },
289 |     {
290 |      "cell_type": "code",
291 |      "collapsed": false,
292 |      "input": [
293 |       "#By default the first value was taken for the duplicates, we can also take the last value instead\n",
294 |       "dframe.drop_duplicates(['key1'],take_last=True)"
295 |      ],
296 |      "language": "python",
297 |      "metadata": {},
298 |      "outputs": [
299 |       {
300 |        "html": [
301 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
302 |         "<table border=\"1\" class=\"dataframe\">\n",
303 |         "  <thead>\n",
304 |         "    <tr style=\"text-align: right;\">\n",
305 |         "      <th></th>\n",
306 |         "      <th>key1</th>\n",
307 |         "      <th>key2</th>\n",
308 |         "    </tr>\n",
309 |         "  </thead>\n",
310 |         "  <tbody>\n",
311 |         "    <tr>\n",
312 |         "      <th>1</th>\n",
313 |         "      <td> A</td>\n",
314 |         "      <td> 2</td>\n",
315 |         "    </tr>\n",
316 |         "    <tr>\n",
317 |         "      <th>4</th>\n",
318 |         "      <td> B</td>\n",
319 |         "      <td> 3</td>\n",
320 |         "    </tr>\n",
321 |         "  </tbody>\n",
322 |         "</table>\n",
323 |         "</div>"
324 |        ],
325 |        "metadata": {},
326 |        "output_type": "pyout",
327 |        "prompt_number": 14,
328 |        "text": [
329 |         "  key1  key2\n",
330 |         "1    A     2\n",
331 |         "4    B     3"
332 |        ]
333 |       }
334 |      ],
335 |      "prompt_number": 14
336 |     },
337 |     {
338 |      "cell_type": "code",
339 |      "collapsed": false,
340 |      "input": [
341 |       "# That's about it for finding duplicates, pretty easy huh? \n",
342 |       "\n",
343 |       "#Up next: Mapping"
344 |      ],
345 |      "language": "python",
346 |      "metadata": {},
347 |      "outputs": []
348 |     }
349 |    ],
350 |    "metadata": {}
351 |   }
352 |  ]
353 | }


--------------------------------------------------------------------------------
/Lec 36 - Mapping.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "name": "",
  4 |   "signature": "sha256:1cbff1691ba3e11486592151ba0bcff77747755c22f017caee0d3c9d7759b84b"
  5 |  },
  6 |  "nbformat": 3,
  7 |  "nbformat_minor": 0,
  8 |  "worksheets": [
  9 |   {
 10 |    "cells": [
 11 |     {
 12 |      "cell_type": "code",
 13 |      "collapsed": false,
 14 |      "input": [
 15 |       "import numpy as np\n",
 16 |       "import pandas as pd\n",
 17 |       "from pandas import Series, DataFrame"
 18 |      ],
 19 |      "language": "python",
 20 |      "metadata": {},
 21 |      "outputs": [],
 22 |      "prompt_number": 1
 23 |     },
 24 |     {
 25 |      "cell_type": "code",
 26 |      "collapsed": false,
 27 |      "input": [
 28 |       "# Let's create a dframe to work with (Highest elevation cities in USA)\n",
 29 |       "dframe = DataFrame({'city':['Alma','Brian Head','Fox Park'],\n",
 30 |       "                    'altitude':[3158,3000,2762]})\n",
 31 |       "\n",
 32 |       "#Show\n",
 33 |       "dframe"
 34 |      ],
 35 |      "language": "python",
 36 |      "metadata": {},
 37 |      "outputs": [
 38 |       {
 39 |        "html": [
 40 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
 41 |         "<table border=\"1\" class=\"dataframe\">\n",
 42 |         "  <thead>\n",
 43 |         "    <tr style=\"text-align: right;\">\n",
 44 |         "      <th></th>\n",
 45 |         "      <th>altitude</th>\n",
 46 |         "      <th>city</th>\n",
 47 |         "    </tr>\n",
 48 |         "  </thead>\n",
 49 |         "  <tbody>\n",
 50 |         "    <tr>\n",
 51 |         "      <th>0</th>\n",
 52 |         "      <td> 3158</td>\n",
 53 |         "      <td>       Alma</td>\n",
 54 |         "    </tr>\n",
 55 |         "    <tr>\n",
 56 |         "      <th>1</th>\n",
 57 |         "      <td> 3000</td>\n",
 58 |         "      <td> Brian Head</td>\n",
 59 |         "    </tr>\n",
 60 |         "    <tr>\n",
 61 |         "      <th>2</th>\n",
 62 |         "      <td> 2762</td>\n",
 63 |         "      <td>   Fox Park</td>\n",
 64 |         "    </tr>\n",
 65 |         "  </tbody>\n",
 66 |         "</table>\n",
 67 |         "</div>"
 68 |        ],
 69 |        "metadata": {},
 70 |        "output_type": "pyout",
 71 |        "prompt_number": 2,
 72 |        "text": [
 73 |         "   altitude        city\n",
 74 |         "0      3158        Alma\n",
 75 |         "1      3000  Brian Head\n",
 76 |         "2      2762    Fox Park"
 77 |        ]
 78 |       }
 79 |      ],
 80 |      "prompt_number": 2
 81 |     },
 82 |     {
 83 |      "cell_type": "code",
 84 |      "collapsed": false,
 85 |      "input": [
 86 |       "#Now let's say we wanted to add a column for the States, we can do that with a mapping.\n",
 87 |       "state_map={'Alma':'Colorado','Brian Head':'Utah','Fox Park':'Wyoming'}\n"
 88 |      ],
 89 |      "language": "python",
 90 |      "metadata": {},
 91 |      "outputs": [],
 92 |      "prompt_number": 6
 93 |     },
 94 |     {
 95 |      "cell_type": "code",
 96 |      "collapsed": false,
 97 |      "input": [
 98 |       "# Now we can map that data to our current dframe\n",
 99 |       "dframe['state'] = dframe['city'].map(state_map)"
100 |      ],
101 |      "language": "python",
102 |      "metadata": {},
103 |      "outputs": [],
104 |      "prompt_number": 7
105 |     },
106 |     {
107 |      "cell_type": "code",
108 |      "collapsed": false,
109 |      "input": [
110 |       "#Show result\n",
111 |       "dframe"
112 |      ],
113 |      "language": "python",
114 |      "metadata": {},
115 |      "outputs": [
116 |       {
117 |        "html": [
118 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
119 |         "<table border=\"1\" class=\"dataframe\">\n",
120 |         "  <thead>\n",
121 |         "    <tr style=\"text-align: right;\">\n",
122 |         "      <th></th>\n",
123 |         "      <th>altitude</th>\n",
124 |         "      <th>city</th>\n",
125 |         "      <th>state</th>\n",
126 |         "    </tr>\n",
127 |         "  </thead>\n",
128 |         "  <tbody>\n",
129 |         "    <tr>\n",
130 |         "      <th>0</th>\n",
131 |         "      <td> 3158</td>\n",
132 |         "      <td>       Alma</td>\n",
133 |         "      <td> Colorado</td>\n",
134 |         "    </tr>\n",
135 |         "    <tr>\n",
136 |         "      <th>1</th>\n",
137 |         "      <td> 3000</td>\n",
138 |         "      <td> Brian Head</td>\n",
139 |         "      <td>     Utah</td>\n",
140 |         "    </tr>\n",
141 |         "    <tr>\n",
142 |         "      <th>2</th>\n",
143 |         "      <td> 2762</td>\n",
144 |         "      <td>   Fox Park</td>\n",
145 |         "      <td>  Wyoming</td>\n",
146 |         "    </tr>\n",
147 |         "  </tbody>\n",
148 |         "</table>\n",
149 |         "</div>"
150 |        ],
151 |        "metadata": {},
152 |        "output_type": "pyout",
153 |        "prompt_number": 8,
154 |        "text": [
155 |         "   altitude        city     state\n",
156 |         "0      3158        Alma  Colorado\n",
157 |         "1      3000  Brian Head      Utah\n",
158 |         "2      2762    Fox Park   Wyoming"
159 |        ]
160 |       }
161 |      ],
162 |      "prompt_number": 8
163 |     },
164 |     {
165 |      "cell_type": "code",
166 |      "collapsed": false,
167 |      "input": [
168 |       "# Mapping is a great way to do element-wise transfomations and other data cleaning operations!"
169 |      ],
170 |      "language": "python",
171 |      "metadata": {},
172 |      "outputs": [],
173 |      "prompt_number": 9
174 |     },
175 |     {
176 |      "cell_type": "code",
177 |      "collapsed": false,
178 |      "input": [
179 |       "# Next up : Replacing Values!"
180 |      ],
181 |      "language": "python",
182 |      "metadata": {},
183 |      "outputs": []
184 |     }
185 |    ],
186 |    "metadata": {}
187 |   }
188 |  ]
189 | }


--------------------------------------------------------------------------------
/Lec 37 - Replace.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "name": "",
  4 |   "signature": "sha256:55609d0e101015296bd00c16afe3c0e3be54e593a7088092d40a32f3e006d1ac"
  5 |  },
  6 |  "nbformat": 3,
  7 |  "nbformat_minor": 0,
  8 |  "worksheets": [
  9 |   {
 10 |    "cells": [
 11 |     {
 12 |      "cell_type": "code",
 13 |      "collapsed": false,
 14 |      "input": [
 15 |       "import numpy as np\n",
 16 |       "import pandas as pd \n",
 17 |       "from pandas import Series, DataFrame"
 18 |      ],
 19 |      "language": "python",
 20 |      "metadata": {},
 21 |      "outputs": [],
 22 |      "prompt_number": 2
 23 |     },
 24 |     {
 25 |      "cell_type": "code",
 26 |      "collapsed": false,
 27 |      "input": [
 28 |       "# Lets make  Series\n",
 29 |       "ser1 = Series([1,2,3,4,1,2,3,4])\n",
 30 |       "#Show\n",
 31 |       "ser1"
 32 |      ],
 33 |      "language": "python",
 34 |      "metadata": {},
 35 |      "outputs": [
 36 |       {
 37 |        "metadata": {},
 38 |        "output_type": "pyout",
 39 |        "prompt_number": 3,
 40 |        "text": [
 41 |         "0    1\n",
 42 |         "1    2\n",
 43 |         "2    3\n",
 44 |         "3    4\n",
 45 |         "4    1\n",
 46 |         "5    2\n",
 47 |         "6    3\n",
 48 |         "7    4\n",
 49 |         "dtype: int64"
 50 |        ]
 51 |       }
 52 |      ],
 53 |      "prompt_number": 3
 54 |     },
 55 |     {
 56 |      "cell_type": "code",
 57 |      "collapsed": false,
 58 |      "input": [
 59 |       "# Using replace we can select --> .replace(value to be replaced, new_value)\n",
 60 |       "ser1.replace(1,np.nan)"
 61 |      ],
 62 |      "language": "python",
 63 |      "metadata": {},
 64 |      "outputs": [
 65 |       {
 66 |        "metadata": {},
 67 |        "output_type": "pyout",
 68 |        "prompt_number": 4,
 69 |        "text": [
 70 |         "0   NaN\n",
 71 |         "1     2\n",
 72 |         "2     3\n",
 73 |         "3     4\n",
 74 |         "4   NaN\n",
 75 |         "5     2\n",
 76 |         "6     3\n",
 77 |         "7     4\n",
 78 |         "dtype: float64"
 79 |        ]
 80 |       }
 81 |      ],
 82 |      "prompt_number": 4
 83 |     },
 84 |     {
 85 |      "cell_type": "code",
 86 |      "collapsed": false,
 87 |      "input": [
 88 |       "#Can also input lists\n",
 89 |       "ser1.replace([1,4],[100,400])"
 90 |      ],
 91 |      "language": "python",
 92 |      "metadata": {},
 93 |      "outputs": [
 94 |       {
 95 |        "metadata": {},
 96 |        "output_type": "pyout",
 97 |        "prompt_number": 5,
 98 |        "text": [
 99 |         "0    100\n",
100 |         "1      2\n",
101 |         "2      3\n",
102 |         "3    400\n",
103 |         "4    100\n",
104 |         "5      2\n",
105 |         "6      3\n",
106 |         "7    400\n",
107 |         "dtype: int64"
108 |        ]
109 |       }
110 |      ],
111 |      "prompt_number": 5
112 |     },
113 |     {
114 |      "cell_type": "code",
115 |      "collapsed": false,
116 |      "input": [
117 |       "#Can also input dictionary\n",
118 |       "ser1.replace({4:np.nan})"
119 |      ],
120 |      "language": "python",
121 |      "metadata": {},
122 |      "outputs": [
123 |       {
124 |        "metadata": {},
125 |        "output_type": "pyout",
126 |        "prompt_number": 15,
127 |        "text": [
128 |         "0     1\n",
129 |         "1     2\n",
130 |         "2     3\n",
131 |         "3   NaN\n",
132 |         "4     1\n",
133 |         "5     2\n",
134 |         "6     3\n",
135 |         "7   NaN\n",
136 |         "dtype: float64"
137 |        ]
138 |       }
139 |      ],
140 |      "prompt_number": 15
141 |     },
142 |     {
143 |      "cell_type": "code",
144 |      "collapsed": false,
145 |      "input": [
146 |       "#That's it for replace, next up Renaming an axis index"
147 |      ],
148 |      "language": "python",
149 |      "metadata": {},
150 |      "outputs": []
151 |     }
152 |    ],
153 |    "metadata": {}
154 |   }
155 |  ]
156 | }


--------------------------------------------------------------------------------
/Lec 38 - Rename Index.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "name": "",
  4 |   "signature": "sha256:1fad817eb6e1e60d9ebdd5d360b147c604b62efcc261e6afebcbb1034e2a59ee"
  5 |  },
  6 |  "nbformat": 3,
  7 |  "nbformat_minor": 0,
  8 |  "worksheets": [
  9 |   {
 10 |    "cells": [
 11 |     {
 12 |      "cell_type": "code",
 13 |      "collapsed": false,
 14 |      "input": [
 15 |       "import numpy as np\n",
 16 |       "import pandas as pd\n",
 17 |       "from pandas import Series, DataFrame"
 18 |      ],
 19 |      "language": "python",
 20 |      "metadata": {},
 21 |      "outputs": [],
 22 |      "prompt_number": 1
 23 |     },
 24 |     {
 25 |      "cell_type": "code",
 26 |      "collapsed": false,
 27 |      "input": [
 28 |       "# Making a DataFrame\n",
 29 |       "dframe= DataFrame(np.arange(12).reshape((3, 4)),\n",
 30 |       "                 index=['NY', 'LA', 'SF'],\n",
 31 |       "                 columns=['A', 'B', 'C', 'D'])\n",
 32 |       "\n",
 33 |       "#Show\n",
 34 |       "dframe"
 35 |      ],
 36 |      "language": "python",
 37 |      "metadata": {},
 38 |      "outputs": [
 39 |       {
 40 |        "html": [
 41 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
 42 |         "<table border=\"1\" class=\"dataframe\">\n",
 43 |         "  <thead>\n",
 44 |         "    <tr style=\"text-align: right;\">\n",
 45 |         "      <th></th>\n",
 46 |         "      <th>A</th>\n",
 47 |         "      <th>B</th>\n",
 48 |         "      <th>C</th>\n",
 49 |         "      <th>D</th>\n",
 50 |         "    </tr>\n",
 51 |         "  </thead>\n",
 52 |         "  <tbody>\n",
 53 |         "    <tr>\n",
 54 |         "      <th>NY</th>\n",
 55 |         "      <td> 0</td>\n",
 56 |         "      <td> 1</td>\n",
 57 |         "      <td>  2</td>\n",
 58 |         "      <td>  3</td>\n",
 59 |         "    </tr>\n",
 60 |         "    <tr>\n",
 61 |         "      <th>LA</th>\n",
 62 |         "      <td> 4</td>\n",
 63 |         "      <td> 5</td>\n",
 64 |         "      <td>  6</td>\n",
 65 |         "      <td>  7</td>\n",
 66 |         "    </tr>\n",
 67 |         "    <tr>\n",
 68 |         "      <th>SF</th>\n",
 69 |         "      <td> 8</td>\n",
 70 |         "      <td> 9</td>\n",
 71 |         "      <td> 10</td>\n",
 72 |         "      <td> 11</td>\n",
 73 |         "    </tr>\n",
 74 |         "  </tbody>\n",
 75 |         "</table>\n",
 76 |         "</div>"
 77 |        ],
 78 |        "metadata": {},
 79 |        "output_type": "pyout",
 80 |        "prompt_number": 13,
 81 |        "text": [
 82 |         "    A  B   C   D\n",
 83 |         "NY  0  1   2   3\n",
 84 |         "LA  4  5   6   7\n",
 85 |         "SF  8  9  10  11"
 86 |        ]
 87 |       }
 88 |      ],
 89 |      "prompt_number": 13
 90 |     },
 91 |     {
 92 |      "cell_type": "code",
 93 |      "collapsed": false,
 94 |      "input": [
 95 |       "# Just like a Series, axis indexes can also use map\n",
 96 |       "\n",
 97 |       "#Let's use map to lowercase the city initials\n",
 98 |       "dframe.index.map(str.lower)"
 99 |      ],
100 |      "language": "python",
101 |      "metadata": {},
102 |      "outputs": [
103 |       {
104 |        "metadata": {},
105 |        "output_type": "pyout",
106 |        "prompt_number": 14,
107 |        "text": [
108 |         "array(['ny', 'la', 'sf'], dtype=object)"
109 |        ]
110 |       }
111 |      ],
112 |      "prompt_number": 14
113 |     },
114 |     {
115 |      "cell_type": "code",
116 |      "collapsed": false,
117 |      "input": [
118 |       "# If you want to assign this to the actual index, you can use index\n",
119 |       "dframe.index = dframe.index.map(str.lower)\n",
120 |       "#Show\n",
121 |       "dframe"
122 |      ],
123 |      "language": "python",
124 |      "metadata": {},
125 |      "outputs": [
126 |       {
127 |        "html": [
128 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
129 |         "<table border=\"1\" class=\"dataframe\">\n",
130 |         "  <thead>\n",
131 |         "    <tr style=\"text-align: right;\">\n",
132 |         "      <th></th>\n",
133 |         "      <th>A</th>\n",
134 |         "      <th>B</th>\n",
135 |         "      <th>C</th>\n",
136 |         "      <th>D</th>\n",
137 |         "    </tr>\n",
138 |         "  </thead>\n",
139 |         "  <tbody>\n",
140 |         "    <tr>\n",
141 |         "      <th>ny</th>\n",
142 |         "      <td> 0</td>\n",
143 |         "      <td> 1</td>\n",
144 |         "      <td>  2</td>\n",
145 |         "      <td>  3</td>\n",
146 |         "    </tr>\n",
147 |         "    <tr>\n",
148 |         "      <th>la</th>\n",
149 |         "      <td> 4</td>\n",
150 |         "      <td> 5</td>\n",
151 |         "      <td>  6</td>\n",
152 |         "      <td>  7</td>\n",
153 |         "    </tr>\n",
154 |         "    <tr>\n",
155 |         "      <th>sf</th>\n",
156 |         "      <td> 8</td>\n",
157 |         "      <td> 9</td>\n",
158 |         "      <td> 10</td>\n",
159 |         "      <td> 11</td>\n",
160 |         "    </tr>\n",
161 |         "  </tbody>\n",
162 |         "</table>\n",
163 |         "</div>"
164 |        ],
165 |        "metadata": {},
166 |        "output_type": "pyout",
167 |        "prompt_number": 25,
168 |        "text": [
169 |         "    A  B   C   D\n",
170 |         "ny  0  1   2   3\n",
171 |         "la  4  5   6   7\n",
172 |         "sf  8  9  10  11"
173 |        ]
174 |       }
175 |      ],
176 |      "prompt_number": 25
177 |     },
178 |     {
179 |      "cell_type": "code",
180 |      "collapsed": false,
181 |      "input": [
182 |       "# Use rename if you want to create a transformed version withour modifying the original!\n",
183 |       "\n",
184 |       "#str.title will capitalize the first letter, lowercasing the columns\n",
185 |       "dframe.rename(index=str.title, columns=str.lower)\n"
186 |      ],
187 |      "language": "python",
188 |      "metadata": {},
189 |      "outputs": [
190 |       {
191 |        "html": [
192 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
193 |         "<table border=\"1\" class=\"dataframe\">\n",
194 |         "  <thead>\n",
195 |         "    <tr style=\"text-align: right;\">\n",
196 |         "      <th></th>\n",
197 |         "      <th>a</th>\n",
198 |         "      <th>b</th>\n",
199 |         "      <th>c</th>\n",
200 |         "      <th>d</th>\n",
201 |         "    </tr>\n",
202 |         "  </thead>\n",
203 |         "  <tbody>\n",
204 |         "    <tr>\n",
205 |         "      <th>Ny</th>\n",
206 |         "      <td> 0</td>\n",
207 |         "      <td> 1</td>\n",
208 |         "      <td>  2</td>\n",
209 |         "      <td>  3</td>\n",
210 |         "    </tr>\n",
211 |         "    <tr>\n",
212 |         "      <th>La</th>\n",
213 |         "      <td> 4</td>\n",
214 |         "      <td> 5</td>\n",
215 |         "      <td>  6</td>\n",
216 |         "      <td>  7</td>\n",
217 |         "    </tr>\n",
218 |         "    <tr>\n",
219 |         "      <th>Sf</th>\n",
220 |         "      <td> 8</td>\n",
221 |         "      <td> 9</td>\n",
222 |         "      <td> 10</td>\n",
223 |         "      <td> 11</td>\n",
224 |         "    </tr>\n",
225 |         "  </tbody>\n",
226 |         "</table>\n",
227 |         "</div>"
228 |        ],
229 |        "metadata": {},
230 |        "output_type": "pyout",
231 |        "prompt_number": 28,
232 |        "text": [
233 |         "    a  b   c   d\n",
234 |         "Ny  0  1   2   3\n",
235 |         "La  4  5   6   7\n",
236 |         "Sf  8  9  10  11"
237 |        ]
238 |       }
239 |      ],
240 |      "prompt_number": 28
241 |     },
242 |     {
243 |      "cell_type": "code",
244 |      "collapsed": false,
245 |      "input": [
246 |       "# We can also use rename to insert dictionaries providing new values for indexes or columns!\n",
247 |       "dframe.rename(index={'ny': 'NEW YORK'},\n",
248 |       "            columns={'A': 'ALPHA'})"
249 |      ],
250 |      "language": "python",
251 |      "metadata": {},
252 |      "outputs": [
253 |       {
254 |        "html": [
255 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
256 |         "<table border=\"1\" class=\"dataframe\">\n",
257 |         "  <thead>\n",
258 |         "    <tr style=\"text-align: right;\">\n",
259 |         "      <th></th>\n",
260 |         "      <th>ALPHA</th>\n",
261 |         "      <th>B</th>\n",
262 |         "      <th>C</th>\n",
263 |         "      <th>D</th>\n",
264 |         "    </tr>\n",
265 |         "  </thead>\n",
266 |         "  <tbody>\n",
267 |         "    <tr>\n",
268 |         "      <th>NEW YORK</th>\n",
269 |         "      <td> 0</td>\n",
270 |         "      <td> 1</td>\n",
271 |         "      <td>  2</td>\n",
272 |         "      <td>  3</td>\n",
273 |         "    </tr>\n",
274 |         "    <tr>\n",
275 |         "      <th>la</th>\n",
276 |         "      <td> 4</td>\n",
277 |         "      <td> 5</td>\n",
278 |         "      <td>  6</td>\n",
279 |         "      <td>  7</td>\n",
280 |         "    </tr>\n",
281 |         "    <tr>\n",
282 |         "      <th>sf</th>\n",
283 |         "      <td> 8</td>\n",
284 |         "      <td> 9</td>\n",
285 |         "      <td> 10</td>\n",
286 |         "      <td> 11</td>\n",
287 |         "    </tr>\n",
288 |         "  </tbody>\n",
289 |         "</table>\n",
290 |         "</div>"
291 |        ],
292 |        "metadata": {},
293 |        "output_type": "pyout",
294 |        "prompt_number": 34,
295 |        "text": [
296 |         "          ALPHA  B   C   D\n",
297 |         "NEW YORK      0  1   2   3\n",
298 |         "la            4  5   6   7\n",
299 |         "sf            8  9  10  11"
300 |        ]
301 |       }
302 |      ],
303 |      "prompt_number": 34
304 |     },
305 |     {
306 |      "cell_type": "code",
307 |      "collapsed": false,
308 |      "input": [
309 |       "# If you would like to actually edit the data set in place, set inplace=True\n",
310 |       "dframe.rename(index={'ny': 'NEW YORK'}, inplace=True)\n",
311 |       "dframe"
312 |      ],
313 |      "language": "python",
314 |      "metadata": {},
315 |      "outputs": [
316 |       {
317 |        "html": [
318 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
319 |         "<table border=\"1\" class=\"dataframe\">\n",
320 |         "  <thead>\n",
321 |         "    <tr style=\"text-align: right;\">\n",
322 |         "      <th></th>\n",
323 |         "      <th>A</th>\n",
324 |         "      <th>B</th>\n",
325 |         "      <th>C</th>\n",
326 |         "      <th>D</th>\n",
327 |         "    </tr>\n",
328 |         "  </thead>\n",
329 |         "  <tbody>\n",
330 |         "    <tr>\n",
331 |         "      <th>NEW YORK</th>\n",
332 |         "      <td> 0</td>\n",
333 |         "      <td> 1</td>\n",
334 |         "      <td>  2</td>\n",
335 |         "      <td>  3</td>\n",
336 |         "    </tr>\n",
337 |         "    <tr>\n",
338 |         "      <th>la</th>\n",
339 |         "      <td> 4</td>\n",
340 |         "      <td> 5</td>\n",
341 |         "      <td>  6</td>\n",
342 |         "      <td>  7</td>\n",
343 |         "    </tr>\n",
344 |         "    <tr>\n",
345 |         "      <th>sf</th>\n",
346 |         "      <td> 8</td>\n",
347 |         "      <td> 9</td>\n",
348 |         "      <td> 10</td>\n",
349 |         "      <td> 11</td>\n",
350 |         "    </tr>\n",
351 |         "  </tbody>\n",
352 |         "</table>\n",
353 |         "</div>"
354 |        ],
355 |        "metadata": {},
356 |        "output_type": "pyout",
357 |        "prompt_number": 38,
358 |        "text": [
359 |         "          A  B   C   D\n",
360 |         "NEW YORK  0  1   2   3\n",
361 |         "la        4  5   6   7\n",
362 |         "sf        8  9  10  11"
363 |        ]
364 |       }
365 |      ],
366 |      "prompt_number": 38
367 |     },
368 |     {
369 |      "cell_type": "code",
370 |      "collapsed": false,
371 |      "input": [
372 |       "#Up next: Binning!"
373 |      ],
374 |      "language": "python",
375 |      "metadata": {},
376 |      "outputs": [],
377 |      "prompt_number": 1
378 |     },
379 |     {
380 |      "cell_type": "code",
381 |      "collapsed": false,
382 |      "input": [],
383 |      "language": "python",
384 |      "metadata": {},
385 |      "outputs": []
386 |     }
387 |    ],
388 |    "metadata": {}
389 |   }
390 |  ]
391 | }


--------------------------------------------------------------------------------
/Lec 39 - Binning.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "name": "",
  4 |   "signature": "sha256:3ca7818dc358f2cddd5e8359b39bb80102b0632ffed56f605191a7ac843317a9"
  5 |  },
  6 |  "nbformat": 3,
  7 |  "nbformat_minor": 0,
  8 |  "worksheets": [
  9 |   {
 10 |    "cells": [
 11 |     {
 12 |      "cell_type": "code",
 13 |      "collapsed": false,
 14 |      "input": [
 15 |       "import numpy as np\n",
 16 |       "import pandas as pd\n",
 17 |       "from pandas import Series, DataFrame"
 18 |      ],
 19 |      "language": "python",
 20 |      "metadata": {},
 21 |      "outputs": [],
 22 |      "prompt_number": 1
 23 |     },
 24 |     {
 25 |      "cell_type": "code",
 26 |      "collapsed": false,
 27 |      "input": [
 28 |       "#Now we'll learn about binning"
 29 |      ],
 30 |      "language": "python",
 31 |      "metadata": {},
 32 |      "outputs": [],
 33 |      "prompt_number": 2
 34 |     },
 35 |     {
 36 |      "cell_type": "code",
 37 |      "collapsed": false,
 38 |      "input": [
 39 |       "years = [1990,1991,1992,2008,2012,2015,1987,1969,2013,2008,1999]"
 40 |      ],
 41 |      "language": "python",
 42 |      "metadata": {},
 43 |      "outputs": [],
 44 |      "prompt_number": 3
 45 |     },
 46 |     {
 47 |      "cell_type": "code",
 48 |      "collapsed": false,
 49 |      "input": [
 50 |       "# We can seperate these years by decade\n",
 51 |       "decade_bins = [1960,1970,1980,1990,2000,2010,2020]"
 52 |      ],
 53 |      "language": "python",
 54 |      "metadata": {},
 55 |      "outputs": [],
 56 |      "prompt_number": 4
 57 |     },
 58 |     {
 59 |      "cell_type": "code",
 60 |      "collapsed": false,
 61 |      "input": [
 62 |       "#Now we'll use cut to get somethign called a Category object\n",
 63 |       "decade_cat = pd.cut(years,decade_bins)"
 64 |      ],
 65 |      "language": "python",
 66 |      "metadata": {},
 67 |      "outputs": [],
 68 |      "prompt_number": 7
 69 |     },
 70 |     {
 71 |      "cell_type": "code",
 72 |      "collapsed": false,
 73 |      "input": [
 74 |       "#Show\n",
 75 |       "decade_cat"
 76 |      ],
 77 |      "language": "python",
 78 |      "metadata": {},
 79 |      "outputs": [
 80 |       {
 81 |        "metadata": {},
 82 |        "output_type": "pyout",
 83 |        "prompt_number": 8,
 84 |        "text": [
 85 |         "[(1980, 1990], (1990, 2000], (1990, 2000], (2000, 2010], (2010, 2020], ..., (1980, 1990], (1960, 1970], (2010, 2020], (2000, 2010], (1990, 2000]]\n",
 86 |         "Length: 11\n",
 87 |         "Categories (6, object): [(1960, 1970] < (1970, 1980] < (1980, 1990] < (1990, 2000] < (2000, 2010] < (2010, 2020]]"
 88 |        ]
 89 |       }
 90 |      ],
 91 |      "prompt_number": 8
 92 |     },
 93 |     {
 94 |      "cell_type": "code",
 95 |      "collapsed": false,
 96 |      "input": [
 97 |       "# We can check the categories using .categories\n",
 98 |       "decade_cat.categories"
 99 |      ],
100 |      "language": "python",
101 |      "metadata": {},
102 |      "outputs": [
103 |       {
104 |        "metadata": {},
105 |        "output_type": "pyout",
106 |        "prompt_number": 13,
107 |        "text": [
108 |         "Index([u'(1960, 1970]', u'(1970, 1980]', u'(1980, 1990]', u'(1990, 2000]', u'(2000, 2010]', u'(2010, 2020]'], dtype='object')"
109 |        ]
110 |       }
111 |      ],
112 |      "prompt_number": 13
113 |     },
114 |     {
115 |      "cell_type": "code",
116 |      "collapsed": false,
117 |      "input": [
118 |       "# Then we can check the value counts in each category\n",
119 |       "pd.value_counts(decade_cat)"
120 |      ],
121 |      "language": "python",
122 |      "metadata": {},
123 |      "outputs": [
124 |       {
125 |        "metadata": {},
126 |        "output_type": "pyout",
127 |        "prompt_number": 16,
128 |        "text": [
129 |         "(2010, 2020]    3\n",
130 |         "(1990, 2000]    3\n",
131 |         "(2000, 2010]    2\n",
132 |         "(1980, 1990]    2\n",
133 |         "(1960, 1970]    1\n",
134 |         "(1970, 1980]    0\n",
135 |         "dtype: int64"
136 |        ]
137 |       }
138 |      ],
139 |      "prompt_number": 16
140 |     },
141 |     {
142 |      "cell_type": "code",
143 |      "collapsed": false,
144 |      "input": [
145 |       "# We can also pass data values to the cut.\n",
146 |       "\n",
147 |       "#For instance, if we just wanted to make two bins, evenly spaced based on max and min year, with a 1 year precision\n",
148 |       "pd.cut(years,2,precision=1)"
149 |      ],
150 |      "language": "python",
151 |      "metadata": {},
152 |      "outputs": [
153 |       {
154 |        "metadata": {},
155 |        "output_type": "pyout",
156 |        "prompt_number": 30,
157 |        "text": [
158 |         "[(1969, 1992], (1969, 1992], (1969, 1992], (1992, 2015], (1992, 2015], ..., (1969, 1992], (1969, 1992], (1992, 2015], (1992, 2015], (1992, 2015]]\n",
159 |         "Length: 11\n",
160 |         "Categories (2, object): [(1969, 1992] < (1992, 2015]]"
161 |        ]
162 |       }
163 |      ],
164 |      "prompt_number": 30
165 |     },
166 |     {
167 |      "cell_type": "code",
168 |      "collapsed": false,
169 |      "input": [
170 |       "# Thats about it for binning basics\n",
171 |       "# One last thing to note, jus tlike in standard math notation, when setting up bins:\n",
172 |       "# () means open, while [] means closed/inclusive"
173 |      ],
174 |      "language": "python",
175 |      "metadata": {},
176 |      "outputs": [],
177 |      "prompt_number": 1
178 |     },
179 |     {
180 |      "cell_type": "code",
181 |      "collapsed": false,
182 |      "input": [
183 |       "# Next up: Finding Outliers and Describing Data!"
184 |      ],
185 |      "language": "python",
186 |      "metadata": {},
187 |      "outputs": []
188 |     }
189 |    ],
190 |    "metadata": {}
191 |   }
192 |  ]
193 | }


--------------------------------------------------------------------------------
/Lec 41 - Permutation.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "name": "",
  4 |   "signature": "sha256:cf7ebec1204d7a76f35fc61b129c5e462d1fec53a756bc437d4781a2254ad10d"
  5 |  },
  6 |  "nbformat": 3,
  7 |  "nbformat_minor": 0,
  8 |  "worksheets": [
  9 |   {
 10 |    "cells": [
 11 |     {
 12 |      "cell_type": "code",
 13 |      "collapsed": false,
 14 |      "input": [
 15 |       "import numpy as np\n",
 16 |       "import pandas as pd\n",
 17 |       "from pandas import Series, DataFrame"
 18 |      ],
 19 |      "language": "python",
 20 |      "metadata": {},
 21 |      "outputs": [],
 22 |      "prompt_number": 1
 23 |     },
 24 |     {
 25 |      "cell_type": "code",
 26 |      "collapsed": false,
 27 |      "input": [
 28 |       "# WE can randomly reorder (permutate) a Series, or the rows in a DataFrame\n",
 29 |       "\n",
 30 |       "#Let's take a look\n",
 31 |       "dframe = DataFrame(np.arange(4 * 4).reshape((4, 4)))\n",
 32 |       "\n",
 33 |       "#Create an array with a random perumation of 0,1,2,3\n",
 34 |       "blender = np.random.permutation(4)\n",
 35 |       "\n",
 36 |       "blender"
 37 |      ],
 38 |      "language": "python",
 39 |      "metadata": {},
 40 |      "outputs": [
 41 |       {
 42 |        "metadata": {},
 43 |        "output_type": "pyout",
 44 |        "prompt_number": 7,
 45 |        "text": [
 46 |         "array([3, 0, 1, 2])"
 47 |        ]
 48 |       }
 49 |      ],
 50 |      "prompt_number": 7
 51 |     },
 52 |     {
 53 |      "cell_type": "code",
 54 |      "collapsed": false,
 55 |      "input": [
 56 |       "dframe"
 57 |      ],
 58 |      "language": "python",
 59 |      "metadata": {},
 60 |      "outputs": [
 61 |       {
 62 |        "html": [
 63 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
 64 |         "<table border=\"1\" class=\"dataframe\">\n",
 65 |         "  <thead>\n",
 66 |         "    <tr style=\"text-align: right;\">\n",
 67 |         "      <th></th>\n",
 68 |         "      <th>0</th>\n",
 69 |         "      <th>1</th>\n",
 70 |         "      <th>2</th>\n",
 71 |         "      <th>3</th>\n",
 72 |         "    </tr>\n",
 73 |         "  </thead>\n",
 74 |         "  <tbody>\n",
 75 |         "    <tr>\n",
 76 |         "      <th>0</th>\n",
 77 |         "      <td>  0</td>\n",
 78 |         "      <td>  1</td>\n",
 79 |         "      <td>  2</td>\n",
 80 |         "      <td>  3</td>\n",
 81 |         "    </tr>\n",
 82 |         "    <tr>\n",
 83 |         "      <th>1</th>\n",
 84 |         "      <td>  4</td>\n",
 85 |         "      <td>  5</td>\n",
 86 |         "      <td>  6</td>\n",
 87 |         "      <td>  7</td>\n",
 88 |         "    </tr>\n",
 89 |         "    <tr>\n",
 90 |         "      <th>2</th>\n",
 91 |         "      <td>  8</td>\n",
 92 |         "      <td>  9</td>\n",
 93 |         "      <td> 10</td>\n",
 94 |         "      <td> 11</td>\n",
 95 |         "    </tr>\n",
 96 |         "    <tr>\n",
 97 |         "      <th>3</th>\n",
 98 |         "      <td> 12</td>\n",
 99 |         "      <td> 13</td>\n",
100 |         "      <td> 14</td>\n",
101 |         "      <td> 15</td>\n",
102 |         "    </tr>\n",
103 |         "  </tbody>\n",
104 |         "</table>\n",
105 |         "</div>"
106 |        ],
107 |        "metadata": {},
108 |        "output_type": "pyout",
109 |        "prompt_number": 8,
110 |        "text": [
111 |         "    0   1   2   3\n",
112 |         "0   0   1   2   3\n",
113 |         "1   4   5   6   7\n",
114 |         "2   8   9  10  11\n",
115 |         "3  12  13  14  15"
116 |        ]
117 |       }
118 |      ],
119 |      "prompt_number": 8
120 |     },
121 |     {
122 |      "cell_type": "code",
123 |      "collapsed": false,
124 |      "input": [
125 |       "# Now permutate the dframe based on the blender\n",
126 |       "dframe.take(blender)"
127 |      ],
128 |      "language": "python",
129 |      "metadata": {},
130 |      "outputs": [
131 |       {
132 |        "html": [
133 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
134 |         "<table border=\"1\" class=\"dataframe\">\n",
135 |         "  <thead>\n",
136 |         "    <tr style=\"text-align: right;\">\n",
137 |         "      <th></th>\n",
138 |         "      <th>0</th>\n",
139 |         "      <th>1</th>\n",
140 |         "      <th>2</th>\n",
141 |         "      <th>3</th>\n",
142 |         "    </tr>\n",
143 |         "  </thead>\n",
144 |         "  <tbody>\n",
145 |         "    <tr>\n",
146 |         "      <th>0</th>\n",
147 |         "      <td>  0</td>\n",
148 |         "      <td>  1</td>\n",
149 |         "      <td>  2</td>\n",
150 |         "      <td>  3</td>\n",
151 |         "    </tr>\n",
152 |         "    <tr>\n",
153 |         "      <th>2</th>\n",
154 |         "      <td>  8</td>\n",
155 |         "      <td>  9</td>\n",
156 |         "      <td> 10</td>\n",
157 |         "      <td> 11</td>\n",
158 |         "    </tr>\n",
159 |         "    <tr>\n",
160 |         "      <th>3</th>\n",
161 |         "      <td> 12</td>\n",
162 |         "      <td> 13</td>\n",
163 |         "      <td> 14</td>\n",
164 |         "      <td> 15</td>\n",
165 |         "    </tr>\n",
166 |         "    <tr>\n",
167 |         "      <th>1</th>\n",
168 |         "      <td>  4</td>\n",
169 |         "      <td>  5</td>\n",
170 |         "      <td>  6</td>\n",
171 |         "      <td>  7</td>\n",
172 |         "    </tr>\n",
173 |         "  </tbody>\n",
174 |         "</table>\n",
175 |         "</div>"
176 |        ],
177 |        "metadata": {},
178 |        "output_type": "pyout",
179 |        "prompt_number": 5,
180 |        "text": [
181 |         "    0   1   2   3\n",
182 |         "0   0   1   2   3\n",
183 |         "2   8   9  10  11\n",
184 |         "3  12  13  14  15\n",
185 |         "1   4   5   6   7"
186 |        ]
187 |       }
188 |      ],
189 |      "prompt_number": 5
190 |     },
191 |     {
192 |      "cell_type": "code",
193 |      "collapsed": false,
194 |      "input": [
195 |       "# Now what if we want permuations WITH replacement"
196 |      ],
197 |      "language": "python",
198 |      "metadata": {},
199 |      "outputs": [],
200 |      "prompt_number": 2
201 |     },
202 |     {
203 |      "cell_type": "code",
204 |      "collapsed": false,
205 |      "input": [
206 |       "# Let imagine a box with 3 marbles in it: labeled 1, 2, and 3\n",
207 |       "box = np.array([1,2,3])\n",
208 |       "\n",
209 |       "# Now lets create a random permuation WITH replacement using randint\n",
210 |       "shaker = np.random.randint(0, len(box), size=10)"
211 |      ],
212 |      "language": "python",
213 |      "metadata": {},
214 |      "outputs": [],
215 |      "prompt_number": 13
216 |     },
217 |     {
218 |      "cell_type": "code",
219 |      "collapsed": false,
220 |      "input": [
221 |       "# Let's check teh box \"shaker\"\n",
222 |       "shaker"
223 |      ],
224 |      "language": "python",
225 |      "metadata": {},
226 |      "outputs": [
227 |       {
228 |        "metadata": {},
229 |        "output_type": "pyout",
230 |        "prompt_number": 14,
231 |        "text": [
232 |         "array([2, 0, 1, 2, 1, 0, 0, 2, 0, 2])"
233 |        ]
234 |       }
235 |      ],
236 |      "prompt_number": 14
237 |     },
238 |     {
239 |      "cell_type": "code",
240 |      "collapsed": false,
241 |      "input": [
242 |       "#Now lets grab form the box\n",
243 |       "hand_grabs = box.take(shaker)\n",
244 |       "\n",
245 |       "#show\n",
246 |       "hand_grabs"
247 |      ],
248 |      "language": "python",
249 |      "metadata": {},
250 |      "outputs": [
251 |       {
252 |        "metadata": {},
253 |        "output_type": "pyout",
254 |        "prompt_number": 15,
255 |        "text": [
256 |         "array([3, 1, 2, 3, 2, 1, 1, 3, 1, 3])"
257 |        ]
258 |       }
259 |      ],
260 |      "prompt_number": 15
261 |     },
262 |     {
263 |      "cell_type": "markdown",
264 |      "metadata": {},
265 |      "source": [
266 |       "Congratulations! We're all done with this Section.\n",
267 |       "Up next: Working with Data Part 3 !!!"
268 |      ]
269 |     },
270 |     {
271 |      "cell_type": "code",
272 |      "collapsed": false,
273 |      "input": [],
274 |      "language": "python",
275 |      "metadata": {},
276 |      "outputs": []
277 |     }
278 |    ],
279 |    "metadata": {}
280 |   }
281 |  ]
282 | }


--------------------------------------------------------------------------------
/Lec 46 - Cross-Tabulation.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "name": "",
  4 |   "signature": "sha256:74416ea1650ff12fb447e93aad34a51ad70af271b87d18327f11081746d4ab55"
  5 |  },
  6 |  "nbformat": 3,
  7 |  "nbformat_minor": 0,
  8 |  "worksheets": [
  9 |   {
 10 |    "cells": [
 11 |     {
 12 |      "cell_type": "code",
 13 |      "collapsed": false,
 14 |      "input": [
 15 |       "import pandas as pd\n",
 16 |       "\n",
 17 |       "# THis will be a quick lesson on cross-tabulations, which are basically a special case of pivot-tables"
 18 |      ],
 19 |      "language": "python",
 20 |      "metadata": {},
 21 |      "outputs": [],
 22 |      "prompt_number": 1
 23 |     },
 24 |     {
 25 |      "cell_type": "code",
 26 |      "collapsed": false,
 27 |      "input": [
 28 |       "# Let's create a quick data set\n",
 29 |       "from StringIO import StringIO\n",
 30 |       "\n",
 31 |       "data =\"\"\"\\\n",
 32 |       "Sample   Animal   Intelligence\n",
 33 |       "1        Dog     Smart\n",
 34 |       "2 Dog Smart\n",
 35 |       "3 Cat Dumb\n",
 36 |       "4 Cat Dumb\n",
 37 |       "5 Dog Dumb\n",
 38 |       "6 Cat Smart\"\"\"\n",
 39 |       "\n",
 40 |       "#Store as dframe\n",
 41 |       "dframe = pd.read_table(StringIO(data),sep='\\s+')"
 42 |      ],
 43 |      "language": "python",
 44 |      "metadata": {},
 45 |      "outputs": [],
 46 |      "prompt_number": 3
 47 |     },
 48 |     {
 49 |      "cell_type": "code",
 50 |      "collapsed": false,
 51 |      "input": [
 52 |       "# Show\n",
 53 |       "dframe"
 54 |      ],
 55 |      "language": "python",
 56 |      "metadata": {},
 57 |      "outputs": [
 58 |       {
 59 |        "html": [
 60 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
 61 |         "<table border=\"1\" class=\"dataframe\">\n",
 62 |         "  <thead>\n",
 63 |         "    <tr style=\"text-align: right;\">\n",
 64 |         "      <th></th>\n",
 65 |         "      <th>Sample</th>\n",
 66 |         "      <th>Animal</th>\n",
 67 |         "      <th>Intelligence</th>\n",
 68 |         "    </tr>\n",
 69 |         "  </thead>\n",
 70 |         "  <tbody>\n",
 71 |         "    <tr>\n",
 72 |         "      <th>0</th>\n",
 73 |         "      <td> 1</td>\n",
 74 |         "      <td> Dog</td>\n",
 75 |         "      <td> Smart</td>\n",
 76 |         "    </tr>\n",
 77 |         "    <tr>\n",
 78 |         "      <th>1</th>\n",
 79 |         "      <td> 2</td>\n",
 80 |         "      <td> Dog</td>\n",
 81 |         "      <td> Smart</td>\n",
 82 |         "    </tr>\n",
 83 |         "    <tr>\n",
 84 |         "      <th>2</th>\n",
 85 |         "      <td> 3</td>\n",
 86 |         "      <td> Cat</td>\n",
 87 |         "      <td>  Dumb</td>\n",
 88 |         "    </tr>\n",
 89 |         "    <tr>\n",
 90 |         "      <th>3</th>\n",
 91 |         "      <td> 4</td>\n",
 92 |         "      <td> Cat</td>\n",
 93 |         "      <td>  Dumb</td>\n",
 94 |         "    </tr>\n",
 95 |         "    <tr>\n",
 96 |         "      <th>4</th>\n",
 97 |         "      <td> 5</td>\n",
 98 |         "      <td> Dog</td>\n",
 99 |         "      <td>  Dumb</td>\n",
100 |         "    </tr>\n",
101 |         "    <tr>\n",
102 |         "      <th>5</th>\n",
103 |         "      <td> 6</td>\n",
104 |         "      <td> Cat</td>\n",
105 |         "      <td> Smart</td>\n",
106 |         "    </tr>\n",
107 |         "  </tbody>\n",
108 |         "</table>\n",
109 |         "</div>"
110 |        ],
111 |        "metadata": {},
112 |        "output_type": "pyout",
113 |        "prompt_number": 4,
114 |        "text": [
115 |         "   Sample Animal Intelligence\n",
116 |         "0       1    Dog        Smart\n",
117 |         "1       2    Dog        Smart\n",
118 |         "2       3    Cat         Dumb\n",
119 |         "3       4    Cat         Dumb\n",
120 |         "4       5    Dog         Dumb\n",
121 |         "5       6    Cat        Smart"
122 |        ]
123 |       }
124 |      ],
125 |      "prompt_number": 4
126 |     },
127 |     {
128 |      "cell_type": "code",
129 |      "collapsed": false,
130 |      "input": [
131 |       "# Now we can create a cross-tabulation table, which is basically just a frequency table\n",
132 |       "pd.crosstab(dframe.Animal,dframe.Intelligence,margins=True)"
133 |      ],
134 |      "language": "python",
135 |      "metadata": {},
136 |      "outputs": [
137 |       {
138 |        "html": [
139 |         "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
140 |         "<table border=\"1\" class=\"dataframe\">\n",
141 |         "  <thead>\n",
142 |         "    <tr style=\"text-align: right;\">\n",
143 |         "      <th>Intelligence</th>\n",
144 |         "      <th>Dumb</th>\n",
145 |         "      <th>Smart</th>\n",
146 |         "      <th>All</th>\n",
147 |         "    </tr>\n",
148 |         "    <tr>\n",
149 |         "      <th>Animal</th>\n",
150 |         "      <th></th>\n",
151 |         "      <th></th>\n",
152 |         "      <th></th>\n",
153 |         "    </tr>\n",
154 |         "  </thead>\n",
155 |         "  <tbody>\n",
156 |         "    <tr>\n",
157 |         "      <th>Cat</th>\n",
158 |         "      <td> 2</td>\n",
159 |         "      <td> 1</td>\n",
160 |         "      <td> 3</td>\n",
161 |         "    </tr>\n",
162 |         "    <tr>\n",
163 |         "      <th>Dog</th>\n",
164 |         "      <td> 1</td>\n",
165 |         "      <td> 2</td>\n",
166 |         "      <td> 3</td>\n",
167 |         "    </tr>\n",
168 |         "    <tr>\n",
169 |         "      <th>All</th>\n",
170 |         "      <td> 3</td>\n",
171 |         "      <td> 3</td>\n",
172 |         "      <td> 6</td>\n",
173 |         "    </tr>\n",
174 |         "  </tbody>\n",
175 |         "</table>\n",
176 |         "</div>"
177 |        ],
178 |        "metadata": {},
179 |        "output_type": "pyout",
180 |        "prompt_number": 6,
181 |        "text": [
182 |         "Intelligence  Dumb  Smart  All\n",
183 |         "Animal                        \n",
184 |         "Cat              2      1    3\n",
185 |         "Dog              1      2    3\n",
186 |         "All              3      3    6"
187 |        ]
188 |       }
189 |      ],
190 |      "prompt_number": 6
191 |     },
192 |     {
193 |      "cell_type": "code",
194 |      "collapsed": false,
195 |      "input": [
196 |       "# And thats about it as far as it's general use.\n",
197 |       "# We'll use it in examples in the final projects!"
198 |      ],
199 |      "language": "python",
200 |      "metadata": {},
201 |      "outputs": []
202 |     },
203 |     {
204 |      "cell_type": "code",
205 |      "collapsed": false,
206 |      "input": [],
207 |      "language": "python",
208 |      "metadata": {},
209 |      "outputs": []
210 |     }
211 |    ],
212 |    "metadata": {}
213 |   }
214 |  ]
215 | }


--------------------------------------------------------------------------------
/Lec 47 - Installing Seaborn.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "metadata": {
 3 |   "name": "",
 4 |   "signature": "sha256:d9a58f58833ac4eb6009b27c1001cf38ee17ff2d08476589f29168aa7ff6069a"
 5 |  },
 6 |  "nbformat": 3,
 7 |  "nbformat_minor": 0,
 8 |  "worksheets": [
 9 |   {
10 |    "cells": [
11 |     {
12 |      "cell_type": "markdown",
13 |      "metadata": {},
14 |      "source": [
15 |       "To install file the directions at the following link, you should be able to use a simple pip install.\n",
16 |       "Remember to install the dependencies!"
17 |      ]
18 |     },
19 |     {
20 |      "cell_type": "markdown",
21 |      "metadata": {},
22 |      "source": [
23 |       "http://stanford.edu/~mwaskom/software/seaborn/installing.html"
24 |      ]
25 |     },
26 |     {
27 |      "cell_type": "code",
28 |      "collapsed": false,
29 |      "input": [],
30 |      "language": "python",
31 |      "metadata": {},
32 |      "outputs": []
33 |     }
34 |    ],
35 |    "metadata": {}
36 |   }
37 |  ]
38 | }


--------------------------------------------------------------------------------
/Lec 7 - Creating Arrays.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "name": "",
  4 |   "signature": "sha256:92c44799902f3e9f8a9d9d682b5ff37da0ea02c590b3864e746f86e9ac2da97a"
  5 |  },
  6 |  "nbformat": 3,
  7 |  "nbformat_minor": 0,
  8 |  "worksheets": [
  9 |   {
 10 |    "cells": [
 11 |     {
 12 |      "cell_type": "code",
 13 |      "collapsed": false,
 14 |      "input": [
 15 |       "# Creating Numpy arrays\n",
 16 |       "import numpy as np\n",
 17 |       "\n",
 18 |       "# Converting from a list\n",
 19 |       "#Lets start with a list\n",
 20 |       "\n",
 21 |       "my_list1 = [1,2,3,4]\n",
 22 |       "\n",
 23 |       "my_array1 = np.array(my_list1)\n",
 24 |       "\n"
 25 |      ],
 26 |      "language": "python",
 27 |      "metadata": {},
 28 |      "outputs": [],
 29 |      "prompt_number": 2
 30 |     },
 31 |     {
 32 |      "cell_type": "code",
 33 |      "collapsed": false,
 34 |      "input": [
 35 |       "#Print out array\n",
 36 |       "\n",
 37 |       "my_array"
 38 |      ],
 39 |      "language": "python",
 40 |      "metadata": {},
 41 |      "outputs": [
 42 |       {
 43 |        "metadata": {},
 44 |        "output_type": "pyout",
 45 |        "prompt_number": 2,
 46 |        "text": [
 47 |         "array([1, 2, 3, 4])"
 48 |        ]
 49 |       }
 50 |      ],
 51 |      "prompt_number": 2
 52 |     },
 53 |     {
 54 |      "cell_type": "code",
 55 |      "collapsed": false,
 56 |      "input": [
 57 |       "# Make another list\n",
 58 |       "my_list2 = [11,22,33,44]\n",
 59 |       "\n",
 60 |       "#Make a list of lists\n",
 61 |       "my_lists = [my_list1,my_list2]\n",
 62 |       "\n",
 63 |       "#Make multi-dimensional array\n",
 64 |       "my_array2 = np.array(my_lists)\n",
 65 |       "\n",
 66 |       "#Show array\n",
 67 |       "my_array2"
 68 |      ],
 69 |      "language": "python",
 70 |      "metadata": {},
 71 |      "outputs": [
 72 |       {
 73 |        "metadata": {},
 74 |        "output_type": "pyout",
 75 |        "prompt_number": 5,
 76 |        "text": [
 77 |         "array([[ 1,  2,  3,  4],\n",
 78 |         "       [11, 22, 33, 44]])"
 79 |        ]
 80 |       }
 81 |      ],
 82 |      "prompt_number": 5
 83 |     },
 84 |     {
 85 |      "cell_type": "code",
 86 |      "collapsed": false,
 87 |      "input": [
 88 |       "#Lets get the size of the array\n",
 89 |       "my_array2.shape"
 90 |      ],
 91 |      "language": "python",
 92 |      "metadata": {},
 93 |      "outputs": [
 94 |       {
 95 |        "metadata": {},
 96 |        "output_type": "pyout",
 97 |        "prompt_number": 6,
 98 |        "text": [
 99 |         "(2L, 4L)"
100 |        ]
101 |       }
102 |      ],
103 |      "prompt_number": 6
104 |     },
105 |     {
106 |      "cell_type": "code",
107 |      "collapsed": false,
108 |      "input": [
109 |       "#Find out the data tyoe of the array\n",
110 |       "my_array2.dtype"
111 |      ],
112 |      "language": "python",
113 |      "metadata": {},
114 |      "outputs": [
115 |       {
116 |        "metadata": {},
117 |        "output_type": "pyout",
118 |        "prompt_number": 7,
119 |        "text": [
120 |         "dtype('int32')"
121 |        ]
122 |       }
123 |      ],
124 |      "prompt_number": 7
125 |     },
126 |     {
127 |      "cell_type": "code",
128 |      "collapsed": false,
129 |      "input": [
130 |       "#Making special case arrays\n",
131 |       "\n",
132 |       "#Zeros\n",
133 |       "np.zeros(5)"
134 |      ],
135 |      "language": "python",
136 |      "metadata": {},
137 |      "outputs": [
138 |       {
139 |        "metadata": {},
140 |        "output_type": "pyout",
141 |        "prompt_number": 8,
142 |        "text": [
143 |         "array([ 0.,  0.,  0.,  0.,  0.])"
144 |        ]
145 |       }
146 |      ],
147 |      "prompt_number": 8
148 |     },
149 |     {
150 |      "cell_type": "code",
151 |      "collapsed": false,
152 |      "input": [
153 |       "#Ones\n",
154 |       "np.ones((5,5))"
155 |      ],
156 |      "language": "python",
157 |      "metadata": {},
158 |      "outputs": [
159 |       {
160 |        "metadata": {},
161 |        "output_type": "pyout",
162 |        "prompt_number": 11,
163 |        "text": [
164 |         "dtype('float64')"
165 |        ]
166 |       }
167 |      ],
168 |      "prompt_number": 11
169 |     },
170 |     {
171 |      "cell_type": "code",
172 |      "collapsed": false,
173 |      "input": [
174 |       "# An empty array\n",
175 |       "\n",
176 |       "np.empty(5)\n",
177 |       "np.empty((3,4))"
178 |      ],
179 |      "language": "python",
180 |      "metadata": {},
181 |      "outputs": [
182 |       {
183 |        "metadata": {},
184 |        "output_type": "pyout",
185 |        "prompt_number": 13,
186 |        "text": [
187 |         "array([[  1.15117295e-321,   0.00000000e+000,   0.00000000e+000,\n",
188 |         "          0.00000000e+000],\n",
189 |         "       [  0.00000000e+000,   0.00000000e+000,   0.00000000e+000,\n",
190 |         "          0.00000000e+000],\n",
191 |         "       [  0.00000000e+000,   0.00000000e+000,   0.00000000e+000,\n",
192 |         "          0.00000000e+000]])"
193 |        ]
194 |       }
195 |      ],
196 |      "prompt_number": 13
197 |     },
198 |     {
199 |      "cell_type": "code",
200 |      "collapsed": false,
201 |      "input": [
202 |       "#Identity array\n",
203 |       "np.eye(5)"
204 |      ],
205 |      "language": "python",
206 |      "metadata": {},
207 |      "outputs": [
208 |       {
209 |        "metadata": {},
210 |        "output_type": "pyout",
211 |        "prompt_number": 16,
212 |        "text": [
213 |         "array([[ 1.,  0.,  0.,  0.,  0.],\n",
214 |         "       [ 0.,  1.,  0.,  0.,  0.],\n",
215 |         "       [ 0.,  0.,  1.,  0.,  0.],\n",
216 |         "       [ 0.,  0.,  0.,  1.,  0.],\n",
217 |         "       [ 0.,  0.,  0.,  0.,  1.]])"
218 |        ]
219 |       }
220 |      ],
221 |      "prompt_number": 16
222 |     },
223 |     {
224 |      "cell_type": "code",
225 |      "collapsed": false,
226 |      "input": [
227 |       "# Using a range\n",
228 |       "\n",
229 |       "np.arange(5)"
230 |      ],
231 |      "language": "python",
232 |      "metadata": {},
233 |      "outputs": [
234 |       {
235 |        "metadata": {},
236 |        "output_type": "pyout",
237 |        "prompt_number": 4,
238 |        "text": [
239 |         "array([0, 1, 2, 3, 4])"
240 |        ]
241 |       }
242 |      ],
243 |      "prompt_number": 4
244 |     },
245 |     {
246 |      "cell_type": "code",
247 |      "collapsed": false,
248 |      "input": [],
249 |      "language": "python",
250 |      "metadata": {},
251 |      "outputs": []
252 |     }
253 |    ],
254 |    "metadata": {}
255 |   }
256 |  ]
257 | }


--------------------------------------------------------------------------------
/Lec 8 - Using arrays and scalars.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "name": "",
  4 |   "signature": "sha256:d5bc45410f980063d362d449d2cd9071e1e0146af31bfdb22f78ba70299c29c8"
  5 |  },
  6 |  "nbformat": 3,
  7 |  "nbformat_minor": 0,
  8 |  "worksheets": [
  9 |   {
 10 |    "cells": [
 11 |     {
 12 |      "cell_type": "code",
 13 |      "collapsed": false,
 14 |      "input": [
 15 |       "import numpy as np\n",
 16 |       "\n"
 17 |      ],
 18 |      "language": "python",
 19 |      "metadata": {},
 20 |      "outputs": [],
 21 |      "prompt_number": 14
 22 |     },
 23 |     {
 24 |      "cell_type": "code",
 25 |      "collapsed": false,
 26 |      "input": [
 27 |       "5/2"
 28 |      ],
 29 |      "language": "python",
 30 |      "metadata": {},
 31 |      "outputs": [
 32 |       {
 33 |        "metadata": {},
 34 |        "output_type": "pyout",
 35 |        "prompt_number": 15,
 36 |        "text": [
 37 |         "2.5"
 38 |        ]
 39 |       }
 40 |      ],
 41 |      "prompt_number": 15
 42 |     },
 43 |     {
 44 |      "cell_type": "code",
 45 |      "collapsed": false,
 46 |      "input": [
 47 |       "#Takes care of floats\n",
 48 |       "from __future__ import division"
 49 |      ],
 50 |      "language": "python",
 51 |      "metadata": {},
 52 |      "outputs": [],
 53 |      "prompt_number": 16
 54 |     },
 55 |     {
 56 |      "cell_type": "code",
 57 |      "collapsed": false,
 58 |      "input": [
 59 |       "# Create array\n",
 60 |       "arr1 = np.array([[1,2,3],[8,9,10]])\n",
 61 |       "\n",
 62 |       "#Show\n",
 63 |       "arr1\n"
 64 |      ],
 65 |      "language": "python",
 66 |      "metadata": {},
 67 |      "outputs": [
 68 |       {
 69 |        "metadata": {},
 70 |        "output_type": "pyout",
 71 |        "prompt_number": 4,
 72 |        "text": [
 73 |         "array([[ 1,  2,  3],\n",
 74 |         "       [ 8,  9, 10]])"
 75 |        ]
 76 |       }
 77 |      ],
 78 |      "prompt_number": 4
 79 |     },
 80 |     {
 81 |      "cell_type": "code",
 82 |      "collapsed": false,
 83 |      "input": [
 84 |       "#Multiplying Arrays\n",
 85 |       "arr1*arr1"
 86 |      ],
 87 |      "language": "python",
 88 |      "metadata": {},
 89 |      "outputs": [
 90 |       {
 91 |        "metadata": {},
 92 |        "output_type": "pyout",
 93 |        "prompt_number": 5,
 94 |        "text": [
 95 |         "array([[  1,   4,   9],\n",
 96 |         "       [ 64,  81, 100]])"
 97 |        ]
 98 |       }
 99 |      ],
100 |      "prompt_number": 5
101 |     },
102 |     {
103 |      "cell_type": "code",
104 |      "collapsed": false,
105 |      "input": [
106 |       "#Subtraction\n",
107 |       "arr1-arr1"
108 |      ],
109 |      "language": "python",
110 |      "metadata": {},
111 |      "outputs": [
112 |       {
113 |        "metadata": {},
114 |        "output_type": "pyout",
115 |        "prompt_number": 6,
116 |        "text": [
117 |         "array([[0, 0, 0],\n",
118 |         "       [0, 0, 0]])"
119 |        ]
120 |       }
121 |      ],
122 |      "prompt_number": 6
123 |     },
124 |     {
125 |      "cell_type": "code",
126 |      "collapsed": false,
127 |      "input": [
128 |       "#Arithmetic operations with scalars on array\n",
129 |       "1 / arr1"
130 |      ],
131 |      "language": "python",
132 |      "metadata": {},
133 |      "outputs": [
134 |       {
135 |        "metadata": {},
136 |        "output_type": "pyout",
137 |        "prompt_number": 10,
138 |        "text": [
139 |         "array([[ 1.        ,  0.5       ,  0.33333333],\n",
140 |         "       [ 0.125     ,  0.11111111,  0.1       ]])"
141 |        ]
142 |       }
143 |      ],
144 |      "prompt_number": 10
145 |     },
146 |     {
147 |      "cell_type": "code",
148 |      "collapsed": false,
149 |      "input": [
150 |       "#Exponential operation\n",
151 |       "arr1 ** 3"
152 |      ],
153 |      "language": "python",
154 |      "metadata": {},
155 |      "outputs": [
156 |       {
157 |        "metadata": {},
158 |        "output_type": "pyout",
159 |        "prompt_number": 18,
160 |        "text": [
161 |         "array([[   1,    8,   27],\n",
162 |         "       [ 512,  729, 1000]])"
163 |        ]
164 |       }
165 |      ],
166 |      "prompt_number": 18
167 |     },
168 |     {
169 |      "cell_type": "code",
170 |      "collapsed": false,
171 |      "input": [],
172 |      "language": "python",
173 |      "metadata": {},
174 |      "outputs": []
175 |     }
176 |    ],
177 |    "metadata": {}
178 |   }
179 |  ]
180 | }


--------------------------------------------------------------------------------
/Lec 9 -Indexing Arrays.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "name": "",
  4 |   "signature": "sha256:da289f9874cfe0a322d3b4211ca9067b0a664a5ade9aec6265bbcd9eeecd4386"
  5 |  },
  6 |  "nbformat": 3,
  7 |  "nbformat_minor": 0,
  8 |  "worksheets": [
  9 |   {
 10 |    "cells": [
 11 |     {
 12 |      "cell_type": "code",
 13 |      "collapsed": false,
 14 |      "input": [
 15 |       "import numpy as np\n"
 16 |      ],
 17 |      "language": "python",
 18 |      "metadata": {},
 19 |      "outputs": [],
 20 |      "prompt_number": 11
 21 |     },
 22 |     {
 23 |      "cell_type": "code",
 24 |      "collapsed": false,
 25 |      "input": [
 26 |       "#Creating sample array\n",
 27 |       "arr = np.arange(0,11)"
 28 |      ],
 29 |      "language": "python",
 30 |      "metadata": {},
 31 |      "outputs": [],
 32 |      "prompt_number": 12
 33 |     },
 34 |     {
 35 |      "cell_type": "code",
 36 |      "collapsed": false,
 37 |      "input": [
 38 |       "#Show\n",
 39 |       "arr"
 40 |      ],
 41 |      "language": "python",
 42 |      "metadata": {},
 43 |      "outputs": [
 44 |       {
 45 |        "metadata": {},
 46 |        "output_type": "pyout",
 47 |        "prompt_number": 6,
 48 |        "text": [
 49 |         "array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])"
 50 |        ]
 51 |       }
 52 |      ],
 53 |      "prompt_number": 6
 54 |     },
 55 |     {
 56 |      "cell_type": "code",
 57 |      "collapsed": false,
 58 |      "input": [
 59 |       "#Get a value at an index\n",
 60 |       "arr[8]"
 61 |      ],
 62 |      "language": "python",
 63 |      "metadata": {},
 64 |      "outputs": [
 65 |       {
 66 |        "metadata": {},
 67 |        "output_type": "pyout",
 68 |        "prompt_number": 7,
 69 |        "text": [
 70 |         "8"
 71 |        ]
 72 |       }
 73 |      ],
 74 |      "prompt_number": 7
 75 |     },
 76 |     {
 77 |      "cell_type": "code",
 78 |      "collapsed": false,
 79 |      "input": [
 80 |       "#Get values in a range\n",
 81 |       "arr[1:5]"
 82 |      ],
 83 |      "language": "python",
 84 |      "metadata": {},
 85 |      "outputs": [
 86 |       {
 87 |        "metadata": {},
 88 |        "output_type": "pyout",
 89 |        "prompt_number": 9,
 90 |        "text": [
 91 |         "array([1, 2, 3, 4])"
 92 |        ]
 93 |       }
 94 |      ],
 95 |      "prompt_number": 9
 96 |     },
 97 |     {
 98 |      "cell_type": "code",
 99 |      "collapsed": false,
100 |      "input": [
101 |       "#Get values in a range\n",
102 |       "arr[0:5]"
103 |      ],
104 |      "language": "python",
105 |      "metadata": {},
106 |      "outputs": [
107 |       {
108 |        "metadata": {},
109 |        "output_type": "pyout",
110 |        "prompt_number": 10,
111 |        "text": [
112 |         "array([0, 1, 2, 3, 4])"
113 |        ]
114 |       }
115 |      ],
116 |      "prompt_number": 10
117 |     },
118 |     {
119 |      "cell_type": "code",
120 |      "collapsed": false,
121 |      "input": [
122 |       "#Setting a value with index range (Broadcasting)\n",
123 |       "arr[0:5]=100\n",
124 |       "\n",
125 |       "#Show\n",
126 |       "arr"
127 |      ],
128 |      "language": "python",
129 |      "metadata": {},
130 |      "outputs": [
131 |       {
132 |        "metadata": {},
133 |        "output_type": "pyout",
134 |        "prompt_number": 16,
135 |        "text": [
136 |         "array([100, 100, 100, 100, 100,   5,   6,   7,   8,   9,  10])"
137 |        ]
138 |       }
139 |      ],
140 |      "prompt_number": 16
141 |     },
142 |     {
143 |      "cell_type": "code",
144 |      "collapsed": false,
145 |      "input": [
146 |       "# Reset array, we'll see why i had to reset in  a moment\n",
147 |       "arr = np.arange(0,11)\n",
148 |       "\n",
149 |       "#Show\n",
150 |       "arr"
151 |      ],
152 |      "language": "python",
153 |      "metadata": {},
154 |      "outputs": [
155 |       {
156 |        "metadata": {},
157 |        "output_type": "pyout",
158 |        "prompt_number": 31,
159 |        "text": [
160 |         "array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])"
161 |        ]
162 |       }
163 |      ],
164 |      "prompt_number": 31
165 |     },
166 |     {
167 |      "cell_type": "code",
168 |      "collapsed": false,
169 |      "input": [
170 |       "#Important notes on Slices\n",
171 |       "slice_of_arr = arr[0:6]\n",
172 |       "\n",
173 |       "#Show slice\n",
174 |       "slice_of_arr\n",
175 |       "\n",
176 |       "\n"
177 |      ],
178 |      "language": "python",
179 |      "metadata": {},
180 |      "outputs": [
181 |       {
182 |        "metadata": {},
183 |        "output_type": "pyout",
184 |        "prompt_number": 32,
185 |        "text": [
186 |         "array([0, 1, 2, 3, 4, 5])"
187 |        ]
188 |       }
189 |      ],
190 |      "prompt_number": 32
191 |     },
192 |     {
193 |      "cell_type": "code",
194 |      "collapsed": false,
195 |      "input": [
196 |       "#Change Slice\n",
197 |       "slice_of_arr[:]=99\n",
198 |       "\n",
199 |       "#Show Slice again\n",
200 |       "slice_of_arr\n"
201 |      ],
202 |      "language": "python",
203 |      "metadata": {},
204 |      "outputs": [
205 |       {
206 |        "metadata": {},
207 |        "output_type": "pyout",
208 |        "prompt_number": 33,
209 |        "text": [
210 |         "array([99, 99, 99, 99, 99, 99])"
211 |        ]
212 |       }
213 |      ],
214 |      "prompt_number": 33
215 |     },
216 |     {
217 |      "cell_type": "code",
218 |      "collapsed": false,
219 |      "input": [
220 |       "# Now note the changes also occur in our original array!\n",
221 |       "arr\n",
222 |       "\n",
223 |       "# Data is not copied, it's a view of the original array! This avoids memory problems!\n",
224 |       "\n"
225 |      ],
226 |      "language": "python",
227 |      "metadata": {},
228 |      "outputs": [
229 |       {
230 |        "metadata": {},
231 |        "output_type": "pyout",
232 |        "prompt_number": 34,
233 |        "text": [
234 |         "array([99, 99, 99, 99, 99, 99,  6,  7,  8,  9, 10])"
235 |        ]
236 |       }
237 |      ],
238 |      "prompt_number": 34
239 |     },
240 |     {
241 |      "cell_type": "code",
242 |      "collapsed": false,
243 |      "input": [
244 |       "#To get a copy, need to be explicit\n",
245 |       "arr_copy = arr.copy()\n",
246 |       "\n",
247 |       "arr_copy"
248 |      ],
249 |      "language": "python",
250 |      "metadata": {},
251 |      "outputs": [
252 |       {
253 |        "metadata": {},
254 |        "output_type": "pyout",
255 |        "prompt_number": 43,
256 |        "text": [
257 |         "array([99, 99, 99, 99, 99, 99,  6,  7,  8,  9, 10])"
258 |        ]
259 |       }
260 |      ],
261 |      "prompt_number": 43
262 |     },
263 |     {
264 |      "cell_type": "code",
265 |      "collapsed": false,
266 |      "input": [
267 |       "# Indexing a 2D array\n",
268 |       "\n",
269 |       "arr_2d = np.array(([5,10,15],[20,25,30],[35,40,45]))\n",
270 |       "\n",
271 |       "#Show\n",
272 |       "arr_2d\n",
273 |       "\n"
274 |      ],
275 |      "language": "python",
276 |      "metadata": {},
277 |      "outputs": [
278 |       {
279 |        "metadata": {},
280 |        "output_type": "pyout",
281 |        "prompt_number": 38,
282 |        "text": [
283 |         "array([[ 5, 10, 15],\n",
284 |         "       [20, 25, 30],\n",
285 |         "       [35, 40, 45]])"
286 |        ]
287 |       }
288 |      ],
289 |      "prompt_number": 38
290 |     },
291 |     {
292 |      "cell_type": "code",
293 |      "collapsed": false,
294 |      "input": [
295 |       "#Indexing row\n",
296 |       "arr_2d[1]\n"
297 |      ],
298 |      "language": "python",
299 |      "metadata": {},
300 |      "outputs": [
301 |       {
302 |        "metadata": {},
303 |        "output_type": "pyout",
304 |        "prompt_number": 39,
305 |        "text": [
306 |         "array([20, 25, 30])"
307 |        ]
308 |       }
309 |      ],
310 |      "prompt_number": 39
311 |     },
312 |     {
313 |      "cell_type": "code",
314 |      "collapsed": false,
315 |      "input": [
316 |       "# Format is arr_2d[row][col] or arr_2d[row,col]\n",
317 |       "\n",
318 |       "# Getting individual element value\n",
319 |       "arr_2d[1][0]\n",
320 |       "\n",
321 |       "\n"
322 |      ],
323 |      "language": "python",
324 |      "metadata": {},
325 |      "outputs": [
326 |       {
327 |        "metadata": {},
328 |        "output_type": "pyout",
329 |        "prompt_number": 40,
330 |        "text": [
331 |         "20"
332 |        ]
333 |       }
334 |      ],
335 |      "prompt_number": 40
336 |     },
337 |     {
338 |      "cell_type": "code",
339 |      "collapsed": false,
340 |      "input": [
341 |       "# Getting individual element value\n",
342 |       "arr_2d[1,0]"
343 |      ],
344 |      "language": "python",
345 |      "metadata": {},
346 |      "outputs": [
347 |       {
348 |        "metadata": {},
349 |        "output_type": "pyout",
350 |        "prompt_number": 41,
351 |        "text": [
352 |         "20"
353 |        ]
354 |       }
355 |      ],
356 |      "prompt_number": 41
357 |     },
358 |     {
359 |      "cell_type": "code",
360 |      "collapsed": false,
361 |      "input": [
362 |       "# 2D array slicing\n",
363 |       "\n",
364 |       "#Shape (2,2) from top right corner\n",
365 |       "arr_2d[:2,1:]\n",
366 |       "\n"
367 |      ],
368 |      "language": "python",
369 |      "metadata": {},
370 |      "outputs": [
371 |       {
372 |        "metadata": {},
373 |        "output_type": "pyout",
374 |        "prompt_number": 45,
375 |        "text": [
376 |         "array([[10, 15],\n",
377 |         "       [25, 30]])"
378 |        ]
379 |       }
380 |      ],
381 |      "prompt_number": 45
382 |     },
383 |     {
384 |      "cell_type": "code",
385 |      "collapsed": false,
386 |      "input": [
387 |       "#Shape bottom row\n",
388 |       "arr_2d[2]\n"
389 |      ],
390 |      "language": "python",
391 |      "metadata": {},
392 |      "outputs": [
393 |       {
394 |        "metadata": {},
395 |        "output_type": "pyout",
396 |        "prompt_number": 46,
397 |        "text": [
398 |         "array([35, 40, 45])"
399 |        ]
400 |       }
401 |      ],
402 |      "prompt_number": 46
403 |     },
404 |     {
405 |      "cell_type": "code",
406 |      "collapsed": false,
407 |      "input": [
408 |       "#Shape bottom row\n",
409 |       "arr_2d[2,:]"
410 |      ],
411 |      "language": "python",
412 |      "metadata": {},
413 |      "outputs": [
414 |       {
415 |        "metadata": {},
416 |        "output_type": "pyout",
417 |        "prompt_number": 47,
418 |        "text": [
419 |         "array([35, 40, 45])"
420 |        ]
421 |       }
422 |      ],
423 |      "prompt_number": 47
424 |     },
425 |     {
426 |      "cell_type": "code",
427 |      "collapsed": false,
428 |      "input": [
429 |       "# Fancy Indexing\n",
430 |       "\n",
431 |       "#Set up matrix\n",
432 |       "arr2d = np.zeros((10,10))\n"
433 |      ],
434 |      "language": "python",
435 |      "metadata": {},
436 |      "outputs": [],
437 |      "prompt_number": 66
438 |     },
439 |     {
440 |      "cell_type": "code",
441 |      "collapsed": false,
442 |      "input": [
443 |       "#Length of array\n",
444 |       "arr_length = arr2d.shape[1]"
445 |      ],
446 |      "language": "python",
447 |      "metadata": {},
448 |      "outputs": [],
449 |      "prompt_number": 68
450 |     },
451 |     {
452 |      "cell_type": "code",
453 |      "collapsed": false,
454 |      "input": [
455 |       "#Set up array\n",
456 |       "\n",
457 |       "for i in range(arr_length):\n",
458 |       "    arr2d[i] = i\n",
459 |       "    \n",
460 |       "arr2d"
461 |      ],
462 |      "language": "python",
463 |      "metadata": {},
464 |      "outputs": [
465 |       {
466 |        "metadata": {},
467 |        "output_type": "pyout",
468 |        "prompt_number": 70,
469 |        "text": [
470 |         "array([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],\n",
471 |         "       [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],\n",
472 |         "       [ 2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.],\n",
473 |         "       [ 3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.],\n",
474 |         "       [ 4.,  4.,  4.,  4.,  4.,  4.,  4.,  4.,  4.,  4.],\n",
475 |         "       [ 5.,  5.,  5.,  5.,  5.,  5.,  5.,  5.,  5.,  5.],\n",
476 |         "       [ 6.,  6.,  6.,  6.,  6.,  6.,  6.,  6.,  6.,  6.],\n",
477 |         "       [ 7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.],\n",
478 |         "       [ 8.,  8.,  8.,  8.,  8.,  8.,  8.,  8.,  8.,  8.],\n",
479 |         "       [ 9.,  9.,  9.,  9.,  9.,  9.,  9.,  9.,  9.,  9.]])"
480 |        ]
481 |       }
482 |      ],
483 |      "prompt_number": 70
484 |     },
485 |     {
486 |      "cell_type": "code",
487 |      "collapsed": false,
488 |      "input": [
489 |       "#Fancy indexing allows the following\n",
490 |       "arr2d[[2,4,6,8]]"
491 |      ],
492 |      "language": "python",
493 |      "metadata": {},
494 |      "outputs": [
495 |       {
496 |        "metadata": {},
497 |        "output_type": "pyout",
498 |        "prompt_number": 71,
499 |        "text": [
500 |         "array([[ 2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.],\n",
501 |         "       [ 4.,  4.,  4.,  4.,  4.,  4.,  4.,  4.,  4.,  4.],\n",
502 |         "       [ 6.,  6.,  6.,  6.,  6.,  6.,  6.,  6.,  6.,  6.],\n",
503 |         "       [ 8.,  8.,  8.,  8.,  8.,  8.,  8.,  8.,  8.,  8.]])"
504 |        ]
505 |       }
506 |      ],
507 |      "prompt_number": 71
508 |     },
509 |     {
510 |      "cell_type": "code",
511 |      "collapsed": false,
512 |      "input": [
513 |       "#Allows in any order\n",
514 |       "arr2d[[6,4,2,7]]"
515 |      ],
516 |      "language": "python",
517 |      "metadata": {},
518 |      "outputs": [
519 |       {
520 |        "metadata": {},
521 |        "output_type": "pyout",
522 |        "prompt_number": 72,
523 |        "text": [
524 |         "array([[ 6.,  6.,  6.,  6.,  6.,  6.,  6.,  6.,  6.,  6.],\n",
525 |         "       [ 4.,  4.,  4.,  4.,  4.,  4.,  4.,  4.,  4.,  4.],\n",
526 |         "       [ 2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.],\n",
527 |         "       [ 7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.]])"
528 |        ]
529 |       }
530 |      ],
531 |      "prompt_number": 72
532 |     },
533 |     {
534 |      "cell_type": "code",
535 |      "collapsed": false,
536 |      "input": [],
537 |      "language": "python",
538 |      "metadata": {},
539 |      "outputs": []
540 |     }
541 |    ],
542 |    "metadata": {}
543 |   }
544 |  ]
545 | }


--------------------------------------------------------------------------------
/Lec_28_test.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmportilla/Udemy-notes/b557a458692a56ade1a818ea2d23402b1ee1086f/Lec_28_test.xlsx


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Learning Python for Data Analysis and Visualization
2 | Here you will find the Notebooks for my Udemy Course:
3 | https://www.udemy.com/learning-python-for-data-analysis-and-visualization/
4 | 
5 | Use the coupon DEAL19 for 90% off the course!
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/lec25.csv:
--------------------------------------------------------------------------------
1 | q,r,s,t,apple
2 | 2,3,4,5,pear
3 | a,s,d,f,rabbit
4 | 5,2,5,7,dog


--------------------------------------------------------------------------------
/lec25.txt:
--------------------------------------------------------------------------------
1 | q,r,s,t,apple
2 | 2,3,4,5,pear
3 | a,s,d,f,rabbit
4 | 5,2,5,7,dog


--------------------------------------------------------------------------------
/my_array.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmportilla/Udemy-notes/b557a458692a56ade1a818ea2d23402b1ee1086f/my_array.npy


--------------------------------------------------------------------------------
/my_test_text.txt:
--------------------------------------------------------------------------------
1 | 1.000000000000000000e+00,2.000000000000000000e+00,3.000000000000000000e+00
2 | 4.000000000000000000e+00,5.000000000000000000e+00,6.000000000000000000e+00
3 | 


--------------------------------------------------------------------------------
/mytextdata_out.csv:
--------------------------------------------------------------------------------
1 | _0_1_2_3_4
2 | 0_q_r_s_t_apple
3 | 1_2_3_4_5_pear
4 | 2_a_s_d_f_rabbit
5 | 3_5_2_5_7_dog
6 | 


--------------------------------------------------------------------------------
/two_arrays.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmportilla/Udemy-notes/b557a458692a56ade1a818ea2d23402b1ee1086f/two_arrays.npz


--------------------------------------------------------------------------------