├── .gitignore
├── Contributing.md
├── Credits.ipynb
├── DynamicBrain
├── BrainObservatory.ipynb
├── EphysObservatory.ipynb
├── Modeling
│ ├── biophysical_notebook
│ │ ├── biophysical_modeling.ipynb
│ │ ├── build_network.py
│ │ ├── components
│ │ │ ├── biophysical
│ │ │ │ ├── electrophysiology
│ │ │ │ │ ├── 472363762_fit.json
│ │ │ │ │ ├── 472912177_fit.json
│ │ │ │ │ ├── 473862421_fit.json
│ │ │ │ │ ├── 473863035_fit.json
│ │ │ │ │ └── 473863510_fit.json
│ │ │ │ └── morphology
│ │ │ │ │ ├── Nr5a1-Cre_Ai14_IVSCC_-169250.03.02.01_471087815_m.swc
│ │ │ │ │ ├── Pvalb-IRES-Cre_Ai14_IVSCC_-169125.03.01.01_469628681_m.swc
│ │ │ │ │ ├── Pvalb-IRES-Cre_Ai14_IVSCC_-176847.04.02.01_470522102_m.swc
│ │ │ │ │ ├── Rorb-IRES2-Cre-D_Ai14_IVSCC_-168053.05.01.01_325404214_m.swc
│ │ │ │ │ └── Scnn1a-Tg3-Cre_Ai14_IVSCC_-177300.01.02.01_473845048_m.swc
│ │ │ ├── electrodes
│ │ │ │ └── single_electrode.csv
│ │ │ ├── hoc_templates
│ │ │ │ └── Biophys1.hoc
│ │ │ ├── intfire
│ │ │ │ ├── IntFire1_exc_1.json
│ │ │ │ └── IntFire1_inh_1.json
│ │ │ ├── mechanisms
│ │ │ │ └── modfiles
│ │ │ │ │ ├── CaDynamics.mod
│ │ │ │ │ ├── Ca_HVA.mod
│ │ │ │ │ ├── Ca_LVA.mod
│ │ │ │ │ ├── Ih.mod
│ │ │ │ │ ├── Im.mod
│ │ │ │ │ ├── Im_v2.mod
│ │ │ │ │ ├── K_P.mod
│ │ │ │ │ ├── K_T.mod
│ │ │ │ │ ├── Kd.mod
│ │ │ │ │ ├── Kv2like.mod
│ │ │ │ │ ├── Kv3_1.mod
│ │ │ │ │ ├── NaTa.mod
│ │ │ │ │ ├── NaTs.mod
│ │ │ │ │ ├── NaV.mod
│ │ │ │ │ ├── Nap.mod
│ │ │ │ │ ├── SK.mod
│ │ │ │ │ └── vecevent.mod
│ │ │ └── synaptic_models
│ │ │ │ ├── AMPA_ExcToExc.json
│ │ │ │ ├── AMPA_ExcToInh.json
│ │ │ │ ├── GABA_InhToExc.json
│ │ │ │ ├── GABA_InhToInh.json
│ │ │ │ ├── instanteneousExc.json
│ │ │ │ └── instanteneousInh.json
│ │ ├── config.json
│ │ ├── run_bionet.py
│ │ └── schematics_png
│ │ │ ├── External_input_connected.png
│ │ │ ├── External_input_created.png
│ │ │ ├── Full_figure.png
│ │ │ ├── Neurons_created.png
│ │ │ ├── Neurons_created_figure.png
│ │ │ ├── Recurrent_connected.png
│ │ │ ├── Recurrent_connected_figure.png
│ │ │ └── full_network.png
│ ├── layer4_analysis
│ │ └── layer4_analysis.ipynb
│ ├── pointnet_notebook
│ │ ├── components
│ │ │ ├── cell_models
│ │ │ │ ├── 472363762_fit.json
│ │ │ │ ├── 472363762_point.json
│ │ │ │ ├── 472912177_fit.json
│ │ │ │ ├── 472912177_point.json
│ │ │ │ ├── 473862421_point.json
│ │ │ │ ├── 473863035_point.json
│ │ │ │ ├── 473863510_point.json
│ │ │ │ ├── IntFire1_exc_1.json
│ │ │ │ ├── IntFire1_exc_fit.json
│ │ │ │ ├── IntFire1_exc_point.json
│ │ │ │ ├── IntFire1_inh_1.json
│ │ │ │ ├── IntFire1_inh_fit.json
│ │ │ │ ├── IntFire1_inh_point.json
│ │ │ │ └── filter_point.json
│ │ │ └── synaptic_models
│ │ │ │ ├── AMPA_ExcToExc.json
│ │ │ │ ├── AMPA_ExcToInh.json
│ │ │ │ ├── ExcToExc.json
│ │ │ │ ├── ExcToInh.json
│ │ │ │ ├── GABA_InhToExc.json
│ │ │ │ ├── GABA_InhToInh.json
│ │ │ │ ├── InhToExc.json
│ │ │ │ ├── InhToInh.json
│ │ │ │ ├── instanteneousExc.json
│ │ │ │ └── instanteneousInh.json
│ │ ├── config.json
│ │ ├── network
│ │ │ ├── recurrent_network
│ │ │ │ ├── edge_types.csv
│ │ │ │ ├── edges.h5
│ │ │ │ ├── node_types.csv
│ │ │ │ ├── nodes.csv
│ │ │ │ └── nodes.h5
│ │ │ └── source_input
│ │ │ │ ├── edge_types.csv
│ │ │ │ ├── edges.h5
│ │ │ │ ├── input_edge_types.csv
│ │ │ │ ├── input_edges.h5
│ │ │ │ ├── input_node_types.csv
│ │ │ │ ├── input_nodes.csv
│ │ │ │ ├── node_types.csv
│ │ │ │ ├── nodes.h5
│ │ │ │ ├── poission_input_spk_train.h5
│ │ │ │ └── poisson_input_spk_train.nwb
│ │ ├── pointnet_modeling_example.ipynb
│ │ ├── run_pointnet.py
│ │ └── set_weights.py
│ └── popnet_notebook
│ │ ├── components
│ │ ├── pop_models
│ │ │ ├── excitatory_pop.json
│ │ │ ├── filter_pop.json
│ │ │ └── inhibitory_pop.json
│ │ └── synaptic_models
│ │ │ ├── ExcToExc.json
│ │ │ ├── ExcToInh.json
│ │ │ ├── InhToExc.json
│ │ │ └── InhToInh.json
│ │ ├── config.json
│ │ ├── input_rates.csv
│ │ ├── network
│ │ └── recurrent_network_v2
│ │ │ ├── edge_types.csv
│ │ │ └── node_types.csv
│ │ ├── population_modeling.ipynb
│ │ └── schematics_png
│ │ ├── DiPDE_ei_net.png
│ │ ├── ei_ext_pop.png
│ │ ├── ei_ext_pop_conn1.png
│ │ ├── ei_ext_pop_conn1and2.png
│ │ └── ei_pop.png
├── Other
│ ├── CellTypes.ipynb
│ └── Connectivity.ipynb
├── Tutorials
│ ├── 01_decoding_sklearn.ipynb
│ ├── T01_Regression.ipynb
│ ├── T02_Principal_component_analysis.ipynb
│ ├── T03_Classification_tutorial.ipynb
│ ├── T04_Pipelines.ipynb
│ ├── solutions
│ │ ├── 01_decoding_sklearn_solutions.html
│ │ └── 01_decoding_sklearn_solutions.ipynb
│ └── tree.png
├── VisualBehavior.ipynb
└── solutions
│ ├── BrainObservatory_solutions.html
│ ├── BrainObservatory_solutions.ipynb
│ ├── EphysObservatory_solutions.html
│ ├── EphysObservatory_solutions.ipynb
│ ├── Modeling
│ ├── biophysical_notebook
│ │ ├── biophysical_modeling_solutions.ipynb
│ │ ├── build_network.py
│ │ ├── config.json
│ │ └── run_bionet.py
│ ├── layer4_analysis_solutions.html
│ ├── layer4_analysis_solutions.ipynb
│ └── popnet_notebook
│ │ ├── components
│ │ ├── pop_models
│ │ │ ├── excitatory_pop.json
│ │ │ ├── filter_pop.json
│ │ │ └── inhibitory_pop.json
│ │ └── synaptic_models
│ │ │ ├── ExcToExc.json
│ │ │ ├── ExcToInh.json
│ │ │ ├── InhToExc.json
│ │ │ └── InhToInh.json
│ │ ├── config.json
│ │ ├── input_rates.csv
│ │ └── population_modeling_solutions.ipynb
│ ├── Other
│ ├── CellTypes_solutions.html
│ ├── CellTypes_solutions.ipynb
│ ├── Connectivity_solutions.html
│ └── Connectivity_solutions.ipynb
│ ├── VisualBehavior_solutions.html
│ └── VisualBehavior_solutions.ipynb
├── Git
├── 03 - Working with Github.md
├── 04 - Working with GitHub in the Cloud.md
├── gh_anim.gif
├── github_workflow_cheatsheet.pdf
├── tiles_00.png
├── tiles_01.png
├── tiles_02.png
├── tiles_03.png
├── tiles_04.png
└── tiles_05.png
├── LICENSE.txt
├── PythonBootcamp
├── 00_Introduction.ipynb
├── 01_Basic_Python_I_Object_and_Data_Structures.ipynb
├── 02_Basic_Python_II_Control_Flow_and_Functions.ipynb
├── 03_Intro_To_Scientific_Computing.ipynb
├── 04_Introduction_To_Numpy.ipynb
├── 05_Custom_Modules_and_Version_Control.ipynb
├── 06_Introduction_To_Matplotlib.ipynb
├── 07_Introduction_To_Pandas.ipynb
├── 08_Development_Tools.ipynb
├── 09_bike_crossing.ipynb
├── 10_glm_exercise.ipynb
├── 11_Image_data.ipynb
├── solutions
│ ├── 01_Basic_Python_I_Object_and_Data_Structures_solutions.html
│ ├── 01_Basic_Python_I_Object_and_Data_Structures_solutions.ipynb
│ ├── 02_Basic_Python_II_Control_Flow_and_Functions_solutions.html
│ ├── 02_Basic_Python_II_Control_Flow_and_Functions_solutions.ipynb
│ ├── 03_Intro_To_Scientific_Computing_solutions.html
│ ├── 03_Intro_To_Scientific_Computing_solutions.ipynb
│ ├── 04_Introduction_To_Numpy_solutions.html
│ ├── 04_Introduction_To_Numpy_solutions.ipynb
│ ├── 05_Custom_Modules_and_Version_Control_solutions.html
│ ├── 05_Custom_Modules_and_Version_Control_solutions.ipynb
│ ├── 06_Introduction_To_Matplotlib_solutions.html
│ ├── 06_Introduction_To_Matplotlib_solutions.ipynb
│ ├── 07_Introduction_To_Pandas_solutions.html
│ ├── 07_Introduction_To_Pandas_solutions.ipynb
│ ├── 09_bike_crossing_solutions.html
│ ├── 09_bike_crossing_solutions.ipynb
│ ├── 11_Image_data_solutions.html
│ └── 11_Image_data_solutions.ipynb
└── support_files
│ ├── CrossingDailyBarPlot.png
│ ├── CrossingMonthlyBarPlot.png
│ ├── SampleWorkbook.csv
│ ├── blurred.png
│ ├── commit_tree.svg
│ ├── cropped-SummerWorkshop_Header.png
│ ├── cross_sections.png
│ ├── gitkraken_1.png
│ ├── gitkraken_2.png
│ ├── gitkraken_3.png
│ ├── gitkraken_4.png
│ ├── grayscales.png
│ ├── leafplot.png
│ ├── maxpixel.png
│ ├── neuron.jpg
│ ├── parallel_commits.png
│ ├── pokemon_alopez247.csv
│ ├── rgb_array.svg
│ ├── stinkbug.png
│ ├── sweeps.csv
│ ├── thresholdedimage.png
│ └── topic_branches.png
├── README.md
└── resources
├── EphysObservatory
├── ecephys_manifest.csv
└── neuropixels.png
├── Neocortical Interneurons.png
├── change_detection_schematic.png
├── connectivity_metadata.csv
└── cropped-SummerWorkshop_Header.png
/.gitignore:
--------------------------------------------------------------------------------
1 | *.DS_Store
2 | *.nwb
3 | *.ipynb_checkpoints/
4 | .idea/
5 | **/x86_64/
6 | **/Modeling/**/output
7 |
8 |
--------------------------------------------------------------------------------
/Contributing.md:
--------------------------------------------------------------------------------
1 | # Allen Institute Contribution Agreement
2 |
3 | This document describes the terms under which you may make “Contributions” —
4 | which may include without limitation, software additions, revisions, bug fixes, configuration changes,
5 | documentation, or any other materials — to any of the projects owned or managed by the Allen Institute.
6 | If you have questions about these terms, please contact us at terms@alleninstitute.org.
7 |
8 | You certify that:
9 |
10 | • Your Contributions are either:
11 |
12 | 1. Created in whole or in part by you and you have the right to submit them under the designated license
13 | (described below); or
14 | 2. Based upon previous work that, to the best of your knowledge, is covered under an appropriate
15 | open source license and you have the right under that license to submit that work with modifications,
16 | whether created in whole or in part by you, under the designated license; or
17 |
18 | 3. Provided directly to you by some other person who certified (1) or (2) and you have not modified them.
19 |
20 | • You are granting your Contributions to the Allen Institute under the terms of the [2-Clause BSD license](https://opensource.org/licenses/BSD-2-Clause)
21 | (the “designated license”).
22 |
23 | • You understand and agree that the Allen Institute projects and your Contributions are public and that
24 | a record of the Contributions (including all metadata and personal information you submit with them) is
25 | maintained indefinitely and may be redistributed consistent with the Allen Institute’s mission and the
26 | 2-Clause BSD license.
27 |
--------------------------------------------------------------------------------
/Credits.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "
\n",
8 | "\n",
9 | "
/swdb_2018_tools`)
51 | 6. Click "Clone the repo!", then "Open Now!"
52 |
53 | ## The virtuous collaborative loop -- integrating changes and making your own
54 |
55 | You now have two copies of someone else's repository -- your fork on GitHub and the one on your computer. Those repositories all have a `master` branch. An important principle to remember:
56 |
57 | > **Leave the `master` branch alone**.
58 |
59 | > `master` is a shared resource, and it should always reflect the state of the primary repository's `master` branch
60 |
61 | Of course it's possible to work in the master branch directly, but you should prefer topic branches for two reasons:
62 |
63 | 1. What if you want to work on two different things at once?
64 | 2. Editing master directly creates a parallel history that is inconsistent with the primary repository.
65 |
66 | We'll now describe a process you can use to integrate others changes and make changes safely. But first...
67 |
68 | #### Tell GitKraken about AllenInstitute/swdb_2018_tools
69 |
70 | Right now your repository only knows about your fork on GitHub (`user_name/swdb_2018_tools`, AKA `origin`). In order to make changes from others, we need our repository to know where these changes are coming from. We only need to do this once.
71 |
72 | 1. Click the "+" in the "Remote" section on the left.
73 | 2. Paste in: https://github.com/alleninstitute/swdb_2018_tools
74 | 3. Accept the default name ("AllenInstitute")
75 |
76 | Now the `AllenInstitute` remote appears above your fork (`origin`) in the list below with its default branch (`master`).
77 |
78 | #### Loop Step 1: Pull changes from AllenInstitute to your computer
79 |
80 |
81 |
82 | Now we want to bring some changes from `AllenInstitute/master` down to your local master branch.
83 |
84 | 1. Right-click the `AllenInstitute` remote and click "Fetch AllenInstitute". This just checks Github for changes.
85 | 2. Right-click the `AllenInstitute/master` branch and choose "Fast-forward changes from AllenInstitute/master".
86 |
87 | That's it -- now you've incorporated changes from `AllenInstitute/master` to your local repository. You can now update your GitHub fork's master branch by clicking "Push".
88 |
89 | #### Loop Step 2: Create a topic branch and make a change
90 |
91 |
92 |
93 | Now we want to make some changes to this repository. Not the AllenInstitute copy (yet) -- just your local copy.
94 |
95 | Topic branches are great because they let you work on multiple things at the same time. In this case, they are necessary because remember: **don't touch the `master` branch**. So let's make our changes in a topic branch!
96 |
97 | 1. Click the 'Branch' icon at the top of the screen. Give it a cool name.
98 | 2. Make some changes that won't conflict. Leave GitKraken and create a file in the repo directory named after your Github user name.
99 | 3. Gitkracken will notice the change -- click "View Change" in the upper right panel.
100 | 4. Mouse-over your new file and click "Stage File"
101 | 5. Type a commit message.
102 | 5. Click "Commit changes to 1 file"
103 |
104 | #### Loop Step 3: Push your branch to your fork on Github
105 |
106 |
107 |
108 | Our topic branch is ready, and we'd like to get our changes integrated into `AllenInstitute/master`. GitHub has a great facility for this, so we need to get your changes up to your GitHub fork. Remember: we always want `master` to be consistent with `AllenInstitute/master`, so we aren't going to merge the topic branch back into `local/master`. Instead, we are going to push your topic branch up to your fork and integrate it into `AllenInstitue/master` from there.
109 |
110 | 1. Right-click your branch, then click "push"
111 | 2. Name your branch on Github (use the default, which is the same name)
112 | 3. Click "Submit"
113 |
114 | Note: in GitKraken, when you click "push" you are pushing to `origin`, which is your fork on GitHub.
115 |
116 | #### Loop Step 4: Issue a pull request to AllenInstitute/master
117 |
118 |
119 |
120 | We have your topic branch up on your GitHub fork. Now we want to merge your changes into `AllenInstitute/master`. We ask for this via a "Pull Request":
121 |
122 | 1. Open Github to http://github.com/user_name/swdb_2018_tools
123 | 2. Github will notice your new branch. Click "Compare and Pull Request".
124 | 3. Write a short description.
125 | 4. Click "Create pull request"
126 | 5. **wait for the instructor to accept the pull request**
127 | 6. Click "delete branch" to delete your topic branch.
128 |
129 | Pull requests are great. We are working on a shared repository, so we really want to make sure that your changes are ready to integrate before pulling the trigger. Pull requests give everyone a mechanism to review and propose new changes before updating the `master` branch.
130 |
131 |
132 |
133 | #### Loop Step 5: Bring your own change back down to local/master
134 |
135 |
136 |
137 | Once your request has been approved, `AllenInstitute/master` is now has your changes in it. Just bring your changes back down to `local/master` and we're done.
138 |
139 | 1. Check out `local/master` by double clicking on it.
140 | 2. Right-click the `AllenInstitute` remote and click "Fetch AllenInstitute". This just checks Github for changes.
141 | 3. Right-click the `AllenInstitute/master` branch and choose "Fast-forward changes from AllenInstitute/master".
142 | 4. Delete your topic branch: Right-click `cool_named_branch`, choose `Delete`.
143 |
144 | If you want to update your GitHub fork's master branch, just click "Push".
145 |
146 | #### All together now
147 |
148 |
149 |
--------------------------------------------------------------------------------
/Git/04 - Working with GitHub in the Cloud.md:
--------------------------------------------------------------------------------
1 | # git lesson 4: Working with GitHub in the Cloud
2 |
3 | This material assumes that you have worked through the previous lessons. At this point you should understand:
4 |
5 | * How to create a repository on your computer
6 | * Stage and commit changes to your repository
7 | * Create topic branches
8 | * Merge topic branches back to your master branch
9 | * Work on a shared repository with forks and pull requests
10 |
11 | This lesson is identical to lesson 3, but it teaches you how to perform the same operations from the command line.
12 |
13 | ## Overview: Why this (relatively complex) workflow?
14 |
15 | GitHub is an online code collaboration platform centered around `git`. This lesson shows you a particular way to use `git` and GitHub that is focused on collaboration. We are trying to solve a few problems here.
16 |
17 | 1. We want to contribute changes to a repository owned by someone else
18 | 2. We want to control when to use changes from that repository
19 | 3. We want to minimize nasty merge conflicts
20 |
21 | The rest of these instructions boil down to a few practices:
22 |
23 | 1. Work in a fork
24 | 2. Work in topic branches, not the master branch
25 | 2. Use pull requests
26 |
27 | Let's get started.
28 |
29 | ## Oh no I don't have a GUI
30 |
31 | Don't panic. These instructions replicate the exact workflow from lesson three, this time with the Jupyter terminal.
32 |
33 | ## Create a repository and copy it to your computer (forking and cloning)
34 |
35 |
36 |
37 | The first thing you should do is create a repository on GitHub. While you can always create an new repository, in this lesson we will be showing you how to collaborate with others on a single repository. You will do this by creating a copy of an existing repository. In `git` parlance, creating a copy of a repository is called `forking`.
38 |
39 | #### Fork a repository
40 |
41 | Do this:
42 |
43 | 1. Go here: [https://github.com/alleninstitute/swdb_2018_tools](https://github.com/alleninstitute/swdb_2018_tools)
44 | 2. Click the 'Fork' button.
45 | 3. If prompted, tell it to clone the repository to your profile.
46 |
47 | You now have a copy of the `swdb_2018_tools` repository all to yourself!
48 |
49 | #### Clone your fork to your computer (in the cloud!)
50 |
51 | Now we want to make changes to the fork we just created, so let's bring it down to our computers. Instead of GitKraken, we'll use the Jupyter Terminal.
52 |
53 | 1. Open the Jupyter Terminal ("new" => "terminal")
54 | 2. Copy the URL of the GitHub repository you want to clone to your clipboard. (e.g. https://github.com/your_user_name/swdb_2018_tools.git)
55 | 3. Clone the repo!
56 | ```bash
57 | $ cd ~/SageMaker/your_user_name/ # this is just for our AWS instances
58 | $ git clone https://github.com/your_user_name/swdb_2018_tools.git
59 | ```
60 |
61 | ## The virtuous collaborative loop -- integrating changes and making your own
62 |
63 | You now have two copies of someone else's repository -- the one on GitHub and the one on your computer. Those repositories all have a `master` branch. An important principle to remember:
64 |
65 | > **Leave the `master` branch alone**.
66 |
67 | > `master` is a shared resource, and it should always reflect the state of the primary repository's `master` branch
68 |
69 | Of course it's possible to work in the master branch directly, but you should prefer topic branches for two reasons:
70 |
71 | 1. What if you want to work on two different things at once?
72 | 2. Editing master directly creates a parallel history that is inconsistent with the primary repository.
73 | We'll now describe a process you can use to integrate others changes and make changes safely. But first...
74 |
75 | #### Tell `git` about AllenInstitute/swdb_2018_tools
76 |
77 | Right now your repository only knows about your fork (`your_user_name/swdb_2018_tools`). In order to incorporate changes from others, we need our repository to know where these changes are coming from. We only need to do this once.
78 |
79 | ```bash
80 | $ cd swdb_2018_tools # this is where your repo was cloned
81 | $ git remote add AllenInstitute https://github.com/alleninstitute/swdb_2018_tools
82 | ```
83 |
84 | #### Loop Step 1: Pull changes from AllenInstitute to your computer
85 |
86 |
87 |
88 | Now we want to bring some changes from `AllenInstitute/master` down to your local master branch.
89 |
90 | ```bash
91 | $ git checkout master # let's make sure we're on the master branch
92 | $ git pull AllenInstitute master
93 | ```
94 |
95 | That's it -- now you've incorporated changes from `AllenInstitute/master` to your local repository. You can now update the Github's copy of your fork's master branch by pushing it:
96 |
97 | ```bash
98 | $ git push origin master
99 | ```
100 |
101 | #### Loop Step 2: Create a topic branch and make a change
102 |
103 |
104 |
105 | Now we want to make some changes to this repository. Not the AllenInstitute copy (yet) -- just your local copy.
106 |
107 | Topic branches are great because they let you work on multiple things at the same time. In this case, they are necessary because remember: **don't touch the `master` branch**. So let's make our changes in a topic branch!
108 |
109 | ```bash
110 | $ git checkout -b dyf_branch # create a new branch and check it out
111 | $ touch dyf.txt # create an empty file
112 | $ git add dyf.txt
113 | $ git commit -m "adding dyf.txt"
114 | ```
115 |
116 | #### Loop Step 3: Push your branch to your fork on Github
117 |
118 |
119 |
120 | Our topic branch is ready, and we'd like to get our changes integrated into `AllenInstitute/master`. GitHub has a great facility for this, so we need to get your changes up to your GitHub fork. Remember: we always want `master` to be consistent with `AllenInstitute/master`, so we aren't going to merge the topic branch back into `local/master`. Instead, we are going to push your topic branch up to your fork and integrate it into `AllenInstitue/master` from there.
121 |
122 | ```bash
123 | $ git push origin dyf_branch
124 | ```
125 |
126 | #### Loop Step 4: Issue a pull request to AllenInstitute/master
127 |
128 |
129 |
130 | We have your topic branch up on your GitHub fork. Now we want to merge your changes into `AllenInstitute/master`. We ask for this via a "Pull Request":
131 |
132 | 1. Open Github to http://github.com/your_user_name/swdb_2018_tools
133 | 2. Github will notice your new branch. Click "Compare and Pull Request".
134 | 3. Write a short description.
135 | 4. Click "Create pull request"
136 | 5. **wait for the instructor to accept the pull request**
137 | 6. Click "delete branch" to delete your topic branch.
138 |
139 | Pull requests are great. We are working on a shared repository, so we really want to make sure that your changes are ready to integrate before pulling the trigger. Pull requests give everyone a mechanism to review and propose new changes before updating the `master` branch.
140 |
141 | #### Loop Step 5: Bring your own change back down to local/master
142 |
143 |
144 |
145 | Once your request has been approved, `AllenInstitute/master` is now has your changes in it. Just bring your changes back down to `local/master` and we're done.
146 |
147 | ```bash
148 | $ git checkout master # just to be safe
149 | $ git branch -d dyf_branch # delete the branch
150 | $ git pull AllenInstitute master
151 | ```
152 |
153 | You can now update the Github's copy of your fork's master branch by pushing it:
154 |
155 | ```bash
156 | $ git push origin master
157 | ```
158 |
159 | #### All together now
160 |
161 |
162 |
163 | ## Bonus Material: How do I install this package and use it in my AWS instance?
164 |
165 | You've successfully cloned and made changes to the repo, but it would be nice if you could import it and use it like a normal python package.
166 |
167 | ```bash
168 | $ source activate python2 # this is the "conda_python2" kernel in Jupyter
169 | $ pip install --user -e swdb_2018_tools/
170 | ```
171 |
172 | Now you can open up a Jupyter notebook, choose the `conda_python2` kernel, and import the repo!
173 |
174 | ```python
175 | >>> import swdb_2018_tools as stools
176 | ```
177 |
--------------------------------------------------------------------------------
/Git/gh_anim.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/SWDB_2018/94c120088284b156f278e4758aa5691b68f0b76e/Git/gh_anim.gif
--------------------------------------------------------------------------------
/Git/github_workflow_cheatsheet.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/SWDB_2018/94c120088284b156f278e4758aa5691b68f0b76e/Git/github_workflow_cheatsheet.pdf
--------------------------------------------------------------------------------
/Git/tiles_00.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/SWDB_2018/94c120088284b156f278e4758aa5691b68f0b76e/Git/tiles_00.png
--------------------------------------------------------------------------------
/Git/tiles_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/SWDB_2018/94c120088284b156f278e4758aa5691b68f0b76e/Git/tiles_01.png
--------------------------------------------------------------------------------
/Git/tiles_02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/SWDB_2018/94c120088284b156f278e4758aa5691b68f0b76e/Git/tiles_02.png
--------------------------------------------------------------------------------
/Git/tiles_03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/SWDB_2018/94c120088284b156f278e4758aa5691b68f0b76e/Git/tiles_03.png
--------------------------------------------------------------------------------
/Git/tiles_04.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/SWDB_2018/94c120088284b156f278e4758aa5691b68f0b76e/Git/tiles_04.png
--------------------------------------------------------------------------------
/Git/tiles_05.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/SWDB_2018/94c120088284b156f278e4758aa5691b68f0b76e/Git/tiles_05.png
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Allen Institute Software License – This software license is the 2-clause BSD license
2 | plus clause a third clause that prohibits redistribution for commercial purposes without further permission.
3 |
4 | Copyright © 2018. Allen Institute. All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
7 | following conditions are met:
8 |
9 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
10 | following disclaimer.
11 |
12 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
13 | following disclaimer in the documentation and/or other materials provided with the distribution.
14 |
15 | 3. Redistributions for commercial purposes are not permitted without the Allen Institute’s written permission.
16 | For purposes of this license, commercial purposes is the incorporation of the Allen Institute's software into
17 | anything for which you will charge fees or other compensation. Contact terms@alleninstitute.org for commercial
18 | licensing opportunities.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
21 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
26 | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 |
--------------------------------------------------------------------------------
/PythonBootcamp/00_Introduction.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "
\n",
8 | "\n",
9 | "Python Bootcamp
\n",
10 | "August 18-19, 2018
"
11 | ]
12 | },
13 | {
14 | "cell_type": "markdown",
15 | "metadata": {},
16 | "source": [
17 | "\n",
18 | "
Introduction: What is Python?
\n",
19 | "\n",
20 | "
How does Python compare to other programming languages? What are its strengths and weaknesses? Why have we chosen Python for this course?\n",
21 | "\n",
22 | "
"
23 | ]
24 | },
25 | {
26 | "cell_type": "markdown",
27 | "metadata": {},
28 | "source": [
29 | "\n",
30 | "
1. Python is a *general purpose* programming language.
\n",
31 | "\n",
32 | "
Most programming languages used by scientists were designed from the beginning to handle numerical and scientific tasks:\n",
33 | "\n",
34 | " R, MATLAB, Igor, Mathematica, IDL\n",
35 | "\n",
36 | "
Advantages of specialized languages:\n",
37 | "\n",
38 | "
\n",
39 | "- Usually the first to support high-level functinoality needed in science\n",
40 | "
- Language and programming environment are tailored to meet the needs of scientific developers\n",
41 | "
- Lower learning curve\n",
42 | "
\n",
43 | "\n",
44 | "
**General-purpose languages such as Python are intended to be useful for any type of programming, regardless of the topic or functionality needed:**\n",
45 | "\n",
46 | " C, C++, Java, Python\n",
47 | "\n",
48 | "
Python is used in virtually every corner of the software development landscape--web sites, server infrastructure, user interfaces, device control, machine learning, etc. \n",
49 | "\n",
50 | "
Advantages of general-purpose languages:\n",
51 | "\n",
52 | "
\n",
53 | "- More flexibility\n",
54 | "
- Languages require a lot of effort to learn; general-purpose languages offer greater return on that investment\n",
55 | "
- Much larger community of developers, greater longevity\n",
56 | "
- Better language design\n",
57 | "
\n",
58 | "\n",
59 | "
"
60 | ]
61 | },
62 | {
63 | "cell_type": "markdown",
64 | "metadata": {},
65 | "source": [
66 | "\n",
67 | "
2. Python is an *interpreted* programming language.
\n",
68 | "\n",
69 | "
Programming languages can be compiled, interpreted, or a hybrid of the two. \n",
70 | "\n",
71 | "
**Compiled languages** like C, C++, Java, and Julia take the program you write and convert it into optimized, machine-executable code. Often, compiled languages are both *faster to execute* and *more difficult to use*. \n",
72 | "\n",
73 | "
**Interpreted languages** like Python, MATLAB, Igor, and PHP use a pre-compiled interpreter to read your program code and execute it, one step at a time. Often, interpreted languages are *slower to execute* and *easier to use*.\n",
74 | "\n",
75 | "
**Question:** Is Python a slow language?
\n",
76 | "**Answer:** It depends on how you use it.\n",
77 | "\n",
78 | "
Ideally, we would like to have a language that is both fast to execute and easy to use, and Python (like many other languages) uses many different techniques to reduce the overhead incurred by being an interpreted language. We will learn about many of these throughout the day.\n",
79 | "\n",
80 | "
"
81 | ]
82 | },
83 | {
84 | "cell_type": "markdown",
85 | "metadata": {},
86 | "source": [
87 | "\n",
88 | "
3. Python is a community of developers
\n",
89 | "\n",
90 | "
Commercial development environments like MATLAB or Igor benefit from a monolithic community structure:\n",
91 | "\n",
92 | "
\n",
93 | "- One official source for core software packages\n",
94 | "
- One default IDE used by everybody\n",
95 | "
- One cohesive community of developers and staff for support\n",
96 | "
- Usually excellent, comprehensive documentation\n",
97 | "
\n",
98 | "\n",
99 | "
Newcomers to Python are often overwhelmed by the vast and confusing landscape of communities, distributions, 3rd-party modules, and development tools. Never fear! The scientific Python community has organized around a central \"stack\" of tools that are well supported and maintained, and have become the de-facto standards in the field. We will guide you through these core tools before releasing you into the wild.\n",
100 | "\n",
101 | "
Scientific Python Stack ([scipy.org](scipy.org)):\n",
102 | "\n",
103 | " NumPy: N-dimensional array package\n",
104 | " SciPy: Fundamental library for scientific computing\n",
105 | " Matplotlib: Comprehensive 2D Plotting\n",
106 | " IPython/Jupyter: Enhanced Interactive Console\n",
107 | " Sympy: Symbolic mathematics\n",
108 | " pandas: Data structures & analysis\n",
109 | "\n",
110 | "
"
111 | ]
112 | },
113 | {
114 | "cell_type": "markdown",
115 | "metadata": {},
116 | "source": [
117 | "\n",
118 | "
4. A carefully designed, open-source programming language
\n",
119 | "\n",
120 | "
The older, commercial specialty languages come with baggage:\n",
121 | "\n",
122 | "
\n",
123 | "- Evolved from simpler systems, retained idiosyncracies\n",
124 | "
- Lack modern object-oriented language features, or these are tacked on to the original language\n",
125 | "
- Expensive, unreliable, cumbersome licensing schemes\n",
126 | "
- Closed source\n",
127 | "
\n",
128 | "\n",
129 | "
Python is widely regarded as one of the easiest, most intuitive, and most readable programming languages. Its language design combined with its open-source license are major factors in its widespread success.\n",
130 | "\n",
131 | "
"
132 | ]
133 | },
134 | {
135 | "cell_type": "markdown",
136 | "metadata": {},
137 | "source": [
138 | "\n",
139 | "
Where to get help
\n",
140 | "\n",
141 | "
Python documentation (https://www.python.org/doc/)\n",
142 | "
\n",
143 | "- [Tutorial](https://docs.python.org/2/tutorial/index.html)\n",
144 | "
- [Library Reference](https://docs.python.org/2/library/index.html)\n",
145 | "
\n",
146 | "\n",
147 | "
MATLAB-to-Python cheat sheets: \n",
148 | "
\n",
149 | "- http://mathesaurus.sourceforge.net/matlab-numpy.html \n",
150 | "
- http://mathesaurus.sourceforge.net/matlab-python-xref.pdf\n",
151 | "
\n",
152 | "\n",
153 | "\n",
154 | "[Scientific python stack documentation: numpy, scipy, matplotlib](http://scipy.org/docs.html)\n",
155 | "\n",
156 | "[Stack Overflow](http://stackoverflow.com)\n",
157 | " \n",
158 | "
"
159 | ]
160 | }
161 | ],
162 | "metadata": {
163 | "kernelspec": {
164 | "display_name": "Python 2",
165 | "language": "python",
166 | "name": "python2"
167 | },
168 | "language_info": {
169 | "codemirror_mode": {
170 | "name": "ipython",
171 | "version": 2
172 | },
173 | "file_extension": ".py",
174 | "mimetype": "text/x-python",
175 | "name": "python",
176 | "nbconvert_exporter": "python",
177 | "pygments_lexer": "ipython2",
178 | "version": "2.7.13"
179 | }
180 | },
181 | "nbformat": 4,
182 | "nbformat_minor": 0
183 | }
184 |
--------------------------------------------------------------------------------
/PythonBootcamp/08_Development_Tools.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "
\n",
8 | "\n",
9 | "Python Bootcamp
\n",
10 | "August 18-19, 2018
"
11 | ]
12 | },
13 | {
14 | "cell_type": "markdown",
15 | "metadata": {},
16 | "source": [
17 | "\n",
18 | "
Python Development Tools
\n",
19 | "\n",
20 | "\n",
21 | "
Commercial tools like MATLAB and Igor come with a single standardized development environment. In contrast, Python is supported by a large and bewildering ecosystem of development tools. This offers us a great deal of flexibility In this section we will present a few of our favorite tools and discuss their relative strengths and weaknesses.\n",
22 | "
"
23 | ]
24 | },
25 | {
26 | "cell_type": "markdown",
27 | "metadata": {},
28 | "source": [
29 | "\n",
30 | "
What tools do you need? That depends on what you are trying to do. Some environments are designed to streamline certain types of development. The general features we are looking for are:\n",
31 | "\n",
32 | "
\n",
33 | "- Text editor\n",
34 | "
- Debugging tools\n",
35 | "
- Variable explorers\n",
36 | "
- Data visualization\n",
37 | "
- Project management\n",
38 | "
\n",
39 | "\n",
40 | "
"
41 | ]
42 | },
43 | {
44 | "cell_type": "markdown",
45 | "metadata": {},
46 | "source": [
47 | "\n",
48 | "
A note about interactivity
\n",
49 | "\n",
50 | "
**Most general-purpose programming environments are non-interactive:**\n",
51 | "
\n",
52 | "- Write code\n",
53 | "
- Start program, execute code\n",
54 | "
- Stop program\n",
55 | "
- Repeat\n",
56 | "
\n",
57 | "\n",
58 | "
Important reasons for this:\n",
59 | "
\n",
60 | "- Simplicity - interactivity requires complex infrastructure\n",
61 | "
- Repeatability - program must run the same way every time\n",
62 | "
\n",
63 | "\n",
64 | "
**Most scientific programming environments (Igor, Matlab, Mathematica, ...) prefer an interactive experience.**
\n",
65 | "Why? Data analysis is incremental.\n",
66 | "\n",
67 | "
\n",
68 | "- You want to see your data, test some code, check the results, tweak the code, check again, etc.\n",
69 | "
- It would be a pain (and possibly very slow) to have to stop and restart the entire program for every change.\n",
70 | "
\n",
71 | "\n",
72 | "
Interactivity means:\n",
73 | "\n",
74 | "
\n",
75 | "- The program that runs your code does not exit when it finishes running your code.\n",
76 | "
- All of your data remains in memory for you to inspect\n",
77 | "
- Code can be modified and executed in the running process.\n",
78 | "
- Maybe nice GUI tools that let you visualize, save, load data without writing any code.\n",
79 | "
\n",
80 | "\n",
81 | "
**Python supports both types of environment.** The tools we will introduce fall into one category or the other, but all of them incorporate features from both categories.\n",
82 | "
"
83 | ]
84 | },
85 | {
86 | "cell_type": "markdown",
87 | "metadata": {},
88 | "source": [
89 | "\n",
90 | "
Editor + console
\n",
91 | "\n",
92 | "This is the simplest approach to code development. Use a nice text editor (sublime is a good one) that at least handles indentation. Save your code to a file, and run it from a command line like:\n",
93 | "\n",
94 | " > python myfile.py\n",
95 | " \n",
96 | "This approach falls into the \"non-interactive\" category, but there are many tools available to make it more interactive. For example, simply running python with the `-i` flag will cause it to begin an interactive session immediately after your script completes:\n",
97 | "\n",
98 | " > python -i myfile.py\n",
99 | " \n",
100 | "This allows you to inspect the variables generated by your program and experiment with new ideas.\n",
101 | "\n",
102 | "\n",
103 | "
**Pros:**\n",
104 | "
\n",
105 | "- Simple, bare-metal approach. Nice if you want to use python without other user interfaces getting in your way. For example:\n",
106 | "
\n",
107 | "- IDEs introduce useful features at the cost of added complexity. (How much time will you spend configuring your IDE instead of coding?) \n",
108 | "
- Some IDEs have debuggers that slow down your code.\n",
109 | "
- Some IDEs change the way your code runs \"under the hood\".\n",
110 | "
\n",
111 | " - You decide exactly which tools you want to use.\n",
112 | "
\n",
113 | "\n",
114 | "
**Cons:**\n",
115 | "
\n",
116 | "- May be more difficult if you're not familiar with DOS/Bash/other CLI\n",
117 | "
- Non-integrated; you need to manually incorporate many other tools (eg for visualization, version control, debugging, etc.)\n",
118 | "
\n",
119 | "
"
120 | ]
121 | },
122 | {
123 | "cell_type": "markdown",
124 | "metadata": {},
125 | "source": [
126 | "\n",
127 | "
Jupyter (IPython) Notebook
\n",
128 | "\n",
129 | "
Jupyter Notebook provides an *interactive* programming environment similar to the Mathematica notebook.\n",
130 | "
\n",
131 | "- Code is divided into chunks called \"cells\".\n",
132 | "
- Cells can be executed in any order, independently of each other.\n",
133 | "
- Results from cell execution are displayed below each cell.\n",
134 | "
\n",
135 | "\n",
136 | "A Python interpreter runs *continuously* in the background. Every time you execute a cell, the state of the interpreter is updated.\n",
137 | "\n",
138 | "\n",
139 | "
**Pros:**\n",
140 | "
\n",
141 | "- Notebooks are excellent for storing and presenting the results of data analysis, simulations, etc. because code, results, and discussion are all combined in a single document.\n",
142 | "
- Any user should be able to execute your notebook and reproduce your results.\n",
143 | " (this is true of all code, but not necessarily as easy)\n",
144 | "
- Good for prototyping--easy to break up the script into multiple intermediate steps and iteratively develop a small chunk at a time.\n",
145 | "
\n",
146 | "\n",
147 | "
**Cons:**\n",
148 | "
\n",
149 | "- Notebooks are not good for developing large applications or user interfaces.\n",
150 | "
- Difficult to use if your project incorporates multiple Python files.\n",
151 | "
- Possible to generate unreproducible results because cells can be executed out of order.\n",
152 | "
- Awkward web browser interface, text editing is clumsy with limited configurability.\n",
153 | "
- Difficult to integrate with debugging and version integration tools.\n",
154 | "
\n",
155 | "
"
156 | ]
157 | },
158 | {
159 | "cell_type": "markdown",
160 | "metadata": {},
161 | "source": [
162 | "\n",
163 | "
PyCharm (IDE)
\n",
164 | "\n",
165 | "
PyCharm is a popular integrated development environment. It includes a fully-featured text editor, excellent debugging tools, interactive data browsing, project management, and version control integration. It is easy to install but *not* included with Anaconda.\n",
166 | "\n",
167 | "
**Pros:**\n",
168 | "
\n",
169 | "- Excellent, fully-featured text editor.\n",
170 | "
\n",
171 | " - Syntax hilighting, indentation, autocomplete\n",
172 | "
- Context-sensitive help (ctrl-q)\n",
173 | "
- Can view Jupyter Notebook files.\n",
174 | "
\n",
175 | " - Excellent debugging tools\n",
176 | "
\n",
177 | " - Breakpoints / steppping\n",
178 | "
- Stack inspection\n",
179 | "
- Interactive object inspection (plotting with matplotlib!)\n",
180 | "
- Array visualization\n",
181 | "
\n",
182 | " - Project management, code versioning\n",
183 | "
\n",
184 | " - Tracks files and execution environment\n",
185 | "
- If you are working on a project with multiple files, this can be very helpful\n",
186 | "
\n",
187 | "
\n",
188 | "\n",
189 | "
**Cons:**\n",
190 | "
\n",
191 | "- **Lots** of features to wade through, overhead in learning new tools.\n",
192 | "
- Project management can get in the way if you just want to work with a single file.\n",
193 | "
- Interactivity is somewhat limited.\n",
194 | "
\n",
195 | "
"
196 | ]
197 | },
198 | {
199 | "cell_type": "markdown",
200 | "metadata": {},
201 | "source": [
202 | "\n",
203 | "
Spyder
\n",
204 | "\n",
205 | "Spyder is an integrated development environment with many similarities to MATLAB's default environment. It is included with Anaconda and provides a more interactive approach to development. \n",
206 | "\n",
207 | "Matlab + Notebook - like features:\n",
208 | "* Divide script into cells, execute in persistent process\n",
209 | "* Inline matplotlib results\n",
210 | "\n",
211 | "
**Pros:**\n",
212 | "
\n",
213 | "- Decent text editor, debugger, and project management.\n",
214 | "
- More interactive than pycharm--console, variable explorer, visualization tools.\n",
215 | "
- List of set scripts, good for running already-written analysis tools. \n",
216 | "
- Notebook-like features--divide a script into cells and execute in a background Python process.\n",
217 | "
\n",
218 | "\n",
219 | "
**Cons:**\n",
220 | "
\n",
221 | "- Somewhat less mature and polished than pycharm.\n",
222 | "
- Debugger can be difficult to use, no stack inspection.\n",
223 | "
\n",
224 | "
"
225 | ]
226 | }
227 | ],
228 | "metadata": {
229 | "kernelspec": {
230 | "display_name": "Python 2",
231 | "language": "python",
232 | "name": "python2"
233 | },
234 | "language_info": {
235 | "codemirror_mode": {
236 | "name": "ipython",
237 | "version": 2
238 | },
239 | "file_extension": ".py",
240 | "mimetype": "text/x-python",
241 | "name": "python",
242 | "nbconvert_exporter": "python",
243 | "pygments_lexer": "ipython2",
244 | "version": "2.7.13"
245 | }
246 | },
247 | "nbformat": 4,
248 | "nbformat_minor": 0
249 | }
250 |
--------------------------------------------------------------------------------
/PythonBootcamp/09_bike_crossing.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "
\n",
8 | "\n",
9 | "Python Bootcamp
\n",
10 | "August 18-19, 2018
"
11 | ]
12 | },
13 | {
14 | "cell_type": "markdown",
15 | "metadata": {},
16 | "source": [
17 | "\n",
18 | "
Exercise: Pandas, Matplotlib, Numpy
\n",
19 | "
\n",
20 | "**Seattle tracks bike crossings across the Fremont Bridge, one of the major north/south crossings of the Ship Canal, and makes data available online**\n",
21 | "
\n",
22 | "
\n",
23 | "This exercise uses that data to demonstrate some basic Pandas functionality, including:\n",
24 | "
\n",
25 | " - Sorting data
\n",
26 | " - Working with datetime objects
\n",
27 | " - Using Pandas built-in plotting methods
\n",
28 | " - Continued practice with Matplotlib to generate custom plots
\n",
29 | "
\n",
30 | "\n",
31 | "
\n"
32 | ]
33 | },
34 | {
35 | "cell_type": "markdown",
36 | "metadata": {},
37 | "source": [
38 | "\n",
39 | "
We'll need the following libraries
\n",
40 | "\n",
41 | "
\n",
42 | " - numpy (import as np)
\n",
43 | " - pandas (import as pd)
\n",
44 | " - matplotlib.pyplot (import as plt)
\n",
45 | "
\n",
46 | "\n",
47 | "
\n",
48 | "And don't forget to turn on the inline (or notebook) plotting magic\n",
49 | "
\n",
50 | "\n",
51 | "
\n"
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": null,
57 | "metadata": {
58 | "collapsed": true
59 | },
60 | "outputs": [],
61 | "source": [
62 | "# Import packages"
63 | ]
64 | },
65 | {
66 | "cell_type": "markdown",
67 | "metadata": {},
68 | "source": [
69 | "\n",
70 | "
Download and open the data, then do some initial formatting
\n",
71 | "\n",
72 | "
Data is from October 2012 to the end of the last month \n",
73 | "\n",
74 | "
get the data using the read_csv method from the following URL (web connection required): \n",
75 | "https://data.seattle.gov/api/views/65db-xm6k/rows.csv?accessType=DOWNLOAD\n",
76 | "\n",
77 | "
"
78 | ]
79 | },
80 | {
81 | "cell_type": "code",
82 | "execution_count": null,
83 | "metadata": {
84 | "collapsed": true
85 | },
86 | "outputs": [],
87 | "source": [
88 | "# Read the CSV from the above link"
89 | ]
90 | },
91 | {
92 | "cell_type": "markdown",
93 | "metadata": {},
94 | "source": [
95 | "\n",
96 | "\n",
97 | "
Take a look at the first few columns using the .head() method\n",
98 | "\n",
99 | "
"
100 | ]
101 | },
102 | {
103 | "cell_type": "code",
104 | "execution_count": null,
105 | "metadata": {},
106 | "outputs": [],
107 | "source": [
108 | "# Display the head"
109 | ]
110 | },
111 | {
112 | "cell_type": "markdown",
113 | "metadata": {},
114 | "source": [
115 | "\n",
116 | "\n",
117 | "
Shorten the column names to make them easier to reference\n",
118 | "\n",
119 | "
"
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": null,
125 | "metadata": {
126 | "collapsed": true
127 | },
128 | "outputs": [],
129 | "source": [
130 | "#rename data columns 'northbound' and 'southbound'"
131 | ]
132 | },
133 | {
134 | "cell_type": "markdown",
135 | "metadata": {},
136 | "source": [
137 | "\n",
138 | "\n",
139 | "
Add a column containing the total crossings for each hour\n",
140 | "\n",
141 | "
"
142 | ]
143 | },
144 | {
145 | "cell_type": "code",
146 | "execution_count": null,
147 | "metadata": {},
148 | "outputs": [],
149 | "source": [
150 | "df['total'] = #add a total column"
151 | ]
152 | },
153 | {
154 | "cell_type": "markdown",
155 | "metadata": {},
156 | "source": [
157 | "\n",
158 | "\n",
159 | "
Take a look at the beginning and end of the dataset. How many total entries are in the table?\n",
160 | "\n",
161 | "
"
162 | ]
163 | },
164 | {
165 | "cell_type": "code",
166 | "execution_count": null,
167 | "metadata": {},
168 | "outputs": [],
169 | "source": [
170 | "#display the head again"
171 | ]
172 | },
173 | {
174 | "cell_type": "code",
175 | "execution_count": null,
176 | "metadata": {},
177 | "outputs": [],
178 | "source": [
179 | "#display the tail"
180 | ]
181 | },
182 | {
183 | "cell_type": "code",
184 | "execution_count": null,
185 | "metadata": {},
186 | "outputs": [],
187 | "source": [
188 | "#print the length"
189 | ]
190 | },
191 | {
192 | "cell_type": "markdown",
193 | "metadata": {},
194 | "source": [
195 | "\n",
196 | "\n",
197 | "
Take advantage of Pandas datetime functionlity to make filtering easy
\n",
198 | "
Take a look at one of the date entries, what is it's data type?\n",
199 | "\n",
200 | "
"
201 | ]
202 | },
203 | {
204 | "cell_type": "code",
205 | "execution_count": null,
206 | "metadata": {},
207 | "outputs": [],
208 | "source": [
209 | "#print the type of one entry"
210 | ]
211 | },
212 | {
213 | "cell_type": "markdown",
214 | "metadata": {},
215 | "source": [
216 | "\n",
217 | "\n",
218 | "\n",
219 | "\n",
220 | "
We need to convert it to a datetime object, which Pandas can then recognize for easy parsing by date\n",
221 | "\n",
222 | "
"
223 | ]
224 | },
225 | {
226 | "cell_type": "code",
227 | "execution_count": null,
228 | "metadata": {},
229 | "outputs": [],
230 | "source": [
231 | "# look up the pd.to_datetime() method"
232 | ]
233 | },
234 | {
235 | "cell_type": "code",
236 | "execution_count": null,
237 | "metadata": {},
238 | "outputs": [],
239 | "source": [
240 | "# look at the head again, how have the dates changed?"
241 | ]
242 | },
243 | {
244 | "cell_type": "markdown",
245 | "metadata": {},
246 | "source": [
247 | "\n",
248 | "
Now plot the total column vs. date
\n",
249 | "
Notice how easily Pandas deals with the date column. It automatically parses and labels the x-axis in a rational way.\n",
250 | "\n",
251 | "\n",
252 | "
"
253 | ]
254 | },
255 | {
256 | "cell_type": "code",
257 | "execution_count": null,
258 | "metadata": {},
259 | "outputs": [],
260 | "source": [
261 | "#use the df.plot() method with x being date and y being total"
262 | ]
263 | },
264 | {
265 | "cell_type": "markdown",
266 | "metadata": {},
267 | "source": [
268 | "\n",
269 | "\n",
270 | "
To make parsing by date easier, add some columns that explicitly list year, month, hour, day of week
\n",
271 | "
Pandas recently added the handy dt accessor, which makes this very easy: \n",
272 | "\n",
273 | "
http://pandas.pydata.org/pandas-docs/version/0.15.0/basics.html#dt-accessor\n",
274 | "\n",
275 | "\n",
276 | "
"
277 | ]
278 | },
279 | {
280 | "cell_type": "code",
281 | "execution_count": null,
282 | "metadata": {},
283 | "outputs": [],
284 | "source": [
285 | "# make new columns for year, month, hour, and day of week. Here's how to make the year column:\n",
286 | "df['year']=df['Date'].dt.year"
287 | ]
288 | },
289 | {
290 | "cell_type": "markdown",
291 | "metadata": {},
292 | "source": [
293 | "\n",
294 | "\n",
295 | "
What is the most common hourly count?
\n",
296 | "
Make a histogram of hourly counts\n",
297 | "\n",
298 | "\n",
299 | "
"
300 | ]
301 | },
302 | {
303 | "cell_type": "code",
304 | "execution_count": null,
305 | "metadata": {},
306 | "outputs": [],
307 | "source": [
308 | "#make a histogram of the values in the total column"
309 | ]
310 | },
311 | {
312 | "cell_type": "markdown",
313 | "metadata": {},
314 | "source": [
315 | "\n",
316 | "\n",
317 | "
Find the busiest month for total crossings
\n",
318 | "
One approach is to use nested for-loops to search over all combinations of unique years and months, checking against the maximum value on each iteration\n",
319 | "\n",
320 | "\n",
321 | "
"
322 | ]
323 | },
324 | {
325 | "cell_type": "code",
326 | "execution_count": null,
327 | "metadata": {},
328 | "outputs": [],
329 | "source": [
330 | "#try writing a for-loop to do this. But don't try too hard - there's a one-line way of doing this instead!"
331 | ]
332 | },
333 | {
334 | "cell_type": "markdown",
335 | "metadata": {},
336 | "source": [
337 | "\n",
338 | "\n",
339 | "\n",
340 | "\n",
341 | "
Another approach is to use the Pandas \"groupby\" method\n",
342 | "\n",
343 | "\n",
344 | "
"
345 | ]
346 | },
347 | {
348 | "cell_type": "code",
349 | "execution_count": null,
350 | "metadata": {},
351 | "outputs": [],
352 | "source": [
353 | "#Instead of a for-loop, you can use the 'groupby' method, sorting by year and month"
354 | ]
355 | },
356 | {
357 | "cell_type": "code",
358 | "execution_count": null,
359 | "metadata": {
360 | "scrolled": false
361 | },
362 | "outputs": [],
363 | "source": [
364 | "#print the maximum month from the grouped dataframe"
365 | ]
366 | },
367 | {
368 | "cell_type": "markdown",
369 | "metadata": {},
370 | "source": [
371 | "\n",
372 | "\n",
373 | "
Make a bar plot showing crossings for each month
\n",
374 | "
Start with the \"groupby\" method\n",
375 | "\n",
376 | "\n",
377 | "
"
378 | ]
379 | },
380 | {
381 | "cell_type": "code",
382 | "execution_count": null,
383 | "metadata": {},
384 | "outputs": [],
385 | "source": [
386 | "#using the grouped dataframe, make a bar plot with the total crossings for each month"
387 | ]
388 | },
389 | {
390 | "cell_type": "markdown",
391 | "metadata": {},
392 | "source": [
393 | "\n",
394 | "\n",
395 | "\n",
396 | "
To gain a bit more control over the plot, make a temporary dataframe called \"monthdf\" that contains only the data we're interested in plotting\n",
397 | "\n",
398 | "
"
399 | ]
400 | },
401 | {
402 | "cell_type": "code",
403 | "execution_count": null,
404 | "metadata": {},
405 | "outputs": [],
406 | "source": [
407 | "monthdf = pd.DataFrame(columns=('month', 'year', 'total'))\n",
408 | "for year in df.year.unique():\n",
409 | " for month in df.month.unique():\n",
410 | " monthdf = monthdf.append(pd.DataFrame({'month':[month],\n",
411 | " 'year':[year],\n",
412 | " 'total':[df[(df.month==month) & (df.year==year)].total.sum()]}))"
413 | ]
414 | },
415 | {
416 | "cell_type": "markdown",
417 | "metadata": {},
418 | "source": [
419 | "\n",
420 | "\n",
421 | "\n",
422 | "
Now make another version of the plot where months are grouped and color coded by year\n",
423 | "
"
424 | ]
425 | },
426 | {
427 | "cell_type": "code",
428 | "execution_count": null,
429 | "metadata": {},
430 | "outputs": [],
431 | "source": [
432 | "# Make the plot here"
433 | ]
434 | },
435 | {
436 | "cell_type": "markdown",
437 | "metadata": {},
438 | "source": [
439 | "\n",
440 | "\n",
441 | "
Make a bar plot showing crossings by day of week, seperated by year
\n",
442 | "
Again, make a temporary dataframe containing only the data we need for the plot\n",
443 | "\n",
444 | "
Make sure to normalize the sum by the total number of days in each year!\n",
445 | "
"
446 | ]
447 | },
448 | {
449 | "cell_type": "code",
450 | "execution_count": null,
451 | "metadata": {},
452 | "outputs": [],
453 | "source": [
454 | "#Try making another intermediate dataframe that contains data sorted by day"
455 | ]
456 | },
457 | {
458 | "cell_type": "markdown",
459 | "metadata": {},
460 | "source": [
461 | "\n",
462 | "\n",
463 | "\n",
464 | "
Make a bar plot where days of week are grouped and color coded by year. \n",
465 | "
Again, make a temporary dataframe containing only the data we need for the plot\n",
466 | "\n",
467 | "\n",
468 | "
"
469 | ]
470 | },
471 | {
472 | "cell_type": "code",
473 | "execution_count": null,
474 | "metadata": {},
475 | "outputs": [],
476 | "source": [
477 | "# make a similar plot below"
478 | ]
479 | }
480 | ],
481 | "metadata": {
482 | "kernelspec": {
483 | "display_name": "Python [default]",
484 | "language": "python",
485 | "name": "python2"
486 | },
487 | "language_info": {
488 | "codemirror_mode": {
489 | "name": "ipython",
490 | "version": 2
491 | },
492 | "file_extension": ".py",
493 | "mimetype": "text/x-python",
494 | "name": "python",
495 | "nbconvert_exporter": "python",
496 | "pygments_lexer": "ipython2",
497 | "version": "2.7.13"
498 | },
499 | "widgets": {
500 | "state": {},
501 | "version": "1.1.2"
502 | }
503 | },
504 | "nbformat": 4,
505 | "nbformat_minor": 1
506 | }
507 |
--------------------------------------------------------------------------------
/PythonBootcamp/10_glm_exercise.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "
\n",
8 | "\n",
9 | "Python Bootcamp
\n",
10 | "August 18-19, 2018
"
11 | ]
12 | },
13 | {
14 | "cell_type": "markdown",
15 | "metadata": {},
16 | "source": [
17 | "\n",
18 | "
Exercise \n",
19 | "
\n",
20 | "
Numpy, Scipy, Pandas\n",
21 | "
\n",
22 | "**Weisberg (1985) makes available a dataset of faculty salaries, along with sevaral possible predictors. We will analyze these data using a general linear model**\n",
23 | "
\n",
24 | "
\n",
25 | "This exercise covers:\n",
26 | "
\n",
27 | " - Unexpected Formats
\n",
28 | " - Statistics with numpy and scipy
\n",
29 | " - Testing methods on dummy data
\n",
30 | "
\n",
31 | "\n",
32 | "
"
33 | ]
34 | },
35 | {
36 | "cell_type": "markdown",
37 | "metadata": {},
38 | "source": [
39 | "## Requirements\n",
40 | "\n",
41 | "* pandas\n",
42 | "* numpy\n",
43 | "* scipy stats\n",
44 | "\n",
45 | "You should also proabbly import division from \\__future__ - just to be safe."
46 | ]
47 | },
48 | {
49 | "cell_type": "code",
50 | "execution_count": 1,
51 | "metadata": {
52 | "collapsed": true
53 | },
54 | "outputs": [],
55 | "source": [
56 | "from __future__ import division\n",
57 | "\n",
58 | "import pandas as pd\n",
59 | "import numpy as np\n",
60 | "import scipy.stats"
61 | ]
62 | },
63 | {
64 | "cell_type": "markdown",
65 | "metadata": {},
66 | "source": [
67 | "## The Data\n",
68 | "\n",
69 | "These data come from a study of salaries among university faculty. The data file is [here](http://data.princeton.edu/wws509/datasets/salary.dat) and a description of the coding is [here](http://data.princeton.edu/wws509/datasets/#salary) (You should probably at least glance at this).\n",
70 | "\n",
71 | "Load these data into a pandas dataframe. Note - the delimiter is not a comma!"
72 | ]
73 | },
74 | {
75 | "cell_type": "code",
76 | "execution_count": 2,
77 | "metadata": {
78 | "collapsed": false
79 | },
80 | "outputs": [],
81 | "source": [
82 | "data = pd.read_csv('http://data.princeton.edu/wws509/datasets/salary.dat', sep='\\s+')"
83 | ]
84 | },
85 | {
86 | "cell_type": "markdown",
87 | "metadata": {},
88 | "source": [
89 | "## A fitting excercise\n",
90 | "\n",
91 | "We'll use a general linear model to analyze these data. In order to do this, we need to be able to fit such models. Fortunately, numpy's linalg module contains a method for least squares fitting. Learn how to use this by generating some noisy (gaussian) data from a toy linear model (try numpy's random module) and then recovering your coefficents.\n",
92 | "\n",
93 | "Note: functions are good."
94 | ]
95 | },
96 | {
97 | "cell_type": "code",
98 | "execution_count": 3,
99 | "metadata": {
100 | "collapsed": true
101 | },
102 | "outputs": [],
103 | "source": [
104 | "def make_test_data(nobs, true_coefs, sigma):\n",
105 | " \n",
106 | " npar = len(true_coefs)\n",
107 | " design = np.random.rand(nobs, npar)\n",
108 | " target = np.dot(design, true_coefs) + np.random.randn(nobs) * sigma\n",
109 | " \n",
110 | " return design, target"
111 | ]
112 | },
113 | {
114 | "cell_type": "code",
115 | "execution_count": 4,
116 | "metadata": {
117 | "collapsed": false
118 | },
119 | "outputs": [],
120 | "source": [
121 | "test_design, test_target = make_test_data(20, np.array([2, 3, 7]), 0.1)"
122 | ]
123 | },
124 | {
125 | "cell_type": "code",
126 | "execution_count": 5,
127 | "metadata": {
128 | "collapsed": true
129 | },
130 | "outputs": [],
131 | "source": [
132 | "coefficients, residuals, rank, sv = np.linalg.lstsq(test_design, test_target)"
133 | ]
134 | },
135 | {
136 | "cell_type": "code",
137 | "execution_count": 6,
138 | "metadata": {
139 | "collapsed": false
140 | },
141 | "outputs": [
142 | {
143 | "name": "stdout",
144 | "output_type": "stream",
145 | "text": [
146 | "[ 2.03368024 2.92780121 6.96839643]\n"
147 | ]
148 | }
149 | ],
150 | "source": [
151 | "print(coefficients)"
152 | ]
153 | },
154 | {
155 | "cell_type": "markdown",
156 | "metadata": {},
157 | "source": [
158 | "## Reformatting the data\n",
159 | "\n",
160 | "If you've taken a look at the data (hint), you probably know that it is not properly formatted for the method of least-squares fitting that we are using here. It has:\n",
161 | "\n",
162 | "* categorical variables in single columns\n",
163 | "* no distinction between the predictor and estimand columns\n",
164 | "* no way to specify an intercept\n",
165 | "\n",
166 | "Write a function to rectify this situation. Your function should have the following signature:\n",
167 | "\n",
168 | "```python\n",
169 | "def glm_data_reformat(dataframe, target_name, cont_pred=None, cat_pred=None, intercept=True):\n",
170 | " '''Sets up a dataframe for fitting with numpy (main effects only)\n",
171 | " \n",
172 | " Parameters\n",
173 | " ---------\n",
174 | " dataframe : pandas df\n",
175 | " contains mix of categorical and continuous predictors\n",
176 | " target_name : str\n",
177 | " column header of target variable (treated as continuous)\n",
178 | " cont_pred : list of str, optional\n",
179 | " column headers of continuous predictors, if any\n",
180 | " cat_pred : list of str, optional\n",
181 | " column headers of categorical predictors, if any\n",
182 | " intercept : bool, optional\n",
183 | " fit an intercept? Defaults to yes.\n",
184 | " \n",
185 | " Returns\n",
186 | " -------\n",
187 | " design : ndarray (n_observations x n_parameters)\n",
188 | " predictor data.\n",
189 | " target : ndarray (n_observations)\n",
190 | " estimand\n",
191 | " design_names : list of str\n",
192 | " names of parameters in design matrix columns\n",
193 | " \n",
194 | " '''\n",
195 | "\n",
196 | " # your code here\n",
197 | "\n",
198 | " return design, target, design_names\n",
199 | "```\n",
200 | "\n",
201 | "Note: You will need to code the continuous variables somehow. This will require spooling them out into multiple columns of the design matrix."
202 | ]
203 | },
204 | {
205 | "cell_type": "code",
206 | "execution_count": 7,
207 | "metadata": {
208 | "collapsed": true
209 | },
210 | "outputs": [],
211 | "source": [
212 | "def glm_data_reformat(dataframe, target_name, cont_pred=None, cat_pred=None, intercept=True):\n",
213 | " '''Sets up a dataframe for fitting with numpy (main effects only)\n",
214 | "\n",
215 | " Parameters\n",
216 | " ---------\n",
217 | " dataframe : pandas df\n",
218 | " contains mix of categorical and continuous predictors\n",
219 | " target_name : str\n",
220 | " column header of target variable (treated as continuous)\n",
221 | " cont_pred : list of str, optional\n",
222 | " column headers of continuous predictors, if any\n",
223 | " cat_pred : list of str, optional\n",
224 | " column headers of categorical predictors, if any\n",
225 | " intercept : bool, optional\n",
226 | " fit an intercept? Defaults to yes.\n",
227 | "\n",
228 | " Returns\n",
229 | " -------\n",
230 | " design : ndarray (n_observations x n_parameters)\n",
231 | " predictor data.\n",
232 | " target : ndarray (n_observations)\n",
233 | " estimand\n",
234 | " design_names : list of str\n",
235 | " names of parameters in design matrix columns\n",
236 | "\n",
237 | " '''\n",
238 | "\n",
239 | " if cont_pred is None: cont_pred = []\n",
240 | " if cat_pred is None: cat_pred = []\n",
241 | " \n",
242 | " design_names = []\n",
243 | " columns = []\n",
244 | " \n",
245 | " for var_name in cont_pred:\n",
246 | " columns.append(dataframe[var_name])\n",
247 | " design_names.append(var_name)\n",
248 | " \n",
249 | " for var_name in cat_pred:\n",
250 | " \n",
251 | " levels = dataframe[var_name].unique()\n",
252 | " nlevels = len(levels)\n",
253 | " \n",
254 | " if nlevels < 2:\n",
255 | " continue\n",
256 | " \n",
257 | " for ii, level in enumerate(levels):\n",
258 | " \n",
259 | " if ii == nlevels - 1 :\n",
260 | " break\n",
261 | " \n",
262 | " indicator = np.zeros(dataframe.shape[0])\n",
263 | " indicator[np.where(dataframe[var_name] == level)] = 1\n",
264 | " columns.append(indicator)\n",
265 | " design_names.append('{0}_as_{1}'.format(var_name, level))\n",
266 | " \n",
267 | " if intercept:\n",
268 | " columns.append(np.ones(dataframe.shape[0]))\n",
269 | " design_names.append('intercept')\n",
270 | " \n",
271 | "\n",
272 | " return np.array(columns).T, np.array(dataframe[target_name]), design_names\n"
273 | ]
274 | },
275 | {
276 | "cell_type": "code",
277 | "execution_count": 8,
278 | "metadata": {
279 | "collapsed": true
280 | },
281 | "outputs": [],
282 | "source": [
283 | "full_design, full_target, full_design_names = glm_data_reformat(\n",
284 | " data, target_name='sl', cont_pred=['yr', 'yd'], cat_pred=['dg', 'rk', 'sx'], intercept=True\n",
285 | " )"
286 | ]
287 | },
288 | {
289 | "cell_type": "markdown",
290 | "metadata": {},
291 | "source": [
292 | "If you have not already, test your function:"
293 | ]
294 | },
295 | {
296 | "cell_type": "code",
297 | "execution_count": null,
298 | "metadata": {
299 | "collapsed": true
300 | },
301 | "outputs": [],
302 | "source": []
303 | },
304 | {
305 | "cell_type": "markdown",
306 | "metadata": {},
307 | "source": [
308 | "Now use this function and the linalg module to format the data and fit a model of your choice."
309 | ]
310 | },
311 | {
312 | "cell_type": "code",
313 | "execution_count": 9,
314 | "metadata": {
315 | "collapsed": false
316 | },
317 | "outputs": [],
318 | "source": [
319 | "full_coefficients, residuals, rank, sv = np.linalg.lstsq(full_design, full_target)"
320 | ]
321 | },
322 | {
323 | "cell_type": "markdown",
324 | "metadata": {},
325 | "source": [
326 | "## Analysis\n",
327 | "\n",
328 | "You have a model, let's do something with it. In particular, we will investigate whether there is an effect of sex on salary in these data. We can use a sequential sum of squares f-test, where:\n",
329 | "\n",
330 | "$$\n",
331 | "f = \\frac{\\frac{SSE_{red} - SSE_{full}}{DFE_{red} - DFE{full}}} {\\frac{SSE_{full}}{DFE_{full}}}\n",
332 | "$$\n",
333 | "Here SSE is the sum of squared errors (i.e. the residuals). DFE is the error degrees of freedom (number of observations - number of design matrix columns). The full model is exactly what it sounds like, while the red (reduced) model is just the same model sans one parameter.\n",
334 | "\n",
335 | "Fit a full and reduced model for a parameter of interest and generate an f-value."
336 | ]
337 | },
338 | {
339 | "cell_type": "code",
340 | "execution_count": 10,
341 | "metadata": {
342 | "collapsed": false
343 | },
344 | "outputs": [],
345 | "source": [
346 | "red_design, red_target, red_design_names = glm_data_reformat(\n",
347 | " data, target_name='sl', cont_pred=['yr', 'yd'], cat_pred=['dg', 'rk'], intercept=True\n",
348 | " )\n",
349 | "red_coefficients, _, _, _ = np.linalg.lstsq(red_design, red_target)\n"
350 | ]
351 | },
352 | {
353 | "cell_type": "code",
354 | "execution_count": 11,
355 | "metadata": {
356 | "collapsed": false
357 | },
358 | "outputs": [],
359 | "source": [
360 | "full_sse = ((np.dot(full_design, full_coefficients) - full_target)**2).sum()\n",
361 | "red_sse = ((np.dot(red_design, red_coefficients) - red_target)**2).sum()\n",
362 | "\n",
363 | "full_dfm = len(full_design_names) \n",
364 | "red_dfm = len(red_design_names)\n",
365 | "\n",
366 | "full_dfe = full_design.shape[0] - full_dfm\n",
367 | "red_dfe = red_design.shape[0] - red_dfm"
368 | ]
369 | },
370 | {
371 | "cell_type": "code",
372 | "execution_count": 12,
373 | "metadata": {
374 | "collapsed": false
375 | },
376 | "outputs": [],
377 | "source": [
378 | "fhat = ( (red_sse - full_sse) / (red_dfe - full_dfe) ) / (full_sse / full_dfe)"
379 | ]
380 | },
381 | {
382 | "cell_type": "code",
383 | "execution_count": 13,
384 | "metadata": {
385 | "collapsed": false
386 | },
387 | "outputs": [
388 | {
389 | "name": "stdout",
390 | "output_type": "stream",
391 | "text": [
392 | "1.58802561117\n"
393 | ]
394 | }
395 | ],
396 | "source": [
397 | "print(fhat)"
398 | ]
399 | },
400 | {
401 | "cell_type": "markdown",
402 | "metadata": {},
403 | "source": [
404 | "Now get a p-value by using the cdf of an f-distributed random variable. Scipy.stats has a handy function for this.\n",
405 | "\n",
406 | "Note that your f-distribution's parameters should be:\n",
407 | "\n",
408 | "1. $DFM_{full} - DFM_{red}$ where DFM is the number of columns in a model's design matrix.\n",
409 | "2. $DFE_{full}$"
410 | ]
411 | },
412 | {
413 | "cell_type": "code",
414 | "execution_count": 14,
415 | "metadata": {
416 | "collapsed": false
417 | },
418 | "outputs": [],
419 | "source": [
420 | "fvar = scipy.stats.f\n",
421 | "\n",
422 | "pvalue = 1 - fvar.cdf(fhat, full_dfm - red_dfm, full_dfe)"
423 | ]
424 | },
425 | {
426 | "cell_type": "code",
427 | "execution_count": 15,
428 | "metadata": {
429 | "collapsed": false
430 | },
431 | "outputs": [
432 | {
433 | "name": "stdout",
434 | "output_type": "stream",
435 | "text": [
436 | "0.214104335593\n"
437 | ]
438 | }
439 | ],
440 | "source": [
441 | "print(pvalue)"
442 | ]
443 | },
444 | {
445 | "cell_type": "markdown",
446 | "metadata": {},
447 | "source": [
448 | "## Continuations\n",
449 | "\n",
450 | "* extend your glm_data_reformat to handle interactions\n",
451 | "* evaluate the model's performance using leave-one-out cross-validation"
452 | ]
453 | }
454 | ],
455 | "metadata": {
456 | "kernelspec": {
457 | "display_name": "Python 2",
458 | "language": "python",
459 | "name": "python2"
460 | },
461 | "language_info": {
462 | "codemirror_mode": {
463 | "name": "ipython",
464 | "version": 2
465 | },
466 | "file_extension": ".py",
467 | "mimetype": "text/x-python",
468 | "name": "python",
469 | "nbconvert_exporter": "python",
470 | "pygments_lexer": "ipython2",
471 | "version": "2.7.11"
472 | }
473 | },
474 | "nbformat": 4,
475 | "nbformat_minor": 0
476 | }
477 |
--------------------------------------------------------------------------------
/PythonBootcamp/11_Image_data.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "
\n",
8 | "\n",
9 | "Python Bootcamp
\n",
10 | "August 18-19, 2018, Seattle, WA
"
11 | ]
12 | },
13 | {
14 | "cell_type": "markdown",
15 | "metadata": {},
16 | "source": [
17 | "\n",
18 | "
Exercise: Numpy, Scipy, Matplotlib
\n",
19 | "\n",
20 | "
The following series of exercises are designed to give you more practice manipulating and plotting data using Numpy, Scipy, and Matplotlib\n",
21 | "
"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": null,
27 | "metadata": {
28 | "collapsed": true
29 | },
30 | "outputs": [],
31 | "source": [
32 | "import numpy as np\n",
33 | "import matplotlib.pyplot as plt\n",
34 | "import scipy.misc\n",
35 | "import scipy.ndimage\n",
36 | "from __future__ import print_function\n",
37 | "\n",
38 | "%matplotlib notebook"
39 | ]
40 | },
41 | {
42 | "cell_type": "markdown",
43 | "metadata": {},
44 | "source": [
45 | "\n",
46 | "\n",
47 | "
Scipy has a built in image called 'face' that we can use to do some image processing
\n",
48 | "
The exercises below will walk you through various basic operations on this image\n",
49 | "
First, make a variabled called \"face\" that contains the image\n",
50 | "\n",
51 | "
"
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": null,
57 | "metadata": {
58 | "collapsed": false
59 | },
60 | "outputs": [],
61 | "source": [
62 | "face = scipy.misc.face()"
63 | ]
64 | },
65 | {
66 | "cell_type": "markdown",
67 | "metadata": {},
68 | "source": [
69 | "\n",
70 | "\n",
71 | "
What is the image?
\n",
72 | "
Show the image using matplotlib\n",
73 | "
"
74 | ]
75 | },
76 | {
77 | "cell_type": "code",
78 | "execution_count": null,
79 | "metadata": {
80 | "collapsed": false
81 | },
82 | "outputs": [],
83 | "source": [
84 | "# Use imshow"
85 | ]
86 | },
87 | {
88 | "cell_type": "markdown",
89 | "metadata": {},
90 | "source": [
91 | "-left: 3px solid #000; padding: 1px; padding-left: 10px; background: #F0FAFF; \">\n",
92 | "
What is the shape of the image?
\n",
93 | "
\n",
94 | "
"
95 | ]
96 | },
97 | {
98 | "cell_type": "code",
99 | "execution_count": null,
100 | "metadata": {
101 | "collapsed": false
102 | },
103 | "outputs": [],
104 | "source": []
105 | },
106 | {
107 | "cell_type": "markdown",
108 | "metadata": {},
109 | "source": [
110 | "\n",
111 | "\n",
112 | "
Make note of the fact that the first two dimensions are ROWS x COLUMNS. When indexing into this array, you'll need to use this convention, as opposed to thinking of X and Y position (which would actually be column and row, respectively).\n",
113 | "
"
114 | ]
115 | },
116 | {
117 | "cell_type": "markdown",
118 | "metadata": {},
119 | "source": [
120 | "\n",
121 | "
What is the intensity range for the pixel values?
\n",
122 | "
hint: use the 'flatten' method to make a 1-D array, then make a histogram\n",
123 | "
"
124 | ]
125 | },
126 | {
127 | "cell_type": "code",
128 | "execution_count": null,
129 | "metadata": {
130 | "collapsed": false,
131 | "scrolled": false
132 | },
133 | "outputs": [],
134 | "source": [
135 | "face_flat = face.flatten()\n",
136 | "#now make a histogram"
137 | ]
138 | },
139 | {
140 | "cell_type": "markdown",
141 | "metadata": {},
142 | "source": [
143 | "\n",
144 | "
The third image dimension refers to the number of color channels (R,G,B).
\n",
145 | "
Try making an array of equal size to the image, but that contains only the color at a given pixel (for example, find a pixel on a leaf, then display an array that contains only that color)\n",
146 | "
Below is an example of how the plot might look:\n",
147 | "
\n",
148 | "
\n"
149 | ]
150 | },
151 | {
152 | "cell_type": "code",
153 | "execution_count": null,
154 | "metadata": {
155 | "collapsed": false
156 | },
157 | "outputs": [],
158 | "source": []
159 | },
160 | {
161 | "cell_type": "markdown",
162 | "metadata": {},
163 | "source": [
164 | "\n",
165 | "
Convert the image to grayscale instead
\n",
166 | "
To start with, look at any (or all) of the color channels individually. Are they all the same?\n",
167 | "
Below is an example of what a plot of each individual color channel might look like. \n",
168 | "
Note that Matplotlib does not make it easy to put individual colorbars on subplots - scour the documentation or just check out the key to figure this out.\n",
169 | "
\n",
170 | "
\n"
171 | ]
172 | },
173 | {
174 | "cell_type": "code",
175 | "execution_count": null,
176 | "metadata": {
177 | "collapsed": false
178 | },
179 | "outputs": [],
180 | "source": []
181 | },
182 | {
183 | "cell_type": "markdown",
184 | "metadata": {
185 | "collapsed": true
186 | },
187 | "source": [
188 | "\n",
189 | "\n",
190 | "
To properly convert the color channels to gray (luminance), you need to account for our visual system's sensitivity to each individual color: https://en.wikipedia.org/wiki/Grayscale#Converting_color_to_grayscale. Try writing a function to do this, then applying it to the image.\n",
191 | "
"
192 | ]
193 | },
194 | {
195 | "cell_type": "code",
196 | "execution_count": null,
197 | "metadata": {
198 | "collapsed": true
199 | },
200 | "outputs": [],
201 | "source": [
202 | "#try making a function that converts each pixel from RBG to gray"
203 | ]
204 | },
205 | {
206 | "cell_type": "code",
207 | "execution_count": null,
208 | "metadata": {
209 | "collapsed": false
210 | },
211 | "outputs": [],
212 | "source": [
213 | "#apply the function, make a grayscale plot. Be sure to set the colormap appropriately."
214 | ]
215 | },
216 | {
217 | "cell_type": "markdown",
218 | "metadata": {},
219 | "source": [
220 | "\n",
221 | "
Try using scipy's gaussian filter method to smooth (blur) the grayscaled face image
\n",
222 | "
Hint: Look up documentation for scipy.ndimage.filters.gaussian_filter\n",
223 | "
Here's what a blurred image might look like:\n",
224 | "
\n",
225 | "
\n"
226 | ]
227 | },
228 | {
229 | "cell_type": "code",
230 | "execution_count": null,
231 | "metadata": {
232 | "collapsed": true
233 | },
234 | "outputs": [],
235 | "source": []
236 | },
237 | {
238 | "cell_type": "markdown",
239 | "metadata": {},
240 | "source": [
241 | "\n",
242 | "
In the grayscale image, find the pixel with the highest intensity
\n",
243 | "
plot an X over it in the image\n",
244 | "
Here's what it should look like:\n",
245 | "\n",
246 | "
\n",
247 | "
"
248 | ]
249 | },
250 | {
251 | "cell_type": "code",
252 | "execution_count": null,
253 | "metadata": {
254 | "collapsed": false
255 | },
256 | "outputs": [],
257 | "source": []
258 | },
259 | {
260 | "cell_type": "markdown",
261 | "metadata": {},
262 | "source": [
263 | "\n",
264 | "
In the grayscale image, choose a threshold, then make all values above that threshold white and all values below that threshold black
\n",
265 | "
**Bonus exercise: Use a sliderbar widget to make the threshold dynamically adjustable:**\n",
266 | "
see: http://ipywidgets.readthedocs.io/en/latest/examples/Using%20Interact.html\n",
267 | "
Here's what it should look like:\n",
268 | "
\n",
269 | "
\n"
270 | ]
271 | },
272 | {
273 | "cell_type": "code",
274 | "execution_count": null,
275 | "metadata": {
276 | "collapsed": false
277 | },
278 | "outputs": [],
279 | "source": [
280 | "# basic answer\n",
281 | "gray_face_thresholded = gray_face.copy() #make a copy of the data so you don't affect the original image\n",
282 | "\n",
283 | "threshold = 150\n",
284 | "gray_face_thresholded[gray_face_thresholded<=threshold] = 0\n",
285 | "gray_face_thresholded[gray_face_thresholded>threshold] = 1\n",
286 | "\n",
287 | "fig,ax=plt.subplots()\n",
288 | "\n",
289 | "ax.imshow(gray_face_thresholded,cmap='gray')"
290 | ]
291 | },
292 | {
293 | "cell_type": "code",
294 | "execution_count": null,
295 | "metadata": {
296 | "collapsed": false
297 | },
298 | "outputs": [],
299 | "source": [
300 | "# Answer with slider bar\n",
301 | "# have to run 'pip install ipywidgets' from the command line first\n",
302 | "from ipywidgets import interact\n",
303 | "@interact\n",
304 | "def show_thresholded(threshold=(0,255,1)):\n",
305 | " gray_face_thresholded = gray_face.copy() #make a copy of the data so you don't affect the original image\n",
306 | "\n",
307 | " gray_face_thresholded[gray_face_thresholded<=threshold] = 0\n",
308 | " gray_face_thresholded[gray_face_thresholded>threshold] = 1\n",
309 | "\n",
310 | " fig,ax=plt.subplots()\n",
311 | "\n",
312 | " ax.imshow(gray_face_thresholded,cmap='gray')"
313 | ]
314 | },
315 | {
316 | "cell_type": "markdown",
317 | "metadata": {},
318 | "source": [
319 | "\n",
320 | "\n",
321 | "
Try making plots of intensity values for an x,y cross section through the image
\n",
322 | "
For example, a plot might look like this:\n",
323 | "
\n",
324 | "
"
325 | ]
326 | },
327 | {
328 | "cell_type": "code",
329 | "execution_count": null,
330 | "metadata": {
331 | "collapsed": false
332 | },
333 | "outputs": [],
334 | "source": [
335 | "#hint: Use gridspec to define the size and locations of the three plots"
336 | ]
337 | }
338 | ],
339 | "metadata": {
340 | "kernelspec": {
341 | "display_name": "Python 2",
342 | "language": "python",
343 | "name": "python2"
344 | },
345 | "language_info": {
346 | "codemirror_mode": {
347 | "name": "ipython",
348 | "version": 2
349 | },
350 | "file_extension": ".py",
351 | "mimetype": "text/x-python",
352 | "name": "python",
353 | "nbconvert_exporter": "python",
354 | "pygments_lexer": "ipython2",
355 | "version": "2.7.13"
356 | }
357 | },
358 | "nbformat": 4,
359 | "nbformat_minor": 0
360 | }
361 |
--------------------------------------------------------------------------------
/PythonBootcamp/support_files/CrossingDailyBarPlot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/SWDB_2018/94c120088284b156f278e4758aa5691b68f0b76e/PythonBootcamp/support_files/CrossingDailyBarPlot.png
--------------------------------------------------------------------------------
/PythonBootcamp/support_files/CrossingMonthlyBarPlot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/SWDB_2018/94c120088284b156f278e4758aa5691b68f0b76e/PythonBootcamp/support_files/CrossingMonthlyBarPlot.png
--------------------------------------------------------------------------------
/PythonBootcamp/support_files/SampleWorkbook.csv:
--------------------------------------------------------------------------------
1 | Column 1,Column 2
2 | one,1
3 | two,2
4 | three,3
--------------------------------------------------------------------------------
/PythonBootcamp/support_files/blurred.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/SWDB_2018/94c120088284b156f278e4758aa5691b68f0b76e/PythonBootcamp/support_files/blurred.png
--------------------------------------------------------------------------------
/PythonBootcamp/support_files/cropped-SummerWorkshop_Header.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/SWDB_2018/94c120088284b156f278e4758aa5691b68f0b76e/PythonBootcamp/support_files/cropped-SummerWorkshop_Header.png
--------------------------------------------------------------------------------
/PythonBootcamp/support_files/cross_sections.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/SWDB_2018/94c120088284b156f278e4758aa5691b68f0b76e/PythonBootcamp/support_files/cross_sections.png
--------------------------------------------------------------------------------
/PythonBootcamp/support_files/gitkraken_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/SWDB_2018/94c120088284b156f278e4758aa5691b68f0b76e/PythonBootcamp/support_files/gitkraken_1.png
--------------------------------------------------------------------------------
/PythonBootcamp/support_files/gitkraken_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/SWDB_2018/94c120088284b156f278e4758aa5691b68f0b76e/PythonBootcamp/support_files/gitkraken_2.png
--------------------------------------------------------------------------------
/PythonBootcamp/support_files/gitkraken_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/SWDB_2018/94c120088284b156f278e4758aa5691b68f0b76e/PythonBootcamp/support_files/gitkraken_3.png
--------------------------------------------------------------------------------
/PythonBootcamp/support_files/gitkraken_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/SWDB_2018/94c120088284b156f278e4758aa5691b68f0b76e/PythonBootcamp/support_files/gitkraken_4.png
--------------------------------------------------------------------------------
/PythonBootcamp/support_files/grayscales.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/SWDB_2018/94c120088284b156f278e4758aa5691b68f0b76e/PythonBootcamp/support_files/grayscales.png
--------------------------------------------------------------------------------
/PythonBootcamp/support_files/leafplot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/SWDB_2018/94c120088284b156f278e4758aa5691b68f0b76e/PythonBootcamp/support_files/leafplot.png
--------------------------------------------------------------------------------
/PythonBootcamp/support_files/maxpixel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/SWDB_2018/94c120088284b156f278e4758aa5691b68f0b76e/PythonBootcamp/support_files/maxpixel.png
--------------------------------------------------------------------------------
/PythonBootcamp/support_files/neuron.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/SWDB_2018/94c120088284b156f278e4758aa5691b68f0b76e/PythonBootcamp/support_files/neuron.jpg
--------------------------------------------------------------------------------
/PythonBootcamp/support_files/parallel_commits.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/SWDB_2018/94c120088284b156f278e4758aa5691b68f0b76e/PythonBootcamp/support_files/parallel_commits.png
--------------------------------------------------------------------------------
/PythonBootcamp/support_files/stinkbug.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/SWDB_2018/94c120088284b156f278e4758aa5691b68f0b76e/PythonBootcamp/support_files/stinkbug.png
--------------------------------------------------------------------------------
/PythonBootcamp/support_files/thresholdedimage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/SWDB_2018/94c120088284b156f278e4758aa5691b68f0b76e/PythonBootcamp/support_files/thresholdedimage.png
--------------------------------------------------------------------------------
/PythonBootcamp/support_files/topic_branches.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/SWDB_2018/94c120088284b156f278e4758aa5691b68f0b76e/PythonBootcamp/support_files/topic_branches.png
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # SWDB_2018
2 | 
3 |
4 | This is the repository for the course materials for the 2018 Summer Workshop on the Dynamic Brain.
5 |
6 | # Support Policy
7 |
8 | We are releasing this code as part of the 2017 Summer Workshop on the Dynamic Brain and will only be supporting and developing it for the context of this workshop. The community is welcome to submit issues, but you should not expect an active response outside of the context of the course.
9 |
10 | Copyright 2018 Allen Institute
11 |
--------------------------------------------------------------------------------
/resources/EphysObservatory/ecephys_manifest.csv:
--------------------------------------------------------------------------------
1 | nwb_filename,experiment_type,VISp,VISal,VISam,VISlm,VISpm,VISrl,locally_sparse_noise,gabor,drifting_gratings,static_gratings,natural_images,natural_movie_3,full_field_flashes
2 | nwb_M14_actual_ds2.nwb,single_probe,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE,FALSE,TRUE,TRUE,TRUE,TRUE,TRUE
3 | nwb_M15_actual_ds2.nwb,single_probe,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE,FALSE,TRUE,TRUE,TRUE,TRUE,TRUE
4 | nwb_M16_actual_ds2.nwb,single_probe,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE,FALSE,TRUE,TRUE,TRUE,TRUE,TRUE
5 | nwb_M39_actual_ds2.nwb,single_probe,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE,FALSE,TRUE,TRUE,TRUE,TRUE,TRUE
6 | nwb_M51_actual_ds2.nwb,single_probe,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE,FALSE,TRUE,TRUE,TRUE,TRUE,TRUE
7 | mouse372584_probe_surf_bob.nwb,multi_probe,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE
8 |
--------------------------------------------------------------------------------
/resources/EphysObservatory/neuropixels.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/SWDB_2018/94c120088284b156f278e4758aa5691b68f0b76e/resources/EphysObservatory/neuropixels.png
--------------------------------------------------------------------------------
/resources/Neocortical Interneurons.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/SWDB_2018/94c120088284b156f278e4758aa5691b68f0b76e/resources/Neocortical Interneurons.png
--------------------------------------------------------------------------------
/resources/change_detection_schematic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/SWDB_2018/94c120088284b156f278e4758aa5691b68f0b76e/resources/change_detection_schematic.png
--------------------------------------------------------------------------------
/resources/cropped-SummerWorkshop_Header.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenInstitute/SWDB_2018/94c120088284b156f278e4758aa5691b68f0b76e/resources/cropped-SummerWorkshop_Header.png
--------------------------------------------------------------------------------