├── .gitignore ├── .gitmodules ├── LICENSE ├── README.md ├── css ├── bootstrap.min.css ├── font-awesome.min.css ├── highlight.tomorrow-night.css └── main.css ├── fonts ├── FontAwesome.otf ├── fontawesome-webfont.eot ├── fontawesome-webfont.svg ├── fontawesome-webfont.ttf └── fontawesome-webfont.woff ├── getstarted.html ├── homework.html ├── hw ├── 2016_spring │ ├── hw1.zip │ └── hw1 │ │ ├── README.txt │ │ ├── matrix.py │ │ ├── matrix_sol.py │ │ ├── run.py │ │ └── util.py ├── 2018_fall │ ├── .gitignore │ ├── hw1 │ │ ├── hw1.pdf │ │ ├── hw1.tex │ │ ├── ipynb │ │ │ ├── hw1_gen.ipynb │ │ │ └── sbm.png │ │ ├── sbm.csv │ │ └── solution │ │ │ ├── hw1.py │ │ │ └── sbm.png │ └── hw2 │ │ ├── abalone.data │ │ ├── hw2.pdf │ │ ├── hw2.tex │ │ ├── ipynb │ │ ├── hw2.ipynb │ │ ├── knn.png │ │ ├── pred.png │ │ └── regression.png │ │ └── solution │ │ ├── hw2.py │ │ └── regression.png └── 2019_winter │ ├── CME193-Assignment1.ipynb │ └── CME193-Assignment2.ipynb ├── index.html ├── jupyter.html ├── lectures.html ├── nb ├── .gitignore ├── 2016_spring │ ├── lecture-1.ipynb │ ├── lecture-1.slides.html │ ├── lecture-2.ipynb │ ├── lecture-2.slides.html │ ├── lecture-3.ipynb │ ├── lecture-3.slides.html │ ├── lecture-4.ipynb │ ├── lecture-4.slides.html │ ├── lecture-5.ipynb │ ├── lecture-5.slides.html │ ├── lecture-6.ipynb │ ├── lecture-6.slides.html │ ├── lecture-7.ipynb │ ├── lecture-7.slides.html │ └── lecture-8.ipynb ├── 2018_fall │ ├── Lecture4-Optimization-Using-Python-SciPy-InClass.ipynb │ ├── Lecture4-Optimization-Using-Python-SciPy.ipynb │ ├── Lecture5-Pandas.ipynb │ ├── Lecture5-supplement-solution.ipynb │ ├── Lecture5-supplement.ipynb │ ├── Lecture6-Scikit-learn.ipynb │ ├── Lecture6-supplement-solution.ipynb │ ├── Lecture6-supplement.ipynb │ ├── Lecture7-Optimization-Using-Python-ORTools-InClass.ipynb │ ├── Lecture7-Optimization-Using-Python-ORTools.ipynb │ ├── Lecture8-solutions.ipynb │ ├── Lecture8.ipynb │ ├── Lecture_1.ipynb │ ├── Lecture_2.ipynb │ ├── Lecture_3_scipy.ipynb │ ├── data │ │ ├── co2_mm_mlo.txt │ │ └── iris.csv │ ├── img │ │ ├── iris_knn.png │ │ └── splitApplyCombine.png │ ├── lecture_1 │ │ └── ex_1.md │ └── lecture_2 │ │ └── test_script.py ├── 2018_spring │ ├── Lecture1.ipynb │ ├── Lecture2.ipynb │ ├── Lecture3.ipynb │ ├── Lecture4.ipynb │ ├── Lecture5.ipynb │ ├── Lecture6.ipynb │ ├── Lecture7-climate.ipynb │ ├── Lecture7-face-classification.ipynb │ ├── Lecture7.ipynb │ ├── Lecture8.ipynb │ ├── Lecture8_old.ipynb │ └── Lecture9.ipynb ├── 2019_spring │ ├── CME193_Homework_1.ipynb │ ├── CME193_Homework_1_Solution.ipynb │ ├── CME193_Homework_2.ipynb │ ├── CME193_Homework_2_Solution.ipynb │ ├── Lecture_1.ipynb │ ├── Lecture_1_post.ipynb │ ├── Lecture_2.ipynb │ ├── Lecture_2_post.ipynb │ ├── Lecture_3.ipynb │ ├── Lecture_3_post.ipynb │ ├── Lecture_4.ipynb │ ├── Lecture_4_post.ipynb │ ├── Lecture_5.ipynb │ ├── Lecture_5_post.ipynb │ ├── Lecture_6.ipynb │ ├── Lecture_6_post.ipynb │ ├── Lecture_7.ipynb │ ├── Lecture_7_post.ipynb │ └── Lecture_8.ipynb ├── 2019_winter │ ├── Lecture4-Optimization-Using-Python-SciPy-InClass.ipynb │ ├── Lecture4-Optimization-Using-Python-SciPy.ipynb │ ├── Lecture5-Pandas.ipynb │ ├── Lecture5-supplement-solution.ipynb │ ├── Lecture5-supplement.ipynb │ ├── Lecture6-Scikit-learn.ipynb │ ├── Lecture6-supplement-solution.ipynb │ ├── Lecture6-supplement.ipynb │ ├── Lecture7-Numpy-revision-and-practice.ipynb │ ├── Lecture7-Optimization-Using-Python-ORTools-InClass.ipynb │ ├── Lecture7-Optimization-Using-Python-ORTools.ipynb │ ├── Lecture8-pytorch.ipynb │ ├── Lecture8-slides.pdf │ ├── Lecture8-solutions.ipynb │ ├── Lecture8.ipynb │ ├── Lecture_1.ipynb │ ├── Lecture_2.ipynb │ ├── Lecture_3.ipynb │ ├── Lecture_3_scipy.ipynb │ ├── Lecture_4.ipynb │ ├── data │ │ ├── co2_mm_mlo.txt │ │ └── iris.csv │ ├── img │ │ ├── iris_knn.png │ │ └── splitApplyCombine.png │ ├── lecture_1 │ │ └── ex_1.md │ └── lecture_2 │ │ └── test_script.py └── nb-assets │ └── img │ ├── broadcasting.png │ └── python.png ├── syllabus.html └── web ├── LICENSE ├── couscous.yml ├── css ├── bootstrap.min.css ├── font-awesome.min.css ├── highlight.tomorrow-night.css └── main.css ├── default.twig ├── fonts ├── FontAwesome.otf ├── fontawesome-webfont.eot ├── fontawesome-webfont.svg ├── fontawesome-webfont.ttf └── fontawesome-webfont.woff ├── getstarted.md ├── homework.md ├── index.md ├── jupyter.md ├── lectures.md └── syllabus.md /.gitignore: -------------------------------------------------------------------------------- 1 | web/.couscous/ 2 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "nb/reveal.js"] 2 | path = nb/reveal.js 3 | url = https://github.com/hakimel/reveal.js 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Couscous 2 | 3 | Copyright (C) Matthieu Napoli 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and 6 | associated documentation files (the "Software"), to deal in the Software without restriction, 7 | including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, 9 | subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in all copies or substantial 12 | portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT 15 | NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 16 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 17 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 18 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CME 193 - Scientific Python 2 | 3 | Please visit the course website: [http://cme193.stanford.edu](http://cme193.stanford.edu) 4 | -------------------------------------------------------------------------------- /css/highlight.tomorrow-night.css: -------------------------------------------------------------------------------- 1 | /* Tomorrow Night Theme */ 2 | /* http://jmblog.github.com/color-themes-for-google-code-highlightjs */ 3 | /* Original theme - https://github.com/chriskempson/tomorrow-theme */ 4 | /* http://jmblog.github.com/color-themes-for-google-code-highlightjs */ 5 | .tomorrow-comment, pre .comment, pre .title { 6 | color: #969896; 7 | } 8 | 9 | .tomorrow-red, pre .variable, pre .attribute, pre .tag, pre .regexp, pre .ruby .constant, pre .xml .tag .title, pre .xml .pi, pre .xml .doctype, pre .html .doctype, pre .css .id, pre .css .class, pre .css .pseudo { 10 | color: #cc6666; 11 | } 12 | 13 | .tomorrow-orange, pre .number, pre .preprocessor, pre .built_in, pre .literal, pre .params, pre .constant { 14 | color: #de935f; 15 | } 16 | 17 | .tomorrow-yellow, pre .class, pre .ruby .class .title, pre .css .rules .attribute { 18 | color: #f0c674; 19 | } 20 | 21 | .tomorrow-green, pre .string, pre .value, pre .inheritance, pre .header, pre .ruby .symbol, pre .xml .cdata { 22 | color: #b5bd68; 23 | } 24 | 25 | .tomorrow-aqua, pre .css .hexcolor { 26 | color: #8abeb7; 27 | } 28 | 29 | .tomorrow-blue, pre .function, pre .python .decorator, pre .python .title, pre .ruby .function .title, pre .ruby .title .keyword, pre .perl .sub, pre .javascript .title, pre .coffeescript .title { 30 | color: #81a2be; 31 | } 32 | 33 | .tomorrow-purple, pre .keyword, pre .javascript .function { 34 | color: #b294bb; 35 | } 36 | 37 | pre code { 38 | display: block; 39 | background: #1d1f21; 40 | color: #c5c8c6; 41 | font-family: Menlo, Monaco, Consolas, monospace; 42 | line-height: 1.5; 43 | border: 1px solid #ccc; 44 | padding: 10px; 45 | } 46 | -------------------------------------------------------------------------------- /css/main.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-size: 19px; 3 | } 4 | 5 | main { 6 | margin-top: 90px; 7 | } 8 | 9 | section { 10 | margin-bottom: 50px; 11 | } 12 | 13 | h1, h2, h3, h4 { 14 | color: #df691a; 15 | } 16 | h3 { 17 | font-size: 23px; 18 | } 19 | 20 | li { 21 | margin-bottom: 3px; 22 | } 23 | 24 | img { 25 | max-width: 100%; 26 | } 27 | 28 | header.navbar { 29 | opacity: 0.9; 30 | } 31 | .navbar .navbar-brand { 32 | font-size: 28px; 33 | height: auto; 34 | line-height: 50px; 35 | margin-left: 20px; 36 | color: #df691a; 37 | } 38 | .navbar a.navbar-brand:hover { 39 | color: #df691a; 40 | } 41 | .navbar .navbar-brand small { 42 | font-size: 18px; 43 | font-weight: 300; 44 | margin-left: 10px; 45 | color: white; 46 | } 47 | 48 | @media (min-width: 768px) { 49 | #sidebar { 50 | position:fixed; 51 | } 52 | } 53 | @media (max-width: 960px) { 54 | body { 55 | font-size: 17px; 56 | } 57 | pre { 58 | font-size: 12px; 59 | } 60 | } 61 | 62 | .page-header { 63 | margin-top: 0; 64 | } 65 | 66 | #sidebar .github-star { 67 | margin-top: 20px; 68 | margin-left: 50px; 69 | } 70 | 71 | #sidebar .text-muted { 72 | color: #859AAF; 73 | } 74 | 75 | pre { 76 | padding: 0; 77 | border-color: #3D5166; 78 | background-color: #1D2B3A; 79 | border-radius: 4px; 80 | margin: 15px; 81 | } 82 | pre code { 83 | border: none; 84 | background-color: #1D2B3A; 85 | } 86 | 87 | code { 88 | font-size: 85%; 89 | padding: 4px 4px 1px; 90 | margin: 0 4px; 91 | border-radius: 3px; 92 | color: #c5c8c6; 93 | border: solid 1px #3D5166; 94 | background-color: #1D2B3A; 95 | white-space: pre-wrap; 96 | white-space: -moz-pre-wrap; 97 | word-wrap: break-word; 98 | } 99 | -------------------------------------------------------------------------------- /fonts/FontAwesome.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icme/cme193/3ed008f6e0951b80faf1d77c9542ae0dd925691d/fonts/FontAwesome.otf -------------------------------------------------------------------------------- /fonts/fontawesome-webfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icme/cme193/3ed008f6e0951b80faf1d77c9542ae0dd925691d/fonts/fontawesome-webfont.eot -------------------------------------------------------------------------------- /fonts/fontawesome-webfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icme/cme193/3ed008f6e0951b80faf1d77c9542ae0dd925691d/fonts/fontawesome-webfont.ttf -------------------------------------------------------------------------------- /fonts/fontawesome-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icme/cme193/3ed008f6e0951b80faf1d77c9542ae0dd925691d/fonts/fontawesome-webfont.woff -------------------------------------------------------------------------------- /homework.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | CME 193 - Scientific Python 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 27 | 28 |
29 |
30 | 31 | 32 | 78 | 79 | 80 |
81 |

Homework

82 |

Homework will be posted on this page when it is assigned.

83 |
84 |
    85 |
  1. Homework 1 - Due 2/1/2019 at 5PM [ipynb] [github] [soln]
  2. 86 |
  3. Homework 2 - Due 2/17/2019 at 5PM [ipynb] [github]
  4. 87 |
88 |
89 | 90 |
91 |
92 | 93 | 94 | 95 | 96 | 97 | 104 | 105 | 106 | 107 | -------------------------------------------------------------------------------- /hw/2016_spring/hw1.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icme/cme193/3ed008f6e0951b80faf1d77c9542ae0dd925691d/hw/2016_spring/hw1.zip -------------------------------------------------------------------------------- /hw/2016_spring/hw1/README.txt: -------------------------------------------------------------------------------- 1 | Instructions: 2 | -------------- 3 | 4 | Edit the code inside matrix.py and create a minimal matrix class. PLEASE do read the comments and post to Piazza with questions. 5 | 6 | To test your code, type: 7 | 8 | python run.py 9 | 10 | which imports and runs your matrix code. There is a minimal set of tests that you can take a look at in run.py 11 | 12 | Over the weekend, more tests will be released that will test addition and multiplication. 13 | 14 | Evaluation: 15 | ----------- 16 | 17 | You're evaluated on two criteria: 18 | 19 | 1) performance on the tests (10 pts for ten tests) 20 | 2) coding style and effort (10 pts) 21 | 22 | The tests will be very similar to the ones currently in run.py plus the ones that will be released over the weekend. 23 | 24 | -------------------------------------------------------------------------------- /hw/2016_spring/hw1/matrix.py: -------------------------------------------------------------------------------- 1 | ''' 2 | CME 193, HW #1 3 | ''' 4 | 5 | # Import things you might need here. 6 | # I recommend array... 7 | import array 8 | 9 | 10 | class Matrix(object): 11 | """ 12 | Instructions: fill out the sections of code marked with 13 | 14 | pass #IMPLEMENT 15 | 16 | and MAKE SURE and read all of the comments in the code here. 17 | 18 | ***** 19 | DONT FORGET to remove all pass statements 20 | ***** 21 | 22 | """ 23 | 24 | def __init__(self, data=None, shape=None, dtype='f'): 25 | ''' 26 | data: can be some initial data that you pass in the form of 27 | something like a list of lists. 28 | 29 | shape: can be a tuple of length 2 describing (nb_rows, nb_columns) 30 | of your matrix. 31 | 32 | dtype: can be one of 'f' or 'i' for float or int 33 | 34 | logically, you can have three scenarios for instantiation: 35 | 36 | 1) someone passes data, but no shape. If the data is nested, i.e., a 37 | list of lists, it checks to make sure the dimensions make a 38 | rectangle. If this fails, raise a ValueError! 39 | If just a list, then makes a (nb_elements, 1) shaped matrix 40 | 41 | 2) someone passes data and a shape. We need to make sure that the 42 | dimensions of the data match the shape if the data is a list 43 | of lists. If the data is one list of numbers, we need to make 44 | sure that nb_elements = shape[0] * shape[1]. If anything fails, 45 | raise a ValueError 46 | 47 | 3) Someone passes just a shape. In this case, we'll assume we want 48 | to initialize the matrix with all zeros. 49 | 50 | Internally, you can represent your matrix with anything that is not a 51 | matrix class itself (i.e., no numpy!) I recommend using the array 52 | built-in from python. https://docs.python.org/2/library/array.html 53 | 54 | Read on for more specifications... 55 | ''' 56 | pass #IMPLEMENT 57 | 58 | def shape(self): 59 | ''' 60 | Return the shape of the current matrix 61 | ''' 62 | pass #IMPLEMENT 63 | 64 | def __getitem__(self, indices): 65 | ''' 66 | This method will implement A[i, j] of the matrix, and will return a 67 | copy of the element. Note that the perfect way of doing this would allow 68 | for slices, but we won't do that for simplicity. This is one of those 69 | special functions that python implicitly calls. 70 | 71 | Example: 72 | >>> M = Matrix(...) 73 | >>> a = M[0, 3] 74 | 75 | REQUIRED: **Return** the i, j element. Raise an IndexError if i, j is not valid 76 | for the given matrix size. 77 | ''' 78 | assert len(indices) == 2 79 | i, j = indices # can unpack two indices! 80 | pass #IMPLEMENT 81 | 82 | def __setitem__(self, indices, value): 83 | ''' 84 | This method will implement the set method of A[i, j] of the matrix. 85 | Note that the perfect way of doing this would allow 86 | for slices, but we won't do that for simplicity. This is one of those 87 | special functions that python implicitly calls. 88 | 89 | Example: 90 | >>> M = Matrix(...) 91 | >>> M[0, 3] = value 92 | 93 | REQUIRED: **Set** the i, j element with value. Raise an 94 | IndexError if i, j is not valid for the given matrix size. 95 | ''' 96 | assert len(indices) == 2 97 | i, j = indices 98 | pass #IMPLEMENT 99 | 100 | def __add__(self, X): 101 | ''' 102 | Perform M + X, where X is either another matrix or a number. 103 | 104 | Raise a ValueError if the dimensions don'm match in the case 105 | of a Matrix 106 | ''' 107 | pass #IMPLEMENT 108 | 109 | def __radd__(self, X): 110 | ''' 111 | Perform x + M, where x is a number. 112 | ''' 113 | pass #IMPLEMENT 114 | 115 | def __mul__(self, X): 116 | ''' 117 | Perform M * X (a matrix product!), where X is either another 118 | matrix or a number. 119 | 120 | Raise a ValueError if the dimensions don'm match in the case 121 | of a Matrix Matrix multiplication. 122 | 123 | For documentation on how to do a matrix multiplication, consult 124 | with https://en.wikipedia.org/wiki/Matrix_multiplication 125 | ''' 126 | pass #IMPLEMENT 127 | 128 | def __rmul__(self, X): 129 | ''' 130 | Perform X * M where X is a number. 131 | ''' 132 | pass #IMPLEMENT 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | -------------------------------------------------------------------------------- /hw/2016_spring/hw1/matrix_sol.py: -------------------------------------------------------------------------------- 1 | ''' 2 | CME 193, HW #1 3 | ''' 4 | 5 | # Import things you might need here. 6 | # I recommend array... 7 | import array 8 | 9 | 10 | class Matrix(object): 11 | """ 12 | Instructions: fill out the sections of code marked with 13 | 14 | pass #IMPLEMENT 15 | 16 | and MAKE SURE and read all of the comments in the code here. 17 | 18 | ***** 19 | DONT FORGET to remove all pass statements 20 | ***** 21 | 22 | """ 23 | 24 | def __init__(self, data=None, shape=None, dtype='f'): 25 | ''' 26 | data: can be some initial data that you pass in the form of 27 | something like a list of lists. 28 | 29 | shape: can be a tuple of length 2 describing (nb_rows, nb_columns) 30 | of your matrix. 31 | 32 | dtype: can be one of 'f' or 'i' for float or int 33 | 34 | logically, you can have three scenarios for instantiation: 35 | 36 | 1) someone passes data, but no shape. If the data is nested, i.e., a 37 | list of lists, it checks to make sure the dimensions make a 38 | rectangle. If this fails, raise a ValueError! 39 | If just a list, then makes a (nb_elements, 1) shaped matrix 40 | 41 | 2) someone passes data and a shape. We need to make sure that the 42 | dimensions of the data match the shape if the data is a list 43 | of lists. If the data is one list of numbers, we need to make 44 | sure that nb_elements = shape[0] * shape[1]. If anything fails, 45 | raise a ValueError 46 | 47 | 3) Someone passes just a shape. In this case, we'll assume we want 48 | to initialize the matrix with all zeros. 49 | 50 | Internally, you can represent your matrix with anything that is not a 51 | matrix class itself (i.e., no numpy!) I recommend using the array 52 | built-in from python. https://docs.python.org/2/library/array.html 53 | 54 | Read on for more specifications... 55 | ''' 56 | assert dtype in {'f', 'i'} 57 | if (shape is None) and (data is None): 58 | raise ValueError('Need one of data or shape') 59 | 60 | if data is not None and type(data[0]) is list: 61 | self._shape = len(data), len(data[0]) 62 | 63 | assert all(len(row) == self._shape[1] for row in data) 64 | 65 | if (shape is not None) and (shape != self._shape): 66 | raise ValueError('Bad shapes') 67 | 68 | self._data = array.array(dtype, [x for row in data for x in row]) 69 | else: 70 | self._shape = (len(data), 1) if shape is None else shape 71 | 72 | data = [0] * (shape[0] * shape[1]) if data is None else data 73 | 74 | assert self._shape[0] * self._shape[1] == len(data) 75 | 76 | self._data = array.array(dtype, data) 77 | 78 | 79 | def shape(self): 80 | ''' 81 | Return the shape of the current matrix 82 | ''' 83 | return self._shape 84 | 85 | def __getitem__(self, indices): 86 | ''' 87 | This method will implement A[i, j] of the matrix, and will return a 88 | copy of the element. Note that the perfect way of doing this would allow 89 | for slices, but we won't do that for simplicity. This is one of those 90 | special functions that python implicitly calls. 91 | 92 | Example: 93 | >>> M = Matrix(...) 94 | >>> a = M[0, 3] 95 | 96 | REQUIRED: **Return** the i, j element. Raise an IndexError if i, j is not valid 97 | for the given matrix size. 98 | ''' 99 | assert len(indices) == 2 100 | i, j = indices # can unpack two indices! 101 | assert i < self._shape[0] 102 | assert j < self._shape[1] 103 | 104 | return self._data[i * self._shape[1] + j] 105 | 106 | def __setitem__(self, indices, value): 107 | ''' 108 | This method will implement the set method of A[i, j] of the matrix. 109 | Note that the perfect way of doing this would allow 110 | for slices, but we won't do that for simplicity. This is one of those 111 | special functions that python implicitly calls. 112 | 113 | Example: 114 | >>> M = Matrix(...) 115 | >>> M[0, 3] = value 116 | 117 | REQUIRED: **Set** the i, j element with value. Raise an 118 | IndexError if i, j is not valid for the given matrix size. 119 | ''' 120 | assert len(indices) == 2 121 | i, j = indices 122 | pass #IMPLEMENT 123 | 124 | def __add__(self, X): 125 | ''' 126 | Perform M + X, where X is either another matrix or a number. 127 | 128 | Raise a ValueError if the dimensions don'm match in the case 129 | of a Matrix 130 | ''' 131 | pass #IMPLEMENT 132 | 133 | def __radd__(self, X): 134 | ''' 135 | Perform x + M, where x is a number. 136 | ''' 137 | pass #IMPLEMENT 138 | 139 | def __mul__(self, X): 140 | ''' 141 | Perform M * X (a matrix product!), where X is either another 142 | matrix or a number. 143 | 144 | Raise a ValueError if the dimensions don'm match in the case 145 | of a Matrix Matrix multiplication. 146 | 147 | For documentation on how to do a matrix multiplication, consult 148 | with https://en.wikipedia.org/wiki/Matrix_multiplication 149 | ''' 150 | pass #IMPLEMENT 151 | 152 | def __rmul__(self, X): 153 | ''' 154 | Perform X * M where X is a number. 155 | ''' 156 | pass #IMPLEMENT 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | -------------------------------------------------------------------------------- /hw/2016_spring/hw1/run.py: -------------------------------------------------------------------------------- 1 | from util import test, summary 2 | from matrix import Matrix 3 | 4 | if __name__ == '__main__': 5 | M = Matrix(shape=(4, 5)) 6 | 7 | test(M.shape() == (4, 5), 'M.shape() test') 8 | 9 | test(M[0, 0] == 0, 'M[0, 0] == 0 test') 10 | 11 | M[0, 0] = 3 12 | 13 | test(M[0, 0] == 3, 'M[0, 0] == 3 (setter) test') 14 | 15 | a = M[0, 0] 16 | 17 | test(a == 3, 'a == 3 (getter) test') 18 | 19 | summary() 20 | 21 | -------------------------------------------------------------------------------- /hw/2016_spring/hw1/util.py: -------------------------------------------------------------------------------- 1 | class _register: 2 | n_pass = 0 3 | n_fail = 0 4 | 5 | def test(statement, name): 6 | if statement: 7 | print '{}: PASS'.format(name) 8 | _register.n_pass += 1 9 | else: 10 | print '{}: FAIL'.format(name) 11 | _register.n_fail += 1 12 | 13 | def summary(): 14 | print '{} / {} tests passed.'.format( 15 | _register.n_pass, _register.n_pass + _register.n_fail) -------------------------------------------------------------------------------- /hw/2018_fall/.gitignore: -------------------------------------------------------------------------------- 1 | *.log 2 | *.out 3 | *.synctex.gz 4 | *.aux 5 | .ipynb_checkpoints 6 | -------------------------------------------------------------------------------- /hw/2018_fall/hw1/hw1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icme/cme193/3ed008f6e0951b80faf1d77c9542ae0dd925691d/hw/2018_fall/hw1/hw1.pdf -------------------------------------------------------------------------------- /hw/2018_fall/hw1/hw1.tex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | \newcommand{\assgnnum}{1} 3 | \newcommand{\duedate}{October 16, 10:30 AM} 4 | 5 | \usepackage{amsmath} 6 | %\usepackage{fullpage} 7 | \usepackage{amssymb} 8 | %\usepackage{bbm} 9 | \usepackage{fancyhdr} 10 | %\usepackage{paralist} 11 | \usepackage{graphicx} 12 | \usepackage{caption} 13 | \usepackage{subcaption} 14 | \usepackage[pdftex,colorlinks=true, urlcolor = blue]{hyperref} 15 | \usepackage{listings} 16 | \usepackage{color} 17 | \usepackage{xcolor} 18 | 19 | \definecolor{thegreen}{rgb}{0,.5,0} 20 | \definecolor{comment-green}{rgb}{0,.3,0} 21 | \definecolor{theblue}{rgb}{0,0,.8} 22 | \definecolor{light-gray}{gray}{0.98} 23 | \definecolor{comment-color}{rgb}{0,0,.8} 24 | \definecolor{string-color}{rgb}{0,.75,0} 25 | \definecolor{border-blue}{rgb}{0,0,.6} 26 | 27 | \lstset{% use our version of highlighting 28 | language=python, % using python 29 | keywordstyle={\color{teal}\bfseries}, % keywords 30 | commentstyle=\color{comment-color}, % comments 31 | stringstyle=\color{string-color}, %strings 32 | } 33 | 34 | \lstset{ 35 | basicstyle={\ttfamily\normalsize}, % use font and smaller size 36 | basewidth={0.5em,0.5em}, 37 | showstringspaces=false, % do not emphasize spaces in strings 38 | tabsize=2, % number of spaces of a TAB 39 | aboveskip={0\baselineskip}, % a bit of space above 40 | columns=fixed, % nice spacing 41 | } 42 | 43 | 44 | \oddsidemargin 0in \evensidemargin 0in 45 | \topmargin -0.5in \headheight 0.25in \headsep 0.25in 46 | \textwidth 6.5in \textheight 9in 47 | \parskip 6pt \parindent 0in \footskip 20pt 48 | 49 | % set the header up 50 | \fancyhead{} 51 | \fancyhead[L]{CME193: Homework \assgnnum} 52 | \fancyhead[R]{Due: \duedate} 53 | %%%%%%%%%%%%%%%%%%%%%%%%%% 54 | \renewcommand\headrulewidth{0.4pt} 55 | \setlength\headheight{15pt} 56 | 57 | \newcommand{\p}{\ensuremath{\mathbf{P}}} 58 | \renewcommand{\Pr}[1]{\ensuremath{\p \left \{ #1 \right \}}} 59 | \newcommand{\nti}{\ensuremath{n \to \infty}} 60 | \newcommand{\I}{\ensuremath{\operatorname{I}}} 61 | \newcommand{\One}[1]{\ensuremath{\mathbbm{1}_{\left \{ #1 \right \}}}} 62 | \newcommand{\E}{\ensuremath{\mathbf{E}}} 63 | \newcommand{\Ex}[2][]{\ensuremath{\E_{#1} \left[ #2 \right]}} 64 | \newcommand{\var}{\ensuremath{\operatorname{Var}}} 65 | \newcommand{\cov}{\ensuremath{\operatorname{Cov}}} 66 | \newcommand{\F}{\ensuremath{\mathcal{F}}} 67 | \newcommand{\R}{\ensuremath{\mathbb{R}}} 68 | \newcommand{\C}{\ensuremath{\mathbb{C}}} 69 | \newcommand{\NormRV}[2]{\ensuremath{\operatorname{N}\left(#1, #2\right)}} 70 | \newcommand{\BetaRV}[2]{\ensuremath{\operatorname{Beta}\left(#1, #2\right)}} 71 | \newcommand{\argmax}{\operatornamewithlimits{argmax}} 72 | \newcommand{\x}{\mathbf{x}} 73 | \newcommand{\A}{\mathbf{A}} 74 | \newcommand{\bb}{\mathbf{b}} 75 | 76 | \newcounter{points} 77 | \setcounter{points}{0} 78 | \newcounter{bonuspoints} 79 | \setcounter{bonuspoints}{0} 80 | 81 | \newcommand\setpoints[1]{\addtocounter{points}{#1}(#1 points)} 82 | \newcommand\setpoint{\addtocounter{points}{1}(1 point)} 83 | \newcommand\setbonuspoints[1]{\addtocounter{bonuspoints}{#1}(#1 bonus points)} 84 | \newcommand\setbonuspoint{\addtocounter{bonuspoints}{1}(1 bonus point)} 85 | 86 | \newcommand\printpoints{Total number of points: \value{\thepoints}} 87 | 88 | \newcommand{\eqD}{\ensuremath{\overset{\mathcal{D}}{=}}} 89 | 90 | \setlength{\parindent}{0in} 91 | 92 | \begin{document} 93 | 94 | \pagestyle{fancy} 95 | %\vspace*{15pt} 96 | 97 | \section*{Overview} 98 | 99 | In this assignment, you'll read sparse matrices from files, implement a class, and implement a simple spectral embedding algorithm. 100 | 101 | All your code should be put in a script {\tt hw1.py} which will be submitted on Canvas. 102 | 103 | Please submit the following files to Canvas: 104 | \begin{enumerate} 105 | \item {\tt hw1.py} - a script with all your code, which will generate the figure 106 | \item {\tt sbm.png} 107 | \end{enumerate} 108 | 109 | {\bf Please use the function, class and file names specified.} It will be easier to read your code if everyone uses the same conventions. Additionally, {\bf please include comments in your code} to explain what you're doing (doesn't have to be detailed, but should be clear). 110 | 111 | 112 | \section{Review of Graph Terminology} 113 | 114 | A graph $G(V,E)$ is a set of vertices $V$ and edges $E\subset V\times V$. We'll denote the number of vertices in a graph by $n$, and assign each vertex a number in $0,1,\dots, n-1$. 115 | 116 | The adjacency matrix of a graph is a $n\times n$ matrix $A$, where 117 | $$A_{i,j} = \begin{cases} 118 | 1 & (i,j) \in E\\ 119 | 0 & (i,j) \notin E 120 | \end{cases}$$ 121 | 122 | 123 | \section{Read A Sparse Matrix From a File} 124 | 125 | {\tt sbm.csv} contains the adjacency matrix of a graph in the following format: each row of the file contains the contents for a single non-zero of the adjacency matrix: 126 | 127 | {\tt 128 | row (int), column (int), value (float) 129 | } 130 | 131 | 132 | Write a function that will take a file name as input and return a {\tt scipy.sparse.coo\_matrix} 133 | 134 | {\bf call this function {\tt read\_coo} } 135 | 136 | If you prefer to work with another sparse matrix type, you can always convert once you have created a COO matrix. 137 | 138 | \section{Create A Sparse + Rank-1 Matrix Class} 139 | 140 | In the next part, it will be convenient to have a class to represent matrices of the form 141 | $$ S + \alpha uv^T$$ 142 | where $S$ is a sparse matrix, $u, v$ are vectors, and $\alpha$ is a scalar. This allows us to use the structure of the matrix to avoid forming a dense array. 143 | 144 | {\bf use the class name {\tt sparse\_rank1} } 145 | 146 | Write a class definition that 147 | \begin{itemize} 148 | \item initializes an object given a sparse matrix $S$, numpy arrays $u$ and $v$ and a float $\alpha$ (store each of these inputs). Give the object another field {\tt shape}, which is set to be the same as {\tt S.shape} 149 | \item implements a {\tt dot} method, which performs matrix-vector multiplication 150 | \end{itemize} 151 | 152 | \section{Power Method} 153 | In lecture 2, you did an exercise in which you implemented power method for a matrix. Recall, this function finds the eigenpair $(\lambda, v)$ with largest $\lambda$ such that $Av = \lambda v$ for a matrix $A$. Note that the implementation only required that $A$ have a {\tt dot} method that performed matrix-vector multiplication, so you can use it with your new matrix class. 154 | 155 | Import this function to your script, and modify it if needed to work with your {\tt sparse\_rank1} matrix class. 156 | 157 | {\bf call this function {\tt power\_method} } 158 | 159 | \section{Spectral Embedding} 160 | 161 | {\tt sbm.csv} contains the adjacency matrix of a graph generated using the stochastic block model. Load this into a sparse matrix {\tt A}. 162 | 163 | A spectral embedding assigns vertices of a graph coordinates in Euclidean space that can be used to visualize the graph. One way to generate a spectral embedding is to calculate the top $k$ vectors of the adjacency matrix, and embed in $\mathbb{R}^k$. We'll do this for $k=2$. 164 | 165 | Find the top two eigenvectors of adjacency matrix using the power method using the following deflation algorithm: 166 | \begin{enumerate} 167 | \item Calculate the top eigenpair $(\lambda_1, v_1)$ of $A$ 168 | \item Calculate the top eigenpair $(\lambda_2, v_2)$ of $(A - \lambda_1 v_1 v_1^T)$ 169 | \end{enumerate} 170 | You can either wrap this in a function, or just execute it directly in the script. 171 | 172 | Use PyPlot to generate a scatter plot of $v_1$ vs $v_2$. {\bf Save this plot as {\tt sbm.png} and submit it on Canvas.} 173 | 174 | Note that this is a very simplified version of a specific spectral clustering algorithm. If you want to know more about this spectral clustering setup, I recommend the paper: 175 | 176 | ``Robust and efficient multi-way spectral clustering'' by A. Damle, V. Minden, and L. Ying. (2017) \\ 177 | {\tt https://arxiv.org/abs/1609.08251} 178 | 179 | 180 | 181 | 182 | \section*{Hints} 183 | 184 | You don't need to use the hints to complete the assignment - it's ok if you want to use functions other than the ones mentioned. 185 | 186 | \begin{itemize} 187 | \item Reading a file: check out {\tt numpy.readtxt} 188 | \item To specify a data type in a numpy array, pass in a type e.g. {\tt np.array(v, int)} 189 | \item $ (S + \alpha uv^T) x = Sx + \alpha u (v^T x)$ 190 | \item Save a figure: check out {\tt savefig} in pyplot 191 | \item You should see 2 clusters when you generate the figure 192 | \end{itemize} 193 | 194 | 195 | 196 | 197 | 198 | \end{document} 199 | -------------------------------------------------------------------------------- /hw/2018_fall/hw1/ipynb/hw1_gen.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import scipy\n", 11 | "import scipy.sparse" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "# SBM on 2 blocks of size n\n", 21 | "# p - inter-block probability of edge\n", 22 | "# q - intra-block probability of edge\n", 23 | "def SBM(n, p, q):\n", 24 | " I = []\n", 25 | " J = []\n", 26 | " V = []\n", 27 | " # block 1 edges\n", 28 | " for i in range(n):\n", 29 | " for j in range(i):\n", 30 | " if np.random.rand() < p:\n", 31 | " # i-j\n", 32 | " I.append(i)\n", 33 | " J.append(j)\n", 34 | " V.append(1.)\n", 35 | " # j-i\n", 36 | " I.append(j)\n", 37 | " J.append(i)\n", 38 | " V.append(1.)\n", 39 | " \n", 40 | " # block 2 edges\n", 41 | " for i in range(n, 2*n):\n", 42 | " for j in range(n, i):\n", 43 | " if np.random.rand() < p:\n", 44 | " # i-j\n", 45 | " I.append(i)\n", 46 | " J.append(j)\n", 47 | " V.append(1.)\n", 48 | " # j-i\n", 49 | " I.append(j)\n", 50 | " J.append(i)\n", 51 | " V.append(1.)\n", 52 | " \n", 53 | " # intra-block edges\n", 54 | " for i in range(n):\n", 55 | " for j in range(n, 2*n):\n", 56 | " if np.random.rand() < q:\n", 57 | " # i-j\n", 58 | " I.append(i)\n", 59 | " J.append(j)\n", 60 | " V.append(1.)\n", 61 | " # j-i\n", 62 | " I.append(j)\n", 63 | " J.append(i)\n", 64 | " V.append(1.)\n", 65 | " \n", 66 | " return I, J, V\n", 67 | " \n", 68 | "I, J, V = SBM(1000, 0.05, 0.01)" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "X = np.array([I, J, V]).T" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "# np.savetxt(\"sbm.csv\", X, delimiter=',')\n", 87 | "np.savetxt(\"sbm.csv\", X, fmt=\"%d, %d, %f\")" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "def read_coo(fname):\n", 97 | " Y = np.loadtxt(fname, delimiter=',')\n", 98 | " I = np.array(Y[:,0], int)\n", 99 | " J = np.array(Y[:,1], int)\n", 100 | " V = Y[:,2]\n", 101 | " return scipy.sparse.coo_matrix((np.array(V), (I, J)))\n", 102 | "\n", 103 | "A = read_coo(\"sbm.csv\")" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "class sparse_rank1(object):\n", 113 | " def __init__(self, S, alpha, u, v):\n", 114 | " self.S = S\n", 115 | " self.alpha = alpha\n", 116 | " self.u = u\n", 117 | " self.v = v\n", 118 | " self.shape = S.shape\n", 119 | " \n", 120 | " def dot(self, x):\n", 121 | " return self.S.dot(x) + self.alpha*self.u*self.v.dot(x)" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "\n", 131 | "\n", 132 | "# compute power method\n", 133 | "# tol is a key-word argument for convergence tolerance\n", 134 | "def power_method(A, tol=1e-8):\n", 135 | " \n", 136 | " # rayleigh quotient\n", 137 | " # returns v^T*Av\n", 138 | " def rq(v, A):\n", 139 | " return v.dot(A.dot(v))\n", 140 | " \n", 141 | " n = A.shape[1]\n", 142 | " # generate random vector with unit length\n", 143 | " v = np.random.normal(0, 1, n)\n", 144 | " v /= np.linalg.norm(v)\n", 145 | " \n", 146 | " rqs = [] # keep track of rayleigh quotients as we progress\n", 147 | " rqs.append(rq(v, A))\n", 148 | " converged = False\n", 149 | " \n", 150 | " while True:\n", 151 | " \n", 152 | " # v <- A*v\n", 153 | " v = A.dot(v)\n", 154 | " # normalize v\n", 155 | " v /= np.linalg.norm(v)\n", 156 | " \n", 157 | " rqs.append(rq(v,A))\n", 158 | " # check if rayleigh quotient has converged\n", 159 | " if np.abs(rqs[-1] - rqs[-2]) < tol:\n", 160 | " break\n", 161 | " \n", 162 | " # set eigenvalue\n", 163 | " lam = rqs[-1]\n", 164 | " \n", 165 | " return v, lam" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "v, lam = power_method(A)\n", 175 | "\n", 176 | "B = sparse_rank1(A, -lam, v, v)\n", 177 | "v2, lam2 = power_method(B)" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "metadata": {}, 184 | "outputs": [], 185 | "source": [ 186 | "import matplotlib.pyplot as plt\n", 187 | "plt.scatter(v, v2)\n", 188 | "plt.savefig('sbm.png')\n", 189 | "plt.show()" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "metadata": {}, 196 | "outputs": [], 197 | "source": [] 198 | } 199 | ], 200 | "metadata": { 201 | "kernelspec": { 202 | "display_name": "Python [conda env:cme193]", 203 | "language": "python", 204 | "name": "conda-env-cme193-py" 205 | }, 206 | "language_info": { 207 | "codemirror_mode": { 208 | "name": "ipython", 209 | "version": 3 210 | }, 211 | "file_extension": ".py", 212 | "mimetype": "text/x-python", 213 | "name": "python", 214 | "nbconvert_exporter": "python", 215 | "pygments_lexer": "ipython3", 216 | "version": "3.6.6" 217 | } 218 | }, 219 | "nbformat": 4, 220 | "nbformat_minor": 2 221 | } 222 | -------------------------------------------------------------------------------- /hw/2018_fall/hw1/ipynb/sbm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icme/cme193/3ed008f6e0951b80faf1d77c9542ae0dd925691d/hw/2018_fall/hw1/ipynb/sbm.png -------------------------------------------------------------------------------- /hw/2018_fall/hw1/solution/hw1.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy 3 | import scipy.sparse 4 | import matplotlib.pyplot as plt 5 | 6 | 7 | # part 2 - read sparse matrix from csv file 8 | def read_coo(fname): 9 | Y = np.loadtxt(fname, delimiter=',') 10 | rows = np.array(Y[:, 0], int) 11 | cols = np.array(Y[:, 1], int) 12 | V = Y[:, 2] 13 | return scipy.sparse.coo_matrix((np.array(V), (rows, cols))) 14 | 15 | 16 | A = read_coo("../sbm.csv") 17 | 18 | 19 | # part 3 - create sparse + Rank-1 class 20 | class sparse_rank1(object): 21 | def __init__(self, S, alpha, u, v): 22 | self.S = S 23 | self.alpha = alpha 24 | self.u = u 25 | self.v = v 26 | self.shape = S.shape 27 | 28 | def dot(self, x): 29 | return self.S.dot(x) + self.alpha*self.u*self.v.dot(x) 30 | 31 | 32 | # part 4 - power method 33 | 34 | # compute power method 35 | # tol is a key-word argument for convergence tolerance 36 | def power_method(A, tol=1e-8): 37 | 38 | # rayleigh quotient 39 | # returns v^T*Av 40 | def rq(v, A): 41 | return v.dot(A.dot(v)) 42 | 43 | n = A.shape[1] 44 | # generate random vector with unit length 45 | v = np.random.normal(0, 1, n) 46 | v /= np.linalg.norm(v) 47 | 48 | rqs = [] # keep track of rayleigh quotients as we progress 49 | rqs.append(rq(v, A)) 50 | 51 | while True: 52 | 53 | # v <- A*v 54 | v = A.dot(v) 55 | # normalize v 56 | v /= np.linalg.norm(v) 57 | 58 | rqs.append(rq(v, A)) 59 | # check if rayleigh quotient has converged 60 | if np.abs(rqs[-1] - rqs[-2]) < tol: 61 | break 62 | 63 | # set eigenvalue 64 | lam = rqs[-1] 65 | 66 | return v, lam 67 | 68 | 69 | # part 5 - spectral embedding 70 | v, lam = power_method(A) 71 | 72 | B = sparse_rank1(A, -lam, v, v) 73 | v2, lam2 = power_method(B) 74 | 75 | fig, ax = plt.subplots(1, 1, figsize=(10, 8)) 76 | 77 | ax.scatter(v, v2) 78 | ax.set_xlabel(r'$v_1$') 79 | ax.set_ylabel(r'$v_2$') 80 | ax.set_title("Spectral Embedding") 81 | 82 | 83 | plt.savefig('sbm.png') 84 | plt.show() 85 | -------------------------------------------------------------------------------- /hw/2018_fall/hw1/solution/sbm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icme/cme193/3ed008f6e0951b80faf1d77c9542ae0dd925691d/hw/2018_fall/hw1/solution/sbm.png -------------------------------------------------------------------------------- /hw/2018_fall/hw2/hw2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icme/cme193/3ed008f6e0951b80faf1d77c9542ae0dd925691d/hw/2018_fall/hw2/hw2.pdf -------------------------------------------------------------------------------- /hw/2018_fall/hw2/hw2.tex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | \newcommand{\assgnnum}{2} 3 | \newcommand{\duedate}{October 25, 10:30 AM} 4 | 5 | \usepackage{amsmath} 6 | %\usepackage{fullpage} 7 | \usepackage{amssymb} 8 | %\usepackage{bbm} 9 | \usepackage{fancyhdr} 10 | %\usepackage{paralist} 11 | \usepackage{graphicx} 12 | \usepackage{caption} 13 | \usepackage{subcaption} 14 | \usepackage[pdftex,colorlinks=true, urlcolor = blue]{hyperref} 15 | \usepackage{listings} 16 | \usepackage{color} 17 | \usepackage{xcolor} 18 | 19 | \definecolor{thegreen}{rgb}{0,.5,0} 20 | \definecolor{comment-green}{rgb}{0,.3,0} 21 | \definecolor{theblue}{rgb}{0,0,.8} 22 | \definecolor{light-gray}{gray}{0.98} 23 | \definecolor{comment-color}{rgb}{0,0,.8} 24 | \definecolor{string-color}{rgb}{0,.75,0} 25 | \definecolor{border-blue}{rgb}{0,0,.6} 26 | 27 | \lstset{% use our version of highlighting 28 | language=python, % using python 29 | keywordstyle={\color{teal}\bfseries}, % keywords 30 | commentstyle=\color{comment-color}, % comments 31 | stringstyle=\color{string-color}, %strings 32 | } 33 | 34 | \lstset{ 35 | basicstyle={\ttfamily\normalsize}, % use font and smaller size 36 | basewidth={0.5em,0.5em}, 37 | showstringspaces=false, % do not emphasize spaces in strings 38 | tabsize=2, % number of spaces of a TAB 39 | aboveskip={0\baselineskip}, % a bit of space above 40 | columns=fixed, % nice spacing 41 | } 42 | 43 | 44 | \oddsidemargin 0in \evensidemargin 0in 45 | \topmargin -0.5in \headheight 0.25in \headsep 0.25in 46 | \textwidth 6.5in \textheight 9in 47 | \parskip 6pt \parindent 0in \footskip 20pt 48 | 49 | % set the header up 50 | \fancyhead{} 51 | \fancyhead[L]{CME193: Homework \assgnnum} 52 | \fancyhead[R]{Due: \duedate} 53 | %%%%%%%%%%%%%%%%%%%%%%%%%% 54 | \renewcommand\headrulewidth{0.4pt} 55 | \setlength\headheight{15pt} 56 | 57 | \newcommand{\p}{\ensuremath{\mathbf{P}}} 58 | \renewcommand{\Pr}[1]{\ensuremath{\p \left \{ #1 \right \}}} 59 | \newcommand{\nti}{\ensuremath{n \to \infty}} 60 | \newcommand{\I}{\ensuremath{\operatorname{I}}} 61 | \newcommand{\One}[1]{\ensuremath{\mathbbm{1}_{\left \{ #1 \right \}}}} 62 | \newcommand{\E}{\ensuremath{\mathbf{E}}} 63 | \newcommand{\Ex}[2][]{\ensuremath{\E_{#1} \left[ #2 \right]}} 64 | \newcommand{\var}{\ensuremath{\operatorname{Var}}} 65 | \newcommand{\cov}{\ensuremath{\operatorname{Cov}}} 66 | \newcommand{\F}{\ensuremath{\mathcal{F}}} 67 | \newcommand{\R}{\ensuremath{\mathbb{R}}} 68 | \newcommand{\C}{\ensuremath{\mathbb{C}}} 69 | \newcommand{\NormRV}[2]{\ensuremath{\operatorname{N}\left(#1, #2\right)}} 70 | \newcommand{\BetaRV}[2]{\ensuremath{\operatorname{Beta}\left(#1, #2\right)}} 71 | \newcommand{\argmax}{\operatornamewithlimits{argmax}} 72 | \newcommand{\x}{\mathbf{x}} 73 | \newcommand{\A}{\mathbf{A}} 74 | \newcommand{\bb}{\mathbf{b}} 75 | 76 | \newcounter{points} 77 | \setcounter{points}{0} 78 | \newcounter{bonuspoints} 79 | \setcounter{bonuspoints}{0} 80 | 81 | \newcommand\setpoints[1]{\addtocounter{points}{#1}(#1 points)} 82 | \newcommand\setpoint{\addtocounter{points}{1}(1 point)} 83 | \newcommand\setbonuspoints[1]{\addtocounter{bonuspoints}{#1}(#1 bonus points)} 84 | \newcommand\setbonuspoint{\addtocounter{bonuspoints}{1}(1 bonus point)} 85 | 86 | \newcommand\printpoints{Total number of points: \value{\thepoints}} 87 | 88 | \newcommand{\eqD}{\ensuremath{\overset{\mathcal{D}}{=}}} 89 | 90 | \setlength{\parindent}{0in} 91 | 92 | \begin{document} 93 | 94 | \pagestyle{fancy} 95 | %\vspace*{15pt} 96 | 97 | \section*{Overview} 98 | 99 | In this assignment, you'll practice some data manipulation in pandas, and perform a simple regression task using the Abalone data set from the UCI repository 100 | 101 | Abalone are a type of mollusk (you may have eaten one before). Each row of this data set is an individual abalone, with a variety of measurements. You can learn about the age of an abalone by counting rings in its shell (kind of like counting tree rings). 102 | 103 | https://archive.ics.uci.edu/ml/datasets/Abalone 104 | 105 | This link describes the data. Take a look at the attribute information so you know what each column is referring to. 106 | 107 | Please put all your code into a single script {\tt hw2.py}, which will generate the image {\tt regression.png}. Submit both files to canvas. 108 | 109 | Please submit the following files to Canvas: 110 | \begin{enumerate} 111 | \item {\tt hw2.py} - a script with all your code, which will generate the figure 112 | \item {\tt regression.png} 113 | \end{enumerate} 114 | 115 | Additionally, {\bf please include comments in your code} to explain what you're doing (doesn't have to be detailed, but should be clear). 116 | 117 | 118 | \section{Read the data into a DataFrame} 119 | 120 | use pandas to read the data at this link into a DataFrame:\\ https://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data 121 | 122 | Hints: 123 | \begin{itemize} 124 | \item decide if {\tt pd.read\_table} or {\tt pd.read\_csv} is more appropriate. 125 | \item you can pass the url provided as a string (like in the Lecture 5 exercise) 126 | \item You'll want to pass in at least 2 keyword arguments: 127 | \begin{itemize} 128 | \item {\tt names} - a list of column names ({\tt `sex', `length', `diam', `height', `wt\_whole', `wt\_shucked', `wt\_viscera', `wt\_shell', `rings'}) 129 | \item {\tt index\_col=False} - indicates that pandas should just create an index for each entry (we don't have ids for the data) 130 | \end{itemize} 131 | \end{itemize} 132 | 133 | \section{Set up the data for a regression problem} 134 | 135 | The problem we're going to try to solve is to predict the number of rings in an abalone shell (the {\tt `rings'} column) from the other features. This is a proxy for the age of the animal. 136 | 137 | This means we want a response vector $y$ that contains the data in the `rings' column, and a design matrix $X$ that contains all the data we'll use to predict the response (the other columns). 138 | 139 | Use the {\tt pasty} library's {\tt dmatrices} function to form your data and response matrices (see lecture 6 for an example). 140 | 141 | Hints: 142 | \begin{itemize} 143 | \item Since `sex' is categorical you'll want to use {\tt `C(sex)'} in your model specification. 144 | \end{itemize} 145 | 146 | you'll see that {\tt X} has a column called `Intercept'. We will not need this column, so remove it from the dataframe: 147 | 148 | {\tt X.drop(X[["Intercept"]], axis=1, inplace=True)} 149 | 150 | \section{Split the data into train and test sets} 151 | 152 | Now, we're going to start using Scikit learn. 153 | 154 | Split your data into train and test sets (See lecture 6 for an example) 155 | 156 | Set your test size to be 30\% of the data 157 | 158 | \section{Fit a Linear Regression model to the data} 159 | 160 | \begin{enumerate} 161 | \item Use Scikit learn's linear regression class to fit the model: 162 | 163 | {\tt from sklearn.linear\_model import LinearRegression} 164 | \item Use your training data to fit the model 165 | \item Predict the number of rings in your test data using the {\tt predict} method. 166 | \item create a scatter plot of your prediction vs. the true number of rings. Save this figure as {\tt regression.png} and sumbit it with your homework 167 | \end{enumerate} 168 | 169 | \section{(Bonus) Try another regression classifier} 170 | 171 | Pick another regression classifier (e.g., try ridge, lasso, decision trees, nearest neighbors, ...) and repeat parts 3 and 4. If you do this, name your image after the classifier you used e.g., {\tt lasso.png} 172 | 173 | 174 | 175 | \end{document} 176 | -------------------------------------------------------------------------------- /hw/2018_fall/hw2/ipynb/knn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icme/cme193/3ed008f6e0951b80faf1d77c9542ae0dd925691d/hw/2018_fall/hw2/ipynb/knn.png -------------------------------------------------------------------------------- /hw/2018_fall/hw2/ipynb/pred.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icme/cme193/3ed008f6e0951b80faf1d77c9542ae0dd925691d/hw/2018_fall/hw2/ipynb/pred.png -------------------------------------------------------------------------------- /hw/2018_fall/hw2/ipynb/regression.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icme/cme193/3ed008f6e0951b80faf1d77c9542ae0dd925691d/hw/2018_fall/hw2/ipynb/regression.png -------------------------------------------------------------------------------- /hw/2018_fall/hw2/solution/hw2.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | from patsy import dmatrices 4 | from sklearn.model_selection import train_test_split 5 | from sklearn.linear_model import LinearRegression 6 | 7 | # part 1 8 | print("part 1") 9 | fname = "https://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data" 10 | fname = "../abalone.data" 11 | df = pd.read_csv(fname, 12 | names=["sex", "length", "diam", "height", "wt_whole", "wt_shucked", "wt_viscera", "wt_shell", "rings"], 13 | index_col=False) 14 | 15 | # part 2 16 | print("part 2") 17 | y, X = dmatrices("rings ~ C(sex) + length + diam + height + wt_whole + wt_shucked + wt_viscera + wt_shell", 18 | df, return_type="dataframe") 19 | 20 | X.drop(X[['Intercept']], axis=1, inplace=True) 21 | 22 | # part 3 23 | print("part 3") 24 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) 25 | 26 | # part 4 27 | print("part 4") 28 | model = LinearRegression() 29 | model.fit(X_train, y_train) 30 | 31 | predicted = model.predict(X_test) 32 | 33 | fig, ax = plt.subplots(1, 1, figsize=(10, 8)) 34 | 35 | ax.scatter(predicted, y_test) 36 | 37 | ax.set_xlabel(r'predicted') 38 | ax.set_ylabel(r'true') 39 | ax.set_title("Number of Rings in Abalone") 40 | 41 | 42 | plt.savefig('regression.png') 43 | -------------------------------------------------------------------------------- /hw/2018_fall/hw2/solution/regression.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icme/cme193/3ed008f6e0951b80faf1d77c9542ae0dd925691d/hw/2018_fall/hw2/solution/regression.png -------------------------------------------------------------------------------- /hw/2019_winter/CME193-Assignment2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# CME193 Assignment 2\n", 8 | "\n", 9 | "### Due Sunday 17th Feb 5PM\n", 10 | "\n", 11 | "In this assignment you will implement some machine learning algorithms on the [Congressional Voting Records Dataset](https://archive.ics.uci.edu/ml/datasets/congressional+voting+records). The goal of the assignment is to write a python script that reads in the dataset from the internet, process it and build a few models and output some graphs.\n", 12 | "\n", 13 | "You can use this notebook to write code and check that it works but once you are sure that everything works you will put all your code in a script, that can be called from the command line. It is always a good habit to convert the code you write in notebooks into clean scripts so that it can be used with relative use later on.\n", 14 | "\n", 15 | "Note : Most programming courses always have starter code to help students in completing the assignments, this is done so that students do not waste time coding up boilerplate code and also to help graders by standardising the code they have to read, but unfortunately this leaves many students with only the ability to fill in code while they lack confidence in creating a project from scratch. It is in this interest that only minimal starter code is provided in this assignment and you are required to submit a script.\n", 16 | "\n", 17 | "Make sure you refer to the lecture notebooks in case you forgot how to do any of the operations mentioned below." 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "## Dataset\n", 25 | "\n", 26 | "The dataset we will be working with on this assignment is the [Congressional Voting Records Dataset](https://archive.ics.uci.edu/ml/datasets/congressional+voting+records) for 1984, open the link and read the description of the dataset, make sure you understand what the columns and rows represent.\n", 27 | "\n", 28 | "The following code will quickly download the dataset into a pandas dataframe" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 1, 34 | "metadata": {}, 35 | "outputs": [ 36 | { 37 | "data": { 38 | "text/html": [ 39 | "
\n", 40 | "\n", 53 | "\n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | "
partyVote_0Vote_1Vote_2Vote_3Vote_4Vote_5Vote_6Vote_7Vote_8Vote_9Vote_10Vote_11Vote_12Vote_13Vote_14Vote_15
0republicannynyyynnny?yyyny
1republicannynyyynnnnnyyyn?
2democrat?yy?yynnnnynyynn
3democratnyyn?ynnnnynynny
4democratyyynyynnnny?yyyy
\n", 179 | "
" 180 | ], 181 | "text/plain": [ 182 | " party Vote_0 Vote_1 Vote_2 Vote_3 Vote_4 Vote_5 Vote_6 Vote_7 Vote_8 \\\n", 183 | "0 republican n y n y y y n n n \n", 184 | "1 republican n y n y y y n n n \n", 185 | "2 democrat ? y y ? y y n n n \n", 186 | "3 democrat n y y n ? y n n n \n", 187 | "4 democrat y y y n y y n n n \n", 188 | "\n", 189 | " Vote_9 Vote_10 Vote_11 Vote_12 Vote_13 Vote_14 Vote_15 \n", 190 | "0 y ? y y y n y \n", 191 | "1 n n y y y n ? \n", 192 | "2 n y n y y n n \n", 193 | "3 n y n y n n y \n", 194 | "4 n y ? y y y y " 195 | ] 196 | }, 197 | "execution_count": 1, 198 | "metadata": {}, 199 | "output_type": "execute_result" 200 | } 201 | ], 202 | "source": [ 203 | "import pandas as pd\n", 204 | "fname = \"https://archive.ics.uci.edu/ml/machine-learning-databases/voting-records/house-votes-84.data\"\n", 205 | "df = pd.read_csv(fname, names = [\"party\"]+[\"Vote_%d\"% i for i in range(16)])\n", 206 | "df.head()" 207 | ] 208 | }, 209 | { 210 | "cell_type": "markdown", 211 | "metadata": {}, 212 | "source": [ 213 | "## Build Models\n", 214 | "\n", 215 | "Using the `dmatrices` function from the `patsy` package, create a design matrix to predict the political party of the member of congress based on the votes cast by each of the members. Remember to treat the votes as a categorical variable as there are three possibilities for each vote (y,n,?).\n", 216 | "\n", 217 | "Next split the dataset into training set and test set, with 30% reserved for the test set.\n", 218 | "\n", 219 | "Train two different models on the training set.\n", 220 | "1. A Logisitic Regresssion Model\n", 221 | "2. A Support Vector Machine (SVM)\n", 222 | " \n", 223 | "You can use default parameters for both the models.\n", 224 | "\n", 225 | "Output the accuracy of each of the model on the test set." 226 | ] 227 | }, 228 | { 229 | "cell_type": "markdown", 230 | "metadata": {}, 231 | "source": [ 232 | "## Compare Models\n", 233 | "\n", 234 | "Now we will compare the predictions of the models to each other and the true values. The approach we will use is to use a scatter plot of the predicted probabilites.\n", 235 | "\n", 236 | "First compute the predicted probabilites, from both the models, for the political party being a specific value (say democrat). Now we can use one model as the X axis and the other as the Y axis of the scatter plot. Also colour the dots based on the true political party, i.e. red dots for republicans and blue dots for democrats.\n", 237 | "\n", 238 | "If both the models are accurate and consistent, you should see all the blue dots in one corner and red dots in the other corner, with some sparse points in the middle of both colours.\n", 239 | "\n", 240 | "Save the scatter plot in a file called \"scatter.png\"." 241 | ] 242 | }, 243 | { 244 | "cell_type": "markdown", 245 | "metadata": {}, 246 | "source": [ 247 | "## Python Script\n", 248 | "\n", 249 | "Copy all your code into a python script and make sure you add some comments describing your code.\n", 250 | "Save the python script as `assign2.py`.\n", 251 | "\n", 252 | "Test run your script by typing `python assign2.py` in your terminal. The code should output the accuracy of both models on the test set and save the graph `scatter.png` in the current directory. (Make sure you have activated your environment when you run the script)" 253 | ] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "metadata": {}, 258 | "source": [ 259 | "## Submission Requirements\n", 260 | "\n", 261 | "Submit the following on canvas\n", 262 | "1. A python script (assign2.py file) which will load the dataset, fit both the models and save the graph \"scatter.png\"\n", 263 | "2. The \"scatter.png\" that you produced." 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": null, 269 | "metadata": {}, 270 | "outputs": [], 271 | "source": [] 272 | } 273 | ], 274 | "metadata": { 275 | "kernelspec": { 276 | "display_name": "Python (3.6-cme193_new)", 277 | "language": "python", 278 | "name": "cme193_new" 279 | }, 280 | "language_info": { 281 | "codemirror_mode": { 282 | "name": "ipython", 283 | "version": 3 284 | }, 285 | "file_extension": ".py", 286 | "mimetype": "text/x-python", 287 | "name": "python", 288 | "nbconvert_exporter": "python", 289 | "pygments_lexer": "ipython3", 290 | "version": "3.6.8" 291 | } 292 | }, 293 | "nbformat": 4, 294 | "nbformat_minor": 2 295 | } 296 | -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | CME 193 - Scientific Python 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 27 | 28 |
29 |
30 | 31 | 32 | 78 | 79 | 80 |
81 |

Course description

82 |

This course is recommended for students who are familiar with programming at least at the level of CS106A and want to translate their programming knowledge to Python with the goal of becoming proficient in the scientific computing and data science stack. Lectures will be interactive with a focus on real world applications of scientific computing. Technologies covered include Numpy, SciPy, Pandas, Scikit-learn, and others. Topics will be chosen from Linear Algebra, Optimization, Machine Learning, and Data Science. Prior knowledge of programming will be assumed, and some familiarity with Python is helpful, but not mandatory.

83 |

Course information

84 |

This course starts the second week of the quarter on Tuesday 1/15

85 |

CME 193 - Introduction to Scientific Python - Winter 2019

86 | 92 |

Instructor: 93 | Anjan Dwaraknath (anjandn {at} stanford {dot} edu)

94 |

Office hours:

95 |
    96 |
  • Anjan - Thurs 10:20-12:00 PM (Huang Basement), or by appointment
  • 97 |
98 |

Prerequisites

99 |

Programming

100 |

There are no formal prerequisites. This means we won't check your previous programming experience.

101 |

However, the course material will assume prior programming experience. Ideally, you already are comfortable programming in at least one language (C, C++, fortran, Julia, Matlab, R, Java, ...), and perhaps have seen some basic Python before.

102 |

If you haven't worked with Python in the past, you may wish to complete an introduction to Python on Codeacademy 103 | and/or Udacity.

104 |

Scientific Computing

105 |

This is a course on scientific computing with Python. This will assume you

106 |
    107 |
  • Have at least a basic familiarity with linear algebra, optimization, and statistics
  • 108 |
  • Have some familiarity with a scientific computing application (simulations, machine learning, etc.)
  • 109 |
110 |

Format

111 |

This short course runs for four weeks of the quarter (starting 1/15) and is offered each quarter during the academic year.

112 |

Lectures will be interactive using Jupyter Notebooks with a focus on learning by example, and assignments will be application-driven.

113 |

We'll typically devote some time during class to working on exercises, so you can ask for help if you're stuck.

114 |

Grading

115 |

This a 1-unit workshop style course, offered on a credit/no-credit basis. To receive credit:

116 |
    117 |
  • Come to class and participate (we know you may have occasional conflicts)
  • 118 |
  • Make a reasonable attempt at the assignments (3 or 4 total)
  • 119 |
120 |

The goal is to give you some practice and experience with the content of the course, without overwhelming you with work.

121 |

Stanford Policies

122 |

Honor Code

123 |

This course is intended to be collaborative. You can (and should) work with other students in class and on homework. You should turn in your own solutions (don't copy others). If you worked closely with someone or found an answer on the web, please acknowledge the source of your solution.

124 |

Students with Documented Disabilities

125 |

Students with Documented Disabilities: Students who may need an academic accommodation based on the impact of a disability must initiate the request with the Office of Accessible Education (OAE). Professional staff will evaluate the request with required documentation, recommend reasonable accommodations, and prepare an Accommodation Letter for faculty. Unless the student has a temporary disability, Accommodation letters are issued for the entire academic year. Students should contact the OAE as soon as possible since timely notice is needed to coordinate accommodations. The OAE is located at 563 Salvatierra Walk (phone: 723-1066, URL: https://oae.stanford.edu/).

126 |
127 | 128 |
129 |
130 | 131 | 132 | 133 | 134 | 135 | 142 | 143 | 144 | 145 | -------------------------------------------------------------------------------- /jupyter.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | CME 193 - Scientific Python 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 27 | 28 |
29 |
30 | 31 | 32 | 78 | 79 | 80 |
81 |

note all commands on this page are invoked from a bash shell (terminal), not from an interactive Python session.

82 |

Jupyter Notebooks

83 |

Jupyter notebooks are used to mix code and markdown (for exposition) in a single place.

84 |

Jupyter comes bundled with Anaconda, but you can download it for whatever python you are using using pip (doesn't have to be anaconda python).

85 |

From a terminal:

86 |
pip install jupyter
87 |

Note that Jupyter is not Python. You can use python in a variety of other ways (e.g., through the command line). Additionally, you can use Jupyter notebooks with other programming languages.

88 |

Launching Jupyter

89 |

Once you have Jupyter installed, you can launch a notebook server.

90 |

From a terminal:

91 |
jupyter notebook
92 |

This should launch a notebook server on your computer, and open a tab on your browser. You can then navigate to the folder holding the notebook you'd like to run. Alternatively, you first navigate to the folder you want to be in, then launch the notebook server.

93 |

Note You can launch Jupyter from the Anaconda launcher. This may work, but I advise that you don't rely on the launcher and instead do it from a terminal.

94 |

Using Jupyter with a Conda virutal environment

95 |

You don't need to install Jupyter in every virtual environment. However, you do need to install a ipykernel for every virtual environment. This is what lets Jupyter know how to run this version of Python.

96 |

First, you may wish to install nb_conda

97 |

Next, you need to install an ipykernel for your virtual environment. This looks like the following:

98 |

From a terminal:

99 |
conda install nb_conda
100 | source activate cme193 # cme193 virtual env
101 | conda install ipykernel # installs a python kernel for this environment
102 |

Now, when you launch a Jupyter notebook server (even without your environment activated), you should see a Python [conda env:cme193] option in the kernel menu.

103 |
104 | 105 |
106 |
107 | 108 | 109 | 110 | 111 | 112 | 119 | 120 | 121 | 122 | -------------------------------------------------------------------------------- /lectures.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | CME 193 - Scientific Python 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 27 | 28 |
29 |
30 | 31 | 32 | 78 | 79 | 80 |
81 |

Lectures

82 |

Disclaimer: these lecture notebooks are subject to change and can be updated any time. 83 | Please check regularly, as lectures are added as we progress.

84 |
    85 |
  1. Intro to Python, NumPy [ipynb] [github]
  2. 86 |
  3. NumPy and Plotting [ipynb][github]
  4. 87 |
  5. Implementing an algorithm using NumPy, Classes and Objects [ipynb][github]
  6. 88 |
  7. SciPy [ipynb][github]
  8. 89 |
  9. Pandas [ipynb][github][exercise][exercise-ipynb][exercise-soln][exercise-soln-ipynb]
  10. 90 |
  11. Scikit-Learn [ipynb][github][exercise][exercise-ipynb][exercise-soln][exercise-ipynb-soln]
  12. 91 |
  13. NumPy Revision and Practice [ipynb][github]
  14. 92 |
  15. Deep Learning and PyTorch [slides][ipynb][github]
  16. 93 |
94 |
95 |

Content from previous offerings of the course (as-is)

96 | 101 |
102 | 103 |
104 |
105 | 106 | 107 | 108 | 109 | 110 | 117 | 118 | 119 | 120 | -------------------------------------------------------------------------------- /nb/.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints/ -------------------------------------------------------------------------------- /nb/2016_spring/lecture-8.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "# CME 193 - Scientific Python\n", 12 | "### Lecture 8 (5/5)\n", 13 | "Spring 2016, Stanford University" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": { 19 | "slideshow": { 20 | "slide_type": "subslide" 21 | } 22 | }, 23 | "source": [ 24 | "### Last time\n", 25 | "* Building some predictive models" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": { 31 | "slideshow": { 32 | "slide_type": "subslide" 33 | } 34 | }, 35 | "source": [ 36 | "### Today\n", 37 | "* We'll learn to recognize handwriting!" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": { 44 | "collapsed": false 45 | }, 46 | "outputs": [], 47 | "source": [ 48 | "%matplotlib inline\n", 49 | "import matplotlib.pyplot as plt" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": { 56 | "collapsed": true 57 | }, 58 | "outputs": [], 59 | "source": [ 60 | "from sklearn import datasets, metrics" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": { 67 | "collapsed": true 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "# import NIST digits data set (1797 8x8 images)\n", 72 | "digits = datasets.load_digits()" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": { 79 | "collapsed": false, 80 | "scrolled": true, 81 | "slideshow": { 82 | "slide_type": "subslide" 83 | } 84 | }, 85 | "outputs": [], 86 | "source": [ 87 | "plt.figure(figsize=(16, 6))\n", 88 | "for i in range(10):\n", 89 | " plt.subplot(1, 10, i + 1)\n", 90 | " plt.imshow(digits['images'][i], cmap='gist_gray_r',interpolation='none')" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": { 97 | "collapsed": true 98 | }, 99 | "outputs": [], 100 | "source": [ 101 | "X = digits['images']" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": { 108 | "collapsed": false 109 | }, 110 | "outputs": [], 111 | "source": [ 112 | "X.shape" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "metadata": { 119 | "collapsed": false 120 | }, 121 | "outputs": [], 122 | "source": [ 123 | "y = digits['target']" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "We'll need to do some data transformation here! How do we make this a regular matrix?" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": null, 136 | "metadata": { 137 | "collapsed": false 138 | }, 139 | "outputs": [], 140 | "source": [ 141 | "X = X.reshape(X.shape[0], -1)\n", 142 | "print X.shape" 143 | ] 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "metadata": { 148 | "collapsed": true, 149 | "slideshow": { 150 | "slide_type": "fragment" 151 | } 152 | }, 153 | "source": [ 154 | "Last time, we talked about PCA -- let's use a real PCA library!\n", 155 | "\n", 156 | "Let's visualize how handwritten digits look in a lower dimensional space" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "metadata": { 163 | "collapsed": true 164 | }, 165 | "outputs": [], 166 | "source": [ 167 | "from sklearn.decomposition import PCA" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": null, 173 | "metadata": { 174 | "collapsed": false 175 | }, 176 | "outputs": [], 177 | "source": [ 178 | "pca = PCA()\n", 179 | "pca.fit(X)" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "metadata": { 186 | "collapsed": false 187 | }, 188 | "outputs": [], 189 | "source": [ 190 | "plt.plot(range(X.shape[-1]), pca.explained_variance_ratio_.cumsum(), '-')\n", 191 | "plt.xlabel('Number of components retained')\n", 192 | "plt.ylabel('% of variance explained')" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": {}, 198 | "source": [ 199 | "Let's visualize what PCA does for handwritten digits!" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": null, 205 | "metadata": { 206 | "collapsed": false 207 | }, 208 | "outputs": [], 209 | "source": [ 210 | "plt.figure(figsize=(10, 10))\n", 211 | "for i in range(64):\n", 212 | " plt.subplot(8, 8, i + 1)\n", 213 | " plt.imshow(pca.components_[i].reshape(8, 8), interpolation='none', cmap='gist_gray_r')" 214 | ] 215 | }, 216 | { 217 | "cell_type": "markdown", 218 | "metadata": {}, 219 | "source": [ 220 | "What do you notice?" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": null, 226 | "metadata": { 227 | "collapsed": true 228 | }, 229 | "outputs": [], 230 | "source": [ 231 | "R = pca.transform(X)" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": null, 237 | "metadata": { 238 | "collapsed": true 239 | }, 240 | "outputs": [], 241 | "source": [ 242 | "cols = ['pc_%s' % i for i in range(R.shape[-1])]" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": null, 248 | "metadata": { 249 | "collapsed": true 250 | }, 251 | "outputs": [], 252 | "source": [ 253 | "import pandas as pd" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": null, 259 | "metadata": { 260 | "collapsed": false 261 | }, 262 | "outputs": [], 263 | "source": [ 264 | "pc = pd.DataFrame(R, columns=cols)\n", 265 | "pc['digit'] = [str(l) for l in y]" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": null, 271 | "metadata": { 272 | "collapsed": false 273 | }, 274 | "outputs": [], 275 | "source": [ 276 | "plt.figure(figsize=(10, 10))\n", 277 | "colors = ['#1abc9c', '#ff0084', '#3498db', '#9b59b6', \n", 278 | " '#f1c40f', '#e67e22', '#e74c3c', '#34495e']\n", 279 | "\n", 280 | "for color, (lab, x) in zip(colors, pc.groupby('digit')):\n", 281 | " plt.plot(x.pc_0, x.pc_1, 'o', label=lab, color=color)\n", 282 | "plt.xlabel('First PC')\n", 283 | "plt.ylabel('Second PC')\n", 284 | "plt.legend()\n", 285 | " " 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": null, 291 | "metadata": { 292 | "collapsed": true 293 | }, 294 | "outputs": [], 295 | "source": [ 296 | "from sklearn.linear_model import LogisticRegressionCV\n", 297 | "from sklearn.cross_validation import train_test_split" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": null, 303 | "metadata": { 304 | "collapsed": true 305 | }, 306 | "outputs": [], 307 | "source": [ 308 | "model = LogisticRegressionCV()" 309 | ] 310 | }, 311 | { 312 | "cell_type": "code", 313 | "execution_count": null, 314 | "metadata": { 315 | "collapsed": false 316 | }, 317 | "outputs": [], 318 | "source": [ 319 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)\n", 320 | "model.fit(X_train, y_train)\n", 321 | "yhat = model.predict(X_test)\n", 322 | "print 'accuracy at {}%'.format(100 * (yhat == y_test).mean())" 323 | ] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "execution_count": null, 328 | "metadata": { 329 | "collapsed": true 330 | }, 331 | "outputs": [], 332 | "source": [ 333 | "pc.drop('digit', axis=1, inplace=True)" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": null, 339 | "metadata": { 340 | "collapsed": false 341 | }, 342 | "outputs": [], 343 | "source": [ 344 | "P = pc.values" 345 | ] 346 | }, 347 | { 348 | "cell_type": "code", 349 | "execution_count": null, 350 | "metadata": { 351 | "collapsed": false 352 | }, 353 | "outputs": [], 354 | "source": [ 355 | "!pip install tqdm" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": null, 361 | "metadata": { 362 | "collapsed": true 363 | }, 364 | "outputs": [], 365 | "source": [ 366 | "import tqdm" 367 | ] 368 | }, 369 | { 370 | "cell_type": "code", 371 | "execution_count": null, 372 | "metadata": { 373 | "collapsed": false 374 | }, 375 | "outputs": [], 376 | "source": [ 377 | "acc = []\n", 378 | "for i in tqdm.tqdm(xrange(P.shape[-1])):\n", 379 | " X_train, X_test, y_train, y_test = train_test_split(P[:, :(i + 1)], y, test_size=0.5)\n", 380 | " model = LogisticRegressionCV()\n", 381 | " model.fit(X_train, y_train)\n", 382 | " yhat = model.predict(X_test)\n", 383 | " acc.append(100 * (yhat == y_test).mean())" 384 | ] 385 | }, 386 | { 387 | "cell_type": "code", 388 | "execution_count": null, 389 | "metadata": { 390 | "collapsed": false 391 | }, 392 | "outputs": [], 393 | "source": [ 394 | "plt.plot(range(P.shape[-1]), acc, '-')\n", 395 | "plt.xlabel('Number of components retained')\n", 396 | "plt.ylabel('Predictive Accuracy')" 397 | ] 398 | }, 399 | { 400 | "cell_type": "code", 401 | "execution_count": null, 402 | "metadata": { 403 | "collapsed": true 404 | }, 405 | "outputs": [], 406 | "source": [] 407 | } 408 | ], 409 | "metadata": { 410 | "kernelspec": { 411 | "display_name": "Python 2", 412 | "language": "python", 413 | "name": "python2" 414 | }, 415 | "language_info": { 416 | "codemirror_mode": { 417 | "name": "ipython", 418 | "version": 2 419 | }, 420 | "file_extension": ".py", 421 | "mimetype": "text/x-python", 422 | "name": "python", 423 | "nbconvert_exporter": "python", 424 | "pygments_lexer": "ipython2", 425 | "version": "2.7.10" 426 | }, 427 | "name": "_merged_merged_merged_merged_merged_merged_merged" 428 | }, 429 | "nbformat": 4, 430 | "nbformat_minor": 0 431 | } 432 | -------------------------------------------------------------------------------- /nb/2018_fall/Lecture5-supplement.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# CME 193 - Pandas Exercise Supplement\n", 8 | "\n", 9 | "In this extended exercise, you'll load and play with CO2 data collected at the Mauna Loa observatory over the last 60 years. \n", 10 | "\n", 11 | "* NOAA Website: https://www.esrl.noaa.gov/gmd/ccgg/trends/full.html\n", 12 | "* NOAA data: https://www.esrl.noaa.gov/gmd/ccgg/trends/data.html\n", 13 | "\n", 14 | "The monthly data can be found at this [link](ftp://aftp.cmdl.noaa.gov/products/trends/co2/co2_mm_mlo.txt)" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import numpy as np\n", 24 | "import scipy\n", 25 | "import pandas as pd\n", 26 | "import matplotlib.pyplot as plt\n", 27 | "%matplotlib inline\n", 28 | "\n", 29 | "np.random.seed(0)" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "Reads the data from the ftp server directly." 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "df = pd.read_table('ftp://aftp.cmdl.noaa.gov/products/trends/co2/co2_mm_mlo.txt', \n", 46 | " delim_whitespace=True, \n", 47 | " comment='#',\n", 48 | " names=[\"year\", \"month\", \"decdate\", \"co2\", \"co2interp\", \"trend\", \"days\"],\n", 49 | " index_col=False)" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": { 56 | "scrolled": false 57 | }, 58 | "outputs": [], 59 | "source": [ 60 | "pd.set_option('display.max_rows', 10)\n", 61 | "df" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "# copies the original data.\n", 71 | "orig = df.copy()" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "## Part 1 - Normalize the Date\n", 79 | "\n", 80 | "1. create a new column for the dataframe called 'day' that is set to be 1 in every entry" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "# your code here" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "2. The dataframe now has columns for 'day', 'month', and 'year'. Use `pd.to_datetime()` to create a new series of dates \n", 97 | "\n", 98 | "`dates = pd.to_datetime(...)`" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [ 107 | "# your code here" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "3. set a new column of the dataframe to hold this series. Call the column `'date'`" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "metadata": {}, 121 | "outputs": [], 122 | "source": [ 123 | "# your code here" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "4. set the index of the dataframe to be the `'date'` column using the `set_index()` method." 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": null, 136 | "metadata": {}, 137 | "outputs": [], 138 | "source": [ 139 | "# your code here" 140 | ] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "metadata": {}, 145 | "source": [ 146 | "5. Now let's remove the old columns with date information. Use the `drop()` method to remove the 'day', 'month', 'year', and 'decdate' columns. Hint: `df.drop(..., axis=1, inplace=True)`\n", 147 | "\n", 148 | "5a. Go ahead and drop the 'days' column as well, since we're not going to use it." 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "# your code here" 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "metadata": {}, 163 | "source": [ 164 | "## Part 2 - deal with missing values\n", 165 | "\n", 166 | "1. First, use the `plot()` method to visualize the contents of your dataframe. What do you see?" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [ 175 | "# your code here" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": {}, 181 | "source": [ 182 | "if you read the header for the file we used to load the dataframe, you'll see that missing values take the value -99.99.\n", 183 | "\n", 184 | "2. Set values that are `-99.99` to `None` (this indicates a missing value in Pandas).\n", 185 | "\n", 186 | "Hint: use the `applymap()` method, and the lambda function\n", 187 | "```python\n", 188 | "lambda x: None if x == -99.99 else x\n", 189 | "```\n", 190 | "If you're familiar with [ternary operators](https://en.wikipedia.org/wiki/%3F:), this is the equivalent of\n", 191 | "```\n", 192 | "x == -99.99 ? None : x\n", 193 | "```\n", 194 | "Note that you may need to make a new assignment e.g., `df = df.applymap(...)`" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": null, 200 | "metadata": {}, 201 | "outputs": [], 202 | "source": [ 203 | "# your code here" 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": {}, 209 | "source": [ 210 | "3. Plot your dataframe again. What do you see now?\n", 211 | "\n", 212 | "3a. Try plotting just the 'co2' series. What do you see?" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": null, 218 | "metadata": {}, 219 | "outputs": [], 220 | "source": [ 221 | "# your code here" 222 | ] 223 | }, 224 | { 225 | "cell_type": "markdown", 226 | "metadata": {}, 227 | "source": [ 228 | "## Part 3 - Create New DataFrames with rows that meet conditions" 229 | ] 230 | }, 231 | { 232 | "cell_type": "markdown", 233 | "metadata": {}, 234 | "source": [ 235 | "1. Create new dataframe called `recent` that contains all rows of the previous dataframe since 2007. Plot it." 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": null, 241 | "metadata": {}, 242 | "outputs": [], 243 | "source": [ 244 | "# your code here" 245 | ] 246 | }, 247 | { 248 | "cell_type": "markdown", 249 | "metadata": {}, 250 | "source": [ 251 | "2. Create a new dataframe called `old` that contains all rows of the dataframe before 1990. Plot it." 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": null, 257 | "metadata": {}, 258 | "outputs": [], 259 | "source": [ 260 | "# your code here" 261 | ] 262 | }, 263 | { 264 | "cell_type": "markdown", 265 | "metadata": {}, 266 | "source": [ 267 | "##### At this point, by inspection, you might be convinced there is further analysis to be done" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": null, 273 | "metadata": {}, 274 | "outputs": [], 275 | "source": [ 276 | "np.var(old['trend']), np.var(recent['trend'])" 277 | ] 278 | }, 279 | { 280 | "cell_type": "markdown", 281 | "metadata": {}, 282 | "source": [ 283 | "#### Seaborn color palettes\n", 284 | "http://seaborn.pydata.org/tutorial/color_palettes.html" 285 | ] 286 | }, 287 | { 288 | "cell_type": "markdown", 289 | "metadata": {}, 290 | "source": [ 291 | "## Part 4 - Create some groups\n", 292 | "\n", 293 | "Let's go back to the original data that we loaded" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": null, 299 | "metadata": {}, 300 | "outputs": [], 301 | "source": [ 302 | "df = orig\n", 303 | "df" 304 | ] 305 | }, 306 | { 307 | "cell_type": "markdown", 308 | "metadata": {}, 309 | "source": [ 310 | "Suppose that we want to look at co2 averages by year instead of by month.\n", 311 | "\n", 312 | "1. drop rows with missing values\n", 313 | "\n", 314 | "1a. apply the map that sends -99.99 to none\n", 315 | "\n", 316 | "1b. use the `dropna()` method to remove rows with missing values: `df = df.dropna()`" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": null, 322 | "metadata": {}, 323 | "outputs": [], 324 | "source": [ 325 | "# your code here" 326 | ] 327 | }, 328 | { 329 | "cell_type": "markdown", 330 | "metadata": {}, 331 | "source": [ 332 | "2. Create a group for each year (use key 'year')" 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": null, 338 | "metadata": {}, 339 | "outputs": [], 340 | "source": [ 341 | "# your code here" 342 | ] 343 | }, 344 | { 345 | "cell_type": "markdown", 346 | "metadata": {}, 347 | "source": [ 348 | "3. Aggregate the groups into a new dataframe, `df2`, using `np.mean`\n", 349 | "\n", 350 | "3a. you can drop all the columns except `'co2'` if you'd like" 351 | ] 352 | }, 353 | { 354 | "cell_type": "code", 355 | "execution_count": null, 356 | "metadata": {}, 357 | "outputs": [], 358 | "source": [ 359 | "# your code here" 360 | ] 361 | }, 362 | { 363 | "cell_type": "markdown", 364 | "metadata": {}, 365 | "source": [ 366 | "4. make a plot of the `'co2'` series" 367 | ] 368 | }, 369 | { 370 | "cell_type": "code", 371 | "execution_count": null, 372 | "metadata": {}, 373 | "outputs": [], 374 | "source": [ 375 | "# your code here" 376 | ] 377 | }, 378 | { 379 | "cell_type": "code", 380 | "execution_count": null, 381 | "metadata": {}, 382 | "outputs": [], 383 | "source": [] 384 | } 385 | ], 386 | "metadata": { 387 | "kernelspec": { 388 | "display_name": "Python [default]", 389 | "language": "python", 390 | "name": "python3" 391 | }, 392 | "language_info": { 393 | "codemirror_mode": { 394 | "name": "ipython", 395 | "version": 3 396 | }, 397 | "file_extension": ".py", 398 | "mimetype": "text/x-python", 399 | "name": "python", 400 | "nbconvert_exporter": "python", 401 | "pygments_lexer": "ipython3", 402 | "version": "3.6.5" 403 | } 404 | }, 405 | "nbformat": 4, 406 | "nbformat_minor": 2 407 | } 408 | -------------------------------------------------------------------------------- /nb/2018_fall/data/iris.csv: -------------------------------------------------------------------------------- 1 | sepal_length,sepal_width,petal_length,petal_width,name 2 | 5.1,3.5,1.4,0.2,setosa 3 | 4.9,3.0,1.4,0.2,setosa 4 | 4.7,3.2,1.3,0.2,setosa 5 | 4.6,3.1,1.5,0.2,setosa 6 | 5.0,3.6,1.4,0.2,setosa 7 | 5.4,3.9,1.7,0.4,setosa 8 | 4.6,3.4,1.4,0.3,setosa 9 | 5.0,3.4,1.5,0.2,setosa 10 | 4.4,2.9,1.4,0.2,setosa 11 | 4.9,3.1,1.5,0.1,setosa 12 | 5.4,3.7,1.5,0.2,setosa 13 | 4.8,3.4,1.6,0.2,setosa 14 | 4.8,3.0,1.4,0.1,setosa 15 | 4.3,3.0,1.1,0.1,setosa 16 | 5.8,4.0,1.2,0.2,setosa 17 | 5.7,4.4,1.5,0.4,setosa 18 | 5.4,3.9,1.3,0.4,setosa 19 | 5.1,3.5,1.4,0.3,setosa 20 | 5.7,3.8,1.7,0.3,setosa 21 | 5.1,3.8,1.5,0.3,setosa 22 | 5.4,3.4,1.7,0.2,setosa 23 | 5.1,3.7,1.5,0.4,setosa 24 | 4.6,3.6,1.0,0.2,setosa 25 | 5.1,3.3,1.7,0.5,setosa 26 | 4.8,3.4,1.9,0.2,setosa 27 | 5.0,3.0,1.6,0.2,setosa 28 | 5.0,3.4,1.6,0.4,setosa 29 | 5.2,3.5,1.5,0.2,setosa 30 | 5.2,3.4,1.4,0.2,setosa 31 | 4.7,3.2,1.6,0.2,setosa 32 | 4.8,3.1,1.6,0.2,setosa 33 | 5.4,3.4,1.5,0.4,setosa 34 | 5.2,4.1,1.5,0.1,setosa 35 | 5.5,4.2,1.4,0.2,setosa 36 | 4.9,3.1,1.5,0.1,setosa 37 | 5.0,3.2,1.2,0.2,setosa 38 | 5.5,3.5,1.3,0.2,setosa 39 | 4.9,3.1,1.5,0.1,setosa 40 | 4.4,3.0,1.3,0.2,setosa 41 | 5.1,3.4,1.5,0.2,setosa 42 | 5.0,3.5,1.3,0.3,setosa 43 | 4.5,2.3,1.3,0.3,setosa 44 | 4.4,3.2,1.3,0.2,setosa 45 | 5.0,3.5,1.6,0.6,setosa 46 | 5.1,3.8,1.9,0.4,setosa 47 | 4.8,3.0,1.4,0.3,setosa 48 | 5.1,3.8,1.6,0.2,setosa 49 | 4.6,3.2,1.4,0.2,setosa 50 | 5.3,3.7,1.5,0.2,setosa 51 | 5.0,3.3,1.4,0.2,setosa 52 | 7.0,3.2,4.7,1.4,versicolor 53 | 6.4,3.2,4.5,1.5,versicolor 54 | 6.9,3.1,4.9,1.5,versicolor 55 | 5.5,2.3,4.0,1.3,versicolor 56 | 6.5,2.8,4.6,1.5,versicolor 57 | 5.7,2.8,4.5,1.3,versicolor 58 | 6.3,3.3,4.7,1.6,versicolor 59 | 4.9,2.4,3.3,1.0,versicolor 60 | 6.6,2.9,4.6,1.3,versicolor 61 | 5.2,2.7,3.9,1.4,versicolor 62 | 5.0,2.0,3.5,1.0,versicolor 63 | 5.9,3.0,4.2,1.5,versicolor 64 | 6.0,2.2,4.0,1.0,versicolor 65 | 6.1,2.9,4.7,1.4,versicolor 66 | 5.6,2.9,3.6,1.3,versicolor 67 | 6.7,3.1,4.4,1.4,versicolor 68 | 5.6,3.0,4.5,1.5,versicolor 69 | 5.8,2.7,4.1,1.0,versicolor 70 | 6.2,2.2,4.5,1.5,versicolor 71 | 5.6,2.5,3.9,1.1,versicolor 72 | 5.9,3.2,4.8,1.8,versicolor 73 | 6.1,2.8,4.0,1.3,versicolor 74 | 6.3,2.5,4.9,1.5,versicolor 75 | 6.1,2.8,4.7,1.2,versicolor 76 | 6.4,2.9,4.3,1.3,versicolor 77 | 6.6,3.0,4.4,1.4,versicolor 78 | 6.8,2.8,4.8,1.4,versicolor 79 | 6.7,3.0,5.0,1.7,versicolor 80 | 6.0,2.9,4.5,1.5,versicolor 81 | 5.7,2.6,3.5,1.0,versicolor 82 | 5.5,2.4,3.8,1.1,versicolor 83 | 5.5,2.4,3.7,1.0,versicolor 84 | 5.8,2.7,3.9,1.2,versicolor 85 | 6.0,2.7,5.1,1.6,versicolor 86 | 5.4,3.0,4.5,1.5,versicolor 87 | 6.0,3.4,4.5,1.6,versicolor 88 | 6.7,3.1,4.7,1.5,versicolor 89 | 6.3,2.3,4.4,1.3,versicolor 90 | 5.6,3.0,4.1,1.3,versicolor 91 | 5.5,2.5,4.0,1.3,versicolor 92 | 5.5,2.6,4.4,1.2,versicolor 93 | 6.1,3.0,4.6,1.4,versicolor 94 | 5.8,2.6,4.0,1.2,versicolor 95 | 5.0,2.3,3.3,1.0,versicolor 96 | 5.6,2.7,4.2,1.3,versicolor 97 | 5.7,3.0,4.2,1.2,versicolor 98 | 5.7,2.9,4.2,1.3,versicolor 99 | 6.2,2.9,4.3,1.3,versicolor 100 | 5.1,2.5,3.0,1.1,versicolor 101 | 5.7,2.8,4.1,1.3,versicolor 102 | 6.3,3.3,6.0,2.5,virginica 103 | 5.8,2.7,5.1,1.9,virginica 104 | 7.1,3.0,5.9,2.1,virginica 105 | 6.3,2.9,5.6,1.8,virginica 106 | 6.5,3.0,5.8,2.2,virginica 107 | 7.6,3.0,6.6,2.1,virginica 108 | 4.9,2.5,4.5,1.7,virginica 109 | 7.3,2.9,6.3,1.8,virginica 110 | 6.7,2.5,5.8,1.8,virginica 111 | 7.2,3.6,6.1,2.5,virginica 112 | 6.5,3.2,5.1,2.0,virginica 113 | 6.4,2.7,5.3,1.9,virginica 114 | 6.8,3.0,5.5,2.1,virginica 115 | 5.7,2.5,5.0,2.0,virginica 116 | 5.8,2.8,5.1,2.4,virginica 117 | 6.4,3.2,5.3,2.3,virginica 118 | 6.5,3.0,5.5,1.8,virginica 119 | 7.7,3.8,6.7,2.2,virginica 120 | 7.7,2.6,6.9,2.3,virginica 121 | 6.0,2.2,5.0,1.5,virginica 122 | 6.9,3.2,5.7,2.3,virginica 123 | 5.6,2.8,4.9,2.0,virginica 124 | 7.7,2.8,6.7,2.0,virginica 125 | 6.3,2.7,4.9,1.8,virginica 126 | 6.7,3.3,5.7,2.1,virginica 127 | 7.2,3.2,6.0,1.8,virginica 128 | 6.2,2.8,4.8,1.8,virginica 129 | 6.1,3.0,4.9,1.8,virginica 130 | 6.4,2.8,5.6,2.1,virginica 131 | 7.2,3.0,5.8,1.6,virginica 132 | 7.4,2.8,6.1,1.9,virginica 133 | 7.9,3.8,6.4,2.0,virginica 134 | 6.4,2.8,5.6,2.2,virginica 135 | 6.3,2.8,5.1,1.5,virginica 136 | 6.1,2.6,5.6,1.4,virginica 137 | 7.7,3.0,6.1,2.3,virginica 138 | 6.3,3.4,5.6,2.4,virginica 139 | 6.4,3.1,5.5,1.8,virginica 140 | 6.0,3.0,4.8,1.8,virginica 141 | 6.9,3.1,5.4,2.1,virginica 142 | 6.7,3.1,5.6,2.4,virginica 143 | 6.9,3.1,5.1,2.3,virginica 144 | 5.8,2.7,5.1,1.9,virginica 145 | 6.8,3.2,5.9,2.3,virginica 146 | 6.7,3.3,5.7,2.5,virginica 147 | 6.7,3.0,5.2,2.3,virginica 148 | 6.3,2.5,5.0,1.9,virginica 149 | 6.5,3.0,5.2,2.0,virginica 150 | 6.2,3.4,5.4,2.3,virginica 151 | 5.9,3.0,5.1,1.8,virginica 152 | -------------------------------------------------------------------------------- /nb/2018_fall/img/iris_knn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icme/cme193/3ed008f6e0951b80faf1d77c9542ae0dd925691d/nb/2018_fall/img/iris_knn.png -------------------------------------------------------------------------------- /nb/2018_fall/img/splitApplyCombine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icme/cme193/3ed008f6e0951b80faf1d77c9542ae0dd925691d/nb/2018_fall/img/splitApplyCombine.png -------------------------------------------------------------------------------- /nb/2018_fall/lecture_1/ex_1.md: -------------------------------------------------------------------------------- 1 | # Exercise 1 solutions 2 | 3 | 1. 4 | ```python3 5 | print("Hello, world!") 6 | ``` 7 | 2. 8 | ```python3 9 | import os 10 | os.path.abspath('.') 11 | ``` 12 | 3. 13 | ```python3 14 | x = 1 15 | x+=1 16 | print(x*2) 17 | ``` 18 | 4. 19 | ```python3 20 | for i in range(1,10): 21 | print(i) 22 | ``` 23 | 5. 24 | ```python3 25 | x = 1 26 | while x < 10000: 27 | print(x) 28 | x = x*2 29 | ``` 30 | 6. 31 | ```python3 32 | def testfn(a,b): 33 | return a+2*b 34 | 35 | testfn(1,2) 36 | ``` 37 | 7. 38 | ```python3 39 | "Hello," + " " + World!" 40 | ``` 41 | 8. 42 | ```python3 43 | str1 = "float %.2f" % 1.0 44 | print(str1) 45 | str2 = "integer %d" % 2 46 | print(str2) 47 | ``` 48 | 9. 49 | -------------------------------------------------------------------------------- /nb/2018_fall/lecture_2/test_script.py: -------------------------------------------------------------------------------- 1 | # this is a test script that runs some basic python code 2 | 3 | print("Hello, world!") 4 | 5 | def square(x): 6 | return x*x 7 | 8 | 9 | a = 5 10 | print("%d squared is %d" % (a, square(a))) 11 | -------------------------------------------------------------------------------- /nb/2019_spring/CME193_Homework_2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# CME193 - Homework 2\n", 8 | "\n", 9 | "In this assignment, you'll be analyzing an interesting dataset: the passengers of the ship *Titanic*. As you are probably familiar, the *Titanic* was a ship that collided with an iceberg on April 15, 1912 and sank. About one-third of the passengers survived. We are interested in analyzing what factors are correlated with whether a person survived (whether the person was traveling with family, the person's sex, the person's age, etc.)." 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "## Question 1: Loading the dataset \n", 17 | "\n", 18 | "The dataset contains the following columns:\n", 19 | "- `Survived`: `1` if the person survived, `0` if not\n", 20 | "- `Pclass`: the ticket class the person was travelling under, `1` for 1st class, `2` for 2nd class, `3` for 3rd class\n", 21 | "- `Name`: name\n", 22 | "- `Sex`: sex, `male` for male, `female` for female\n", 23 | "- `Age`: age\n", 24 | "- `Siblings/Spouses Aboard`: the number of siblings and spouses aboard\n", 25 | "- `Parents/Children Aboard`: the number of parents and children aboard\n", 26 | "- `Fare`: the amount paid for the ticket, in pounds\n", 27 | "\n", 28 | "Eventually, we would like to use a classification algorithm to predict the `Survived` column from the other columns (besides `Name`). This means we need to convert the non-numeric columns into numeric columns (or boolean columns, for which `True` and `False` can be interpreted as `1` and `0` respectively).\n", 29 | "\n", 30 | "You can find the dataset at the following URL (from CS 109, originally from Kaggle):\n", 31 | "\n", 32 | " http://web.stanford.edu/class/archive/cs/cs109/cs109.1166/stuff/titanic.csv\n", 33 | "\n", 34 | "Load this CSV file into a Pandas dataframe and look at the data. Convert `Pclass` and `Sex` into boolean columns; that is, create four new boolean columns, `Female`, `1st Class`, `2nd Class`, and `3rd Class`, with the appropriate values. For example, `Female` would be `True` if the person is female, `False` if the person is male.\n", 35 | "\n" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "# YOUR CODE HERE" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "## Question 2: Building the dataset\n", 52 | "Next, let's convert our dataset into NumPy arrays. Create a NumPy array `X` derived from the Pandas dataframe with the numerical and boolean columns, that is: `['Age', 'Siblings/Spouses Aboard', 'Parents/Children Aboard', 'Fare', 'Female', '1st Class', '2nd Class', '3rd Class']`. Create a NumPy vector `y` derived from the `Survived` column. Ensure that the entries of both `X` and `y` are floating point numbers. (Hint: if `a` is a NumPy array, then `a.astype(float)` converts the entries to floats.)" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "# YOUR CODE HERE" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "## Question 3: Logistic regression\n", 69 | "Logistic regression is a classification algorithm that comes with scikit-learn. In this case, we would like to predict one of two classes, `1` if the person survived, `0` if the person did not. Normally, we would split our dataset into a training set and a test set, but for simplicity we will not do that here; instead we will train on our entire dataset.\n", 70 | "\n", 71 | "Using scikit-learn, fit a logistic regression model to the dataset we created in Question 2. What percentage of the passengers' outcomes does the model correctly predict? What does the model think about the fate of a 30-year-old male travelling alone who paid 50 pounds for his 2nd-class ticket?" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "import numpy as np\n", 81 | "from sklearn.linear_model import LogisticRegression\n", 82 | "# YOUR CODE HERE" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": null, 88 | "metadata": {}, 89 | "outputs": [], 90 | "source": [ 91 | "# YOUR CODE HERE" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [ 100 | "# YOUR CODE HERE" 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "## Question 4: Defining the model\n", 108 | "As an exercise, we will now do logistic regression again, but this time \"manually\" -- without the use of scikit-learn -- by solving the underlying optimization problem. To do so, we'll make use of SciPy's `special` and `optimize` packages.\n", 109 | "\n", 110 | "Consider a single observation $x$ (a single passenger), represented by a vector of length $k$. Logistic regression defines a model with two parameters, $\\alpha$ and $\\beta$, where $\\alpha$ is a number and $\\beta$ is a vector of length $k$. Assuming we know $\\alpha$ and $\\beta$, the probability that $x$ survives is the number\n", 111 | "$$ \\text{probability that }x\\text{ survives} = \\frac{1}{1 + \\exp(-(\\alpha + x^T \\beta)) }.$$\n", 112 | "Our eventual goal is to find the values for $\\alpha$ and $\\beta$ that results in a probability that best matches the observed outcome for $x$.\n", 113 | "\n", 114 | "Define a function `probability_of_surviving(alpha, beta, X)` that computes probabilities of the passengers in `X` surviving,\n", 115 | "$$ \\frac{1}{1 + \\exp(-(\\alpha + X \\beta )) },$$\n", 116 | "where:\n", 117 | "- `alpha` is a number $\\alpha$\n", 118 | "- `beta` is a vector $\\beta$ of length $k$\n", 119 | "- `X` is an $n$-by-$k$ matrix $X$, where each of the $n$ rows corresponds to an observation (a passenger), and each column corresponds to a feature.\n", 120 | "\n", 121 | "This function should output a vector of length $n$, with each entry being the probability that each person survives, assuming we know $\\alpha$ and $\\beta$. Note that $X\\beta$ should be interpreted as matrix multiplication, but all other operations (addition, exponential, division) operate elementwise. (Hint: check out SciPy's `special.expit`.)" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "import scipy.special as special\n", 131 | "\n", 132 | "def probability_of_surviving(alpha, beta, X):\n", 133 | " # YOUR CODE HERE\n", 134 | " pass" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "## Question 5: Defining the loss\n", 142 | "Our goal is to find the $\\alpha$ and $\\beta$ values that best match the observed data $(X, y)$. To do this, we will construct a loss function that, when given $\\alpha$ and $\\beta$, characterizes how good our predictions $\\hat{y}$ are compared to the ground truth $y$. \n", 143 | "\n", 144 | "Mathematically, our loss function will be $$\n", 145 | " L(\\alpha, \\beta, X, y) = \\text{sum}(\\text{KL}(y, \\hat{y})) + \\frac{1}{2}\\beta^T \\beta,\n", 146 | "$$ where:\n", 147 | "- $\\hat{y}$ is the vector of predicted probabilities (from Question 4), which is computed from $\\alpha$, $\\beta$, and $X$\n", 148 | "- $\\text{KL}(y, \\hat{y})$ is the *Kullback-Liebler divergence*, which measures how different the ground truth $y$ is from the predicted probabilities $\\hat{y}$\n", 149 | "- $\\text{sum}(\\cdot)$ sums up the entries of a vector, in this case adding up the loss for each passenger in $X$\n", 150 | "- $\\frac{1}{2}\\beta^T \\beta$ is a *regularization term* that prevents overfitting.\n", 151 | "\n", 152 | "Define a function `logistic_regression_loss(alpha_beta, X, y)` that computes $L(\\alpha, \\beta, X, y)$, where\n", 153 | "- `alpha_beta` is a vector of length $1+ k$ that contains $\\alpha$ in its first entry and $\\beta$ in the remaining entries\n", 154 | "- `X` is an $n$-by-$k$ matrix $X$, where each of the $n$ rows corresponds to an observation (a passenger), and each column corresponds to a feature\n", 155 | "- `y` is a vector $y$ of length $n$, that is 1 if the passenger survived and 0 if they did not.\n", 156 | "\n", 157 | "(Hint: check out SciPy's `special.kl_div`.) " 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": null, 163 | "metadata": {}, 164 | "outputs": [], 165 | "source": [ 166 | "def logistic_regression_loss(alpha_beta, X, y):\n", 167 | " # YOUR CODE HERE\n", 168 | " pass" 169 | ] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "metadata": {}, 174 | "source": [ 175 | "## Question 6: Logistic regression (the hard way)\n", 176 | "Use SciPy's `optimize.minimize` to find the $\\alpha$ and $\\beta$ that best explain the data $(X,y)$. In other words, find $\\alpha$ and $\\beta$ that minimizes the function `logistic_regression_loss`. Use an initial guess of $\\alpha = 0$, $\\beta = 0$. Compare your result to the $\\alpha$ and $\\beta$ computed by scikit-learn, given by `model.intercept_` and `model.coef_`. (It will not match exactly but should be somewhat close.)\n", 177 | "\n", 178 | "(Hint: you'll need to make use of the `args` argument for `optimize.minimize` to pass in `X` and `y`. Also, you may get a warning that the desired error was not achieved, but you can ignore this.)\n", 179 | "\n" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "metadata": {}, 186 | "outputs": [], 187 | "source": [ 188 | "import scipy.optimize as optimize\n", 189 | "# YOUR CODE HERE" 190 | ] 191 | }, 192 | { 193 | "cell_type": "markdown", 194 | "metadata": {}, 195 | "source": [ 196 | "## Question 7: Predictions\n", 197 | "\n", 198 | "With what probability does the model learned in Question 6 think a 30-year-old male travelling alone who paid 50 pounds for his 2nd-class ticket will survive? What percentage of the passengers' outcomes does the model correctly predict, if we say that the model predicts survival if the probability of survival is greater than 0.5?" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": null, 204 | "metadata": {}, 205 | "outputs": [], 206 | "source": [ 207 | "# YOUR CODE HERE" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": null, 213 | "metadata": {}, 214 | "outputs": [], 215 | "source": [ 216 | "# YOUR CODE HERE" 217 | ] 218 | }, 219 | { 220 | "cell_type": "markdown", 221 | "metadata": {}, 222 | "source": [ 223 | "# Submission instructions\n", 224 | "\n", 225 | "Save this notebook (`CME193_Homework_2.ipynb`), and submit it on Canvas." 226 | ] 227 | } 228 | ], 229 | "metadata": { 230 | "kernelspec": { 231 | "display_name": "Python 3", 232 | "language": "python", 233 | "name": "python3" 234 | }, 235 | "language_info": { 236 | "codemirror_mode": { 237 | "name": "ipython", 238 | "version": 3 239 | }, 240 | "file_extension": ".py", 241 | "mimetype": "text/x-python", 242 | "name": "python", 243 | "nbconvert_exporter": "python", 244 | "pygments_lexer": "ipython3", 245 | "version": "3.7.2" 246 | } 247 | }, 248 | "nbformat": 4, 249 | "nbformat_minor": 2 250 | } 251 | -------------------------------------------------------------------------------- /nb/2019_spring/Lecture_2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# CME 193 - Lecture 2" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Example: Rational Numbers\n", 15 | "\n", 16 | "Let's continue with our example of rational numbers (fractions), that is, numbers of the form\n", 17 | "$$r = \\frac{p}{q}$$\n", 18 | "where $p$ and $q$ are integers. Let's make it support addition using the formula:\n", 19 | "$$ \\frac{p_1}{q_1} + \\frac{p_2}{q_2} = \\frac{p_1 q_2 + p_2 q_1}{q_1 q_2}$$" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "import math\n", 29 | "\n", 30 | "class Rational:\n", 31 | " def __init__(self, p, q=1):\n", 32 | " \n", 33 | " if q == 0:\n", 34 | " raise ValueError('Denominator must not be zero')\n", 35 | " if not isinstance(p, int):\n", 36 | " raise TypeError('Numerator must be an integer')\n", 37 | " if not isinstance(q, int):\n", 38 | " raise TypeError('Denominator must be an integer')\n", 39 | " \n", 40 | " g = math.gcd(p, q)\n", 41 | " \n", 42 | " self.p = p // g\n", 43 | " self.q = q // g\n", 44 | " \n", 45 | " # method to convert rational to float\n", 46 | " def __float__(self):\n", 47 | " return float(self.p) / float(self.q) \n", 48 | " \n", 49 | " # method to convert rational to string for printing\n", 50 | " def __str__(self):\n", 51 | " return '%d / %d' % (self.p, self.q)\n", 52 | " \n", 53 | " # method to add two rationals - interprets self + other\n", 54 | " def __add__(self, other):\n", 55 | " if isinstance(other, Rational):\n", 56 | " return Rational(self.p * other.q + other.p * self.q, self.q * other.q)\n", 57 | " # -- if it's an integer...\n", 58 | " elif isinstance(other, int):\n", 59 | " return Rational(self.p + other * self.q, self.q)\n", 60 | " # -- otherwise, we assume it will be a float\n", 61 | " return float(self) + float(other)\n", 62 | " \n", 63 | " def __radd__(self, other): # interprets other + self\n", 64 | " return self + other # addition commutes!\n", 65 | " " 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "r = Rational(3)\n", 75 | "print(r)" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "metadata": { 82 | "scrolled": true 83 | }, 84 | "outputs": [], 85 | "source": [ 86 | "r = Rational(3, 2)\n", 87 | "print('Integer adding:')\n", 88 | "print('right add')\n", 89 | "print(r + 4)\n", 90 | "print(float(r + 4))" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "print('left add')\n", 100 | "print(4 + r)\n", 101 | "print(float(4 + r))" 102 | ] 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "metadata": {}, 107 | "source": [ 108 | "# Exercise 3\n", 109 | "\n", 110 | "### Add more operations to `Rational`\n", 111 | "You can read about the available operations that you can overload [here](https://docs.python.org/3.7/reference/datamodel.html#emulating-numeric-types)\n", 112 | "\n", 113 | "Add the following operations to the `Rational` class:\n", 114 | "* `*` - use `__mul__`\n", 115 | "* `/` - use `__truediv__`\n", 116 | "* `-` - use `__sub__`\n", 117 | "\n", 118 | "You only need to define these operations between two `Rational` types - use an `if isinstance(other, Rational):` block.\n", 119 | "\n", 120 | "Make a few examples to convince yourself that this works.\n", 121 | "\n" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "class Rational:\n", 131 | " def __init__(self, p, q=1):\n", 132 | " \n", 133 | " if q == 0:\n", 134 | " raise ValueError('Denominator must not be zero')\n", 135 | " if not isinstance(p, int):\n", 136 | " raise TypeError('Numerator must be an integer')\n", 137 | " if not isinstance(q, int):\n", 138 | " raise TypeError('Denominator must be an integer')\n", 139 | " \n", 140 | " g = math.gcd(p, q)\n", 141 | " \n", 142 | " self.p = p // g\n", 143 | " self.q = q // g\n", 144 | " \n", 145 | " # method to convert rational to float\n", 146 | " def __float__(self):\n", 147 | " return float(self.p) / float(self.q) \n", 148 | " \n", 149 | " # method to convert rational to string for printing\n", 150 | " def __str__(self):\n", 151 | " return '%d / %d' % (self.p, self.q)\n", 152 | " \n", 153 | " # method to add two rationals - interprets self + other\n", 154 | " def __add__(self, other):\n", 155 | " if isinstance(other, Rational):\n", 156 | " return Rational(self.p * other.q + other.p * self.q, self.q * other.q)\n", 157 | " # -- if it's an integer...\n", 158 | " elif isinstance(other, int):\n", 159 | " return Rational(self.p + other * self.q, self.q)\n", 160 | " # -- otherwise, we assume it will be a float\n", 161 | " return float(self) + float(other)\n", 162 | " \n", 163 | " def __radd__(self, other): # interprets other + self\n", 164 | " return self + other # addition commutes!\n", 165 | " \n", 166 | " # subtraction\n", 167 | " def __sub__(self, other):\n", 168 | " raise NotImplementedError('Subtraction not implemented yet')\n", 169 | " \n", 170 | " # multiplication\n", 171 | " def __mul__(self, other):\n", 172 | " raise NotImplementedError('Subtraction not implemented yet')\n", 173 | " \n", 174 | " # division\n", 175 | " def __truediv__(self, other):\n", 176 | " raise NotImplementedError('Division not implemented yet')\n", 177 | " " 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "metadata": {}, 184 | "outputs": [], 185 | "source": [ 186 | "# Write some examples to test your code" 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "metadata": {}, 192 | "source": [ 193 | "# Exercise 4\n", 194 | "## Square root of rationals using the Babylonian method\n", 195 | "\n", 196 | "Implement the [Babylonian Method](https://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Babylonian_method) for computing the square root of a number $S$." 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": null, 202 | "metadata": {}, 203 | "outputs": [], 204 | "source": [ 205 | "def babylonian(S, num_iters=5):\n", 206 | " raise NotImplementedError('Not implemented yet')" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": null, 212 | "metadata": {}, 213 | "outputs": [], 214 | "source": [ 215 | "math.sqrt(24)" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": null, 221 | "metadata": {}, 222 | "outputs": [], 223 | "source": [ 224 | "babylonian(24)" 225 | ] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "metadata": {}, 230 | "source": [ 231 | "# NumPy\n", 232 | "This is a good segue into NumPy. Python provides only a handful of numeric types: ints, longs, floats, and complex numbers. We just declared a class that implements rational numbers. NumPy implements one very useful numeric type: multidimensional arrays." 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": null, 238 | "metadata": {}, 239 | "outputs": [], 240 | "source": [ 241 | "# Quick note on importing\n", 242 | "import math\n", 243 | "math.sin(5)" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "metadata": {}, 250 | "outputs": [], 251 | "source": [ 252 | "import math as m\n", 253 | "m.sin(5)" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": null, 259 | "metadata": {}, 260 | "outputs": [], 261 | "source": [ 262 | "import numpy as np" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": null, 268 | "metadata": {}, 269 | "outputs": [], 270 | "source": [ 271 | "x = np.array([[0, 1], [1, 5]])\n", 272 | "x" 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": null, 278 | "metadata": {}, 279 | "outputs": [], 280 | "source": [ 281 | "y = np.array([[4, 0], [0, 4]])\n", 282 | "y" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": null, 288 | "metadata": {}, 289 | "outputs": [], 290 | "source": [ 291 | "x + y" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": null, 297 | "metadata": {}, 298 | "outputs": [], 299 | "source": [ 300 | "x ** 2" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": null, 306 | "metadata": {}, 307 | "outputs": [], 308 | "source": [ 309 | "x @ y # Matrix multiplication" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": null, 315 | "metadata": {}, 316 | "outputs": [], 317 | "source": [ 318 | "np.sum(x)" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": null, 324 | "metadata": {}, 325 | "outputs": [], 326 | "source": [] 327 | } 328 | ], 329 | "metadata": { 330 | "kernelspec": { 331 | "display_name": "Python 3", 332 | "language": "python", 333 | "name": "python3" 334 | }, 335 | "language_info": { 336 | "codemirror_mode": { 337 | "name": "ipython", 338 | "version": 3 339 | }, 340 | "file_extension": ".py", 341 | "mimetype": "text/x-python", 342 | "name": "python", 343 | "nbconvert_exporter": "python", 344 | "pygments_lexer": "ipython3", 345 | "version": "3.7.2" 346 | } 347 | }, 348 | "nbformat": 4, 349 | "nbformat_minor": 2 350 | } 351 | -------------------------------------------------------------------------------- /nb/2019_spring/Lecture_4.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# CME 193 - Lecture 4" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Matrix multiplication clarification" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import numpy as np\n", 24 | "A = np.array([[1, 2], [3, 4]])\n", 25 | "b = np.array([1, 2])" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "b # This is a just a vector, neither a column nor a row vector" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "A @ b # Matrix-vector product: A b" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "A @ A # Matrix-matrix product: A A" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "b @ b # Vector-vector product: b^T b" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "b @ A # Vector-matrix: b^T A" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": { 77 | "scrolled": true 78 | }, 79 | "outputs": [], 80 | "source": [ 81 | "# To treat a vector as a row or column vector, we must explicitly reshape it\n", 82 | "bb = b.reshape(2, 1) # a column vector (really a narrow matrix)\n", 83 | "print(bb)\n", 84 | "bb @ bb.T # outer product" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "## Array indexing" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [ 100 | "x = np.random.randn(4, 4)\n", 101 | "x" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "x[2,3]" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "x[2,3] = 0\n", 120 | "x" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "# Caution! Assigning to a new variable doesn't make a copy\n", 130 | "y = x\n", 131 | "y[2,3] = 1\n", 132 | "x[2,3]" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "metadata": {}, 139 | "outputs": [], 140 | "source": [ 141 | "print(x)\n", 142 | "x[2] # Picks out a row" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [ 151 | "print(x)\n", 152 | "x[:,2] # Picks out a column" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [ 161 | "print(x)\n", 162 | "x[1:3,1:3] # Picks out a portion" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "metadata": {}, 169 | "outputs": [], 170 | "source": [ 171 | "x[1:3,1:3] = np.ones((2, 2))\n", 172 | "x" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": null, 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [ 181 | "print(x)\n", 182 | "x[[3,0,1]] # Picks 3rd, 0th, 1st row" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": null, 188 | "metadata": {}, 189 | "outputs": [], 190 | "source": [ 191 | "print(x)\n", 192 | "x[[3,0,1],[3,0,1]] # Picks (3,3), (0,0), (1,1)" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": null, 198 | "metadata": {}, 199 | "outputs": [], 200 | "source": [ 201 | "print(x)\n", 202 | "x > 0" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": null, 208 | "metadata": {}, 209 | "outputs": [], 210 | "source": [ 211 | "x[x > 0] = 0 # Sets all entries greater than 0 to 0\n", 212 | "x" 213 | ] 214 | }, 215 | { 216 | "cell_type": "markdown", 217 | "metadata": {}, 218 | "source": [ 219 | "## Example: Conway's game of life\n", 220 | "See [here](https://bitstorm.org/gameoflife/) for a demonstration." 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": null, 226 | "metadata": { 227 | "scrolled": true 228 | }, 229 | "outputs": [], 230 | "source": [ 231 | "X = np.random.rand(10, 10) > 0.5\n", 232 | "X" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": null, 238 | "metadata": {}, 239 | "outputs": [], 240 | "source": [ 241 | "np.zeros(X.shape, dtype=int)" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": null, 247 | "metadata": { 248 | "scrolled": true 249 | }, 250 | "outputs": [], 251 | "source": [ 252 | "def evolve(X):\n", 253 | " counts = np.zeros(X.shape, dtype=int)\n", 254 | " counts[:, :-1] += X[:, 1:]\n", 255 | " counts[:, 1:] += X[:, :-1]\n", 256 | " counts[:-1, :] += X[1:, :]\n", 257 | " counts[1:, :] += X[:-1, :]\n", 258 | " counts[1:, 1:] += X[:-1, :-1]\n", 259 | " counts[:-1, :-1] += X[1:, 1:]\n", 260 | " counts[1:, :-1] += X[:-1, 1:]\n", 261 | " counts[:-1, 1:] += X[1:, :-1]\n", 262 | " \n", 263 | " return (X == 1 and (counts == 2 or counts == 3)) or (X == 0 and counts == 3)\n", 264 | "# return np.logical_or(np.logical_and(X == 1, np.logical_or(counts == 2, counts == 3)),\n", 265 | "# np.logical_and(X == 0, counts == 3))" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": null, 271 | "metadata": {}, 272 | "outputs": [], 273 | "source": [ 274 | "X == 1 and X == 0" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": null, 280 | "metadata": {}, 281 | "outputs": [], 282 | "source": [ 283 | "import matplotlib.pyplot as plt" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": null, 289 | "metadata": { 290 | "scrolled": true 291 | }, 292 | "outputs": [], 293 | "source": [ 294 | "X = evolve(X)\n", 295 | "plt.imshow(X)\n", 296 | "plt.show()" 297 | ] 298 | }, 299 | { 300 | "cell_type": "markdown", 301 | "metadata": {}, 302 | "source": [ 303 | "# Exercise 5\n", 304 | "Let $A$ be a symmetric matrix, and recall that a pair $(v, \\lambda)$ is called an eigenvector-eigenvalue pair if $Av = \\lambda v$. Use the power method to find the eigenvalue $\\lambda$ with the largest absolute value, and the corresponding eigenvector $v$. Compare with the result returned by `np.linalg.eigh`.\n", 305 | "\n", 306 | "Recall that the power method uses repeated matrix multiplication and is given by the following pseudocode:\n", 307 | "\n", 308 | " input) A: an n x n symmetric matrix\n", 309 | " outputs) l: the eigenvalue with the largest absolute value\n", 310 | " v: the corresponding eigenvector\n", 311 | "\n", 312 | " v <- random vector of length n\n", 313 | " while l has not converged:\n", 314 | " v <- A v\n", 315 | " v <- v / ||v||_2\n", 316 | " l <- v^T A v\n", 317 | "\n", 318 | "Once your code works, package it in a nice function called `power_method(A)`." 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": null, 324 | "metadata": {}, 325 | "outputs": [], 326 | "source": [ 327 | "tmp = np.random.randn(5, 5)\n", 328 | "A = tmp + tmp.T\n", 329 | "A" 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": null, 335 | "metadata": {}, 336 | "outputs": [], 337 | "source": [ 338 | "np.linalg.eigh(A) # Returns an array of eigenvalues, and a matrix of eigenvectors" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": null, 344 | "metadata": {}, 345 | "outputs": [], 346 | "source": [] 347 | } 348 | ], 349 | "metadata": { 350 | "kernelspec": { 351 | "display_name": "Python 3", 352 | "language": "python", 353 | "name": "python3" 354 | }, 355 | "language_info": { 356 | "codemirror_mode": { 357 | "name": "ipython", 358 | "version": 3 359 | }, 360 | "file_extension": ".py", 361 | "mimetype": "text/x-python", 362 | "name": "python", 363 | "nbconvert_exporter": "python", 364 | "pygments_lexer": "ipython3", 365 | "version": "3.7.2" 366 | } 367 | }, 368 | "nbformat": 4, 369 | "nbformat_minor": 2 370 | } 371 | -------------------------------------------------------------------------------- /nb/2019_spring/Lecture_6.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# CME 193 - Lecture 6 - Pandas" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "[Pandas](https://pandas.pydata.org/) is a Python library for dealing with data. The main thing you'll hear people talk about is the DataFrame object (inspired by R), which is designed to hold tabular data." 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import pandas as pd\n", 24 | "import numpy as np" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "## Creating and combining dataframes" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": { 38 | "scrolled": true 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "# Creating a dataframe\n", 43 | "df = pd.DataFrame({\n", 44 | " 'name': ['Alice', 'Bob', 'Carol', 'Dan', 'Eve', 'Frank', 'Grace'],\n", 45 | " 'num_cats': [1, 2, 4, 8, 16, 32, None],\n", 46 | " 'num_dogs': [7, None, 0, 0, 3, 2, 1],\n", 47 | " 'location': ['NY', None, 'FL', 'HI', 'AK', 'AK', 'CA'],\n", 48 | "})\n", 49 | "df" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "df2 = pd.DataFrame({\n", 59 | " 'name': ['Alice', 'Alex', 'Bob', 'Carl', 'Carol', 'Dan', 'David'],\n", 60 | " 'favorite_animal': ['dog', 'dog', 'cat', 'dog', 'cat', 'turtle', 'dog']\n", 61 | "})\n", 62 | "df2" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": { 69 | "scrolled": true 70 | }, 71 | "outputs": [], 72 | "source": [ 73 | "df3 = pd.merge(df, df2, how='outer', on='name')\n", 74 | "df3" 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "## Working with columns" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": { 88 | "scrolled": true 89 | }, 90 | "outputs": [], 91 | "source": [ 92 | "# Selecting a single column\n", 93 | "df3['name']" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": {}, 100 | "outputs": [], 101 | "source": [ 102 | "# Selecting multiple columns\n", 103 | "df3[['name', 'num_cats', 'num_dogs']]" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": { 110 | "scrolled": true 111 | }, 112 | "outputs": [], 113 | "source": [ 114 | "# Making a new column\n", 115 | "df3['num_legs'] = 4 * (df['num_cats'] + df['num_dogs'])\n", 116 | "df3" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "metadata": { 123 | "scrolled": true 124 | }, 125 | "outputs": [], 126 | "source": [ 127 | "# Calculating summaries\n", 128 | "print(df3['num_legs'].sum())\n", 129 | "print(df3['num_legs'].median())" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "df3['location'].value_counts()" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "# Deleting columns by name\n", 148 | "df3.drop(['location'], axis=1)" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "## Selecting rows" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "metadata": { 162 | "scrolled": true 163 | }, 164 | "outputs": [], 165 | "source": [ 166 | "# Selecting first five rows\n", 167 | "df3.head(5)" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": null, 173 | "metadata": { 174 | "scrolled": false 175 | }, 176 | "outputs": [], 177 | "source": [ 178 | "# Selecting last five rows\n", 179 | "df3.tail(5)" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "metadata": {}, 186 | "outputs": [], 187 | "source": [ 188 | "# Selecting by criteria: those who have more cats than dogs\n", 189 | "df3[df3['num_cats'] > df3['num_dogs']]" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "metadata": {}, 196 | "outputs": [], 197 | "source": [ 198 | "# Selecting by index requires setting an index\n", 199 | "df3 = df3.set_index('name')\n", 200 | "df3" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": null, 206 | "metadata": {}, 207 | "outputs": [], 208 | "source": [ 209 | "# Now we can access by name by indexing into dataframe.loc\n", 210 | "df3.loc['Dan']" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": {}, 217 | "outputs": [], 218 | "source": [ 219 | "# Can even slice\n", 220 | "df3.loc['Dan':'Grace']" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": null, 226 | "metadata": {}, 227 | "outputs": [], 228 | "source": [ 229 | "# Deleting rows by index\n", 230 | "df3.drop(['Bob', 'Grace'])" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": null, 236 | "metadata": {}, 237 | "outputs": [], 238 | "source": [ 239 | "# Important note: Pandas always makes copies\n", 240 | "df3 # Bob and Grace are still there!\n", 241 | "\n", 242 | "# To save changes, assign it to the same variable, like:\n", 243 | "#df3 = df3.drop(['Bob', 'Grace'])" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "metadata": {}, 250 | "outputs": [], 251 | "source": [ 252 | "# Deletes all rows with missing data\n", 253 | "df3.dropna(subset=['num_dogs'])" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": null, 259 | "metadata": { 260 | "scrolled": true 261 | }, 262 | "outputs": [], 263 | "source": [ 264 | "# Sorts rows by a column\n", 265 | "df3.sort_values('num_legs')" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": null, 271 | "metadata": { 272 | "scrolled": true 273 | }, 274 | "outputs": [], 275 | "source": [ 276 | "# Grouping by a column and then performing an aggregation\n", 277 | "df3.groupby('favorite_animal').sum()" 278 | ] 279 | }, 280 | { 281 | "cell_type": "markdown", 282 | "metadata": {}, 283 | "source": [ 284 | "## Exercise 8\n", 285 | "In this exercise, you'll download and play with [CO2 data collected at the Mauna Loa observatory](https://www.esrl.noaa.gov/gmd/ccgg/trends/data.html) over the last 60 years. Use the following code to load the dataset, and complete the cells below." 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": null, 291 | "metadata": { 292 | "scrolled": false 293 | }, 294 | "outputs": [], 295 | "source": [ 296 | "df = pd.read_csv('ftp://aftp.cmdl.noaa.gov/products/trends/co2/co2_mm_mlo.txt', \n", 297 | " delim_whitespace=True, \n", 298 | " comment='#',\n", 299 | " names=[\"year\", \"month\", \"decdate\", \"co2\", \"co2interp\", \"trend\", \"days\"])\n", 300 | "\n", 301 | "pd.set_option('display.max_rows', 10)\n", 302 | "df" 303 | ] 304 | }, 305 | { 306 | "cell_type": "markdown", 307 | "metadata": {}, 308 | "source": [ 309 | "The dataframe currently contains columns called `year` and `month`. The goal is to introduce a new column called `date` that combines the year and month into one column. To do this, we will use a Pandas built-in function called `pd.to_datetime`, which accepts as its first argument a dataframe with three columns, `month`, `day`, and `year`, and outputs a dataframe with a single column corresponding to the date. \n", 310 | "\n", 311 | "First, create a column called `day` in the dataframe `df`, and set it equal to `1` for all rows. (This is so that, for example, the row with year 1958 and month 3 will be treated as March 1, 1958.)" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": null, 317 | "metadata": {}, 318 | "outputs": [], 319 | "source": [ 320 | "# YOUR CODE HERE (1 lines, but feel free to also print df if you'd like)" 321 | ] 322 | }, 323 | { 324 | "cell_type": "markdown", 325 | "metadata": {}, 326 | "source": [ 327 | "Next, select the three columns `month`, `day`, and `year` from `df` and pass the resulting three-column dataframe to `pd.to_datetime`. Create a new column `date` in `df` and set it equal to the output of `pd.to_datetime` to create a new column corresponding to the date." 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": null, 333 | "metadata": {}, 334 | "outputs": [], 335 | "source": [ 336 | "# YOUR CODE HERE (1 line)" 337 | ] 338 | }, 339 | { 340 | "cell_type": "markdown", 341 | "metadata": {}, 342 | "source": [ 343 | "From this point onward, we only care about the columns `date`, `co2`, and `trend`. Select only these columns and discard everything else." 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": null, 349 | "metadata": {}, 350 | "outputs": [], 351 | "source": [ 352 | "# YOUR CODE HERE (1 line)" 353 | ] 354 | }, 355 | { 356 | "cell_type": "markdown", 357 | "metadata": {}, 358 | "source": [ 359 | "Set the index to `date` (use `df.set_index`)." 360 | ] 361 | }, 362 | { 363 | "cell_type": "code", 364 | "execution_count": null, 365 | "metadata": { 366 | "scrolled": false 367 | }, 368 | "outputs": [], 369 | "source": [ 370 | "# YOUR CODE HERE (1 line)" 371 | ] 372 | }, 373 | { 374 | "cell_type": "markdown", 375 | "metadata": {}, 376 | "source": [ 377 | "Plot the data by using `df.plot()`. What do you notice?" 378 | ] 379 | }, 380 | { 381 | "cell_type": "code", 382 | "execution_count": null, 383 | "metadata": { 384 | "scrolled": false 385 | }, 386 | "outputs": [], 387 | "source": [ 388 | "# YOUR CODE HERE (1 line)" 389 | ] 390 | }, 391 | { 392 | "cell_type": "markdown", 393 | "metadata": {}, 394 | "source": [ 395 | "The dataset uses -99.99 in the `co2` column to denote missing data. Set these values to `None` instead." 396 | ] 397 | }, 398 | { 399 | "cell_type": "code", 400 | "execution_count": null, 401 | "metadata": {}, 402 | "outputs": [], 403 | "source": [ 404 | "# YOUR CODE HERE (1 line)" 405 | ] 406 | }, 407 | { 408 | "cell_type": "markdown", 409 | "metadata": {}, 410 | "source": [ 411 | "Actually, we don't want these rows anyway. Delete these rows (`df.dropna()`)." 412 | ] 413 | }, 414 | { 415 | "cell_type": "code", 416 | "execution_count": null, 417 | "metadata": {}, 418 | "outputs": [], 419 | "source": [ 420 | "# YOUR CODE HERE (1 line)" 421 | ] 422 | }, 423 | { 424 | "cell_type": "markdown", 425 | "metadata": {}, 426 | "source": [ 427 | "Plot the data again (`df.plot()`). Is the problem solved?" 428 | ] 429 | }, 430 | { 431 | "cell_type": "code", 432 | "execution_count": null, 433 | "metadata": { 434 | "scrolled": false 435 | }, 436 | "outputs": [], 437 | "source": [ 438 | "# YOUR CODE HERE (1 line)" 439 | ] 440 | }, 441 | { 442 | "cell_type": "markdown", 443 | "metadata": {}, 444 | "source": [ 445 | "Now plot only the data since 2008 (by selecting only the rows after 2008)." 446 | ] 447 | }, 448 | { 449 | "cell_type": "code", 450 | "execution_count": null, 451 | "metadata": {}, 452 | "outputs": [], 453 | "source": [ 454 | "# YOUR CODE HERE (1 line)" 455 | ] 456 | } 457 | ], 458 | "metadata": { 459 | "kernelspec": { 460 | "display_name": "Python 3", 461 | "language": "python", 462 | "name": "python3" 463 | }, 464 | "language_info": { 465 | "codemirror_mode": { 466 | "name": "ipython", 467 | "version": 3 468 | }, 469 | "file_extension": ".py", 470 | "mimetype": "text/x-python", 471 | "name": "python", 472 | "nbconvert_exporter": "python", 473 | "pygments_lexer": "ipython3", 474 | "version": "3.7.2" 475 | } 476 | }, 477 | "nbformat": 4, 478 | "nbformat_minor": 2 479 | } 480 | -------------------------------------------------------------------------------- /nb/2019_spring/Lecture_8.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Lecture 8 - CME 193 - Python scripts\n", 8 | "So far, we've been working in Jupyter notebooks, which are nice and interactive. However, they can be clunky if you have lots of code. It can be annoying to have to run one cell at a time. For large codebases, most people work with Python scripts. The code in the cells below are meant to be run in Python scripts, not Jupyter notebooks. That means that they are saved in files that end in `.py` and run via the command line.\n", 9 | "\n", 10 | "As a first example, save the following code in a file called `main.py`." 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "# main.py\n", 20 | "import numpy as np\n", 21 | "print(np.pi)" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "To run this script, open a terminal, change into the directory with `main.py`, and run\n", 29 | "\n", 30 | " python main.py\n", 31 | " \n", 32 | "If all goes well, then you should see $\\pi$ printed!\n", 33 | "\n", 34 | "Running Python scripts is very similar to running a cell in a notebook, where Python executes each line in the file in a sequence. One difference is that anything you want to output needs to be explicitly `print`ed. Otherwise, you won't see any output.\n", 35 | "\n", 36 | "One difference is that with Python scripts, it's sometimes useful to accept command line arguments. For example, if your script saves its output to a file, it might be useful to accept a filename, like:\n", 37 | "\n", 38 | " python main.py output.txt\n", 39 | "\n", 40 | "To access the command line arguments from a Python script, you can use `sys.argv`, which is a list, in this case `['main.py', 'output.txt']`. We can access `output.txt` using the standard indexing notation, like `sys.argv[1]`." 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "# main.py\n", 50 | "import sys\n", 51 | "print(sys.argv)\n", 52 | "print(sys.argv[1])" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "Reading elements of this list works fine if you only have one or two arguments. For anything more complicated, Python comes with a built-in library called `argparse`. This is beyond the scope of this class, but here is an example that accepts a command line argument called `num_iters`, which defaults to `100` if nothing is passed. You can pass a different value like this:\n", 60 | "\n", 61 | " python main.py --num_iters 10000" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": { 68 | "scrolled": true 69 | }, 70 | "outputs": [], 71 | "source": [ 72 | "# main.py\n", 73 | "import argparse\n", 74 | "parser = argparse.ArgumentParser()\n", 75 | "parser.add_argument('--num_iters', default=100, type=int, help='number of iterations')\n", 76 | "args = parser.parse_args()\n", 77 | "print(args.num_iters)" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": {}, 83 | "source": [ 84 | "One thing that is easy in Jupyter notebooks but tricky in Python scripts is plotting. If you're lucky, the plot will still pop up, but one thing you can always do is save the plot to a file, as below." 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": {}, 91 | "outputs": [], 92 | "source": [ 93 | "# main.py\n", 94 | "import pandas as pd\n", 95 | "\n", 96 | "# The following two lines might be necessary, depending on your operating system. If you get\n", 97 | "# an error message initially, you can uncomment these lines to see if it works.\n", 98 | "# import matplotlib\n", 99 | "# matplotlib.use('Agg')\n", 100 | "\n", 101 | "import matplotlib.pyplot as plt\n", 102 | "\n", 103 | "# Load the abalone dataset from lecture 7.\n", 104 | "df = pd.read_csv('http://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data',\n", 105 | " header=None, names=['sex', 'length', 'diameter', 'height', 'weight', 'shucked_weight',\n", 106 | " 'viscera_weight', 'shell_weight', 'rings'])\n", 107 | "\n", 108 | "df.plot('weight', 'rings', kind='scatter')\n", 109 | "\n", 110 | "# This line might pop open a plot, but it might not on some computers.\n", 111 | "plt.show()\n", 112 | "\n", 113 | "# An alternative to showing the plot is to save it as an image.\n", 114 | "plt.savefig('figure.png')" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "Sometimes it's useful to save things to disk, after we've done a long calculation for example. Here are several ways to do so:" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "# Save a Pandas dataframe.\n", 131 | "df.to_csv('data.csv')\n", 132 | "\n", 133 | "# Save a NumPy array.\n", 134 | "np.save(arr, 'data.npy')\n", 135 | "\n", 136 | "# Write text to a file. Warning: the 'w' means overwrite, which deletes anything already\n", 137 | "# existing in that file! To append to the file instead, use 'a' instead of 'w'.\n", 138 | "f = open('test.txt', 'w')\n", 139 | "f.write('hello world')\n", 140 | "f.close()\n", 141 | "\n", 142 | "# This is how to read a file (you can treat f as a list of lines that we can iterate over).\n", 143 | "f = open('test.txt', 'r')\n", 144 | "for line in f:\n", 145 | " print(line)\n", 146 | "f.close()\n", 147 | "\n", 148 | "# Closing a file when we're done saves memory. This is alternate syntax that automatically\n", 149 | "# closes the file after executing all the indented code inside the if block.\n", 150 | "with open('test.txt', 'r') as f:\n", 151 | " for line in f:\n", 152 | " print(line)" 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": {}, 158 | "source": [ 159 | "Often it makes sense to split Python scripts into multiple files. For example, suppose we have another file called `other.py`, where we define useful functions." 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [ 168 | "# other.py\n", 169 | "import numpy as np\n", 170 | "\n", 171 | "def compute_pi():\n", 172 | " return np.pi" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": {}, 178 | "source": [ 179 | "You can access the function using the `import` command, just like other libraries." 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "metadata": {}, 186 | "outputs": [], 187 | "source": [ 188 | "# main.py\n", 189 | "import other\n", 190 | "print(other.compute_pi())\n", 191 | "\n", 192 | "# Alternatively:\n", 193 | "import other.compute_pi as compute_pi\n", 194 | "print(compute_pi())\n", 195 | "\n", 196 | "# To import a file in a subdirectory, for example, computations/other.py, we would use:\n", 197 | "import computations.other as other\n", 198 | "print(other.compute_pi())" 199 | ] 200 | }, 201 | { 202 | "cell_type": "markdown", 203 | "metadata": {}, 204 | "source": [ 205 | "You might encounter this weird block of code, `if __name__ == '__main__':`, in other people's Python code. It denotes code that is executed only if the file is run directly (`python other.py`), and not if it is merely imported. That way, `other.py` operates as both a standalone script that can be run or as a library file that can be imported." 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": null, 211 | "metadata": {}, 212 | "outputs": [], 213 | "source": [ 214 | "# other.py\n", 215 | "import numpy as np\n", 216 | "\n", 217 | "def compute_pi():\n", 218 | " return np.pi\n", 219 | "\n", 220 | "# Note that this will get executed even if this file is imported.\n", 221 | "print('Computing pi...')\n", 222 | "\n", 223 | "if __name__ == '__main__':\n", 224 | " # This only gets executed if this file is run with `python other.py`.\n", 225 | " print('Computing pi...')" 226 | ] 227 | } 228 | ], 229 | "metadata": { 230 | "kernelspec": { 231 | "display_name": "Python 3", 232 | "language": "python", 233 | "name": "python3" 234 | }, 235 | "language_info": { 236 | "codemirror_mode": { 237 | "name": "ipython", 238 | "version": 3 239 | }, 240 | "file_extension": ".py", 241 | "mimetype": "text/x-python", 242 | "name": "python", 243 | "nbconvert_exporter": "python", 244 | "pygments_lexer": "ipython3", 245 | "version": "3.7.2" 246 | } 247 | }, 248 | "nbformat": 4, 249 | "nbformat_minor": 2 250 | } 251 | -------------------------------------------------------------------------------- /nb/2019_winter/Lecture5-supplement.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# CME 193 - Pandas Exercise Supplement\n", 8 | "\n", 9 | "In this extended exercise, you'll load and play with CO2 data collected at the Mauna Loa observatory over the last 60 years. \n", 10 | "\n", 11 | "* NOAA Website: https://www.esrl.noaa.gov/gmd/ccgg/trends/full.html\n", 12 | "* NOAA data: https://www.esrl.noaa.gov/gmd/ccgg/trends/data.html\n", 13 | "\n", 14 | "The monthly data can be found at this [link](ftp://aftp.cmdl.noaa.gov/products/trends/co2/co2_mm_mlo.txt)" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import numpy as np\n", 24 | "import scipy\n", 25 | "import pandas as pd\n", 26 | "import matplotlib.pyplot as plt" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "Reads the data from the ftp server directly." 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "df = pd.read_csv('ftp://aftp.cmdl.noaa.gov/products/trends/co2/co2_mm_mlo.txt', \n", 43 | " delim_whitespace=True, \n", 44 | " comment='#',\n", 45 | " names=[\"year\", \"month\", \"decdate\", \"co2\", \"co2interp\", \"trend\", \"days\"],\n", 46 | " index_col=False)" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": { 53 | "scrolled": false 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "pd.set_option('display.max_rows', 10)\n", 58 | "df" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "# copies the original data.\n", 68 | "orig = df.copy()" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "## Part 1 - Normalize the Date\n", 76 | "\n", 77 | "1. create a new column for the dataframe called 'day' that is set to be 1 in every entry" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "# your code here" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": {}, 92 | "source": [ 93 | "2. The dataframe now has columns for 'day', 'month', and 'year'. Use `pd.to_datetime()` to create a new series of dates \n", 94 | "\n", 95 | "`dates = pd.to_datetime(...)`" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "# your code here" 105 | ] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": {}, 110 | "source": [ 111 | "3. set a new column of the dataframe to hold this series. Call the column `'date'`" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "# your code here" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "4. set the index of the dataframe to be the `'date'` column using the `set_index()` method." 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [ 136 | "# your code here" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": {}, 142 | "source": [ 143 | "5. Now let's remove the old columns with date information. Use the `drop()` method to remove the 'day', 'month', 'year', and 'decdate' columns. Hint: `df.drop(..., axis=1, inplace=True)`\n", 144 | "\n", 145 | "5a. Go ahead and drop the 'days' column as well, since we're not going to use it." 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": null, 151 | "metadata": {}, 152 | "outputs": [], 153 | "source": [ 154 | "# your code here" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "## Part 2 - deal with missing values\n", 162 | "\n", 163 | "1. First, use the `plot()` method to visualize the contents of your dataframe. What do you see?" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": null, 169 | "metadata": {}, 170 | "outputs": [], 171 | "source": [ 172 | "# your code here" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": {}, 178 | "source": [ 179 | "if you read the header for the file we used to load the dataframe, you'll see that missing values take the value -99.99.\n", 180 | "\n", 181 | "2. Set values that are `-99.99` to `None` (this indicates a missing value in Pandas).\n", 182 | "\n", 183 | "Hint: use the `applymap()` method, and the lambda function\n", 184 | "```python\n", 185 | "lambda x: None if x == -99.99 else x\n", 186 | "```\n", 187 | "If you're familiar with [ternary operators](https://en.wikipedia.org/wiki/%3F:), this is the equivalent of\n", 188 | "```\n", 189 | "x == -99.99 ? None : x\n", 190 | "```\n", 191 | "Note that you may need to make a new assignment e.g., `df = df.applymap(...)`" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": null, 197 | "metadata": {}, 198 | "outputs": [], 199 | "source": [ 200 | "# your code here" 201 | ] 202 | }, 203 | { 204 | "cell_type": "markdown", 205 | "metadata": {}, 206 | "source": [ 207 | "3. Plot your dataframe again. What do you see now?\n", 208 | "\n", 209 | "3a. Try plotting just the 'co2' series. What do you see?" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": null, 215 | "metadata": {}, 216 | "outputs": [], 217 | "source": [ 218 | "# your code here" 219 | ] 220 | }, 221 | { 222 | "cell_type": "markdown", 223 | "metadata": {}, 224 | "source": [ 225 | "## Part 3 - Create New DataFrames with rows that meet conditions" 226 | ] 227 | }, 228 | { 229 | "cell_type": "markdown", 230 | "metadata": {}, 231 | "source": [ 232 | "1. Create new dataframe called `recent` that contains all rows of the previous dataframe since 2007. Plot it." 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": null, 238 | "metadata": {}, 239 | "outputs": [], 240 | "source": [ 241 | "# your code here" 242 | ] 243 | }, 244 | { 245 | "cell_type": "markdown", 246 | "metadata": {}, 247 | "source": [ 248 | "2. Create a new dataframe called `old` that contains all rows of the dataframe before 1990. Plot it." 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": null, 254 | "metadata": {}, 255 | "outputs": [], 256 | "source": [ 257 | "# your code here" 258 | ] 259 | }, 260 | { 261 | "cell_type": "markdown", 262 | "metadata": {}, 263 | "source": [ 264 | "##### At this point, by inspection, you might be convinced there is further analysis to be done" 265 | ] 266 | }, 267 | { 268 | "cell_type": "code", 269 | "execution_count": null, 270 | "metadata": {}, 271 | "outputs": [], 272 | "source": [ 273 | "np.var(old['trend']), np.var(recent['trend'])" 274 | ] 275 | }, 276 | { 277 | "cell_type": "markdown", 278 | "metadata": {}, 279 | "source": [ 280 | "## Part 4 - Create some groups\n", 281 | "\n", 282 | "Let's go back to the original data that we loaded" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": null, 288 | "metadata": {}, 289 | "outputs": [], 290 | "source": [ 291 | "df = orig\n", 292 | "df" 293 | ] 294 | }, 295 | { 296 | "cell_type": "markdown", 297 | "metadata": {}, 298 | "source": [ 299 | "Suppose that we want to look at co2 averages by year instead of by month.\n", 300 | "\n", 301 | "1. drop rows with missing values\n", 302 | "\n", 303 | "1a. apply the map that sends -99.99 to none\n", 304 | "\n", 305 | "1b. use the `dropna()` method to remove rows with missing values: `df = df.dropna()`" 306 | ] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "execution_count": null, 311 | "metadata": {}, 312 | "outputs": [], 313 | "source": [ 314 | "# your code here" 315 | ] 316 | }, 317 | { 318 | "cell_type": "markdown", 319 | "metadata": {}, 320 | "source": [ 321 | "2. Create a group for each year (use key 'year')" 322 | ] 323 | }, 324 | { 325 | "cell_type": "code", 326 | "execution_count": null, 327 | "metadata": {}, 328 | "outputs": [], 329 | "source": [ 330 | "# your code here" 331 | ] 332 | }, 333 | { 334 | "cell_type": "markdown", 335 | "metadata": {}, 336 | "source": [ 337 | "3. Aggregate the groups into a new dataframe, `df2`, using `np.mean`\n", 338 | "\n", 339 | "3a. you can drop all the columns except `'co2'` if you'd like" 340 | ] 341 | }, 342 | { 343 | "cell_type": "code", 344 | "execution_count": null, 345 | "metadata": {}, 346 | "outputs": [], 347 | "source": [ 348 | "# your code here" 349 | ] 350 | }, 351 | { 352 | "cell_type": "markdown", 353 | "metadata": {}, 354 | "source": [ 355 | "4. make a plot of the `'co2'` series" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": null, 361 | "metadata": {}, 362 | "outputs": [], 363 | "source": [ 364 | "# your code here" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": null, 370 | "metadata": {}, 371 | "outputs": [], 372 | "source": [] 373 | } 374 | ], 375 | "metadata": { 376 | "kernelspec": { 377 | "display_name": "Python (3.6-cme193)", 378 | "language": "python", 379 | "name": "cme193" 380 | }, 381 | "language_info": { 382 | "codemirror_mode": { 383 | "name": "ipython", 384 | "version": 3 385 | }, 386 | "file_extension": ".py", 387 | "mimetype": "text/x-python", 388 | "name": "python", 389 | "nbconvert_exporter": "python", 390 | "pygments_lexer": "ipython3", 391 | "version": "3.6.8" 392 | } 393 | }, 394 | "nbformat": 4, 395 | "nbformat_minor": 2 396 | } 397 | -------------------------------------------------------------------------------- /nb/2019_winter/Lecture8-slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icme/cme193/3ed008f6e0951b80faf1d77c9542ae0dd925691d/nb/2019_winter/Lecture8-slides.pdf -------------------------------------------------------------------------------- /nb/2019_winter/data/iris.csv: -------------------------------------------------------------------------------- 1 | sepal_length,sepal_width,petal_length,petal_width,name 2 | 5.1,3.5,1.4,0.2,setosa 3 | 4.9,3.0,1.4,0.2,setosa 4 | 4.7,3.2,1.3,0.2,setosa 5 | 4.6,3.1,1.5,0.2,setosa 6 | 5.0,3.6,1.4,0.2,setosa 7 | 5.4,3.9,1.7,0.4,setosa 8 | 4.6,3.4,1.4,0.3,setosa 9 | 5.0,3.4,1.5,0.2,setosa 10 | 4.4,2.9,1.4,0.2,setosa 11 | 4.9,3.1,1.5,0.1,setosa 12 | 5.4,3.7,1.5,0.2,setosa 13 | 4.8,3.4,1.6,0.2,setosa 14 | 4.8,3.0,1.4,0.1,setosa 15 | 4.3,3.0,1.1,0.1,setosa 16 | 5.8,4.0,1.2,0.2,setosa 17 | 5.7,4.4,1.5,0.4,setosa 18 | 5.4,3.9,1.3,0.4,setosa 19 | 5.1,3.5,1.4,0.3,setosa 20 | 5.7,3.8,1.7,0.3,setosa 21 | 5.1,3.8,1.5,0.3,setosa 22 | 5.4,3.4,1.7,0.2,setosa 23 | 5.1,3.7,1.5,0.4,setosa 24 | 4.6,3.6,1.0,0.2,setosa 25 | 5.1,3.3,1.7,0.5,setosa 26 | 4.8,3.4,1.9,0.2,setosa 27 | 5.0,3.0,1.6,0.2,setosa 28 | 5.0,3.4,1.6,0.4,setosa 29 | 5.2,3.5,1.5,0.2,setosa 30 | 5.2,3.4,1.4,0.2,setosa 31 | 4.7,3.2,1.6,0.2,setosa 32 | 4.8,3.1,1.6,0.2,setosa 33 | 5.4,3.4,1.5,0.4,setosa 34 | 5.2,4.1,1.5,0.1,setosa 35 | 5.5,4.2,1.4,0.2,setosa 36 | 4.9,3.1,1.5,0.1,setosa 37 | 5.0,3.2,1.2,0.2,setosa 38 | 5.5,3.5,1.3,0.2,setosa 39 | 4.9,3.1,1.5,0.1,setosa 40 | 4.4,3.0,1.3,0.2,setosa 41 | 5.1,3.4,1.5,0.2,setosa 42 | 5.0,3.5,1.3,0.3,setosa 43 | 4.5,2.3,1.3,0.3,setosa 44 | 4.4,3.2,1.3,0.2,setosa 45 | 5.0,3.5,1.6,0.6,setosa 46 | 5.1,3.8,1.9,0.4,setosa 47 | 4.8,3.0,1.4,0.3,setosa 48 | 5.1,3.8,1.6,0.2,setosa 49 | 4.6,3.2,1.4,0.2,setosa 50 | 5.3,3.7,1.5,0.2,setosa 51 | 5.0,3.3,1.4,0.2,setosa 52 | 7.0,3.2,4.7,1.4,versicolor 53 | 6.4,3.2,4.5,1.5,versicolor 54 | 6.9,3.1,4.9,1.5,versicolor 55 | 5.5,2.3,4.0,1.3,versicolor 56 | 6.5,2.8,4.6,1.5,versicolor 57 | 5.7,2.8,4.5,1.3,versicolor 58 | 6.3,3.3,4.7,1.6,versicolor 59 | 4.9,2.4,3.3,1.0,versicolor 60 | 6.6,2.9,4.6,1.3,versicolor 61 | 5.2,2.7,3.9,1.4,versicolor 62 | 5.0,2.0,3.5,1.0,versicolor 63 | 5.9,3.0,4.2,1.5,versicolor 64 | 6.0,2.2,4.0,1.0,versicolor 65 | 6.1,2.9,4.7,1.4,versicolor 66 | 5.6,2.9,3.6,1.3,versicolor 67 | 6.7,3.1,4.4,1.4,versicolor 68 | 5.6,3.0,4.5,1.5,versicolor 69 | 5.8,2.7,4.1,1.0,versicolor 70 | 6.2,2.2,4.5,1.5,versicolor 71 | 5.6,2.5,3.9,1.1,versicolor 72 | 5.9,3.2,4.8,1.8,versicolor 73 | 6.1,2.8,4.0,1.3,versicolor 74 | 6.3,2.5,4.9,1.5,versicolor 75 | 6.1,2.8,4.7,1.2,versicolor 76 | 6.4,2.9,4.3,1.3,versicolor 77 | 6.6,3.0,4.4,1.4,versicolor 78 | 6.8,2.8,4.8,1.4,versicolor 79 | 6.7,3.0,5.0,1.7,versicolor 80 | 6.0,2.9,4.5,1.5,versicolor 81 | 5.7,2.6,3.5,1.0,versicolor 82 | 5.5,2.4,3.8,1.1,versicolor 83 | 5.5,2.4,3.7,1.0,versicolor 84 | 5.8,2.7,3.9,1.2,versicolor 85 | 6.0,2.7,5.1,1.6,versicolor 86 | 5.4,3.0,4.5,1.5,versicolor 87 | 6.0,3.4,4.5,1.6,versicolor 88 | 6.7,3.1,4.7,1.5,versicolor 89 | 6.3,2.3,4.4,1.3,versicolor 90 | 5.6,3.0,4.1,1.3,versicolor 91 | 5.5,2.5,4.0,1.3,versicolor 92 | 5.5,2.6,4.4,1.2,versicolor 93 | 6.1,3.0,4.6,1.4,versicolor 94 | 5.8,2.6,4.0,1.2,versicolor 95 | 5.0,2.3,3.3,1.0,versicolor 96 | 5.6,2.7,4.2,1.3,versicolor 97 | 5.7,3.0,4.2,1.2,versicolor 98 | 5.7,2.9,4.2,1.3,versicolor 99 | 6.2,2.9,4.3,1.3,versicolor 100 | 5.1,2.5,3.0,1.1,versicolor 101 | 5.7,2.8,4.1,1.3,versicolor 102 | 6.3,3.3,6.0,2.5,virginica 103 | 5.8,2.7,5.1,1.9,virginica 104 | 7.1,3.0,5.9,2.1,virginica 105 | 6.3,2.9,5.6,1.8,virginica 106 | 6.5,3.0,5.8,2.2,virginica 107 | 7.6,3.0,6.6,2.1,virginica 108 | 4.9,2.5,4.5,1.7,virginica 109 | 7.3,2.9,6.3,1.8,virginica 110 | 6.7,2.5,5.8,1.8,virginica 111 | 7.2,3.6,6.1,2.5,virginica 112 | 6.5,3.2,5.1,2.0,virginica 113 | 6.4,2.7,5.3,1.9,virginica 114 | 6.8,3.0,5.5,2.1,virginica 115 | 5.7,2.5,5.0,2.0,virginica 116 | 5.8,2.8,5.1,2.4,virginica 117 | 6.4,3.2,5.3,2.3,virginica 118 | 6.5,3.0,5.5,1.8,virginica 119 | 7.7,3.8,6.7,2.2,virginica 120 | 7.7,2.6,6.9,2.3,virginica 121 | 6.0,2.2,5.0,1.5,virginica 122 | 6.9,3.2,5.7,2.3,virginica 123 | 5.6,2.8,4.9,2.0,virginica 124 | 7.7,2.8,6.7,2.0,virginica 125 | 6.3,2.7,4.9,1.8,virginica 126 | 6.7,3.3,5.7,2.1,virginica 127 | 7.2,3.2,6.0,1.8,virginica 128 | 6.2,2.8,4.8,1.8,virginica 129 | 6.1,3.0,4.9,1.8,virginica 130 | 6.4,2.8,5.6,2.1,virginica 131 | 7.2,3.0,5.8,1.6,virginica 132 | 7.4,2.8,6.1,1.9,virginica 133 | 7.9,3.8,6.4,2.0,virginica 134 | 6.4,2.8,5.6,2.2,virginica 135 | 6.3,2.8,5.1,1.5,virginica 136 | 6.1,2.6,5.6,1.4,virginica 137 | 7.7,3.0,6.1,2.3,virginica 138 | 6.3,3.4,5.6,2.4,virginica 139 | 6.4,3.1,5.5,1.8,virginica 140 | 6.0,3.0,4.8,1.8,virginica 141 | 6.9,3.1,5.4,2.1,virginica 142 | 6.7,3.1,5.6,2.4,virginica 143 | 6.9,3.1,5.1,2.3,virginica 144 | 5.8,2.7,5.1,1.9,virginica 145 | 6.8,3.2,5.9,2.3,virginica 146 | 6.7,3.3,5.7,2.5,virginica 147 | 6.7,3.0,5.2,2.3,virginica 148 | 6.3,2.5,5.0,1.9,virginica 149 | 6.5,3.0,5.2,2.0,virginica 150 | 6.2,3.4,5.4,2.3,virginica 151 | 5.9,3.0,5.1,1.8,virginica 152 | -------------------------------------------------------------------------------- /nb/2019_winter/img/iris_knn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icme/cme193/3ed008f6e0951b80faf1d77c9542ae0dd925691d/nb/2019_winter/img/iris_knn.png -------------------------------------------------------------------------------- /nb/2019_winter/img/splitApplyCombine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icme/cme193/3ed008f6e0951b80faf1d77c9542ae0dd925691d/nb/2019_winter/img/splitApplyCombine.png -------------------------------------------------------------------------------- /nb/2019_winter/lecture_1/ex_1.md: -------------------------------------------------------------------------------- 1 | # Exercise 1 solutions 2 | 3 | 1. 4 | ```python3 5 | print("Hello, world!") 6 | ``` 7 | 2. 8 | ```python3 9 | import os 10 | os.path.abspath('.') 11 | ``` 12 | 3. 13 | ```python3 14 | x = 1 15 | x+=1 16 | print(x*2) 17 | ``` 18 | 4. 19 | ```python3 20 | for i in range(1,10): 21 | print(i) 22 | ``` 23 | 5. 24 | ```python3 25 | x = 1 26 | while x < 10000: 27 | print(x) 28 | x = x*2 29 | ``` 30 | 6. 31 | ```python3 32 | def testfn(a,b): 33 | return a+2*b 34 | 35 | testfn(1,2) 36 | ``` 37 | 7. 38 | ```python3 39 | "Hello," + " " + World!" 40 | ``` 41 | 8. 42 | ```python3 43 | str1 = "float %.2f" % 1.0 44 | print(str1) 45 | str2 = "integer %d" % 2 46 | print(str2) 47 | ``` 48 | 9. 49 | -------------------------------------------------------------------------------- /nb/2019_winter/lecture_2/test_script.py: -------------------------------------------------------------------------------- 1 | # this is a test script that runs some basic python code 2 | 3 | print("Hello, world!") 4 | 5 | def square(x): 6 | return x*x 7 | 8 | 9 | a = 5 10 | print("%d squared is %d" % (a, square(a))) 11 | -------------------------------------------------------------------------------- /nb/nb-assets/img/broadcasting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icme/cme193/3ed008f6e0951b80faf1d77c9542ae0dd925691d/nb/nb-assets/img/broadcasting.png -------------------------------------------------------------------------------- /nb/nb-assets/img/python.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icme/cme193/3ed008f6e0951b80faf1d77c9542ae0dd925691d/nb/nb-assets/img/python.png -------------------------------------------------------------------------------- /syllabus.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | CME 193 - Scientific Python 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 27 | 28 |
29 |
30 | 31 | 32 | 78 | 79 | 80 |
81 |

Syllabus

82 |

This is a course on scientific computing using Python. We'll cover aspects of the Python language as they are relevant to the material. The following schedule should be seen as a high-level guide to what we'll do in 8 lectures, but is not set in stone.

83 |
    84 |
  1. Introduction to Python & NumPy
  2. 85 |
  3. Dense Linear Algebra in NumPy
  4. 86 |
  5. Intro to SciPy - Dense & Sparse linear algebra
  6. 87 |
  7. Optimization I - Scipy.optimize
  8. 88 |
  9. Pandas, Sckit learn
  10. 89 |
  11. Tensorflow
  12. 90 |
  13. Optimization II - ortools
  14. 91 |
  15. TBD (Survey)
  16. 92 |
93 |

We'll intersperse the visualization libraries:

94 |
    95 |
  • Matplotlib
  • 96 |
  • Plotly
  • 97 |
98 |
99 | 100 |
101 |
102 | 103 | 104 | 105 | 106 | 107 | 114 | 115 | 116 | 117 | -------------------------------------------------------------------------------- /web/LICENSE: -------------------------------------------------------------------------------- 1 | Couscous 2 | 3 | Copyright (C) Matthieu Napoli 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and 6 | associated documentation files (the "Software"), to deal in the Software without restriction, 7 | including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, 9 | subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in all copies or substantial 12 | portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT 15 | NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 16 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 17 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 18 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | -------------------------------------------------------------------------------- /web/couscous.yml: -------------------------------------------------------------------------------- 1 | template: 2 | directory: . 3 | # url: https://github.com/CouscousPHP/Template-Dark 4 | 5 | title: CME 193 - Scientific Python 6 | subTitle: Stanford University 7 | baseUrl: ./ 8 | 9 | # The left menu bar 10 | menu: 11 | sections: 12 | main: 13 | name: Course Information 14 | items: 15 | course_description: 16 | text: Course Description 17 | # You can use relative urls 18 | relativeUrl: index.html 19 | 20 | syllabus: 21 | text: Syllabus 22 | # You can use relative urls 23 | relativeUrl: syllabus.html 24 | lectures: 25 | text: Lectures 26 | # Or absolute urls 27 | relativeUrl: lectures.html 28 | homework: 29 | text: Homework 30 | relativeUrl: homework.html 31 | other: 32 | name: Other Links 33 | items: 34 | get_started: 35 | text: Getting Started 36 | relativeUrl: getstarted.html 37 | jupyter: 38 | text: Jupyter 39 | relativeUrl: jupyter.html 40 | -------------------------------------------------------------------------------- /web/css/highlight.tomorrow-night.css: -------------------------------------------------------------------------------- 1 | /* Tomorrow Night Theme */ 2 | /* http://jmblog.github.com/color-themes-for-google-code-highlightjs */ 3 | /* Original theme - https://github.com/chriskempson/tomorrow-theme */ 4 | /* http://jmblog.github.com/color-themes-for-google-code-highlightjs */ 5 | .tomorrow-comment, pre .comment, pre .title { 6 | color: #969896; 7 | } 8 | 9 | .tomorrow-red, pre .variable, pre .attribute, pre .tag, pre .regexp, pre .ruby .constant, pre .xml .tag .title, pre .xml .pi, pre .xml .doctype, pre .html .doctype, pre .css .id, pre .css .class, pre .css .pseudo { 10 | color: #cc6666; 11 | } 12 | 13 | .tomorrow-orange, pre .number, pre .preprocessor, pre .built_in, pre .literal, pre .params, pre .constant { 14 | color: #de935f; 15 | } 16 | 17 | .tomorrow-yellow, pre .class, pre .ruby .class .title, pre .css .rules .attribute { 18 | color: #f0c674; 19 | } 20 | 21 | .tomorrow-green, pre .string, pre .value, pre .inheritance, pre .header, pre .ruby .symbol, pre .xml .cdata { 22 | color: #b5bd68; 23 | } 24 | 25 | .tomorrow-aqua, pre .css .hexcolor { 26 | color: #8abeb7; 27 | } 28 | 29 | .tomorrow-blue, pre .function, pre .python .decorator, pre .python .title, pre .ruby .function .title, pre .ruby .title .keyword, pre .perl .sub, pre .javascript .title, pre .coffeescript .title { 30 | color: #81a2be; 31 | } 32 | 33 | .tomorrow-purple, pre .keyword, pre .javascript .function { 34 | color: #b294bb; 35 | } 36 | 37 | pre code { 38 | display: block; 39 | background: #1d1f21; 40 | color: #c5c8c6; 41 | font-family: Menlo, Monaco, Consolas, monospace; 42 | line-height: 1.5; 43 | border: 1px solid #ccc; 44 | padding: 10px; 45 | } 46 | -------------------------------------------------------------------------------- /web/css/main.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-size: 19px; 3 | } 4 | 5 | main { 6 | margin-top: 90px; 7 | } 8 | 9 | section { 10 | margin-bottom: 50px; 11 | } 12 | 13 | h1, h2, h3, h4 { 14 | color: #df691a; 15 | } 16 | h3 { 17 | font-size: 23px; 18 | } 19 | 20 | li { 21 | margin-bottom: 3px; 22 | } 23 | 24 | img { 25 | max-width: 100%; 26 | } 27 | 28 | header.navbar { 29 | opacity: 0.9; 30 | } 31 | .navbar .navbar-brand { 32 | font-size: 28px; 33 | height: auto; 34 | line-height: 50px; 35 | margin-left: 20px; 36 | color: #df691a; 37 | } 38 | .navbar a.navbar-brand:hover { 39 | color: #df691a; 40 | } 41 | .navbar .navbar-brand small { 42 | font-size: 18px; 43 | font-weight: 300; 44 | margin-left: 10px; 45 | color: white; 46 | } 47 | 48 | @media (min-width: 768px) { 49 | #sidebar { 50 | position:fixed; 51 | } 52 | } 53 | @media (max-width: 960px) { 54 | body { 55 | font-size: 17px; 56 | } 57 | pre { 58 | font-size: 12px; 59 | } 60 | } 61 | 62 | .page-header { 63 | margin-top: 0; 64 | } 65 | 66 | #sidebar .github-star { 67 | margin-top: 20px; 68 | margin-left: 50px; 69 | } 70 | 71 | #sidebar .text-muted { 72 | color: #859AAF; 73 | } 74 | 75 | pre { 76 | padding: 0; 77 | border-color: #3D5166; 78 | background-color: #1D2B3A; 79 | border-radius: 4px; 80 | margin: 15px; 81 | } 82 | pre code { 83 | border: none; 84 | background-color: #1D2B3A; 85 | } 86 | 87 | code { 88 | font-size: 85%; 89 | padding: 4px 4px 1px; 90 | margin: 0 4px; 91 | border-radius: 3px; 92 | color: #c5c8c6; 93 | border: solid 1px #3D5166; 94 | background-color: #1D2B3A; 95 | white-space: pre-wrap; 96 | white-space: -moz-pre-wrap; 97 | word-wrap: break-word; 98 | } 99 | -------------------------------------------------------------------------------- /web/default.twig: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | {{ title }} 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 32 | 33 |
34 |
35 | 36 | {% if menu is defined %} 37 | 38 | 57 | 58 | {% endif %} 59 | 60 | 63 | 64 |
65 |
66 | 67 | 68 | 69 | 70 | 71 | 78 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /web/fonts/FontAwesome.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icme/cme193/3ed008f6e0951b80faf1d77c9542ae0dd925691d/web/fonts/FontAwesome.otf -------------------------------------------------------------------------------- /web/fonts/fontawesome-webfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icme/cme193/3ed008f6e0951b80faf1d77c9542ae0dd925691d/web/fonts/fontawesome-webfont.eot -------------------------------------------------------------------------------- /web/fonts/fontawesome-webfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icme/cme193/3ed008f6e0951b80faf1d77c9542ae0dd925691d/web/fonts/fontawesome-webfont.ttf -------------------------------------------------------------------------------- /web/fonts/fontawesome-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icme/cme193/3ed008f6e0951b80faf1d77c9542ae0dd925691d/web/fonts/fontawesome-webfont.woff -------------------------------------------------------------------------------- /web/getstarted.md: -------------------------------------------------------------------------------- 1 | --- 2 | currentMenu: getting_started 3 | --- 4 | #Getting started 5 | 6 | Getting started with Python can be a little confusing, hopefully this page helps to get you going. 7 | 8 | In general, I am not going to tell you to do things one way. This is because there are a variety of operating systems, and some of you may already have development environments set up in a particular way. The best ways to troubleshoot installation and environment issues are: 9 | 1. Consult the internet 10 | 2. Consult a friend/neighbor with the same operating system 11 | 12 | Personally, I run Fedora (linux) on my machine. Unless you are as well, you may find that I fall back on steps 1 and 2 above if you ask me for help. 13 | 14 | For anyone who is new to Python or maintaining any sort of development environment on a computer, I recommend Anaconda python. 15 | 16 | ##Installing Python 17 | 18 | In this class, we will be using Python 3.6. If you want to use a different version, you can try, but the material provided in class may not run with your different version. In particular, if you are using Python 2, you are particularly likely to see things not work. We recommend having some Python 3 interpreter available. 19 | 20 | In the terminal, you can check which python version you are currently using with 21 | ```bash 22 | python -V # -V is the same as --version 23 | ``` 24 | You may have a python 2 interpreter as well as a python 3 interpreter. Often these can be invoked as 25 | ```bash 26 | python2 -V # python 2 version 27 | python3 -V # python 3 version 28 | ``` 29 | 30 | Following are a variety of methods you can use to install Python. As far as this course is concerned, you can use whatever you want (if you already have system python or anaconda working for you, don't change just for the course). 31 | 32 | If you are new to Python, I recommend using Anaconda, regardless of operating system. You can always change your mind later. [Conda environments](https://conda.io/docs/user-guide/tasks/manage-environments.html) are enough of a reason to at least give it a try. 33 | 34 | ###Anaconda 35 | 36 | One convenient method to set up your Python environment is using a free, pre-packaged distribution, such as [Anaconda](https://www.anaconda.com/download/). 37 | 38 | This has the advantage that many relevant packages come pre-installed, possibly saving you some headaches later on. It also includes *pip*, the Python package manager. 39 | 40 | Note that while Anaconda also comes with the Anaconda launcher and a bunch of other tools, you should not be using any of these for this course. 41 | We only use Anaconda for the convenience of installation of Python and the main packages, not the other stuff that comes with it as well. 42 | 43 | If you are a Windows user, Anaconda is recommended, as Windows is notorious for not playing nice with Python. 44 | 45 | ### System Package Manager (Linux) 46 | 47 | If you're running Linux, you can just use your package manager (*apt-get*, *dnf*, etc.) to install Python. You can also often use your package manager to install Python packages instead of *pip*, but you may run into issues doing this. 48 | 49 | ### Homebrew (Mac only) 50 | 51 | If you want a bit more control over your Python distribution, then using Homebrew to install Python is useful. 52 | 53 | It also installs the package manager *pip* for you, which is very useful. 54 | 55 | However, it does not come with additional modules, such as *numpy* or *scipy*, 56 | though they can be installed easily using *pip*. 57 | 58 | A short tutorial can be found [here](http://docs.python-guide.org/en/latest/starting/install/osx/). 59 | 60 | 61 | ###Note to Windows Users 62 | 63 | If you are a Windows user and are serious about taking this *and* other computer classes, I recommend using the [Windows Subsystem for Linux](https://docs.microsoft.com/en-us/windows/wsl/install-win10). In general, much scientific computing is done on clusters running some form of Linux. The sooner you start using Linux, the better. 64 | 65 | If you really want to take the plunge, you can also partition your hard drive and add a linux distribution as a second operating system. 66 | 67 | If the install guides for Anaconda fail, and some Googling leads to nothing of utility, I highly suggest running with a cloud-based option. 68 | 69 | ##Editor 70 | 71 | To write code, you need an editor. 72 | While there are many options, old and new, [Atom](https://atom.io/) is (generally) what I use. 73 | 74 | A script is a simple file with text, such as 75 | 76 | ```python 77 | a = 'Hello,' 78 | b = 'world!' 79 | print(a + ' ' + b) 80 | ``` 81 | 82 | which you can save to your *filesystem* using the editor. 83 | 84 | ##Running code 85 | 86 | Now that you know how to write Python scripts, it's time to learn how to run them using Python. 87 | 88 | ###Linux 89 | Open a terminal. 90 | 91 | To open the interpreter, enter `python` 92 | 93 | To run a script enter `python