├── .gitignore ├── asset ├── exam_.txt ├── a_.txt ├── ._a_.txt ├── population_2.txt ├── trans.txt ├── ._exam_.txt ├── ._trans.txt ├── dtree.pdf ├── ._lecture_data.txt ├── ._population_1.txt ├── ._population_2.txt ├── population_1.txt ├── ._SMSSpamCollection.txt ├── ._sydney_housing_market.txt ├── ._Daily_Weather_Observations.csv ├── Christopher_Manning_wordcloud.png ├── ._Christopher_Manning_wordcloud.png ├── ._Median Price of Established House.txt ├── ._Median Price of Established House Transfers.txt ├── Median Price of Established House.txt ├── Median Price of Established House Transfers.txt ├── lecture_data.txt ├── sydney_housing_market.txt └── Daily_Weather_Observations.csv ├── README.md ├── 0.self-evaluation ├── README.md └── L0.python3-and-jupyter.ipynb ├── 9.apriori └── L9 - Apriori Algorithm.ipynb ├── 4.optimal-histogram └── L4 - Optimal Histogram.ipynb └── 1.tools ├── L1 - numpy-fundamentals.ipynb └── L1 - Pandas-2.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.sw[po] 3 | .ipynb_checkpoints 4 | -------------------------------------------------------------------------------- /asset/exam_.txt: -------------------------------------------------------------------------------- 1 | A B C M 2 | 1 1 1 1 3 | 2 1 2 3 4 | 1 2 1 9 -------------------------------------------------------------------------------- /asset/a_.txt: -------------------------------------------------------------------------------- 1 | A B M 2 | 1 1 20 3 | 2 1 50 4 | 1 2 30 5 | 1 3 40 6 | -------------------------------------------------------------------------------- /asset/._a_.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leasunhy/sysu-dm-summer/HEAD/asset/._a_.txt -------------------------------------------------------------------------------- /asset/population_2.txt: -------------------------------------------------------------------------------- 1 | location population 2 | New York 8406000 3 | Toronto 2615000 -------------------------------------------------------------------------------- /asset/trans.txt: -------------------------------------------------------------------------------- 1 | A C D 2 | B C 3 | A B E 4 | B E 5 | A B C D E 6 | D E 7 | -------------------------------------------------------------------------------- /asset/._exam_.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leasunhy/sysu-dm-summer/HEAD/asset/._exam_.txt -------------------------------------------------------------------------------- /asset/._trans.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leasunhy/sysu-dm-summer/HEAD/asset/._trans.txt -------------------------------------------------------------------------------- /asset/dtree.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leasunhy/sysu-dm-summer/HEAD/asset/dtree.pdf -------------------------------------------------------------------------------- /asset/._lecture_data.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leasunhy/sysu-dm-summer/HEAD/asset/._lecture_data.txt -------------------------------------------------------------------------------- /asset/._population_1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leasunhy/sysu-dm-summer/HEAD/asset/._population_1.txt -------------------------------------------------------------------------------- /asset/._population_2.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leasunhy/sysu-dm-summer/HEAD/asset/._population_2.txt -------------------------------------------------------------------------------- /asset/population_1.txt: -------------------------------------------------------------------------------- 1 | location population 2 | New York 8406000 3 | Vancouver 630500 4 | Sydney 4293000 -------------------------------------------------------------------------------- /asset/._SMSSpamCollection.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leasunhy/sysu-dm-summer/HEAD/asset/._SMSSpamCollection.txt -------------------------------------------------------------------------------- /asset/._sydney_housing_market.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leasunhy/sysu-dm-summer/HEAD/asset/._sydney_housing_market.txt -------------------------------------------------------------------------------- /asset/._Daily_Weather_Observations.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leasunhy/sysu-dm-summer/HEAD/asset/._Daily_Weather_Observations.csv -------------------------------------------------------------------------------- /asset/Christopher_Manning_wordcloud.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leasunhy/sysu-dm-summer/HEAD/asset/Christopher_Manning_wordcloud.png -------------------------------------------------------------------------------- /asset/._Christopher_Manning_wordcloud.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leasunhy/sysu-dm-summer/HEAD/asset/._Christopher_Manning_wordcloud.png -------------------------------------------------------------------------------- /asset/._Median Price of Established House.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leasunhy/sysu-dm-summer/HEAD/asset/._Median Price of Established House.txt -------------------------------------------------------------------------------- /asset/._Median Price of Established House Transfers.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leasunhy/sysu-dm-summer/HEAD/asset/._Median Price of Established House Transfers.txt -------------------------------------------------------------------------------- /asset/Median Price of Established House.txt: -------------------------------------------------------------------------------- 1 | City Q1 Q2 Q3 Q4 2 | Sydney 700.0 750.0 750.0 835.0 3 | Rest of NSW 370.0 375.0 373.0 385.0 4 | Melbourne 525.0 555.5 540.0 580.0 5 | Rest of Vic. 300.0 295.0 287.0 302.0 6 | Brisbane 460.0 474.9 471.0 485.0 7 | Rest of Qld. 400.0 405.0 400.0 411.0 8 | Adelaide 415.0 410.0 418.8 427.5 9 | Rest of SA 265.0 261.0 255.0 275.0 10 | Perth 550.0 540.0 550.0 555.0 11 | Rest of WA 387.5 377.0 380.0 385.5 12 | Hobart 367.0 345.0 350.0 365.0 13 | Rest of Tas. 250.0 240.0 265.0 260.0 14 | Darwin 590.0 568.5 565.0 582.5 15 | Rest of NT 415.0 425.0 422.5 420.0 16 | Canberra 555.0 550.0 560.0 570.0 -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Materials for SYSU Data Mining Summer Course 2 | ============================================ 3 | 4 | This repo is for holding the materials for the DM summer course in SYSU. 5 | 6 | Please navigate into the subfolders to see specific instructions for each of the materials. 7 | 8 | 9 | Contents 10 | -------- 11 | 12 | Lessons: 13 | * L0: [Self Evaluation](0.self-evaluation) 14 | * L1: [NumPy and Pandas](1.tools) 15 | * L2: [LSH](2.lsh) 16 | * L3: [Data Preprocessing](3.preprocessing) 17 | * L4: [Optimal Histogram](4.optimal-histogram) 18 | * L5: [Decision Trees](5.decision-tree) 19 | * L6: [GaussianNB, KNN and Cross-validation](6.gaussiannb-knn-cv) 20 | * L7: [Naive Bayesian Classifiers](7.naive-bayesian-classifiers) 21 | * L8: [Clustering](8.clustering) 22 | * L9: [Apriori Algorithm](9.apriori) 23 | 24 | -------------------------------------------------------------------------------- /0.self-evaluation/README.md: -------------------------------------------------------------------------------- 1 | Assignment 0. Self Evaluation 2 | ============================= 3 | 4 | Time being limited and contents intensive, the course is somewhat demanding, especially in your familarity with Python and your programming skills. 5 | 6 | In light of this, we created this notebook to help you do self-evaluation. 7 | Please download the notebook and try to finish the challenges in it. 8 | 9 | If you can finish the challenges without too much efforts, you are good to go. 10 | If you can't, however, you should reconsider whether you should enroll in this course then. 11 | We won't insist you quit in such cases, but... Well, you have been warned :smile: 12 | 13 | To start, first click on [this](L0.python3-and-jupyter.ipynb) to preview the notebook on Github and follow the instructions to set up your environment. 14 | After that, download the notebook to your local machine and try to work on it. 15 | 16 | If you have any questions, please consult the TAs. 17 | -------------------------------------------------------------------------------- /asset/Median Price of Established House Transfers.txt: -------------------------------------------------------------------------------- 1 | City Quarter Price 2 | Sydney Q1 700.0 3 | Rest of NSW Q1 370.0 4 | Melbourne Q1 525.0 5 | Rest of Vic. Q1 6 | Brisbane Q1 460.0 7 | Rest of Qld. Q1 400.0 8 | Adelaide Q1 415.0 9 | Rest of SA Q1 265.0 10 | Perth Q1 550.0 11 | Rest of WA Q1 387.5 12 | Hobart Q1 367.0 13 | Rest of Tas. Q1 250.0 14 | Darwin Q1 590.0 15 | Rest of NT Q1 16 | Canberra Q1 555.0 17 | Sydney Q2 750.0 18 | Rest of NSW Q2 375.0 19 | Melbourne Q2 555.5 20 | Rest of Vic. Q2 295.0 21 | Brisbane Q2 474.9 22 | Rest of Qld. Q2 405.0 23 | Adelaide Q2 24 | Rest of SA Q2 261.0 25 | Perth Q2 540.0 26 | Rest of WA Q2 377.0 27 | Hobart Q2 345.0 28 | Rest of Tas. Q2 240.0 29 | Darwin Q2 30 | Rest of NT Q2 425.0 31 | Canberra Q2 550.0 32 | Sydney Q3 750.0 33 | Rest of NSW Q3 373.0 34 | Melbourne Q3 540.0 35 | Rest of Vic. Q3 287.0 36 | Brisbane Q3 471.0 37 | Rest of Qld. Q3 400.0 38 | Adelaide Q3 418.8 39 | Rest of SA Q3 255.0 40 | Perth Q3 550.0 41 | Rest of WA Q3 380.0 42 | Hobart Q3 43 | Rest of Tas. Q3 265.0 44 | Darwin Q3 565.0 45 | Rest of NT Q3 422.5 46 | Canberra Q3 560.0 47 | Sydney Q4 835.0 48 | Rest of NSW Q4 385.0 49 | Melbourne Q4 580.0 50 | Rest of Vic. Q4 302.0 51 | Brisbane Q4 52 | Rest of Qld. Q4 411.0 53 | Adelaide Q4 427.5 54 | Rest of SA Q4 275.0 55 | Perth Q4 555.0 56 | Rest of WA Q4 385.5 57 | Hobart Q4 365.0 58 | Rest of Tas. Q4 260.0 59 | Darwin Q4 582.5 60 | Rest of NT Q4 420.0 61 | Canberra Q4 570.0 -------------------------------------------------------------------------------- /asset/lecture_data.txt: -------------------------------------------------------------------------------- 1 | location time item dollars_sold 2 | Vancouver Q1 home entertainment 605 3 | Vancouver Q2 home entertainment 680 4 | Vancouver Q3 home entertainment 812 5 | Vancouver Q4 home entertainment 927 6 | New York Q1 home entertainment 1087 7 | New York Q2 home entertainment 1130 8 | New York Q3 home entertainment 1034 9 | New York Q4 home entertainment 1142 10 | Toronto Q1 home entertainment 818 11 | Toronto Q2 home entertainment 894 12 | Toronto Q3 home entertainment 940 13 | Toronto Q4 home entertainment 978 14 | Vancouver Q1 computer 825 15 | Vancouver Q2 computer 952 16 | Vancouver Q3 computer 1023 17 | Vancouver Q4 computer 1038 18 | New York Q1 computer 968 19 | New York Q2 computer 1024 20 | New York Q3 computer 1048 21 | New York Q4 computer 1091 22 | Toronto Q1 computer 746 23 | Toronto Q2 computer 769 24 | Toronto Q3 computer 795 25 | Toronto Q4 computer 864 26 | Vancouver Q1 phone 14 27 | Vancouver Q2 phone 31 28 | Vancouver Q3 phone 30 29 | Vancouver Q4 phone 38 30 | New York Q1 phone 38 31 | New York Q2 phone 41 32 | New York Q3 phone 45 33 | New York Q4 phone 54 34 | Toronto Q1 phone 43 35 | Toronto Q2 phone 52 36 | Toronto Q3 phone 58 37 | Toronto Q4 phone 59 38 | Vancouver Q1 security 400 39 | Vancouver Q2 security 512 40 | Vancouver Q3 security 501 41 | Vancouver Q4 security 580 42 | New York Q1 security 872 43 | New York Q2 security 925 44 | New York Q3 security 1002 45 | New York Q4 security 984 46 | Toronto Q1 security 591 47 | Toronto Q2 security 682 48 | Toronto Q3 security 728 49 | Toronto Q4 security 784 -------------------------------------------------------------------------------- /asset/sydney_housing_market.txt: -------------------------------------------------------------------------------- 1 | suburb council type distance_to_CBD value sold 2 | Turrella Rockdale house 9.9 $839,676 17 3 | Sydenham Marrickville house 7.4 $845,771 21 4 | St Peters Marrickville house 6.9 $920,169 50 5 | Tempe Marrickville house 8.3 $921,390 59 6 | Waterloo Sydney house 4.7 $937,316 11 7 | Marrickville Marrickville house 7.5 $994,026 191 8 | Erskineville Sydney house 5.2 $1,005,107 84 9 | Mortlake Canada Bay house 9.2 $1,036,064 15 10 | Newtown Sydney house 5.2 $1,044,330 166 11 | Chippendale Sydney house 3.2 $1,060,415 31 12 | Darlington Sydney house 3.8 $1,062,426 32 13 | Hurlstone Park Canterbury house 9.1 $1,066,214 36 14 | Stanmore Marrickville house 5.3 $1,070,242 108 15 | Leichhardt Leichhardt house 5.2 $1,076,623 225 16 | Enmore Marrickville house 5.7 $1,079,403 58 17 | Summer Hill Ashfield house 7.3 $1,080,042 49 18 | Eastlakes Botany Bay house 8.2 $1,090,744 29 19 | Zetland Sydney house 5.6 $1,117,141 10 20 | Petersham Marrickville house 6.1 $1,122,372 82 21 | Ashfield Ashfield house 7.9 $1,123,787 110 22 | Eastlakes Botany Bay unit 8.2 $484,487 56 23 | Marrickville Marrickville unit 7.5 $552,713 225 24 | Newtown Sydney unit 5.2 $559,175 104 25 | Potts Point Sydney unit 2.3 $565,141 217 26 | Lewisham Marrickville unit 6.7 $569,889 20 27 | Gladesville Ryde unit 7.9 $572,923 139 28 | Haberfield Ashfield unit 6.5 $573,347 13 29 | Rushcutters Bay Sydney unit 2.9 $574,618 78 30 | Ashfield Ashfield unit 7.9 $575,096 228 31 | Centennial Park Randwick unit 5.2 $577,814 53 32 | Hurlstone Park Canterbury unit 9.1 $593,081 15 33 | Stanmore Marrickville unit 5.3 $594,654 52 34 | Croydon Burwood unit 8.4 $605,927 43 35 | Enmore Marrickville unit 5.7 $607,195 15 36 | Petersham Marrickville unit 6.1 $610,890 69 37 | Lane Cove North Lane Cove unit 7 $611,207 220 38 | Elizabeth Bay Sydney unit 2.7 $615,967 182 39 | Alexandria Sydney unit 5.7 $617,024 211 40 | Dulwich Hill Marrickville unit 8 $622,141 143 41 | Summer Hill Ashfield unit 7.3 $631,492 88 -------------------------------------------------------------------------------- /9.apriori/L9 - Apriori Algorithm.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Introduction\n", 8 | "\n", 9 | "We use \n", 10 | "* a single letter string to represent an **item**\n", 11 | "* a `frozenset` to represent an **itemset**\n", 12 | "* class `Transactions` to present the **transaction database** (a collection of **itemset**s)" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "## The `Transactions` class" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 1, 25 | "metadata": { 26 | "collapsed": true 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "# helper function\n", 31 | "# we choose to use `frozenset` so that it is hashable (e.g., harder to remove duplicates); `set` is not\n", 32 | "def itemset(*list_of_items): # `splat` allow us to send in multiple arguments\n", 33 | " return frozenset(sorted(list_of_items)) # there is no need to sort, but sorted set is better for debugging\n", 34 | "\n", 35 | "# print(set) does not display set items in sorted order, hence we need to do it ourselves\n", 36 | "# we overload the function depending on the type of the argument\n", 37 | "def to_str(obj):\n", 38 | " if isinstance(obj, frozenset):\n", 39 | " return '{{{}}}'.format(', '.join(sorted(obj)))\n", 40 | " elif isinstance(obj, list):\n", 41 | " strs = [to_str(fs) for fs in obj]\n", 42 | " return '[ {} ]'.format(', '.join(strs))" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 2, 48 | "metadata": { 49 | "collapsed": false, 50 | "scrolled": true 51 | }, 52 | "outputs": [ 53 | { 54 | "data": { 55 | "text/plain": [ 56 | "frozenset({'A', 'B', 'D', 'X'})" 57 | ] 58 | }, 59 | "execution_count": 2, 60 | "metadata": {}, 61 | "output_type": "execute_result" 62 | } 63 | ], 64 | "source": [ 65 | "t = itemset('B', 'A', 'D', 'X')\n", 66 | "t" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 3, 72 | "metadata": { 73 | "collapsed": false, 74 | "scrolled": true 75 | }, 76 | "outputs": [ 77 | { 78 | "name": "stdout", 79 | "output_type": "stream", 80 | "text": [ 81 | "frozenset({'B', 'A', 'D', 'X'})\n", 82 | "{A, B, D, X}\n" 83 | ] 84 | } 85 | ], 86 | "source": [ 87 | "print(t)\n", 88 | "print(to_str(t))" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 4, 94 | "metadata": { 95 | "collapsed": false 96 | }, 97 | "outputs": [ 98 | { 99 | "data": { 100 | "text/plain": [ 101 | "'[ {A, B, D, X}, {A, E} ]'" 102 | ] 103 | }, 104 | "execution_count": 4, 105 | "metadata": {}, 106 | "output_type": "execute_result" 107 | } 108 | ], 109 | "source": [ 110 | "t1 = itemset('A', 'E')\n", 111 | "list_of_itemsets = [t, t1]\n", 112 | "to_str(list_of_itemsets)" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 5, 118 | "metadata": { 119 | "collapsed": false, 120 | "scrolled": true 121 | }, 122 | "outputs": [ 123 | { 124 | "data": { 125 | "text/plain": [ 126 | "True" 127 | ] 128 | }, 129 | "execution_count": 5, 130 | "metadata": {}, 131 | "output_type": "execute_result" 132 | } 133 | ], 134 | "source": [ 135 | "itemset('A', 'B') == itemset('B', 'A')" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 6, 141 | "metadata": { 142 | "collapsed": false 143 | }, 144 | "outputs": [ 145 | { 146 | "name": "stdout", 147 | "output_type": "stream", 148 | "text": [ 149 | "{A, C, D}\n", 150 | "{B, C}\n", 151 | "{A, B, E}\n", 152 | "{B, E}\n", 153 | "{A, B, C, D, E}\n", 154 | "{D, E}\n", 155 | "supp({A, B}) = 2\n", 156 | "unique items are: ['A', 'B', 'C', 'D', 'E']\n", 157 | "minsup = 1, frequent items = [ {A}, {B}, {C}, {D}, {E} ]\n", 158 | "minsup = 2, frequent items = [ {A}, {B}, {C}, {D}, {E} ]\n", 159 | "minsup = 3, frequent items = [ {A}, {B}, {C}, {D}, {E} ]\n", 160 | "minsup = 4, frequent items = [ {B}, {E} ]\n", 161 | "minsup = 5, frequent items = [ ]\n", 162 | "minsup = 6, frequent items = [ ]\n" 163 | ] 164 | } 165 | ], 166 | "source": [ 167 | "import numpy as np\n", 168 | "\n", 169 | "class transactions:\n", 170 | " def __init__(self, filename):\n", 171 | " trans_sets = []\n", 172 | " with open(filename, 'r') as f:\n", 173 | " for line in f:\n", 174 | " fields = line.split() # format: a transaction per line, consisting of items \n", 175 | " trans_sets.append(itemset(*fields))\n", 176 | " self.trans_sets = trans_sets\n", 177 | " \n", 178 | " def get_tdb(self):\n", 179 | " return self.trans_sets\n", 180 | "\n", 181 | " def support(self, itemset): \n", 182 | " # naive counting. `itemset` must be a set\n", 183 | " match_vector = [1 if itemset.issubset(trans) else 0 for trans in self.trans_sets]\n", 184 | " return np.sum(match_vector)\n", 185 | " \n", 186 | " def get_items(self):\n", 187 | " items = set()\n", 188 | " for itemset in self.trans_sets:\n", 189 | " items = items.union(itemset)\n", 190 | " return sorted(items)\n", 191 | " \n", 192 | " def get_frequent_items(self, minsup):\n", 193 | " return [itemset(item) for item in self.get_items() if self.support(itemset(item)) >= minsup]\n", 194 | " \n", 195 | " def len(self):\n", 196 | " return len(self.trans_sets)\n", 197 | " \n", 198 | " def dump(self):\n", 199 | " for trans in self.trans_sets:\n", 200 | " print('{}'.format(to_str(trans)))\n", 201 | " \n", 202 | " \n", 203 | "db = transactions('asset/trans.txt')\n", 204 | "db.dump()\n", 205 | "\n", 206 | "my_itemset = itemset('B', 'A')\n", 207 | "print('supp({}) = {}'.format(to_str(my_itemset), db.support(my_itemset)))\n", 208 | " \n", 209 | "print('unique items are: {}'.format(db.get_items()))\n", 210 | "for i in range(1, db.len()+1):\n", 211 | " print('minsup = {}, frequent items = {}'.format(i, to_str(db.get_frequent_items(i))))" 212 | ] 213 | }, 214 | { 215 | "cell_type": "markdown", 216 | "metadata": {}, 217 | "source": [ 218 | "## The Apriori Algorithm" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": 7, 224 | "metadata": { 225 | "collapsed": true 226 | }, 227 | "outputs": [], 228 | "source": [ 229 | "def merge_itemsets(itemset1, itemset2):\n", 230 | " merged = set(itemset1)\n", 231 | " merged = merged.union(itemset2)\n", 232 | " return frozenset(merged)" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": 8, 238 | "metadata": { 239 | "collapsed": false 240 | }, 241 | "outputs": [ 242 | { 243 | "name": "stdout", 244 | "output_type": "stream", 245 | "text": [ 246 | "{A, B, C}\n", 247 | "{A, B, C, D}\n" 248 | ] 249 | } 250 | ], 251 | "source": [ 252 | "itmset1 = itemset('A', 'B')\n", 253 | "itmset2 = itemset('A', 'C')\n", 254 | "itmset3 = itemset('A', 'D')\n", 255 | "itmset4 = itemset('B', 'C')\n", 256 | "\n", 257 | "print(to_str(merge_itemsets(itmset1, itmset2)))\n", 258 | "print(to_str(merge_itemsets(itmset3, itmset4)))" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": 9, 264 | "metadata": { 265 | "collapsed": true 266 | }, 267 | "outputs": [], 268 | "source": [ 269 | "# debug == 0: print nothing\n", 270 | "# debug == 1: print input and output itemsets\n", 271 | "# debug == 2: print all details\n", 272 | "def gen_candidate_itemsets(freq_itemsets, debug = 1):\n", 273 | " if len(freq_itemsets) == 0: return []\n", 274 | " \n", 275 | " # freq_itemsets is a set of itemset (which is nothing but Set) \n", 276 | " current_len = len(freq_itemsets[0])\n", 277 | " if debug >= 1: print('.. Input (L_k) :\\t{}'.format(to_str(freq_itemsets)))\n", 278 | " \n", 279 | " # using set() to remove duplicates\n", 280 | " merged_itemsets = set([merge_itemsets(set1, set2) \n", 281 | " for set1 in freq_itemsets for set2 in freq_itemsets \n", 282 | " if str(set1) < str(set2)])\n", 283 | " if debug >= 2: print('.. .. Merged: {}'.format(to_str(merged_itemsets)))\n", 284 | "\n", 285 | " candidates = []\n", 286 | " for x in merged_itemsets:\n", 287 | " if debug >=2: print('.. .. Checking {}'.format(x))\n", 288 | " if len(x) == current_len + 1:\n", 289 | " pruned = False\n", 290 | " for elem in x:\n", 291 | " sub_itemset = set(x) # need to make a copy\n", 292 | " sub_itemset.remove(elem) # remove one item\n", 293 | " if sub_itemset not in freq_itemsets:\n", 294 | " pruned = True\n", 295 | " break\n", 296 | " if not pruned:\n", 297 | " candidates.append(x)\n", 298 | " if debug >= 1: print('.. Output (C_(k+1):\\t{}'.format(to_str(candidates)))\n", 299 | " \n", 300 | " return candidates" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": 10, 306 | "metadata": { 307 | "collapsed": false 308 | }, 309 | "outputs": [ 310 | { 311 | "name": "stdout", 312 | "output_type": "stream", 313 | "text": [ 314 | "[ {A, B}, {A, C}, {A, D}, {B, C} ]\n", 315 | ".. Input (L_k) :\t[ {A, B}, {A, C}, {A, D}, {B, C} ]\n", 316 | ".. Output (C_(k+1):\t[ {A, B, C} ]\n" 317 | ] 318 | }, 319 | { 320 | "data": { 321 | "text/plain": [ 322 | "[frozenset({'A', 'B', 'C'})]" 323 | ] 324 | }, 325 | "execution_count": 10, 326 | "metadata": {}, 327 | "output_type": "execute_result" 328 | } 329 | ], 330 | "source": [ 331 | "my_itemsets = [itmset1, itmset2, itmset3, itmset4]\n", 332 | "print(to_str(my_itemsets))\n", 333 | "gen_candidate_itemsets(my_itemsets, debug = 1)" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": 11, 339 | "metadata": { 340 | "collapsed": false 341 | }, 342 | "outputs": [], 343 | "source": [ 344 | "def output(freq_itemsets):\n", 345 | " for x in freq_itemsets:\n", 346 | " print('{} => {}'.format(to_str(x), db.support(x)))\n", 347 | "\n", 348 | "def apriori(db, minsup):\n", 349 | " last_frequent_itemsets = db.get_frequent_items(minsup)\n", 350 | " output(last_frequent_itemsets)\n", 351 | " count = 1\n", 352 | " \n", 353 | " # generate candidate itemsets \n", 354 | " candidate_itemsets = gen_candidate_itemsets(last_frequent_itemsets)\n", 355 | " while len(candidate_itemsets) > 0:\n", 356 | " count += 1\n", 357 | " print('\\n** Iteration {}'.format(count))\n", 358 | " last_frequent_itemsets = [itemsets for itemsets in candidate_itemsets if db.support(itemsets) >= minsup]\n", 359 | " output(last_frequent_itemsets)\n", 360 | " candidate_itemsets = gen_candidate_itemsets(last_frequent_itemsets)\n" 361 | ] 362 | }, 363 | { 364 | "cell_type": "code", 365 | "execution_count": 12, 366 | "metadata": { 367 | "collapsed": false, 368 | "scrolled": true 369 | }, 370 | "outputs": [ 371 | { 372 | "name": "stdout", 373 | "output_type": "stream", 374 | "text": [ 375 | "{A} => 3\n", 376 | "{B} => 4\n", 377 | "{C} => 3\n", 378 | "{D} => 3\n", 379 | "{E} => 4\n", 380 | ".. Input (L_k) :\t[ {A}, {B}, {C}, {D}, {E} ]\n", 381 | ".. Output (C_(k+1):\t[ {B, C}, {B, D}, {A, C}, {A, D}, {C, D}, {C, E}, {B, E}, {A, E}, {D, E}, {A, B} ]\n", 382 | "\n", 383 | "** Iteration 2\n", 384 | "{B, C} => 2\n", 385 | "{A, C} => 2\n", 386 | "{A, D} => 2\n", 387 | "{C, D} => 2\n", 388 | "{B, E} => 3\n", 389 | "{A, E} => 2\n", 390 | "{D, E} => 2\n", 391 | "{A, B} => 2\n", 392 | ".. Input (L_k) :\t[ {B, C}, {A, C}, {A, D}, {C, D}, {B, E}, {A, E}, {D, E}, {A, B} ]\n", 393 | ".. Output (C_(k+1):\t[ {A, D, E}, {A, B, C}, {A, C, D}, {A, B, E} ]\n", 394 | "\n", 395 | "** Iteration 3\n", 396 | "{A, C, D} => 2\n", 397 | "{A, B, E} => 2\n", 398 | ".. Input (L_k) :\t[ {A, C, D}, {A, B, E} ]\n", 399 | ".. Output (C_(k+1):\t[ ]\n" 400 | ] 401 | } 402 | ], 403 | "source": [ 404 | "# change the default `debug` value to 1 (or 2) to see more info\n", 405 | "apriori(db, 2)" 406 | ] 407 | }, 408 | { 409 | "cell_type": "markdown", 410 | "metadata": {}, 411 | "source": [ 412 | "If you turn on the debugging info, you can see that only `ABE` and `ACD` are identified as $C_3$. E.g., both `AB` and `AD` are in $L_2$, but since `BD` is not in $L_2$, `ABD` is not added to $C_3$. " 413 | ] 414 | }, 415 | { 416 | "cell_type": "code", 417 | "execution_count": 13, 418 | "metadata": { 419 | "collapsed": false 420 | }, 421 | "outputs": [ 422 | { 423 | "name": "stdout", 424 | "output_type": "stream", 425 | "text": [ 426 | "{A} => 3\n", 427 | "{B} => 4\n", 428 | "{C} => 3\n", 429 | "{D} => 3\n", 430 | "{E} => 4\n", 431 | ".. Input (L_k) :\t[ {A}, {B}, {C}, {D}, {E} ]\n", 432 | ".. Output (C_(k+1):\t[ {B, C}, {B, D}, {A, C}, {A, D}, {C, D}, {C, E}, {B, E}, {A, E}, {D, E}, {A, B} ]\n", 433 | "\n", 434 | "** Iteration 2\n", 435 | "{B, E} => 3\n", 436 | ".. Input (L_k) :\t[ {B, E} ]\n", 437 | ".. Output (C_(k+1):\t[ ]\n" 438 | ] 439 | } 440 | ], 441 | "source": [ 442 | "apriori(db, 3)" 443 | ] 444 | }, 445 | { 446 | "cell_type": "code", 447 | "execution_count": 14, 448 | "metadata": { 449 | "collapsed": false 450 | }, 451 | "outputs": [ 452 | { 453 | "name": "stdout", 454 | "output_type": "stream", 455 | "text": [ 456 | "{B} => 4\n", 457 | "{E} => 4\n", 458 | ".. Input (L_k) :\t[ {B}, {E} ]\n", 459 | ".. Output (C_(k+1):\t[ {B, E} ]\n", 460 | "\n", 461 | "** Iteration 2\n" 462 | ] 463 | } 464 | ], 465 | "source": [ 466 | "apriori(db, 4)" 467 | ] 468 | }, 469 | { 470 | "cell_type": "code", 471 | "execution_count": 15, 472 | "metadata": { 473 | "collapsed": true 474 | }, 475 | "outputs": [], 476 | "source": [ 477 | "apriori(db, 5)" 478 | ] 479 | }, 480 | { 481 | "cell_type": "code", 482 | "execution_count": null, 483 | "metadata": { 484 | "collapsed": true 485 | }, 486 | "outputs": [], 487 | "source": [] 488 | } 489 | ], 490 | "metadata": { 491 | "kernelspec": { 492 | "display_name": "Python 3", 493 | "language": "python", 494 | "name": "python3" 495 | }, 496 | "language_info": { 497 | "codemirror_mode": { 498 | "name": "ipython", 499 | "version": 3 500 | }, 501 | "file_extension": ".py", 502 | "mimetype": "text/x-python", 503 | "name": "python", 504 | "nbconvert_exporter": "python", 505 | "pygments_lexer": "ipython3", 506 | "version": "3.5.2" 507 | } 508 | }, 509 | "nbformat": 4, 510 | "nbformat_minor": 1 511 | } 512 | -------------------------------------------------------------------------------- /4.optimal-histogram/L4 - Optimal Histogram.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Introduction\n", 8 | "\n", 9 | "In this notebook, we experiment with the optimal histogram algorithm. We will implement a simple version based on recursion and you will do the hard job of implementing a dynamic programming-based version. \n", 10 | "\n", 11 | "\n", 12 | "References: \n", 13 | "* H. V. Jagadish, Nick Koudas, S. Muthukrishnan, Viswanath Poosala, Kenneth C. Sevcik, Torsten Suel: Optimal Histograms with Quality Guarantees. VLDB 1998: 275-286. (url: http://engineering.nyu.edu/~suel/papers/vopt.pdf)\n", 14 | "* Dynamic Programming (wikipedia): https://en.wikipedia.org/wiki/Dynamic_programming\n" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": { 21 | "collapsed": true 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "LARGE_NUM = 1000000000.0\n", 26 | "EMPTY = -1\n", 27 | "\n", 28 | "DEBUG = 2\n", 29 | "#DEBUG = 1\n", 30 | "\n", 31 | "import numpy as np\n", 32 | "\n", 33 | "def sse(arr):\n", 34 | " if len(arr) == 0: # deal with arr == []\n", 35 | " return 0.0\n", 36 | "\n", 37 | " avg = np.average(arr)\n", 38 | " val = sum( [(x-avg)*(x-avg) for x in arr] )\n", 39 | " return val\n", 40 | "\n", 41 | "def calc_depth(b):\n", 42 | " return 5 - b\n", 43 | "\n", 44 | "def v_opt_rec(xx, b):\n", 45 | " mincost = LARGE_NUM\n", 46 | " n = len(xx)\n", 47 | "\n", 48 | " # check boundary condition:\n", 49 | " if n < b:\n", 50 | " return LARGE_NUM + 1\n", 51 | " elif b == 1:\n", 52 | " return sse(xx)\n", 53 | " else: # the general case\n", 54 | " if DEBUG > 1:\n", 55 | " #print('.. BEGIN: input = {!s:<30}, b = {}'.format(xx, b))\n", 56 | " print('..{}BEGIN: input = {!s:<30}, b = {}'.format(' '*calc_depth(b), xx, b))\n", 57 | "\n", 58 | " \n", 59 | " for t in range(n):\n", 60 | " prefix = xx[0 : t+1]\n", 61 | " suffix = xx[t+1 : ]\n", 62 | " cost = sse(prefix) + v_opt_rec(suffix, b - 1)\n", 63 | " mincost = min(mincost, cost)\n", 64 | "\n", 65 | " if DEBUG > 0:\n", 66 | " #print('.. END: input = {!s:<32}, b = {}, mincost = {}'.format(xx, b, mincost))\n", 67 | " print('..{}END: input = {!s:<32}, b = {}, mincost = {}'.format(' '*calc_depth(b), xx, b, mincost))\n", 68 | "\n", 69 | " return mincost\n", 70 | "\n" 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "Now, try to understand how the algorithm works -- feel free to modify the code to output more if you need. Specifically, \n", 78 | "\n", 79 | "1. Observe and understand how the recursion works (set `DEBUG = 2`)\n", 80 | "2. Observe and understand how many sub-problems are being solved again and again (set `DEBUG = 1`), especially when the input array is longer. " 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 2, 86 | "metadata": {}, 87 | "outputs": [ 88 | { 89 | "name": "stdout", 90 | "output_type": "stream", 91 | "text": [ 92 | ".. BEGIN: input = [7, 9, 13, 5] , b = 3\n", 93 | ".. BEGIN: input = [9, 13, 5] , b = 2\n", 94 | ".. END: input = [9, 13, 5] , b = 2, mincost = 8.0\n", 95 | ".. BEGIN: input = [13, 5] , b = 2\n", 96 | ".. END: input = [13, 5] , b = 2, mincost = 0.0\n", 97 | ".. END: input = [7, 9, 13, 5] , b = 3, mincost = 2.0\n", 98 | "optimal cost = 2.0\n" 99 | ] 100 | } 101 | ], 102 | "source": [ 103 | "x = [7, 9, 13, 5]\n", 104 | "b = 3\n", 105 | "\n", 106 | "c = v_opt_rec(x, b)\n", 107 | "print('optimal cost = {}'.format(c))" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 3, 113 | "metadata": {}, 114 | "outputs": [ 115 | { 116 | "name": "stdout", 117 | "output_type": "stream", 118 | "text": [ 119 | ".. BEGIN: input = [1, 3, 9, 13, 17] , b = 4\n", 120 | ".. BEGIN: input = [3, 9, 13, 17] , b = 3\n", 121 | ".. BEGIN: input = [9, 13, 17] , b = 2\n", 122 | ".. END: input = [9, 13, 17] , b = 2, mincost = 8.0\n", 123 | ".. BEGIN: input = [13, 17] , b = 2\n", 124 | ".. END: input = [13, 17] , b = 2, mincost = 0.0\n", 125 | ".. END: input = [3, 9, 13, 17] , b = 3, mincost = 8.0\n", 126 | ".. BEGIN: input = [9, 13, 17] , b = 3\n", 127 | ".. BEGIN: input = [13, 17] , b = 2\n", 128 | ".. END: input = [13, 17] , b = 2, mincost = 0.0\n", 129 | ".. END: input = [9, 13, 17] , b = 3, mincost = 0.0\n", 130 | ".. END: input = [1, 3, 9, 13, 17] , b = 4, mincost = 2.0\n", 131 | "c = 2.0\n" 132 | ] 133 | } 134 | ], 135 | "source": [ 136 | "x = [1, 3, 9, 13, 17]\n", 137 | "b = 4\n", 138 | "\n", 139 | "c = v_opt_rec(x, b)\n", 140 | "print('c = {}'.format(c))" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 7, 146 | "metadata": {}, 147 | "outputs": [ 148 | { 149 | "name": "stdout", 150 | "output_type": "stream", 151 | "text": [ 152 | ".. BEGIN: input = [3, 1, 18, 9, 13, 34, 17] , b = 4\n", 153 | ".. BEGIN: input = [1, 18, 9, 13, 34, 17] , b = 3\n", 154 | ".. BEGIN: input = [18, 9, 13, 34, 17] , b = 2\n", 155 | ".. END: input = [18, 9, 13, 34, 17] , b = 2, mincost = 185.16666666666669\n", 156 | ".. BEGIN: input = [9, 13, 34, 17] , b = 2\n", 157 | ".. END: input = [9, 13, 34, 17] , b = 2, mincost = 152.5\n", 158 | ".. BEGIN: input = [13, 34, 17] , b = 2\n", 159 | ".. END: input = [13, 34, 17] , b = 2, mincost = 144.5\n", 160 | ".. BEGIN: input = [34, 17] , b = 2\n", 161 | ".. END: input = [34, 17] , b = 2, mincost = 0.0\n", 162 | ".. END: input = [1, 18, 9, 13, 34, 17] , b = 3, mincost = 154.75\n", 163 | ".. BEGIN: input = [18, 9, 13, 34, 17] , b = 3\n", 164 | ".. BEGIN: input = [9, 13, 34, 17] , b = 2\n", 165 | ".. END: input = [9, 13, 34, 17] , b = 2, mincost = 152.5\n", 166 | ".. BEGIN: input = [13, 34, 17] , b = 2\n", 167 | ".. END: input = [13, 34, 17] , b = 2, mincost = 144.5\n", 168 | ".. BEGIN: input = [34, 17] , b = 2\n", 169 | ".. END: input = [34, 17] , b = 2, mincost = 0.0\n", 170 | ".. END: input = [18, 9, 13, 34, 17] , b = 3, mincost = 40.66666666666667\n", 171 | ".. BEGIN: input = [9, 13, 34, 17] , b = 3\n", 172 | ".. BEGIN: input = [13, 34, 17] , b = 2\n", 173 | ".. END: input = [13, 34, 17] , b = 2, mincost = 144.5\n", 174 | ".. BEGIN: input = [34, 17] , b = 2\n", 175 | ".. END: input = [34, 17] , b = 2, mincost = 0.0\n", 176 | ".. END: input = [9, 13, 34, 17] , b = 3, mincost = 8.0\n", 177 | ".. BEGIN: input = [13, 34, 17] , b = 3\n", 178 | ".. BEGIN: input = [34, 17] , b = 2\n", 179 | ".. END: input = [34, 17] , b = 2, mincost = 0.0\n", 180 | ".. END: input = [13, 34, 17] , b = 3, mincost = 0.0\n", 181 | ".. END: input = [3, 1, 18, 9, 13, 34, 17] , b = 4, mincost = 42.66666666666667\n", 182 | "c = 42.66666666666667\n" 183 | ] 184 | } 185 | ], 186 | "source": [ 187 | "x = [3, 1, 18, 9, 13, 34, 17]\n", 188 | "b = 4\n", 189 | "\n", 190 | "c = v_opt_rec(x, b)\n", 191 | "print('c = {}'.format(c))" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": 5, 197 | "metadata": {}, 198 | "outputs": [ 199 | { 200 | "name": "stdout", 201 | "output_type": "stream", 202 | "text": [ 203 | ".. BEGIN: input = [1, 2, 3, 4, 5, 6] , b = 4\n", 204 | ".. BEGIN: input = [2, 3, 4, 5, 6] , b = 3\n", 205 | ".. BEGIN: input = [3, 4, 5, 6] , b = 2\n", 206 | ".. END: input = [3, 4, 5, 6] , b = 2, mincost = 1.0\n", 207 | ".. BEGIN: input = [4, 5, 6] , b = 2\n", 208 | ".. END: input = [4, 5, 6] , b = 2, mincost = 0.5\n", 209 | ".. BEGIN: input = [5, 6] , b = 2\n", 210 | ".. END: input = [5, 6] , b = 2, mincost = 0.0\n", 211 | ".. END: input = [2, 3, 4, 5, 6] , b = 3, mincost = 1.0\n", 212 | ".. BEGIN: input = [3, 4, 5, 6] , b = 3\n", 213 | ".. BEGIN: input = [4, 5, 6] , b = 2\n", 214 | ".. END: input = [4, 5, 6] , b = 2, mincost = 0.5\n", 215 | ".. BEGIN: input = [5, 6] , b = 2\n", 216 | ".. END: input = [5, 6] , b = 2, mincost = 0.0\n", 217 | ".. END: input = [3, 4, 5, 6] , b = 3, mincost = 0.5\n", 218 | ".. BEGIN: input = [4, 5, 6] , b = 3\n", 219 | ".. BEGIN: input = [5, 6] , b = 2\n", 220 | ".. END: input = [5, 6] , b = 2, mincost = 0.0\n", 221 | ".. END: input = [4, 5, 6] , b = 3, mincost = 0.0\n", 222 | ".. END: input = [1, 2, 3, 4, 5, 6] , b = 4, mincost = 1.0\n", 223 | "c = 1.0\n" 224 | ] 225 | } 226 | ], 227 | "source": [ 228 | "x = [1, 2, 3, 4, 5, 6]\n", 229 | "b = 4\n", 230 | "\n", 231 | "c = v_opt_rec(x, b)\n", 232 | "print('c = {}'.format(c))" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": 6, 238 | "metadata": {}, 239 | "outputs": [ 240 | { 241 | "name": "stdout", 242 | "output_type": "stream", 243 | "text": [ 244 | ".. BEGIN: input = [1, 7, 8, 15, 21, 30, 78] , b = 4\n", 245 | ".. BEGIN: input = [7, 8, 15, 21, 30, 78] , b = 3\n", 246 | ".. BEGIN: input = [8, 15, 21, 30, 78] , b = 2\n", 247 | ".. END: input = [8, 15, 21, 30, 78] , b = 2, mincost = 261.0\n", 248 | ".. BEGIN: input = [15, 21, 30, 78] , b = 2\n", 249 | ".. END: input = [15, 21, 30, 78] , b = 2, mincost = 114.0\n", 250 | ".. BEGIN: input = [21, 30, 78] , b = 2\n", 251 | ".. END: input = [21, 30, 78] , b = 2, mincost = 40.5\n", 252 | ".. BEGIN: input = [30, 78] , b = 2\n", 253 | ".. END: input = [30, 78] , b = 2, mincost = 0.0\n", 254 | ".. END: input = [7, 8, 15, 21, 30, 78] , b = 3, mincost = 78.5\n", 255 | ".. BEGIN: input = [8, 15, 21, 30, 78] , b = 3\n", 256 | ".. BEGIN: input = [15, 21, 30, 78] , b = 2\n", 257 | ".. END: input = [15, 21, 30, 78] , b = 2, mincost = 114.0\n", 258 | ".. BEGIN: input = [21, 30, 78] , b = 2\n", 259 | ".. END: input = [21, 30, 78] , b = 2, mincost = 40.5\n", 260 | ".. BEGIN: input = [30, 78] , b = 2\n", 261 | ".. END: input = [30, 78] , b = 2, mincost = 0.0\n", 262 | ".. END: input = [8, 15, 21, 30, 78] , b = 3, mincost = 65.0\n", 263 | ".. BEGIN: input = [15, 21, 30, 78] , b = 3\n", 264 | ".. BEGIN: input = [21, 30, 78] , b = 2\n", 265 | ".. END: input = [21, 30, 78] , b = 2, mincost = 40.5\n", 266 | ".. BEGIN: input = [30, 78] , b = 2\n", 267 | ".. END: input = [30, 78] , b = 2, mincost = 0.0\n", 268 | ".. END: input = [15, 21, 30, 78] , b = 3, mincost = 18.0\n", 269 | ".. BEGIN: input = [21, 30, 78] , b = 3\n", 270 | ".. BEGIN: input = [30, 78] , b = 2\n", 271 | ".. END: input = [30, 78] , b = 2, mincost = 0.0\n", 272 | ".. END: input = [21, 30, 78] , b = 3, mincost = 0.0\n", 273 | ".. END: input = [1, 7, 8, 15, 21, 30, 78] , b = 4, mincost = 46.666666666666664\n", 274 | "c = 46.666666666666664\n" 275 | ] 276 | } 277 | ], 278 | "source": [ 279 | "x = [1, 7, 8, 15, 21, 30, 78]\n", 280 | "b = 4\n", 281 | "\n", 282 | "c = v_opt_rec(x, b)\n", 283 | "print('c = {}'.format(c))" 284 | ] 285 | }, 286 | { 287 | "cell_type": "markdown", 288 | "metadata": {}, 289 | "source": [ 290 | "## Exercise\n", 291 | "\n", 292 | "Now you need to implement the same algorithm using dynamic programming. The idea is to fill in a table, named `opt`, of $b \\times n$, where `opt[i][j]` records the optimal cost for building a histogram of $[x_j, x_{j+1}, \\ldots, x_n]$ using $i$ bins. \n", 293 | "\n", 294 | "The first step is to work out the general recursive formula, in the form of: \n", 295 | "$$\n", 296 | "opt[i][j] = \\min_{t} f(t)\n", 297 | "$$\n", 298 | "You need to work out what is the domain of $t$ and what exactly is $f()$, which should depend on $opt[u][v]$ that has **already** been computed. If you cannot work it out directly, you can observe the sub-problems being solved in the recursive algorithm and see if you can schedule the computation of table cells such that every cell required on the right hand side (i.e., $f(t)$) is always scheduled **before** the current cell $opt[i][j]$. " 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": null, 304 | "metadata": { 305 | "collapsed": true 306 | }, 307 | "outputs": [], 308 | "source": [] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": null, 313 | "metadata": { 314 | "collapsed": true 315 | }, 316 | "outputs": [], 317 | "source": [] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": null, 322 | "metadata": { 323 | "collapsed": true 324 | }, 325 | "outputs": [], 326 | "source": [] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": null, 331 | "metadata": { 332 | "collapsed": true 333 | }, 334 | "outputs": [], 335 | "source": [] 336 | } 337 | ], 338 | "metadata": { 339 | "kernelspec": { 340 | "display_name": "Python 3", 341 | "language": "python", 342 | "name": "python3" 343 | }, 344 | "language_info": { 345 | "codemirror_mode": { 346 | "name": "ipython", 347 | "version": 3 348 | }, 349 | "file_extension": ".py", 350 | "mimetype": "text/x-python", 351 | "name": "python", 352 | "nbconvert_exporter": "python", 353 | "pygments_lexer": "ipython3", 354 | "version": "3.6.1" 355 | } 356 | }, 357 | "nbformat": 4, 358 | "nbformat_minor": 1 359 | } 360 | -------------------------------------------------------------------------------- /1.tools/L1 - numpy-fundamentals.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Fundamentals of NumPy" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "This notebook aims to quickly walk you through the most fundamental bits of NumPy, including:\n", 15 | "1. how to create/initiate 1D arrays and 2D matrices,\n", 16 | "2. how to get/set the shape of numpy arrays,\n", 17 | "3. and how to calculate the dot product of two NumPy arrays." 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "*This notebook is created by cherry-picking from [the official documents of NumPy](https://docs.scipy.org/doc/numpy-dev/user/quickstart.html). Please refer to that page for more information.*" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 1, 30 | "metadata": { 31 | "collapsed": true 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "import numpy as np" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "## 1. `ndarray` in NumPy" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "NumPy’s main object is the homogeneous multidimensional array (`ndarray`). It is a table of elements (usually numbers), all of the same type, indexed by a tuple of positive integers." 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "A list of elements can be expressed as an `ndarray` of rank 1, i.e. a 1D array; a matrix can be expressed as an `ndarray` of rank 2." 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "Here lists some important attributes of `ndarray`:\n", 64 | "\n", 65 | "* **`ndarray.ndim`**: the rank of the `ndarray`. For instance, a matrix has a rank of 2.\n", 66 | "* **`ndarray.shape`**: the dimensions of the `ndarray` as a tuple of integers. For an array having 10 elements, the shape is `(10,)` (**note the trailing comma**, not the same as `(10, 1)`); for a matrix having 20 rows and 30 columns, the shape is `(20, 30)`.\n", 67 | "* **`ndarray.size`**: the total number of elements in the `ndarray`, which is equal to the product of the dimensions.\n", 68 | "* **`ndarray.dtype`**: an object describing the type of the elements in the array." 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 2, 74 | "metadata": { 75 | "scrolled": false 76 | }, 77 | "outputs": [ 78 | { 79 | "name": "stdout", 80 | "output_type": "stream", 81 | "text": [ 82 | "arr\t\t [1 2 3 4 5 6 7 8 9]\n", 83 | "arr.ndim\t 1\n", 84 | "arr.shape\t (9,)\n", 85 | "arr.size\t 9\n", 86 | "arr.dtype\t int32\n" 87 | ] 88 | } 89 | ], 90 | "source": [ 91 | "arr = np.array(range(1, 10))\n", 92 | "print('arr\\t\\t', arr)\n", 93 | "print('arr.ndim\\t', arr.ndim)\n", 94 | "print('arr.shape\\t', arr.shape)\n", 95 | "print('arr.size\\t', arr.size)\n", 96 | "print('arr.dtype\\t', arr.dtype)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": { 103 | "collapsed": true 104 | }, 105 | "outputs": [], 106 | "source": [ 107 | "arr." 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "## 2. Array Creation" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "There are several ways to create `ndarray`s." 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "### 2.1 Using the `array` function" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "You can create a 1D `ndarray` from an existing list/array easily using the `array` function." 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 9, 141 | "metadata": {}, 142 | "outputs": [ 143 | { 144 | "data": { 145 | "text/plain": [ 146 | "array([1, 2, 3, 4])" 147 | ] 148 | }, 149 | "execution_count": 9, 150 | "metadata": {}, 151 | "output_type": "execute_result" 152 | } 153 | ], 154 | "source": [ 155 | "np.array([1, 2, 3, 4])" 156 | ] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "metadata": {}, 161 | "source": [ 162 | "Note that there is only one argument. So never do this:" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 10, 168 | "metadata": {}, 169 | "outputs": [ 170 | { 171 | "ename": "ValueError", 172 | "evalue": "only 2 non-keyword arguments accepted", 173 | "output_type": "error", 174 | "traceback": [ 175 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 176 | "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", 177 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m2\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m3\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m4\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", 178 | "\u001b[1;31mValueError\u001b[0m: only 2 non-keyword arguments accepted" 179 | ] 180 | } 181 | ], 182 | "source": [ 183 | "np.array(1, 2, 3, 4)" 184 | ] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "metadata": {}, 189 | "source": [ 190 | "To create a matrix, call `array` on a sequence of sequence." 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 14, 196 | "metadata": {}, 197 | "outputs": [ 198 | { 199 | "data": { 200 | "text/plain": [ 201 | "array([[1, 2],\n", 202 | " [3, 4],\n", 203 | " [5, 6]])" 204 | ] 205 | }, 206 | "execution_count": 14, 207 | "metadata": {}, 208 | "output_type": "execute_result" 209 | } 210 | ], 211 | "source": [ 212 | "x = np.array([[1, 2], [3, 4], [5, 6]])\n", 213 | "x" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": 16, 219 | "metadata": {}, 220 | "outputs": [ 221 | { 222 | "data": { 223 | "text/plain": [ 224 | "(3, 2)" 225 | ] 226 | }, 227 | "execution_count": 16, 228 | "metadata": {}, 229 | "output_type": "execute_result" 230 | } 231 | ], 232 | "source": [ 233 | "x.shape" 234 | ] 235 | }, 236 | { 237 | "cell_type": "markdown", 238 | "metadata": {}, 239 | "source": [ 240 | "### 2.2 Using `zeros`, `ones`, `empty`" 241 | ] 242 | }, 243 | { 244 | "cell_type": "markdown", 245 | "metadata": {}, 246 | "source": [ 247 | "When the contents of the array to be created are unknown, but its dimensions are known, use one of `zeros`, `ones`, `empty`." 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": 6, 253 | "metadata": {}, 254 | "outputs": [ 255 | { 256 | "data": { 257 | "text/plain": [ 258 | "array([[ 0., 0., 0.],\n", 259 | " [ 0., 0., 0.]])" 260 | ] 261 | }, 262 | "execution_count": 6, 263 | "metadata": {}, 264 | "output_type": "execute_result" 265 | } 266 | ], 267 | "source": [ 268 | "np.zeros((2, 3)) # the elements are explicitly initialized to zeros" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": 7, 274 | "metadata": {}, 275 | "outputs": [ 276 | { 277 | "data": { 278 | "text/plain": [ 279 | "array([[ 1., 1., 1.],\n", 280 | " [ 1., 1., 1.]])" 281 | ] 282 | }, 283 | "execution_count": 7, 284 | "metadata": {}, 285 | "output_type": "execute_result" 286 | } 287 | ], 288 | "source": [ 289 | "np.ones((2, 3)) # the elements are explicitly initialized to ones" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": 8, 295 | "metadata": {}, 296 | "outputs": [ 297 | { 298 | "data": { 299 | "text/plain": [ 300 | "array([[ 0., 0., 0.],\n", 301 | " [ 0., 0., 0.]])" 302 | ] 303 | }, 304 | "execution_count": 8, 305 | "metadata": {}, 306 | "output_type": "execute_result" 307 | } 308 | ], 309 | "source": [ 310 | "np.empty((2, 3)) # the elements are not explicitly initialized; expect random values" 311 | ] 312 | }, 313 | { 314 | "cell_type": "markdown", 315 | "metadata": {}, 316 | "source": [ 317 | "The default `dtype` is `numpy.float64` for these functions." 318 | ] 319 | }, 320 | { 321 | "cell_type": "markdown", 322 | "metadata": {}, 323 | "source": [ 324 | "### 2.3 Using `arange`, `linspace`" 325 | ] 326 | }, 327 | { 328 | "cell_type": "markdown", 329 | "metadata": {}, 330 | "source": [ 331 | "Akin to `range` in Python, `arange` in NumPy returns a sequence of numbers in a `ndarray`. Use the `dtype` parameter to change the type, or use `astype()` function to cast into another type. " 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": null, 337 | "metadata": {}, 338 | "outputs": [], 339 | "source": [ 340 | "np.arange(1, 3, 0.2)" 341 | ] 342 | }, 343 | { 344 | "cell_type": "markdown", 345 | "metadata": {}, 346 | "source": [ 347 | "Due to the finite precision of floating point numbers, however, it's better to use `linspace` when we are trying to create a sequence of floating point numbers, specifying how many elements we want, instead of the `step`." 348 | ] 349 | }, 350 | { 351 | "cell_type": "code", 352 | "execution_count": null, 353 | "metadata": {}, 354 | "outputs": [], 355 | "source": [ 356 | "np.linspace(1, 3, 7)" 357 | ] 358 | }, 359 | { 360 | "cell_type": "markdown", 361 | "metadata": {}, 362 | "source": [ 363 | "## 3. Playing with the Shapes of `ndarray`s" 364 | ] 365 | }, 366 | { 367 | "cell_type": "code", 368 | "execution_count": 24, 369 | "metadata": {}, 370 | "outputs": [ 371 | { 372 | "data": { 373 | "text/plain": [ 374 | "array([1, 2, 3, 4, 5, 6, 7, 8, 9])" 375 | ] 376 | }, 377 | "execution_count": 24, 378 | "metadata": {}, 379 | "output_type": "execute_result" 380 | } 381 | ], 382 | "source": [ 383 | "arr = np.arange(1, 10)\n", 384 | "arr" 385 | ] 386 | }, 387 | { 388 | "cell_type": "markdown", 389 | "metadata": {}, 390 | "source": [ 391 | "### 3.1 How to Get the Shape of an `ndarray`" 392 | ] 393 | }, 394 | { 395 | "cell_type": "code", 396 | "execution_count": 18, 397 | "metadata": {}, 398 | "outputs": [ 399 | { 400 | "data": { 401 | "text/plain": [ 402 | "(9,)" 403 | ] 404 | }, 405 | "execution_count": 18, 406 | "metadata": {}, 407 | "output_type": "execute_result" 408 | } 409 | ], 410 | "source": [ 411 | "arr.shape" 412 | ] 413 | }, 414 | { 415 | "cell_type": "markdown", 416 | "metadata": {}, 417 | "source": [ 418 | "### 3.2 How to Reshape the `ndarray`:" 419 | ] 420 | }, 421 | { 422 | "cell_type": "code", 423 | "execution_count": 19, 424 | "metadata": {}, 425 | "outputs": [ 426 | { 427 | "data": { 428 | "text/plain": [ 429 | "array([[1, 2, 3],\n", 430 | " [4, 5, 6],\n", 431 | " [7, 8, 9]])" 432 | ] 433 | }, 434 | "execution_count": 19, 435 | "metadata": {}, 436 | "output_type": "execute_result" 437 | } 438 | ], 439 | "source": [ 440 | "arr.reshape((3, 3))" 441 | ] 442 | }, 443 | { 444 | "cell_type": "markdown", 445 | "metadata": {}, 446 | "source": [ 447 | "The `reshape` function returns a new `ndarray` with the shape changed without modifying the original one." 448 | ] 449 | }, 450 | { 451 | "cell_type": "code", 452 | "execution_count": 20, 453 | "metadata": {}, 454 | "outputs": [ 455 | { 456 | "data": { 457 | "text/plain": [ 458 | "array([1, 2, 3, 4, 5, 6, 7, 8, 9])" 459 | ] 460 | }, 461 | "execution_count": 20, 462 | "metadata": {}, 463 | "output_type": "execute_result" 464 | } 465 | ], 466 | "source": [ 467 | "arr" 468 | ] 469 | }, 470 | { 471 | "cell_type": "code", 472 | "execution_count": 25, 473 | "metadata": {}, 474 | "outputs": [ 475 | { 476 | "data": { 477 | "text/plain": [ 478 | "array([[1],\n", 479 | " [2],\n", 480 | " [3],\n", 481 | " [4],\n", 482 | " [5],\n", 483 | " [6],\n", 484 | " [7],\n", 485 | " [8],\n", 486 | " [9]])" 487 | ] 488 | }, 489 | "execution_count": 25, 490 | "metadata": {}, 491 | "output_type": "execute_result" 492 | } 493 | ], 494 | "source": [ 495 | "arr.reshape((9,1))" 496 | ] 497 | }, 498 | { 499 | "cell_type": "markdown", 500 | "metadata": {}, 501 | "source": [ 502 | "To directly modify the shape of an `ndarray`:" 503 | ] 504 | }, 505 | { 506 | "cell_type": "code", 507 | "execution_count": null, 508 | "metadata": {}, 509 | "outputs": [], 510 | "source": [ 511 | "arr.shape = (3, 3)\n", 512 | "arr" 513 | ] 514 | }, 515 | { 516 | "cell_type": "markdown", 517 | "metadata": {}, 518 | "source": [ 519 | "## 4. Basic Operations" 520 | ] 521 | }, 522 | { 523 | "cell_type": "markdown", 524 | "metadata": {}, 525 | "source": [ 526 | "### 4.1 Arithmetic Operators" 527 | ] 528 | }, 529 | { 530 | "cell_type": "markdown", 531 | "metadata": {}, 532 | "source": [ 533 | "Arithmetic operators on `ndarray`s apply elementwise (so the operation is *vectorized*). A new `ndarray` will be created to hold the result." 534 | ] 535 | }, 536 | { 537 | "cell_type": "code", 538 | "execution_count": 26, 539 | "metadata": { 540 | "collapsed": true 541 | }, 542 | "outputs": [], 543 | "source": [ 544 | "arr = np.array([1, 2])" 545 | ] 546 | }, 547 | { 548 | "cell_type": "code", 549 | "execution_count": 27, 550 | "metadata": {}, 551 | "outputs": [ 552 | { 553 | "data": { 554 | "text/plain": [ 555 | "array([2, 3])" 556 | ] 557 | }, 558 | "execution_count": 27, 559 | "metadata": {}, 560 | "output_type": "execute_result" 561 | } 562 | ], 563 | "source": [ 564 | "arr + 1" 565 | ] 566 | }, 567 | { 568 | "cell_type": "code", 569 | "execution_count": 28, 570 | "metadata": {}, 571 | "outputs": [ 572 | { 573 | "data": { 574 | "text/plain": [ 575 | "array([2, 4])" 576 | ] 577 | }, 578 | "execution_count": 28, 579 | "metadata": {}, 580 | "output_type": "execute_result" 581 | } 582 | ], 583 | "source": [ 584 | "arr * 2" 585 | ] 586 | }, 587 | { 588 | "cell_type": "code", 589 | "execution_count": 29, 590 | "metadata": {}, 591 | "outputs": [ 592 | { 593 | "data": { 594 | "text/plain": [ 595 | "array([1, 4])" 596 | ] 597 | }, 598 | "execution_count": 29, 599 | "metadata": {}, 600 | "output_type": "execute_result" 601 | } 602 | ], 603 | "source": [ 604 | "arr ** 2" 605 | ] 606 | }, 607 | { 608 | "cell_type": "markdown", 609 | "metadata": {}, 610 | "source": [ 611 | "### 4.2 Dot Products" 612 | ] 613 | }, 614 | { 615 | "cell_type": "markdown", 616 | "metadata": {}, 617 | "source": [ 618 | "Given two `ndarray`s with proper shapes:" 619 | ] 620 | }, 621 | { 622 | "cell_type": "code", 623 | "execution_count": 30, 624 | "metadata": {}, 625 | "outputs": [ 626 | { 627 | "data": { 628 | "text/plain": [ 629 | "array([1, 2])" 630 | ] 631 | }, 632 | "execution_count": 30, 633 | "metadata": {}, 634 | "output_type": "execute_result" 635 | } 636 | ], 637 | "source": [ 638 | "a = np.array([1, 2])\n", 639 | "a" 640 | ] 641 | }, 642 | { 643 | "cell_type": "code", 644 | "execution_count": 31, 645 | "metadata": {}, 646 | "outputs": [ 647 | { 648 | "data": { 649 | "text/plain": [ 650 | "array([[1],\n", 651 | " [2]])" 652 | ] 653 | }, 654 | "execution_count": 31, 655 | "metadata": {}, 656 | "output_type": "execute_result" 657 | } 658 | ], 659 | "source": [ 660 | "b = np.array([[1], [2]])\n", 661 | "b" 662 | ] 663 | }, 664 | { 665 | "cell_type": "markdown", 666 | "metadata": {}, 667 | "source": [ 668 | "To calculate the dot product, the sentence in the following cell is intuitive but **WRONG**:" 669 | ] 670 | }, 671 | { 672 | "cell_type": "code", 673 | "execution_count": 32, 674 | "metadata": {}, 675 | "outputs": [ 676 | { 677 | "data": { 678 | "text/plain": [ 679 | "array([[1, 2],\n", 680 | " [2, 4]])" 681 | ] 682 | }, 683 | "execution_count": 32, 684 | "metadata": {}, 685 | "output_type": "execute_result" 686 | } 687 | ], 688 | "source": [ 689 | "a * b # calculating elementwise product!" 690 | ] 691 | }, 692 | { 693 | "cell_type": "markdown", 694 | "metadata": {}, 695 | "source": [ 696 | "To correctly calculate the dot products of two `ndarray`s, use `numpy.dot` or the `dot` function on the `ndarray` object." 697 | ] 698 | }, 699 | { 700 | "cell_type": "code", 701 | "execution_count": 33, 702 | "metadata": {}, 703 | "outputs": [ 704 | { 705 | "data": { 706 | "text/plain": [ 707 | "array([5])" 708 | ] 709 | }, 710 | "execution_count": 33, 711 | "metadata": {}, 712 | "output_type": "execute_result" 713 | } 714 | ], 715 | "source": [ 716 | "a.dot(b)" 717 | ] 718 | }, 719 | { 720 | "cell_type": "code", 721 | "execution_count": 34, 722 | "metadata": {}, 723 | "outputs": [ 724 | { 725 | "data": { 726 | "text/plain": [ 727 | "array([5])" 728 | ] 729 | }, 730 | "execution_count": 34, 731 | "metadata": {}, 732 | "output_type": "execute_result" 733 | } 734 | ], 735 | "source": [ 736 | "np.dot(a, b)" 737 | ] 738 | }, 739 | { 740 | "cell_type": "markdown", 741 | "metadata": {}, 742 | "source": [ 743 | "Both ways create a new `ndarray` to hold the results without modifying the original ones." 744 | ] 745 | }, 746 | { 747 | "cell_type": "code", 748 | "execution_count": null, 749 | "metadata": { 750 | "collapsed": true 751 | }, 752 | "outputs": [], 753 | "source": [] 754 | } 755 | ], 756 | "metadata": { 757 | "kernelspec": { 758 | "display_name": "Python 3", 759 | "language": "python", 760 | "name": "python3" 761 | }, 762 | "language_info": { 763 | "codemirror_mode": { 764 | "name": "ipython", 765 | "version": 3 766 | }, 767 | "file_extension": ".py", 768 | "mimetype": "text/x-python", 769 | "name": "python", 770 | "nbconvert_exporter": "python", 771 | "pygments_lexer": "ipython3", 772 | "version": "3.6.1" 773 | } 774 | }, 775 | "nbformat": 4, 776 | "nbformat_minor": 2 777 | } 778 | -------------------------------------------------------------------------------- /0.self-evaluation/L0.python3-and-jupyter.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Setting up your `python` environment" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "We will be using `python 3` and `ipython notebook`/`jupyter` extensively in this course. You need to set up the correct `python` environment first. Below are some instructions in a Mac OS X 10.11 environment. You should be able to adapt it to other environments -- try Google if any problem. " 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "## Installing `anaconda` and `jupyter`" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "1. Download and install the `anaconda` installation package (for `python 3.6`) at https://www.continuum.io/downloads\n", 29 | "2. Create a `py36` virtual environment by `conda create -n py36 python=3.6 anaconda`. See more at http://conda.pydata.org/docs/using/envs.html\n", 30 | "3. Activiate `py36` (or put it on your `~/.bashrc`): `source activate py36`\n", 31 | "4. To install a new package in an environment, switch to it and use `conda install -n PACKAGENAME` or `pip install PACKAGENAME`\n", 32 | "5. Install `jupyter` by `conda install jupyter`\n" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "### Test your installation" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "```\n", 47 | "% python -V\n", 48 | "Python 3.6.1 :: Anaconda 4.4.0 (x86_64)\n", 49 | "% ipython -V\n", 50 | "5.3.0\n", 51 | "% jupyter notebook\n", 52 | "```\n", 53 | "\n", 54 | "\n", 55 | "The last command shall open up a new page in your browser. Also check if you click the \"new\" button, there is a \"python 3\" choice under the 'notebooks'. " 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "## Using `jupyter`" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "Start with simple tutorial: https://jupyter-notebook-beginner-guide.readthedocs.io/en/latest/index.html\n", 70 | "\n", 71 | "Press `h` (you may need to press `ESC` first) to learn a few important keyboard shortcuts, e.g., \n", 72 | "\n", 73 | "* `SHIFT+RETURN`\n", 74 | "* `A`, `B`, `X`\n", 75 | "* `ESC`\n", 76 | "* `ESC m`: to change the current cell to a `markdown` cell, \n", 77 | "* selecting multiple lines + `TAB` (indent them) / `Cmd + /` (block comment). \n", 78 | "* Note that mouse selection = copying to clipboard (sometimes annoying). \n", 79 | "\n", 80 | "Read the syntax of `markdown` at http://jupyter-notebook.readthedocs.io/en/latest/examples/Notebook/Working%20With%20Markdown%20Cells.html and try it out by yourself. \n", 81 | "\n", 82 | "It also can display maths symbols/equations, e.g., $e^{ix} = cos(x) + i \\cdot sin(x)$.\n", 83 | "\n", 84 | "$$ P \\implies Q \\qquad \\equiv \\qquad P \\lor \\neg Q $$\n", 85 | "\n", 86 | "Try out cells with simple python code (or try the following cell in this notebook). \n", 87 | "\n", 88 | "**Tips**: \n", 89 | "\n", 90 | "* Recommended browser: firefox (Chrome has issues rendering maths fonts/equations)\n", 91 | "* Your code may run into an infinite loop and you may HAVE TO kill the browser. So use a decent session manager for your browser. " 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 1, 97 | "metadata": {}, 98 | "outputs": [ 99 | { 100 | "data": { 101 | "text/plain": [ 102 | "[2, 9, 2, 2, 4, 6, 9, 3, 10, 7]" 103 | ] 104 | }, 105 | "execution_count": 1, 106 | "metadata": {}, 107 | "output_type": "execute_result" 108 | } 109 | ], 110 | "source": [ 111 | "import random\n", 112 | "\n", 113 | "n = 10\n", 114 | "data = [random.randint(1, 10) for _ in range(n)]\n", 115 | "data # this print out the variable's content" 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "## Exercise 1: Integer square root of an integer \n", 123 | "\n", 124 | "You need to write a function, `nsqrt()`, that takes as input an integer `x`, and return the largest integer that does not exceed $\\sqrt{x}$. You need to abide by the following constraints:\n", 125 | "1. The time complexity of your algorithm should be $O(\\log x)$. \n", 126 | "2. You cannot use `sqrt()` function. \n", 127 | "\n", 128 | "For example, `nsqrt(11) = 3`, and `nsqrt(1369) = 37`. \n" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 3, 134 | "metadata": { 135 | "collapsed": true 136 | }, 137 | "outputs": [], 138 | "source": [ 139 | "def nsqrt(x): # do not change the heading of the function\n", 140 | " \n", 141 | " pass # **replace** this line with your code" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": { 147 | "collapsed": true 148 | }, 149 | "source": [ 150 | "You can test your implementation using the following code." 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 11, 156 | "metadata": {}, 157 | "outputs": [ 158 | { 159 | "name": "stdout", 160 | "output_type": "stream", 161 | "text": [ 162 | "3 37\n" 163 | ] 164 | } 165 | ], 166 | "source": [ 167 | "print(nsqrt(11), nsqrt(1369))" 168 | ] 169 | }, 170 | { 171 | "cell_type": "markdown", 172 | "metadata": { 173 | "collapsed": true 174 | }, 175 | "source": [ 176 | "## Exercise 2: Root finding \n", 177 | "\n", 178 | "Use [Newton's method](https://en.wikipedia.org/wiki/Newton's_method) to find a root of an equation numerically. Newton's method starts from $x_0$ and iteratively computes $$x_{i+1} = x_i - \\frac{f(x_i)}{f'(x_i)}.$$\n", 179 | "\n", 180 | "Let us consider find a $x$ such that $f(x) = x \\ln(x) - 16 = 0$. First, we plot the function and it seems $x$ is close to 8.0." 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": 5, 186 | "metadata": {}, 187 | "outputs": [ 188 | { 189 | "data": { 190 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEACAYAAACwB81wAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAGkBJREFUeJzt3XmUVNXVxuHfRtQYCEYNYtSIEgGR0Y6icUhaARFwAEfk\nS9RINFmgcUxEESFmcFiamCgaNaioOMQhChigIVAqKDLPgkgDYgygDAKK2HTv749TSAcbmu4ablXd\n91mrFlXVXXW3Jby9+9xzzzF3R0RECl+dqAsQEZHsUOCLiMSEAl9EJCYU+CIiMaHAFxGJCQW+iEhM\npCXwzWyIma0yszmVntvPzErMbJGZjTGzfdNxLBERqZ10dfiPA513eK4fMM7dmwPjgZvTdCwREakF\nS9eFV2bWGBjh7m2SjxcCP3b3VWZ2EJBw96PScjAREamxTI7hH+juqwDcfSXQMIPHEhGRauikrYhI\nTNTN4HuvMrNGlYZ0Vlf1TWamxXxERGrB3a0m35/ODt+St22GA5cl718KvLqzF7q7bu4MHDgw8hpy\n5abPQp+FPotd32ojXdMynwHeApqZ2Qdm9jPgTqCTmS0COiYfi4hIRNIypOPuvXbypY7peH8REUmd\nTtrmkOLi4qhLyBn6LLbTZ7GdPovUpG0efq0LMPOoaxARyTdmhkd40lZERHKYAl9EJCYU+CIiMaHA\nFxGJCQW+iEhMKPBFRGJCgS8iEhMKfBGRmFDgi4jEhAJfRCQmFPgiIjGhwBcRiQkFvohITCjwRURi\nQoEvIhITmdzEXERE0swdHnigdq9V4IuI5IlPPoGf/QxWrard6zWkIyKSB8aPh3btoEULmDixdu+h\nDl9EJIeVlcHAgfDEE+F2+um1fy8FvohIjlq6FC6+GPbbD2bNggMPTO39NKQjIpKDnn0Wjj8eLroI\nXnst9bAHdfgiIjll0ya4+mqYNAlGj4aiovS9tzp8EZEcMWNGCHiz7ffTSYEvIhKxigr485+hc2f4\n7W/hscegfv30H0dDOiIiEVq9Gi67DNauhXfegSZNMncsdfgiIhEZOxaOOSbMr3/zzcyGPWShwzez\nZcCnQAVQ5u7tM31MEZFc9uWXcOut8Mwz8NRTcNpp2TluNoZ0KoBid1+XhWOJiOS0998Pc+sPOijM\nrf/Od7J37GwM6ViWjiMiktOefhp++EO45BIYPjy7YQ/Z6fAdGGNmDjzi7o9m4ZgiIjlj40bo2xem\nToVx46Bt22jqyEbnfaK7Hwt0Bfqa2clZOKaISE6YOjWcmN17b5g2Lbqwhyx0+O6+Mvnnx2b2T6A9\n8D9rvQ0aNOir+8XFxRQXF2e6LBGRjCovh3vugXvvDevXX3hhau+XSCRIJBIpvYe5e2pV7OrNzb4J\n1HH3TWZWDygBfuvuJZW+xzNZg4hItq1YEcbpy8vDLJzGjdN/DDPD3a0mr8n0kE4jYKKZzQQmAyMq\nh72ISKF58UX4wQ+gUyeYMCEzYV9bGe3wd6sAdfgiUgA2boRrrgkXUA0bBu0zfMVRLnb4IiIFb8qU\n7YuezZyZ+bCvLa2lIyJSS+XlcOed8Ne/wuDBcP75UVe0awp8EZFaWL4cfvpTqFsXpk+HQw+NuqLq\naUhHRKSGnnsOjjsOzjwzLICWD2EP6vBFRHbbhg1w1VVhGeNRo8JsnHyiDl9EZDe8/XZYxvib3wy7\nUeVb2IM6fBGRXdq6Ff7wB3joIfjb36B796grqj0FvojITixdCj/5CdSrF7r6gw+OuqLUaEhHRKQK\nTz8d5tOfdx6MHp3/YQ/q8EVE/sf69WEp45kzwwycdu2irih91OGLiCRNmBCWL95vv7CUcSGFPajD\nFxFhyxbo3x+efRaGDIEzzoi6osxQ4ItIrM2ZE07MNm0Ks2dnf9vBbNKQjojEUkVF2KCkQwe44Yaw\nrHEhhz2owxeRGFq+HC67LMyxnzIFjjgi6oqyQx2+iMSGe5huedxxYZw+kYhP2IM6fBGJibVr4Ze/\nhPnzYcyYsLF43KjDF5GCN3ZsmG55yCFhKeM4hj2owxeRArZ5M/TrBy+/DI8/Dh07Rl1RtNThi0hB\n2rai5apVYbpl3MMeFPgiUmDKy+GOO8JJ2VtvDZuV7L9/1FXlBg3piEjBKC2FSy6BvfYKSyMcdljU\nFeUWdfgikvfc4bHH4Pjj4dxzYdw4hX1V1OGLSF5buRKuuAJWrIDx46F166gryl3q8EUkb73wQljR\nsm3bcMWswn7X1OGLSN5ZuzZsJj59Orz6ahjKkeqpwxeRvDJqFLRpAw0bhk1KFPa7Tx2+iOSFjRvD\nqpYlJfDUU3DqqVFXlH/U4YtIznvjjTBOX14e1q9X2NdOxgPfzM4ws4Vm9p6Z3ZTp44lI4fjii9DV\n9+wJf/lL2I2qQYOoq8pfGQ18M6sDPAB0BloCF5vZUZk8pogUhmnToKgoTLecMwfOOivqivJfpjv8\n9sBid1/u7mXAc8A5GT6miOSxsjIYNAi6doUBA+D55wt/J6psyfRJ20OAFZUef0j4ISAi8jULFoSl\nERo2hFmz4OCDo66osGQ68K2K5/xr31Rc6dsOB2K0A42I7CA5dHPIo9GWkXOWAstSe4tMB/6HQOUV\nLQ4FPtrxmzzxtZ8BIhITpaVhf1mAJ56AJk2irCZ/mFXVT+9apsfwpwJHmlljM9sL6AkMz/AxRSQP\nVFTAAw9A+/ZwzjkwYYLCPtMy2uG7e7mZXQWUEH64DHH3dzN5TBHJfaWl0Lt3mHY5aRI0bx51RfGQ\n8Xn47j7a3Zu7e1N3vzPTxxOR3FVRAYMHh66+WzeYOFFhn01aWkFEsmLp0tDVf/55CPqjdEVO1mlp\nBRHJqIoKeOih0NV36RKGcBT20VCHLyIZs2xZ6Oo/+yysh9OiRdQVxZs6fBFJO3f429/guOOgc+cw\nhKOwj546fBFJq+XLQ1e/cSO8/jocfXTUFck26vBFJC3c4eGH4dhjoWPHMFavsM8t6vBFJGUffAA/\n/zmsWweJBLRsGXVFUhV1+CJSa+7w6KPwgx+ETUneflthn8vU4YtIrSxbBldeGTYUnzABWrWKuiKp\njjp8EamRigq4//4wA6dDB5g8WWGfL9Thi8huW7QojNW7a1mEfKQOX0SqtXUr3HUXnHwyXHRRuIhK\nYZ9/1OGLyC7Nng2XXw777w9Tp8Lhh0ddkdSWOnwRqdKWLXDbbdCpE/TtCyUlCvt8pw5fRL7mnXdC\nV9+0qfaWLSQKfBH5yuefw4AB8MwzcN99cOGFUIud9CRHaUhHRICw7k3btrByJcydG07OKuwLizp8\nkZjbsAFuuglGjoQHH4Szzoq6IskUdfgiMTZqFLRuHaZdzp2rsC906vBFYmj1arjuurD2zWOPhStm\npfCpwxeJEXcYOjR09QcfHLp6hX18qMMXiYklS+CXv4Q1a8JQTlFR1BVJtqnDFylwW7fC3XfD8ceH\n7QanTFHYx5U6fJECNn16WOysYcMQ9E2aRF2RREkdvkgB+uwzuOEG6NYNrr8exoxR2IsCX6TglJSE\nk7KrV4eTsj/9qS6gkkBDOiIF4uOPQzc/aRI89FAYrxepTB2+SJ5zh6eeCl19o0ahq1fYS1Uy1uGb\n2UDgCmB18qlb3H10po4nEkelpWGq5SefwGuvhc3ERXYm0x3+n9y9KHlT2IukyZdfwh13QPv20LFj\nmIGjsJfqZHoMX6eKRNLszTdDV3/EETBtmjYlkd2X6Q6/r5nNMrO/m9m+GT6WSEFbswZ694ZeveD2\n22HECIW91ExKHb6ZjQUaVX4KcKA/8CBwu7u7mf0e+BPQu6r3GTRo0Ff3i4uLKS4uTqUskYLiDk8+\nGZYwvugimD8fGjSIuirJtkQiQSKRSOk9zN3TU82uDmLWGBjh7m2q+JpnowaRfLRwYRi+2bQJHn5Y\n4/SynZnh7jUaNs/YkI6ZHVTp4bnAvEwdS6TQbN4ctho85RQ477ywx6zCXlKVyZO2d5tZO6ACWAb8\nIoPHEikYY8dCnz7Qrl3YQPyQQ6KuSApFVoZ0dlmAhnREgLCX7PXXh01JBg+Grl2jrkhyWU4N6YjI\n7qmoCEshtG4NjRuHk7IKe8kEraUjEqEZM8LwTd26MGECtGoVdUVSyNThi0Rg3Tq46qrQyV9xBbzx\nhsJeMk+BL5JFFRVhT9mjj4bycliwIFxMVUf/EiULNKQjkiVz5oThmy1bYPhwOO64qCuSuFFfIZJh\nn34K114LnTrBJZfA5MkKe4mGAl8kQ9xh2DBo0SJsOTh/Plx5JeyxR9SVSVxpSEckA+bNg759w5II\nL78MJ5wQdUUi6vBF0mrjRrjxRjjttLDQ2ZQpCnvJHQp8kTRwh+efD8M3a9aEDr9PHw3fSG7RkI5I\niubNg2uuCdsMPv88nHRS1BWJVE0dvkgtrVsHv/pVGL7p0QOmT1fYS25T4IvUUHk5PPIIHHUUlJWF\ni6euuiosjyCSy/RXVKQGJk2Cq6+GevVg9Gg45pioKxLZfQp8kd3wn//Ab34T1ry5+27o2ROsRgvT\nikRPQzoiu/DFF/DHP0LbtnDEEWHLwYsvVthLflKHL1IFdxgxAq67Dtq0CfPpmzSJuiqR1CjwRXaw\ncGFY+2b58rAxyemnR12RSHpoSEckaf16uOGGsHF4585hdUuFvRQSBb7E3tatYQ/Z5s1hw4ZwIdV1\n18Gee0ZdmUh6aUhHYm3UqNDVf/e7UFISTs6KFCoFvsTS/Pkh6EtL4Z574KyzNPNGCp+GdCRWPv44\nLGpWXAxnnBGGb84+W2Ev8aDAl1jYsgXuvTfsJVu37vaZOHvtFXVlItmjIR0paO7wyivw61+HtW/e\nfDP8KRJHCnwpWDNnhtk2a9aE+fSdOkVdkUi0NKQjBeeDD+DSS6FLl7AMwsyZCnsRUOBLAVm/Hm66\nKaxgedhh8N578ItfaNlikW1SCnwzO9/M5plZuZkV7fC1m81ssZm9a2a6XlEyZssWuO8+aNYM1q4N\nV8j+7nfQoEHUlYnkllR7n7lAD+Dhyk+aWQvgQqAFcCgwzsyauruneDyRr1RUwD/+AbfcEmbfjB8P\nrVpFXZVI7kop8N19EYDZ12YxnwM85+5bgWVmthhoD7yTyvFEtkkkwswbdxgyBE49NeqKRHJfpkY3\nDwHervT4P8nnRFIyf34Yp1+wIKxTf+GFUEdnokR2S7WBb2ZjgUaVnwIc6O/uI3b2siqe2+lwzqBB\ng766X1xcTHFxcXVlScx89BHcdhsMHw433wwvvQR77x11VSLZk0gkSCQSKb2HpWNY3cwmADe4+4zk\n436Au/tdycejgYHu/rUhHTPT0L7s1Lp1cNddYdPwK66Afv1gv/2irkokemaGu9doUZB0/jJc+cDD\ngZ5mtpeZHQEcCUxJ47GkwH32WRiyadYsXDg1e3YIfoW9SO2lOi2zu5mtAE4ARprZKAB3XwD8A1gA\n/AvoozZedseXX8IDD8CRR4aQnzQJHn0Uvve9qCsTyX9pGdJJqQAN6QhQXg7DhsHAgWGtmz/8AYqK\nqn+dSFzVZkhH1yBKpNzh1Vfh1lth331h6FD40Y+irkqkMCnwJTLjx4eLpjZvhjvvhG7dtC69SCYp\n8CXrJk+GAQNg6VK4/Xbo2VNz6UWyQf/MJGumToWuXcPFUuefD+++C716KexFskX/1CTjZswIe8b2\n6AFnngmLF4dVLPfcM+rKROJFgS8ZM2sWdO8eQv700+H998N+srpCViQaCnxJu3nzwpBNly5hs/Al\nS+Dqq+Eb34i6MpF4U+BL2rz7bjgB27EjnHBC6OivvRb22SfqykQEFPiSBnPnhq0Ef/zjsNvU++/D\njTdCvXpRVyYilSnwpdamTQsnYjt1CkG/ZElYurh+/agrE5GqKPClxt56K4zPd+8eNh4pLYXf/Aa+\n9a2oKxORXdGFV7Jb3GHCBPj978MFU/36wSuvaMaNSD5R4MsuucOoUSHo16wJSyH06qU59CL5SIEv\nVSovh3/+E+64IyxZ3L8/XHAB7LFH1JWJSG0p8OV/bN4MTz4J99wDBxwQVrE85xwtfyBSCBT4AoSt\nBB98EO6/H449FoYMgVNO0eqVIoVEfVvMrVgB118P3/9+WONm3DgYOTKsSa+wFyksCvyYmjcPLr0U\n2rYNwT57NjzxBLRqFXVlIpIpCvwYcQ8dfLduYfmD5s3DxVL33qs9Y0XiQGP4MbB5c9gv9r77wuNr\nr4WXXtJiZiJxo8AvYB99FE7EPvIItG8fAr9DB43Ni8SVhnQK0PTp8JOfQMuWYfbNm2+GE7EdOyrs\nReJMgV8gysrghRfCVMoePcLJ2NJSGDw4jNWLiGhIJ899+CE8+mi4NW0aNho591yoq/+zIrIDdfh5\naNtsm/POgzZtwho3JSXw+uthg3CFvYhURdGQR9atg6FD4aGHwiqVffqEufNallhEdocCP8e5h/Xn\nhwyBl1+Grl3D/ZNO0glYEakZBX6OWrUqLGI2ZEgI/d69YdEiaNQo6spEJF+lNIZvZueb2TwzKzez\nokrPNzazz81sRvL2YOqlFr6tW8P0yR49wsyaBQvg73+HhQvDjlIKexFJRaod/lygB/BwFV97392L\nqnhedrBwYejmhw4NSxz07h3uN2gQdWUiUkhSCnx3XwRgVuVoskaYd2HVKnjuOXj66TC1slevMNOm\nZcuoKxORQpXJMfzDzWw6sAEY4O4TM3isvLBpU9gH9umnYfLksLHIH/8Ip52mnaREJPOqDXwzGwtU\nHj02wIH+7j5iJy/7CDjM3dclx/ZfMbOj3X1TyhXnmS1bQuf+/PNhfP7kk8OyxC+9BPXqRV2diMRJ\ntYHv7p1q+qbuXgasS96fYWZLgGbAjKq+f9CgQV/dLy4upri4uKaHzCmbN8OYMfDii/Daa+HiqAsu\ngD/9CQ48MOrqRCQfJRIJEolESu9h7p5yIWY2AbjR3acnH38HWOvuFWbWBHgdaO3u66t4raejhqh9\n/jmMGhVCftQoKCqC888PyxwcdFDU1YlIoTEz3L1G50pTCnwz6w7cD3wHWA/McvcuZnYucDtQBpQD\nt7n7v3byHnkb+CtWhA5+5Eh4442wBPEFF4RplerkRSSTsh746ZBPgV9RAVOmhIAfOTLMrunSBc48\nEzp3hm9/O+oKRSQuFPhp5h6WGP73v8NiZRMmhIufzjwz3E44QQuViUg0FPhpsGIFTJy4PeS3bAkb\nh3ToEG7a+1VEcoECv4bKymD27LA42VtvwaRJIeBPPHF7wLdooUXKRCT35G3gn322M2AAHHts5o6z\ncSPMmQOzZm2/LVgATZqElSdPPDHcvv99BbyI5L7aBH5OjECfckqY3bL33tCpUwjgli2hWbPw3O74\n8ktYuTJs3P3f/8KSJbB4Mbz3Xvhz7Vpo1QratYNjjoHLL4fWraF+/cz+t4mI5Iqc6PDdnYqK0HWX\nlMDUqTB/fgjt+vWhYUPYf/9wgnSPPcLtiy/CUgWffQaffgobNoQTqt/9brg1aRK2/Nt2+973tHyB\niBSOvB3S2VkNFRVhl6ePPw5/bt0K5eXhts8+YWmC+vXDqpIHHAB1tGGjiMREwQW+iIhUrTaBr55Y\nRCQmFPgiIjGhwBcRiQkFvohITCjwRURiQoEvIhITCnwRkZhQ4IuIxIQCX0QkJhT4IiIxocAXEYkJ\nBb6ISEwo8EVEYkKBLyISEwp8EZGYUOCLiMSEAl9EJCYU+CIiMaHAFxGJCQW+iEhMpBT4Zna3mb1r\nZrPM7CUza1Dpazeb2eLk109PvVQREUlFqh1+CdDS3dsBi4GbAczsaOBCoAXQBXjQzGq0u3ocJRKJ\nqEvIGfosttNnsZ0+i9SkFPjuPs7dK5IPJwOHJu+fDTzn7lvdfRnhh0H7VI4VB/rLvJ0+i+30WWyn\nzyI16RzDvxz4V/L+IcCKSl/7T/I5ERGJSN3qvsHMxgKNKj8FONDf3Uckv6c/UObuz1b6nh15irWK\niEgKzD21HDazS4ErgdPcfUvyuX6Au/tdycejgYHu/k4Vr9cPAhGRWnD3Gp0bTSnwzewM4F7gR+6+\nptLzRwPDgOMJQzljgaae6k8XERGptWqHdKpxP7AXMDY5CWeyu/dx9wVm9g9gAVAG9FHYi4hEK+Uh\nHRERyQ+RXmlrZmeY2UIze8/MboqyliiZ2aFmNt7MFpjZXDP7VdQ1Rc3M6pjZDDMbHnUtUTKzfc3s\nheQFjPPN7Pioa4qKmV1nZvPMbI6ZDTOzvaKuKVvMbIiZrTKzOZWe28/MSsxskZmNMbN9q3ufyALf\nzOoADwCdgZbAxWZ2VFT1RGwrcL27Hw38EOgb489im2sIQ4Jx9xfgX+7eAmgLvBtxPZEws4OBq4Ei\nd29DGI7uGW1VWfU4ISsr6weMc/fmwHiSF77uSpQdfntgsbsvd/cy4DngnAjriYy7r3T3Wcn7mwj/\nqGN73YKZHQp0Bf4edS1RMrNvAae4++MAyQsZN0RcVpT2AOqZWV3gm8BHEdeTNe4+EVi3w9PnAEOT\n94cC3at7nygDf8eLsz4kxiG3jZkdDrQDvjaFNUb+DPwaXbvRBPjEzB5PDm89Ymb7RF1UFNz9I8KM\nwA8IF3Kud/dx0VYVuQPdfRWEphFoWN0Logx8XZy1AzOrD7wIXJPs9GPHzLoBq5K/8RhV/z2Ji7pA\nETDY3YuAzwm/xseOmX2b0NE2Bg4G6ptZr2iryj9RBv6HwGGVHh9KjH5F21Hy19QXgafc/dWo64nQ\nScDZZlYKPAucamZPRlxTVD4EVrj7tOTjFwk/AOKoI1Dq7mvdvRx4GTgx4pqitsrMGgGY2UHA6upe\nEGXgTwWONLPGybPtPYE4z8h4DFjg7n+JupAoufst7n6Yuzch/J0Y7+6XRF1XFJK/rq8ws2bJpzoQ\n3xPZHwAnmNk3kivvdiB+J7B3/I13OHBZ8v6lQLWNYqoXXtWau5eb2VWEJZbrAEPcPW7/AwEws5OA\n/wPmmtlMwtDWLe4+OtrKJAf8ChhmZnsCpcDPIq4nEu4+xcxeBGYSLuacCTwSbVXZY2bPAMXAAWb2\nATAQuBN4wcwuJ/xAvKDa99GFVyIi8aAtDkVEYkKBLyISEwp8EZGYUOCLiMSEAl9EJCYU+CIiMaHA\nFxGJCQW+iEhM/D8dvbYBmW/u2gAAAABJRU5ErkJggg==\n", 191 | "text/plain": [ 192 | "" 193 | ] 194 | }, 195 | "metadata": {}, 196 | "output_type": "display_data" 197 | } 198 | ], 199 | "source": [ 200 | "import matplotlib\n", 201 | "import numpy as np\n", 202 | "import matplotlib.pyplot as plt\n", 203 | "%matplotlib inline \n", 204 | "\n", 205 | "import math\n", 206 | "\n", 207 | "def f(x):\n", 208 | " return x * math.log(x) - 16.0\n", 209 | "\n", 210 | "xvals = np.arange(0.01, 10, 0.01) \n", 211 | "yvals = np.array([f(x) for x in xvals])\n", 212 | "plt.plot(xvals, yvals) \n", 213 | "plt.plot(xvals, 0*xvals)\n", 214 | "plt.show()" 215 | ] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "metadata": { 220 | "collapsed": true 221 | }, 222 | "source": [ 223 | "To find $x$ for the equation, we need to compute the derivative of $f(x)$, i.e., $f'(x)$ first. \n", 224 | "\n", 225 | "$$f'(x) = (x \\cdot \\frac{1}{x} + 1 \\cdot \\ln(x)) + 0 = 1 + \\ln(x)$$ \n", 226 | "\n", 227 | "We implement it as `fprime(x)`:" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": 7, 233 | "metadata": { 234 | "collapsed": true 235 | }, 236 | "outputs": [], 237 | "source": [ 238 | "def fprime(x):\n", 239 | " return 1.0 + math.log(x)" 240 | ] 241 | }, 242 | { 243 | "cell_type": "markdown", 244 | "metadata": { 245 | "collapsed": true 246 | }, 247 | "source": [ 248 | "Now you need to implement Newton's method below." 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": 8, 254 | "metadata": { 255 | "collapsed": true 256 | }, 257 | "outputs": [], 258 | "source": [ 259 | "'''\n", 260 | "x_0: initial guess\n", 261 | "EPSILON: stop when abs(x - x_new) < EPSILON\n", 262 | "MAX_ITER: maximum number of iterations\n", 263 | "\n", 264 | "NOTE: you must use the default values of the above parameters, do not change them\n", 265 | "'''\n", 266 | "def find_root(f, fprime, x_0=1.0, EPSILON = 1E-7, MAX_ITER = 1000): # do not change the heading of the function\n", 267 | " pass # **replace** this line with your code" 268 | ] 269 | }, 270 | { 271 | "cell_type": "markdown", 272 | "metadata": { 273 | "collapsed": true 274 | }, 275 | "source": [ 276 | "You can test your implementation using the following code.\n", 277 | "\n", 278 | "Note that we will test your code using a different $f(x)$ (and its corresponding $f'(x)$). You need to perform similar tests by yourself. \n", 279 | "\n" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": 13, 285 | "metadata": { 286 | "scrolled": true 287 | }, 288 | "outputs": [ 289 | { 290 | "name": "stdout", 291 | "output_type": "stream", 292 | "text": [ 293 | "i = 0:\t1.0 => 17.0\n", 294 | "i = 1:\t17.0 => 8.608965126131011\n", 295 | "i = 2:\t8.608965126131011 => 7.805421527761377\n", 296 | "i = 3:\t7.805421527761377 => 7.7927448262150705\n", 297 | "i = 4:\t7.7927448262150705 => 7.792741452820569\n", 298 | "i = 5:\t7.792741452820569 => 7.792741452820329\n", 299 | "7.792741452820329\n", 300 | "0.0\n" 301 | ] 302 | } 303 | ], 304 | "source": [ 305 | "x = find_root(f, fprime)\n", 306 | "print(x)\n", 307 | "print(f(x))" 308 | ] 309 | }, 310 | { 311 | "cell_type": "markdown", 312 | "metadata": { 313 | "collapsed": true 314 | }, 315 | "source": [ 316 | "## Exercise 3: Enumerating Combinations\n", 317 | "\n", 318 | "You need to write a function, `comb(n, k)`, that returns a list consisting of all the combinations of $k$ elements out of a set $\\{1, 2, \\ldots, n\\}$. Your implementation should be using recursions instead of `for` loop (you may use *list comprehension* though). " 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": 1, 324 | "metadata": { 325 | "collapsed": true 326 | }, 327 | "outputs": [], 328 | "source": [ 329 | "# use this cell\n" 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": 56, 335 | "metadata": {}, 336 | "outputs": [ 337 | { 338 | "data": { 339 | "text/plain": [ 340 | "[[1, 5, 6],\n", 341 | " [2, 5, 6],\n", 342 | " [3, 5, 6],\n", 343 | " [4, 5, 6],\n", 344 | " [1, 4, 6],\n", 345 | " [2, 4, 6],\n", 346 | " [3, 4, 6],\n", 347 | " [1, 3, 6],\n", 348 | " [2, 3, 6],\n", 349 | " [1, 2, 6],\n", 350 | " [1, 4, 5],\n", 351 | " [2, 4, 5],\n", 352 | " [3, 4, 5],\n", 353 | " [1, 3, 5],\n", 354 | " [2, 3, 5],\n", 355 | " [1, 2, 5],\n", 356 | " [1, 3, 4],\n", 357 | " [2, 3, 4],\n", 358 | " [1, 2, 4],\n", 359 | " [1, 2, 3]]" 360 | ] 361 | }, 362 | "execution_count": 56, 363 | "metadata": {}, 364 | "output_type": "execute_result" 365 | } 366 | ], 367 | "source": [ 368 | "comb(6, 3)" 369 | ] 370 | }, 371 | { 372 | "cell_type": "code", 373 | "execution_count": null, 374 | "metadata": { 375 | "collapsed": true 376 | }, 377 | "outputs": [], 378 | "source": [] 379 | }, 380 | { 381 | "cell_type": "code", 382 | "execution_count": null, 383 | "metadata": { 384 | "collapsed": true 385 | }, 386 | "outputs": [], 387 | "source": [] 388 | }, 389 | { 390 | "cell_type": "code", 391 | "execution_count": null, 392 | "metadata": { 393 | "collapsed": true 394 | }, 395 | "outputs": [], 396 | "source": [] 397 | }, 398 | { 399 | "cell_type": "code", 400 | "execution_count": null, 401 | "metadata": { 402 | "collapsed": true 403 | }, 404 | "outputs": [], 405 | "source": [] 406 | }, 407 | { 408 | "cell_type": "code", 409 | "execution_count": null, 410 | "metadata": { 411 | "collapsed": true 412 | }, 413 | "outputs": [], 414 | "source": [] 415 | } 416 | ], 417 | "metadata": { 418 | "kernelspec": { 419 | "display_name": "Python 3", 420 | "language": "python", 421 | "name": "python3" 422 | }, 423 | "language_info": { 424 | "codemirror_mode": { 425 | "name": "ipython", 426 | "version": 3 427 | }, 428 | "file_extension": ".py", 429 | "mimetype": "text/x-python", 430 | "name": "python", 431 | "nbconvert_exporter": "python", 432 | "pygments_lexer": "ipython3", 433 | "version": "3.6.1" 434 | } 435 | }, 436 | "nbformat": 4, 437 | "nbformat_minor": 1 438 | } 439 | -------------------------------------------------------------------------------- /asset/Daily_Weather_Observations.csv: -------------------------------------------------------------------------------- 1 | Year,Month,Date,Day,Temps_min,Temps_max,Rain,Evap,Sun_hours,Max_wind_dir,Max_wind_spd,Max_wind_time,Temp_at_9am,RH_at_9am,CLD_at_9am,Wind_dir_at_9am,Wind_spd_at_9am,MSLP_at_9am,Temp_at_3pm,RH_at_3pm,CLD_at_3pm,Wind_dir_at_3pm,Wind_spd_at_3pm,MSLP_at_3pm 2015,8,1,Sa,9.4,22.3,0,3.4,9.2,WNW,61,13:08,14.3,54,6,NW,15,1018.6,20.7,28,6,NW,28,1015.6 2015,8,2,Su,14.3,24.7,0,5.4,7.8,WNW,43,21:16,16.9,51,5,WNW,9,1015.1,23.8,30,5,NW,20,1010.4 2015,8,3,Mo,8.4,19.4,0,5.4,10.1,WSW,57,16:22,11.7,48,1,WNW,20,1015.2,19,29,1,WSW,17,1013.7 2015,8,4,Tu,8.3,15.1,0,3.8,4.6,WSW,33,21:07,8.9,57,7,W,19,1023,14.4,42,4,SSE,2,1018.9 2015,8,5,We,5,14.2,0,0.6,4.8,W,48,16:06,9.4,46,6,WNW,20,1014.5,13,46,6,WNW,19,1010.3 2015,8,6,Th,6.8,16.4,0,3.8,8.5,WSW,37,11:07,9.5,53,3,WNW,22,1016.5,14.3,39,7,S,11,1016.7 2015,8,7,Fr,6.7,18.5,0,2.8,10.1,WNW,33,9:00,10,60,1,W,26,1024,16.1,40,2,SSE,17,1021 2015,8,8,Sa,7.9,17.5,0,2.6,8.9,SSW,33,10:43,11,63,5,WNW,19,1024.7,16.3,44,1,SSE,24,1022.5 2015,8,9,Su,6.2,17.6,0,1.8,9.5,W,30,6:49,9.1,68,1,W,20,1024.8,16.3,56,1,E,13,1019.6 2015,8,10,Mo,8.6,20.6,0,2.6,10.2,NNE,33,16:49,11.8,66,1,W,13,1017.7,18.8,41,1,E,20,1012.9 2015,8,11,Tu,9.8,21,0,5,10.5,W,30,15:46,13.3,52,1,W,17,1014.6,20.4,27,1,WNW,9,1011.1 2015,8,12,We,8.4,17.5,0,3.8,8.7,NW,74,12:32,11.1,54,1,W,19,1007.9,16.2,37,1,WNW,28,1004 2015,8,13,Th,9.4,19.8,0,4.8,10.5,WSW,48,9:35,12.3,50,1,WSW,28,1014.9,19.3,23,1,SW,17,1014.7 2015,8,14,Fr,7.9,20.4,0,4,10.5,WSW,30,10:20,11.8,55,1,W,15,1025.1,18,38,1,E,19,1023.4 2015,8,15,Sa,8.2,18.1,0,3,7.3,W,30,0:07,12.4,63,3,WNW,20,1030.5,17,61,5,SSE,19,1028.8 2015,8,16,Su,10.5,19.8,0,3,9.2,NW,28,20:40,14,70,5,W,13,1027.4,17.4,63,4,NE,15,1021.4 2015,8,17,Mo,9.3,20.5,5.6,2.8,10.4,WSW,46,16:05,13.4,53,3,W,13,1017.1,20.4,29,1,NW,13,1013.3 2015,8,18,Tu,10.9,16.2,0,4.4,10.4,SW,39,6:00,12.6,40,1,WSW,20,1020.8,14.7,46,2,SSE,19,1021 2015,8,19,We,9,17,0,3.6,6.6,WNW,24,6:00,12.5,58,3,W,17,1027.2,16.4,54,6,E,13,1025 2015,8,20,Th,8.2,18.9,0,2.6,9.8,NE,31,14:47,12.1,71,1,WNW,11,1028.3,18.7,59,1,NE,20,1025 2015,8,21,Fr,9.4,21,0,3.8,9.4,NNE,31,16:28,13,73,0,WNW,13,1025.8,19.8,63,7,NE,15,1021.2 2015,8,22,Sa,12.9,28.3,0,4.2,9.4,NNW,33,13:26,19.8,45,2,N,19,1021,26.9,32,5,N,11,1016.2 2015,8,23,Su,15.4,18.5,2.4,6.4,0.2,NNW,22,0:14,15.6,85,7,N,7,1021.8,17.9,73,7,NNW,7,1018.4 2015,8,24,Mo,15.1,17.4,4.2,1.8,0.6,E,59,20:38,16.3,90,8,S,17,1018.4,16.4,95,7,SSE,9,1015 2015,8,25,Tu,12.5,19.9,54.4,5,4.4,NW,56,20:50,16.1,66,7,WNW,22,1013.4,18.7,53,7,W,20,1012.8 2015,8,26,We,12.3,20.2,5.2,2.8,6.3,SSW,28,1:38,16.3,80,7,SSE,13,1018.1,18.9,59,7,S,13,1017.8 2015,8,27,Th,11.6,20.5,0,3,4.3,W,52,18:12,15.1,87,7,W,9,1019.7,20.1,44,6,NW,17,1015.1 2015,8,28,Fr,10.6,20.8,0,2.8,9.2,WSW,48,9:25,14.6,48,3,W,22,1019.8,18.5,43,5,S,20,1018.9 2015,8,29,Sa,9,20.3,0,4.2,10.8,WSW,30,23:43,13,60,1,W,13,1022.7,20.1,31,2,WNW,15,1017.9 2015,8,30,Su,8.3,18.7,0,3.6,10.8,SSW,39,16:29,12.1,52,1,WNW,20,1020.3,18.1,44,3,S,22,1017.5 2015,8,31,Mo,9.6,19.7,0,4.8,10.9,SSE,35,15:13,13.9,48,1,WSW,15,1017.3,18.4,39,2,SSE,22,1013.8 2015,9,1,Tu,9,19.7,0,3.2,10.8,W,31,9:42,12.7,50,1,W,17,1015.8,17,39,1,ESE,13,1013.3 2015,9,2,We,8,18.8,0,4,10.4,WNW,31,23:46,11.9,61,5,W,11,1018,18.6,46,1,ENE,19,1012 2015,9,3,Th,11.2,20.8,2.8,5.6,8,SW,52,16:16,14.4,69,3,WNW,20,1006,20.6,28,6,WSW,24,1005.6 2015,9,4,Fr,12.5,19.5,20.4,4,5.7,S,48,11:22,13.7,92,7,SSW,19,1021.5,18.3,54,2,S,20,1020.5 2015,9,5,Sa,11.5,21.7,0,2.8,9.3,ESE,37,15:27,15.2,68,1,WNW,17,1022.3,18.7,53,5,SE,24,1019.6 2015,9,6,Su,12.7,17.5,0.4,5.4,1,ENE,24,16:12,14.6,84,7,NNE,11,1024.9,16,68,7,ENE,6,1021.9 2015,9,7,Mo,11.1,24.9,0.2,0.6,9.3,W,54,18:14,17.4,62,2,W,9,1018.3,22.7,32,4,W,15,1013.3 2015,9,8,Tu,10,20,0,6.8,11,W,52,14:06,13.3,42,1,WSW,30,1018.8,19.7,32,2,W,24,1015.7 2015,9,9,We,9.8,20.2,0,5,10.9,,,,15.4,48,1,W,20,1023.1,18.9,39,1,SSE,17,1022 2015,9,10,Th,11.3,19.6,0,4.8,6.5,SSW,37,9:34,15.5,59,1,WNW,13,1030.9,18.9,41,7,SSE,15,1029.8 2015,9,11,Fr,14,21.3,0,4,9.1,NNE,39,17:22,17.6,56,1,WNW,11,1032.1,20.5,44,0,NE,24,1027.6 2015,9,12,Sa,10.9,25.4,0,7.4,10.4,ENE,24,13:20,17.2,61,1,W,11,1028.5,21.3,51,5,E,19,1024.7 2015,9,13,Su,12.4,22.3,0,3.8,10.6,NNE,30,21:51,18.3,61,1,WNW,11,1028.5,21.4,60,1,E,20,1025 2015,9,14,Mo,15.3,23.6,0,5,10.7,NE,43,16:41,20.7,58,1,NNE,17,1025.9,21.4,59,1,NE,24,1020.5 2015,9,15,Tu,15.8,29.8,0,6.4,11,,,,23.5,33,1,NNE,9,1015.4,29.2,25,3,NNW,35,1009 2015,9,16,We,12.4,21.1,0,10.2,10.2,ESE,37,14:37,17.2,42,2,W,19,1017.7,19.3,60,3,ESE,26,1015 2015,9,17,Th,12.2,19.9,0,4.2,8.2,S,43,11:24,18,53,3,WNW,9,1018.4,17.7,57,7,SE,28,1018.3 2015,9,18,Fr,13.6,20,2.2,6.4,7.7,S,52,10:05,15.8,65,5,S,20,1026.2,18.9,44,2,S,26,1025.5 2015,9,19,Sa,12.5,19.5,10.8,5.4,7.3,E,33,10:41,14.7,88,7,W,17,1026.5,18.1,54,6,ESE,24,1023.9 2015,9,20,Su,12.5,18.6,0.4,4,3.4,ENE,37,9:36,16.6,62,5,W,11,1024.5,17.4,72,7,E,13,1021.7 2015,9,21,Mo,12.4,22.5,0.2,1.6,10.4,SSW,35,20:21,16.3,74,1,W,15,1019.9,21.3,60,2,NE,15,1015.2 2015,9,22,Tu,14.6,19.5,0.2,6.2,9.2,SSW,59,11:35,18.7,36,3,SW,24,1016.8,17.8,47,2,S,37,1017.5 2015,9,23,We,10,16.2,0.2,8.8,6.5,S,69,11:25,13.3,55,3,SSW,31,1025.5,11.7,74,7,SSW,35,1025.4 2015,9,24,Th,8.3,16.3,3.2,4,6.1,S,65,9:28,13,63,7,SSW,22,1025.6,13.6,62,7,SSW,28,1024.6 2015,9,25,Fr,10.1,16.4,31.8,2.8,4.3,S,54,13:37,13.4,91,7,S,13,1029.3,15.9,63,7,SSE,26,1027.8 2015,9,26,Sa,11.1,20.1,3.4,4.4,7.7,S,43,17:52,14.4,86,6,W,22,1028.5,18.4,48,3,SSE,24,1025.4 2015,9,27,Su,12.3,19.2,3.6,3.8,6.2,SW,46,11:36,15.3,70,7,SW,20,1023.7,18.3,51,7,S,20,1021.1 2015,9,28,Mo,10.9,20.8,0.2,4.2,11,W,28,0:27,17.3,56,1,W,15,1021.1,19.1,49,1,ESE,17,1017.8 2015,9,29,Tu,11.9,23.4,0,5.8,3.4,NW,50,18:35,19.1,58,0,N,15,1019.5,21.7,55,1,NE,17,1015.8 2015,9,30,We,16.7,21.4,0,5.6,6.4,SSE,33,7:44,21,57,7,SSE,17,1022.1,18.7,62,7,ESE,20,1021.4 2015,10,1,Th,16.8,22.9,0,5.6,9.6,ENE,33,13:01,19.9,68,4,W,6,1026.4,21.7,65,1,E,24,1022.8 2015,10,2,Fr,16.5,23,0,4.8,10.5,SSE,33,4:13,21.3,57,1,NNW,4,1033.4,21.1,57,1,ENE,24,1031.8 2015,10,3,Sa,14.2,27.5,0,7.2,11.1,E,22,12:26,21.5,62,0,W,13,1030.6,27.3,47,1,E,11,1024.7 2015,10,4,Su,19.8,33.8,0,8,8.2,WSW,46,13:41,26.4,28,7,W,4,1023.4,32.6,13,7,W,28,1021.8 2015,10,5,Mo,16.1,37,0,11,11.4,S,20,23:31,21.1,42,1,WNW,11,1024,35.6,16,7,E,11,1021.1 2015,10,6,Tu,20.3,32.3,0,8.6,11.4,NNE,43,16:22,23.8,37,1,WNW,15,1023.2,30.6,26,1,ENE,17,1019.7 2015,10,7,We,19.7,21.5,0,13.2,1.3,SSW,67,4:20,20.3,61,5,SSE,39,1029.6,19,56,7,SE,26,1033.6 2015,10,8,Th,17,21.1,0,6.6,2.2,ENE,35,12:53,18.5,53,7,ESE,13,1038.8,19.9,57,7,E,20,1036 2015,10,9,Fr,16.8,22.7,0,5.4,8.7,ENE,43,15:35,20.1,51,7,N,15,1034.4,22,51,5,NE,24,1029.7 2015,10,10,Sa,15.7,24.3,0,6.8,9.5,E,24,13:04,21.2,62,1,SE,2,1028.9,22.3,60,4,E,17,1025.8 2015,10,11,Su,15,24.3,0,5.6,6.4,W,41,21:03,18.7,77,6,WNW,9,1022.7,21.4,65,7,ENE,11,1018.6 2015,10,12,Mo,16.5,29.5,3.6,5,9.6,SSE,43,20:24,22.7,50,1,WSW,11,1016.7,23.4,55,6,NE,19,1012.4 2015,10,13,Tu,17.5,20.4,1.6,7.8,0.8,SSW,37,6:58,18.3,82,4,S,20,1022.3,19.8,77,8,ESE,20,1022.3 2015,10,14,We,16.9,23.3,3.4,1.4,8.3,NE,37,14:50,20.1,75,7,W,6,1026.8,22.7,60,1,NE,20,1025.2 2015,10,15,Th,16.3,24,0,7,12,NE,44,16:32,21.6,65,1,NNE,13,1026.2,23.1,63,0,ENE,30,1022 2015,10,16,Fr,17.3,27.5,0,6.8,11.9,NNE,43,17:43,23.3,60,0,ESE,6,1021.7,27.3,52,1,E,20,1018.7 2015,10,17,Sa,19.1,26,0,9.8,9.7,SSE,31,6:28,21.5,66,1,SSE,17,1023.1,23,64,4,ESE,15,1021.4 2015,10,18,Su,18.8,23.2,1.4,7.2,3.8,SSE,35,7:41,19.5,69,8,SE,17,1029.2,22.1,56,7,E,15,1027.9 2015,10,19,Mo,19.4,24.6,0.2,4.2,8.2,NE,39,14:54,21.8,81,7,N,11,1026.6,24.4,54,4,ENE,24,1022.4 2015,10,20,Tu,18.7,26.8,0.4,6.8,7.7,W,35,16:51,23.7,60,2,SE,6,1020.6,26,55,2,ENE,15,1016.3 2015,10,21,We,19.7,27.9,1,7.6,3.4,NNE,31,20:38,24.5,58,4,WNW,2,1013.8,26.3,43,7,W,9,1011.5 2015,10,22,Th,18.6,20.5,8.4,5,0.7,S,61,14:54,19.6,89,7,S,11,1011.4,20.1,74,7,SSW,31,1010.3 2015,10,23,Fr,14.5,21.4,14.8,2.8,3.4,SSW,41,0:22,16.6,70,7,SSW,19,1022.1,19.4,55,7,SSW,17,1021.6 2015,10,24,Sa,12.9,22.3,0.2,4.6,12,ENE,39,16:38,17.8,62,5,WNW,13,1024.4,20.7,54,1,ENE,26,1020.9 2015,10,25,Su,15.2,24.9,0,7.4,10.2,NE,43,15:39,21.1,62,2,E,7,1022.1,22.3,59,3,NE,19,1017.8 2015,10,26,Mo,17.5,29.4,0,7,5.2,WSW,69,13:16,24.6,54,3,ESE,9,1014.4,22.2,53,7,WSW,13,1015.1 2015,10,27,Tu,15.5,19.6,7.6,7.8,2,S,54,1:15,16.5,65,8,S,19,1025.8,19.3,47,7,SSE,30,1025.8 2015,10,28,We,13.4,21.2,0.4,3.8,8.2,SSE,31,0:03,16.4,70,7,W,9,1025.3,20.6,45,5,E,20,1022.5 2015,10,29,Th,15.7,23,0,5.4,11.4,E,31,13:20,18.2,55,5,WNW,7,1023.6,21.9,40,1,E,22,1022.1 2015,10,30,Fr,14.1,23.1,0,8,9.3,ENE,39,16:13,19,59,1,W,7,1023.8,21.8,48,7,NE,24,1020.8 2015,10,31,Sa,17.1,24.2,0,8,5.7,NE,48,13:58,22.2,59,7,NE,19,1021.8,22.5,61,8,NE,28,1018.6 2015,11,1,Su,17.5,26.8,0,7.8,9,NE,44,15:57,21.9,67,1,E,15,1015.2,25.4,59,3,NE,17,1010.6 2015,11,2,Mo,18.2,30.7,26,6.6,9.1,WSW,46,23:34,21.6,75,5,NW,13,1011.2,26.4,53,5,E,15,1009 2015,11,3,Tu,18.6,20,1.4,6.8,0,S,35,5:48,19,91,8,SSE,19,1017,19.2,89,8,SE,17,1016.5 2015,11,4,We,17.1,19.6,3.8,1.4,0,E,56,12:46,19.5,91,8,ESE,24,1017.6,19.4,83,8,E,31,1016.3 2015,11,5,Th,16,23.4,13,0,0.7,ENE,50,8:11,18.2,94,8,NE,30,1014.8,20.5,80,8,NE,26,1010.5 2015,11,6,Fr,18.2,28.3,11.6,3.2,5.2,S,33,13:04,23.3,77,4,W,9,1007.7,20.5,87,7,NE,13,1006.3 2015,11,7,Sa,18.4,24.6,10,5.6,5.3,SSE,46,18:32,22.5,69,3,ESE,9,1012.3,21.8,69,7,ESE,28,1012.3 2015,11,8,Su,16.2,21.7,0.4,7.2,4.6,SSW,46,9:21,18,67,7,SSW,17,1020.7,20.4,65,6,SSE,26,1020 2015,11,9,Mo,13.7,23.3,0,6.4,12.3,E,30,16:09,19.5,59,5,W,11,1021.3,21.8,48,1,E,17,1017.5 2015,11,10,Tu,15.2,25.6,0,6,11.3,ENE,30,14:28,20.6,61,3,S,2,1016,24.8,45,7,ENE,19,1013.5 2015,11,11,We,18.9,23.9,0,8.6,0,SSW,31,0:35,20,82,7,S,13,1020.3,22.1,66,7,ESE,11,1020.3 2015,11,12,Th,18.3,25.5,2.4,,4.2,NNE,43,9:44,23.5,53,7,NNE,17,1020.4,22.2,69,8,ENE,17,1018 2015,11,13,Fr,18,28.8,0.4,8.4,7.4,SSW,48,15:14,22.7,63,1,W,7,1013.1,23.3,60,7,NE,6,1009.8 2015,11,14,Sa,17.7,20.1,18.2,9,0,SSE,33,0:54,18.7,81,8,S,15,1014.4,18.9,70,8,ESE,19,1013.4 2015,11,15,Su,14.9,20.9,29.6,2.2,0.8,SSW,46,12:29,18.2,64,7,SSE,24,1017.7,18.5,70,7,S,19,1017.4 2015,11,16,Mo,14.3,22.6,5,4.4,13,ESE,31,11:27,19.1,56,5,SSE,17,1020,21.7,43,1,ESE,20,1018 2015,11,17,Tu,14.4,25.1,0,8,12.6,NNE,44,17:24,20.2,55,0,W,6,1019.1,23.7,46,2,NE,22,1016 2015,11,18,We,17.4,36.5,0,11,13,SSW,57,19:33,25.1,45,5,W,11,1013.6,35.6,17,1,W,22,1009.7 2015,11,19,Th,18.8,31.2,0,10.4,12.5,SSW,28,23:12,23.1,62,1,W,9,1015,28.6,54,4,E,17,1011.7 2015,11,20,Fr,21.4,40.9,0,8.8,12.4,W,54,15:13,25.2,63,1,SW,2,1012.5,40.7,13,1,WNW,28,1006.3 2015,11,21,Sa,20.3,22.1,0,14.6,0.3,S,54,23:35,20.8,65,7,SE,13,1015.6,20.7,56,7,SE,24,1017.3 2015,11,22,Su,19,22.4,0,6.2,2.7,NNE,33,16:59,19.8,59,7,E,17,1019.7,20.8,58,8,ENE,17,1016.7 2015,11,23,Mo,17.1,26.5,0,3.8,11.5,SSE,52,15:31,22.4,60,1,WNW,11,1011.9,25.4,52,2,SE,30,1010.8 2015,11,24,Tu,18.7,25.2,0,8,11.4,NE,52,18:38,23.6,57,4,ENE,13,1017.4,23,55,1,NE,28,1014.1 2015,11,25,We,17.3,32.7,0,9,,NNE,50,17:11,22.4,62,1,W,9,1013.1,28.7,44,0,NNE,24,1007.4 2015,11,26,Th,22.4,37.8,0,12.6,9.6,W,74,14:27,32.4,31,1,NNW,28,999.8,36,14,2,W,35,1000 2015,11,27,Fr,16.5,22.6,0.6,14.6,7.1,ENE,37,14:45,21.1,54,7,SE,4,1016.1,21.6,60,4,ESE,22,1013.8 2015,11,28,Sa,19.1,23.8,0,7.4,0.3,ENE,37,12:31,21.5,69,8,ENE,20,1016.7,22.5,55,8,NE,17,1016 2015,11,29,Su,19.9,26,0,6.4,7.1,ENE,43,15:21,21.1,80,7,NNE,17,1013.7,25.1,65,2,NE,22,1009.7 2015,11,30,Mo,18.2,26.3,0.2,7.4,11.1,S,48,0:07,21.1,65,4,WSW,7,1012.4,23.8,65,1,E,22,1008.7 2015,12,1,Tu,19,33.3,0,8,10.4,S,63,22:29,26.3,58,1,ESE,9,1010.3,32,34,5,NE,31,1006.7 2015,12,2,We,19.2,22.8,0,14.2,5.3,SSE,61,12:50,21,66,7,S,22,1015.1,19.4,59,7,S,35,1017 2015,12,3,Th,17.1,21.7,0,9.2,8.1,ESE,31,13:27,19.4,52,7,ESE,11,1027.1,20.3,49,5,ESE,20,1026 2015,12,4,Fr,14.6,23.6,0,6.4,9.3,E,28,12:57,20.3,53,4,WNW,6,1026.3,22.8,44,3,E,19,1024.1 2015,12,5,Sa,15.5,24.9,0,8,,NE,39,14:42,21,56,1,W,7,1022.6,24.1,52,1,NE,20,1018.9 2015,12,6,Su,16.6,25.8,0,9,12.7,ENE,35,17:23,23,58,3,SE,7,1020.5,24.9,46,5,ENE,24,1017.8 2015,12,7,Mo,18.8,25.5,0,9.2,7.9,E,39,15:47,23.2,61,7,ESE,9,1021.4,24.1,57,7,E,22,1019.9 2015,12,8,Tu,20.9,25,0,7.6,0.3,NE,43,17:29,22.7,68,7,NE,11,1018.6,24.4,64,7,ENE,17,1015.8 2015,12,9,We,21.3,33.3,0.8,4.6,8.6,S,61,19:37,24.6,70,5,NNE,7,1014.2,27.5,58,3,ENE,17,1012.1 2015,12,10,Th,20.2,25.5,15.2,10,6.4,SSW,44,23:10,21.3,80,7,S,7,1015.9,25,63,3,E,19,1011.7 2015,12,11,Fr,21.2,35.3,0.2,6.4,8.8,W,59,12:47,24.4,66,3,W,17,1005.8,34.7,13,7,WNW,30,1001.1 2015,12,12,Sa,18.4,22.6,0,12.2,5.8,ESE,39,13:04,20.8,57,3,E,15,1014.7,21.4,50,4,ESE,28,1015.1 2015,12,13,Su,18.2,24.4,0,7.6,6.5,NE,48,13:09,21.4,58,5,NE,19,1019.3,23.2,56,7,NE,30,1016.1 2015,12,14,Mo,18.3,29.6,0,8,13.1,ENE,28,11:55,23.4,63,3,WNW,9,1015.4,25.5,59,3,E,19,1013.4 2015,12,15,Tu,20.6,26.9,0,10,5.7,ENE,43,15:04,23.1,72,6,ENE,11,1015.3,25,65,8,ENE,22,1012.5 2015,12,16,We,21.1,23.4,0.6,5.4,1.8,SSE,80,12:35,22.9,87,7,NE,22,1014.8,18.2,82,8,N,13,1016.7 2015,12,17,Th,16.6,26.1,9.8,,12,E,37,15:25,21.6,70,4,WNW,9,1018.5,24.6,60,1,E,24,1016.9 2015,12,18,Fr,19,27.5,0,13.6,12.3,NNE,46,18:19,24.5,58,0,E,15,1016.8,27.1,53,1,ENE,30,1014 2015,12,19,Sa,20.3,29,0,9.4,13.2,NE,52,18:44,26.3,59,1,ESE,9,1014.6,27.5,53,1,NE,20,1012.8 2015,12,20,Su,19.9,30.3,0,13,,NE,56,17:29,24.6,61,1,E,11,1013.4,28.5,52,1,NNE,31,1008.7 2015,12,21,Mo,23.6,29.1,0,14,3.6,SSE,50,17:15,27.1,56,6,ENE,7,1014.8,21.5,79,8,S,22,1017.2 2015,12,22,Tu,17.7,21.8,29.8,4.4,0,ESE,44,17:50,17.8,89,8,SSW,19,1023.2,17.7,90,8,NE,2,1021.5 2015,12,23,We,17.3,23,34,3.6,4.5,ESE,31,23:13,21.8,65,7,SE,13,1019.4,22,59,7,ESE,22,1017.7 2015,12,24,Th,16.9,24.6,1,5,9.5,E,37,18:06,21.6,60,4,ESE,20,1019.9,23.9,52,2,ESE,20,1018.7 2015,12,25,Fr,17.5,25.2,0,7,9.6,ENE,35,15:09,21.8,55,5,ENE,4,1018.7,24.5,48,4,E,20,1015.4 2015,12,26,Sa,18.8,26.1,0.6,7.4,6.1,NE,39,10:39,22.7,74,5,N,17,1009.7,23.4,77,7,NE,17,1006 2015,12,27,Su,17.1,23,1.8,8.8,5.7,S,59,0:48,17.1,74,8,SSE,22,1016.7,22.3,47,1,S,28,1018.4 2015,12,28,Mo,15.3,22.6,2.6,5.8,4.8,SSW,44,10:25,16.7,78,7,SW,20,1023.7,21.6,50,7,SSE,24,1023 2015,12,29,Tu,15.4,24.9,0.2,7.4,12.5,SSE,35,8:19,21.2,51,6,SE,20,1022.4,24.1,38,1,ESE,22,1020.7 2015,12,30,We,15,25.6,0,7.4,13.1,ESE,26,11:52,20.2,60,0,NW,6,1019.9,23.7,50,1,E,19,1018.7 2015,12,31,Th,17.4,26.7,0,8,12.7,,,,21.3,62,1,WNW,9,1019.4,24.2,52,5,E,24,1016.8 2016,1,1,Fr,17.9,25.6,0,11,12.3,E,35,15:42,22.9,57,6,SSW,2,1016.4,24.2,52,4,E,28,1013.7 2016,1,2,Sa,18,25.4,0,6.8,3.3,ESE,28,13:35,22.3,60,7,Calm,0,1013.4,23.5,51,7,ESE,19,1012.3 2016,1,3,Su,20.2,24.6,0,8,0,SE,44,12:36,22.2,63,8,SSE,20,1015.2,19.9,84,8,SE,22,1014.7 2016,1,4,Mo,18.6,23.6,15,6.4,0.7,ESE,46,12:38,18.8,86,8,ESE,6,1016.8,19.1,89,8,ESE,7,1016.3 2016,1,5,Tu,17.8,20.3,34.2,0,0,ESE,54,8:42,19.1,87,8,SE,26,1016.5,19.5,87,8,SE,26,1014.6 2016,1,6,We,17.4,18.6,51.4,8,0,SE,48,8:48,18.2,92,8,SE,19,1012.3,17.9,91,8,SSE,20,1011.6 2016,1,7,Th,16.5,24.4,41.4,2,7.6,,,,18.1,91,7,SSW,22,1014,21,66,7,SSW,24,1015.8 2016,1,8,Fr,15.5,25.4,0.4,6.6,12.1,,,,20.6,58,,NW,4,1020.1,24.1,53,1,E,19,1019 2016,1,9,Sa,18.5,25.5,0,7,8.6,ENE,31,16:45,21.2,71,5,N,2,1021.9,24.5,52,6,E,22,1018.8 2016,1,10,Su,18.5,27.1,0,8,12.2,ENE,41,15:48,22.9,67,3,W,9,1020.4,26.2,53,1,ENE,26,1017.5 2016,1,11,Mo,20.5,31.6,0,9,12.1,E,28,12:14,24.5,69,1,ESE,7,1013.3,31.1,52,1,E,15,1009.1 2016,1,12,Tu,23.5,34,0,10.6,4.4,S,52,15:28,26.5,56,6,W,2,1010.4,25.9,70,7,SSE,24,1012.1 2016,1,13,We,21.3,31,0,6,11,NE,37,16:11,23.2,69,3,NNE,4,1019.7,26.7,59,1,E,28,1016.2 2016,1,14,Th,22.7,39.2,0,8.4,,S,74,18:01,30.8,50,1,SE,6,1011.7,32.4,43,7,ENE,15,1008.1 2016,1,15,Fr,14.3,21.7,34,12.2,2.7,S,54,23:04,14.5,92,8,W,19,1024.4,19.4,71,7,S,22,1024 2016,1,16,Sa,14.3,23.8,5.4,6,7.2,SE,43,9:24,19.7,66,7,SSE,20,1026.9,22.8,45,5,SSE,20,1026.1 2016,1,17,Su,16,26.1,0,5.6,11.8,SE,33,11:40,20.4,71,6,SSE,13,1025.1,25.1,49,4,ESE,22,1022.6 2016,1,18,Mo,16.7,27,0,7,13.5,NNE,35,21:49,22.1,64,2,WNW,6,1020.6,25.1,55,2,E,20,1017.7 2016,1,19,Tu,19.2,30.2,0,8,13,NNE,41,16:40,24.2,60,3,N,6,1017.4,29.2,54,3,ENE,13,1013.3 2016,1,20,We,21.9,37.5,0,11.8,10,NNE,39,17:57,27.9,39,1,ESE,4,1012.6,32.1,39,6,E,9,1009.8 2016,1,21,Th,23.6,33.5,0,11.6,6.9,SW,61,17:32,26.2,66,5,SW,4,1010.5,32.3,48,4,ENE,20,1007.4 2016,1,22,Fr,22.8,29.2,30.6,10.4,1.4,NE,30,19:40,24.9,85,7,SSW,7,1011,25.7,76,7,E,13,1006.4 2016,1,23,Sa,22.3,26,13,3.4,9.5,,,,25.6,60,3,S,17,1006.7,25.5,59,6,S,33,1008.6 2016,1,24,Su,20.8,25,0,6.6,0.6,SSE,31,23:14,21.9,73,8,SSW,11,1013.4,24.1,62,7,ESE,15,1012.1 2016,1,25,Mo,21.6,25.2,0,4.6,0,,,,23.2,73,8,E,13,1014.5,24.7,64,8,E,13,1013.3 2016,1,26,Tu,21.6,27.8,0,2.4,8.7,,,,22.9,73,8,E,11,1017.7,26.4,54,4,ENE,19,1017.3 2016,1,27,We,20.8,24.9,2.2,9.8,0.5,NE,48,21:38,24,66,6,NE,17,1016.2,24.3,63,8,NE,15,1014 2016,1,28,Th,21.4,27.6,0.4,5.6,6.7,NNE,39,23:47,23.5,74,7,N,17,1008.2,27.2,64,7,ENE,15,1004.7 2016,1,29,Fr,22.4,28.1,0,5,8.2,NW,65,12:27,26.3,72,3,NNE,11,1003.3,23.2,86,7,ESE,6,1001.9 2016,1,30,Sa,19.5,28.7,9,3,3,ESE,70,19:14,22.4,84,8,W,17,1004.9,28.1,62,7,ESE,17,1002.7 2016,1,31,Su,19.7,31.1,12.8,5.8,10,WNW,48,10:50,22.6,83,1,W,15,1004.4,31,26,5,WNW,26,1003.3 2016,2,1,Mo,21.8,26.5,0.2,8,7.4,SSE,43,17:42,26,42,5,NNW,2,1004.2,25.5,52,4,SE,15,1001.1 2016,2,2,Tu,17,27.9,2,5,12.9,SE,33,12:48,20.9,62,2,WNW,17,1006.3,26.6,45,3,ESE,24,1004 2016,2,3,We,19.3,28.3,0,7.6,10.7,SSE,43,22:07,23.4,67,5,W,4,1003.6,27.7,59,4,E,26,1002.2 2016,2,4,Th,19.1,24.8,10.8,7.6,0.8,S,59,14:15,19.7,87,8,SSW,24,1012,23.5,55,7,S,33,1012.8 2016,2,5,Fr,19.6,26.1,3.6,9.6,6.8,SSE,56,10:34,22,59,6,S,31,1017.4,23.9,56,5,SSE,39,1017.6 2016,2,6,Sa,19.9,27,1.8,8.8,8.1,SSE,37,23:36,22.8,75,7,SE,20,1018,26.4,49,3,ESE,20,1016.9 2016,2,7,Su,19.1,27.4,0,7.2,10.1,ESE,30,13:05,22.5,76,5,WNW,9,1018,25.7,54,3,ESE,17,1016.6 2016,2,8,Mo,19,28,0,4.6,12.6,E,24,11:24,23,68,2,WNW,11,1019.1,27,54,1,ESE,17,1017.9 2016,2,9,Tu,21,27.7,0,9.8,10.3,SSE,39,7:53,25,70,6,SSE,26,1020.7,27.3,47,1,SE,19,1019.6 2016,2,10,We,19.6,28.5,0.2,7.6,12.7,NE,43,16:10,23.2,68,2,W,11,1016.7,27.6,46,1,ENE,22,1012.9 2016,2,11,Th,20.7,28.4,0,10.4,9.6,E,26,14:24,24.6,65,3,WSW,6,1015.8,26.7,58,7,E,20,1014.2 2016,2,12,Fr,21.5,28.8,0,6.4,12,ENE,33,17:19,25.4,63,2,W,7,1017,27.5,58,1,E,22,1015.1 2016,2,13,Sa,20.7,28.9,0,8.8,12.1,NNE,46,20:10,25.3,60,1,N,9,1014.6,27.7,54,1,NE,24,1011.5 2016,2,14,Su,21.1,32.5,0,9.6,10.4,SSE,48,17:46,26.2,58,1,NE,2,1008.4,31.5,53,5,SSE,4,1005.3 2016,2,15,Mo,22.3,29.3,0,8,6.9,SW,31,1:40,24.1,75,7,W,15,1011.8,27.4,62,3,ESE,20,1007.7 2016,2,16,Tu,21.5,27.9,0,7,10.8,SSE,48,13:40,25.3,36,1,SSW,17,1009.1,25.6,51,2,SE,31,1008.8 2016,2,17,We,19.5,26,0,8.8,10.4,SSE,39,7:27,20.5,57,3,SSW,22,1011.4,25.5,34,1,ESE,22,1008.4 2016,2,18,Th,18.3,26.4,0,8,11.7,E,30,17:17,22.9,59,3,SSW,11,1009.8,26.1,47,1,ESE,19,1008.7 2016,2,19,Fr,20.5,29.4,0,8,11.1,ENE,35,12:48,23.7,70,4,NW,9,1012.4,28.6,60,1,ENE,20,1009.8 2016,2,20,Sa,23.1,29.3,0,9.4,5.9,S,52,3:03,24.6,75,7,SSW,24,1017.7,28.8,52,5,S,26,1018.2 2016,2,21,Su,21.4,27,6,8.2,4.8,E,37,0:01,21.9,89,6,S,9,1025.4,26.6,67,4,ESE,13,1024.6 2016,2,22,Mo,20.8,28.8,1.2,3.8,11.6,ENE,39,17:06,24.3,71,1,W,11,1025.4,28,59,6,ENE,24,1022 2016,2,23,Tu,21.8,29.7,0,9.4,12.4,NNE,43,17:11,25.4,62,1,N,9,1021.3,28.2,51,1,ENE,22,1017.7 2016,2,24,We,20.9,29.5,0,11,12.3,NNE,46,18:15,26.1,60,0,NNE,11,1016.1,28.4,58,1,ENE,26,1012.9 2016,2,25,Th,21.9,32.8,0,8.4,12.2,NNE,44,18:45,26.1,62,0,NE,7,1012.4,32.2,45,0,E,17,1009.3 2016,2,26,Fr,23.2,27.8,0,12.2,5.2,S,63,4:53,25.4,67,1,SSW,28,1014.9,26.7,60,7,SE,22,1015.5 2016,2,27,Sa,22.5,27.5,0,6,10,ESE,33,9:53,26,60,7,ESE,17,1019.3,25.2,55,5,ESE,22,1019 2016,2,28,Su,20.7,28.1,0,8,10.1,E,28,19:07,23.7,62,1,W,11,1018.8,26.3,53,1,E,17,1017.2 2016,2,29,Mo,20.5,27.7,0,11.2,5.7,ESE,31,13:09,23,73,7,W,9,1021.2,27.5,60,5,SE,20,1020.4 2016,3,1,Tu,21.3,28.2,0,4.2,7.7,ENE,33,15:39,23.6,72,4,E,4,1022,27.6,47,1,ENE,26,1019.9 2016,3,2,We,20.3,28.8,0,7.8,10.7,NNE,50,18:08,25.1,59,7,NNE,13,1021.1,28.1,52,7,ENE,30,1018.9 2016,3,3,Th,22.2,28.7,0,8.2,10.2,NE,46,15:01,25.2,71,3,NNE,4,1021.3,28.4,58,4,NE,26,1019.4 2016,3,4,Fr,22.8,28.3,0,9.6,10.1,NE,39,13:15,24.9,70,7,N,20,1022,26.9,53,2,NE,24,1020.1 2016,3,5,Sa,20.6,27.6,0,9.2,10.6,NE,41,14:44,24.6,64,1,NNE,11,1021.7,27.2,55,1,NE,24,1019.4 2016,3,6,Su,20.8,27.9,0,9.8,10.5,NE,46,12:55,24.9,64,2,N,4,1022.9,26.3,59,1,NE,20,1021.2 2016,3,7,Mo,21,28.3,0,8,11.8,ENE,44,15:06,24.5,71,2,E,7,1021.6,26.8,57,1,NE,26,1018.8 2016,3,8,Tu,22.4,28.1,0,8.8,10.8,ENE,33,19:04,26.2,61,1,NE,7,1023.4,27,57,3,ENE,20,1022.6 2016,3,9,We,22.9,29.3,0,9.2,10.1,NNE,43,19:21,24.8,71,4,N,13,1021.6,29.1,55,4,NE,24,1017.9 2016,3,10,Th,22,30.6,0,8,6.5,SE,31,12:04,24,84,7,SSE,2,1019.7,28.5,59,7,SSE,24,1018.9 2016,3,11,Fr,22.8,27.8,0,5.8,2.3,ENE,37,13:17,23.8,79,8,ENE,6,1020.1,26.8,66,7,ENE,19,1017.9 2016,3,12,Sa,21.8,28.7,0,4.6,10.7,NE,43,16:52,24.6,73,1,WNW,9,1019.6,27.6,60,2,E,24,1018 2016,3,13,Su,22.6,27.7,0,9,7.7,ENE,30,9:09,25.8,63,4,NE,11,1020.5,26.2,63,7,ENE,20,1017.9 2016,3,14,Mo,21.8,30.7,0,7.2,4.9,SSW,41,20:31,22.9,84,7,WNW,9,1016.3,29.9,54,6,SE,20,1014.3 2016,3,15,Tu,19.9,25.6,16.8,7,4.5,SSW,44,14:14,20.2,89,7,SW,11,1019.5,24.3,66,6,S,26,1018.6 2016,3,16,We,19.7,26.2,23,6,7.6,ESE,39,23:25,20.7,88,8,S,9,1019.7,25.4,58,4,SSW,20,1018 2016,3,17,Th,19,26.1,13.6,5.2,7.5,,,,19.6,92,7,NW,4,1017,25.5,59,4,E,15,1012.9 2016,3,18,Fr,19.6,28.2,3.4,5.6,4.4,SSW,52,16:00,23.4,71,7,NNE,11,1003.6,26.4,67,7,NNE,13,997.6 2016,3,19,Sa,16.2,24.4,4.8,5.4,11,W,44,1:57,18.4,43,1,WSW,20,1010.9,23.8,36,1,SE,22,1011.2 2016,3,20,Su,17.4,22,0.2,7.8,3.4,S,52,11:33,18.6,63,7,SSW,15,1017.5,18.1,81,8,S,20,1016.8 2016,3,21,Mo,15.4,21.2,61.2,5.8,2.7,S,43,14:14,15.6,91,8,WSW,19,1018.8,20.7,65,7,SSW,24,1016.5 2016,3,22,Tu,14.3,23.8,25.4,2.8,6.9,S,50,14:25,17.1,74,3,W,20,1017.5,21.5,63,7,S,19,1015.5 2016,3,23,We,14.9,25.3,1.6,4.2,9.5,W,33,3:48,18.3,66,1,W,20,1018.2,23.9,51,3,ESE,15,1016.9 2016,3,24,Th,16.5,27.4,0,5.2,9.3,ENE,31,15:03,19.4,73,6,W,11,1020.7,25.8,51,3,E,24,1019 2016,3,25,Fr,19.3,25.2,0,6.8,0.3,S,24,21:39,20.4,78,7,WNW,11,1018.9,22.9,62,7,WNW,9,1016.3 2016,3,26,Sa,17.1,26,0,3,10.2,ENE,28,16:06,20,71,2,WNW,17,1018.6,24.5,59,2,ENE,17,1016 2016,3,27,Su,19.3,24.2,0.2,4,5.7,ESE,41,10:19,22.2,83,5,S,9,1018.2,23,63,6,E,20,1017 2016,3,28,Mo,19.4,26,34.8,5.8,4.1,SSW,26,6:45,20.6,88,6,S,15,1018.7,26,64,6,ESE,20,1015.7 2016,3,29,Tu,20.6,23.3,0,4,0,SSW,37,11:02,21.2,84,7,SSW,13,1017.5,21.1,84,8,SSW,19,1016 2016,3,30,We,19,28.7,7,1.4,7.5,WNW,30,10:40,20.5,81,1,W,17,1015,26.8,31,6,NW,7,1012 2016,3,31,Th,16.5,25.6,1.2,6.8,10.6,SSE,39,12:54,19.9,51,1,W,20,1017.1,24.7,42,2,SSE,20,1016.2 2016,4,1,Fr,16.6,25.6,0,5.4,10.2,NE,37,14:51,20.2,69,3,W,15,1019.1,24.9,58,1,ENE,28,1015.1 2016,4,2,Sa,18.3,31.2,0,6.8,10.3,SSE,35,23:58,19.9,63,7,W,19,1016.4,29.7,19,3,SW,9,1014.6 2016,4,3,Su,19.5,25.2,0,6.8,6.7,SSE,35,0:06,21.2,74,6,SSW,17,1024.6,23.3,64,2,ESE,19,1022.1 2016,4,4,Mo,18.7,23.5,94.4,3.4,0.8,ENE,33,19:22,19.5,91,8,SE,6,1023.2,23.3,71,7,ESE,17,1019.6 2016,4,5,Tu,18.6,26.2,6.4,3,10.4,,,,23.5,64,1,NNE,9,1020.1,25.6,58,3,NNE,24,1015.2 2016,4,6,We,19.9,34.2,0,6.8,10.6,NE,43,15:51,25,54,1,ESE,7,1015.9,29.3,46,1,NNE,26,1011.6 2016,4,7,Th,19.9,20.5,0.2,8.4,0,SSE,43,9:19,20.1,59,8,SSE,26,1023.2,19.9,54,7,SSE,19,1022.3 2016,4,8,Fr,17.7,20.6,0,4,0,ENE,31,16:58,18.8,71,8,W,13,1023.5,20.5,70,8,E,2,1021.2 2016,4,9,Sa,16,25,0.4,1.2,9.5,ESE,26,13:16,19.3,65,2,WNW,17,1022,24,57,7,SE,15,1018.8 2016,4,10,Su,16.1,27.3,0,3.8,9.6,,,,19.3,70,5,WNW,19,1019.8,23.7,51,3,E,19,1015.5 2016,4,11,Mo,16.1,26.5,0,6.4,9.4,WSW,31,10:35,19.6,51,3,WNW,15,1017.5,24.1,46,6,SE,19,1016.4 2016,4,12,Tu,17.4,22.9,8.2,5.6,5.7,S,35,7:19,18.3,85,7,SE,7,1023.1,20.5,68,5,SSE,22,1021.8 2016,4,13,We,15.5,24.2,0.6,4.2,8.3,ESE,28,13:08,19.7,72,6,WNW,11,1027.4,22.5,57,2,ESE,19,1026.3 2016,4,14,Th,15.7,22.9,0.6,3.4,2.4,ENE,33,11:36,18.6,88,7,S,6,1030.2,22.3,62,7,NE,15,1027.5 2016,4,15,Fr,16.5,24.6,1,3.4,9.9,NE,33,15:46,21.3,66,6,W,2,1026.4,24.2,51,5,NE,19,1021.8 2016,4,16,Sa,16.6,25.5,0,5,9.3,SSE,28,23:01,19.8,69,6,W,7,1021.2,24.7,49,7,ENE,11,1017.8 2016,4,17,Su,19.1,23.2,0,4.6,3.2,SSW,43,16:05,20.1,73,7,SSW,15,1022.1,20.5,77,7,SSE,17,1020.4 2016,4,18,Mo,15.1,24.2,2,3,7.6,SSE,37,10:58,18.9,75,6,W,17,1021.8,21.2,68,7,SSE,17,1019.2 2016,4,19,Tu,16.9,23.9,15,5,6.9,W,24,0:13,19.5,83,3,W,13,1021.7,22.2,68,7,ESE,17,1020.1 2016,4,20,We,16.7,25.5,15.8,3.4,10.3,E,22,15:35,19.3,82,3,W,15,1024.6,23.2,66,2,E,13,1022 2016,4,21,Th,17.3,24.5,0,3.4,10.5,ENE,30,13:12,20.2,79,2,W,9,1023.1,24.1,69,5,NE,13,1019.1 2016,4,22,Fr,17.1,25.9,0,4,7.1,SSW,44,10:33,20.2,70,1,WNW,13,1020.1,21.8,69,7,S,24,1019.1 2016,4,23,Sa,16.2,20.2,6.6,3.4,1.3,SSW,52,13:40,16.8,83,7,SSW,22,1024.2,16.5,85,6,SSW,17,1024.5 2016,4,24,Su,15.3,23.1,3.8,2.6,9.9,SSE,43,2:00,20.1,61,5,ESE,22,1029.8,21.5,49,3,ESE,20,1029.3 2016,4,25,Mo,13.1,22.8,0,4.8,10.4,W,20,0:29,16.3,76,1,WNW,13,1032,21.7,53,1,ESE,15,1029.4 2016,4,26,Tu,14.2,23.7,0,2.8,9.6,W,22,6:32,17.2,75,3,WNW,13,1030.9,22.2,56,3,E,13,1027.6 2016,4,27,We,15.3,23.7,0,2.8,10.7,NE,35,14:19,17.8,78,1,WNW,15,1028.1,23,54,2,NE,19,1023.7 2016,4,28,Th,15.7,25.1,0,5.2,7.7,E,24,14:51,18.4,84,7,W,11,1024.9,23.7,61,1,E,20,1021.5 2016,4,29,Fr,16,24.2,0,4.8,4.6,NE,26,15:36,18.8,75,4,W,13,1022.8,23.3,65,8,NE,11,1018.5 2016,4,30,Sa,18.8,26.5,0,3.4,4.5,N,33,11:55,20.5,74,8,NE,4,1018.7,26,54,6,WNW,9,1015.6 2016,5,1,Su,18.1,26.1,2.8,3.4,4.7,NNW,44,13:07,19.4,86,7,N,19,1013.6,25.4,55,5,N,20,1009.6 2016,5,2,Mo,13.9,21.3,0,4.8,6.1,,,,16.3,56,0,NW,7,1017.3,21.2,60,7,NE,9,1012.2 2016,5,3,Tu,12.8,26.4,0,3.4,10.4,NW,33,20:06,16,61,1,W,11,1012.6,25.8,27,0,WNW,20,1006.4 2016,5,4,We,16,26.2,0,8,10.2,NNW,26,2:33,19.1,49,1,W,17,1015.2,23.1,41,2,E,15,1014.2 2016,5,5,Th,13.5,22.4,0,4,10.1,W,28,23:47,16.2,55,1,WNW,13,1020.7,21.3,54,1,E,20,1016.9 2016,5,6,Fr,14,26.7,0,3.4,9.9,,,,16.4,69,4,W,19,1019.6,23.2,42,1,ESE,13,1017.2 2016,5,7,Sa,12.4,22.9,0,5.6,8.7,W,24,5:28,15.4,75,0,W,13,1021.3,21.6,60,6,ENE,13,1018.4 2016,5,8,Su,15.3,20.8,0,1.4,0,WNW,22,21:37,18.2,86,7,W,11,1018.5,20.5,74,7,E,9,1013.8 2016,5,9,Mo,18,22.2,1.6,1,0,NW,43,23:07,20.1,76,7,NNE,11,1008.8,21.8,72,7,NNE,17,1003.7 2016,5,10,Tu,17.3,24.6,0,4.4,9.7,NW,72,14:14,19.8,51,1,NW,20,1006.5,23.8,35,1,WNW,35,1004.3 2016,5,11,We,12.6,23.1,0,8,7.5,W,43,18:50,15.3,51,1,WSW,9,1015.5,21.3,21,6,WSW,17,1012.2 2016,5,12,Th,13.7,24.5,0,6,9.1,W,46,12:16,17.5,52,2,NNW,15,1014.7,23.3,35,6,WNW,22,1013.9 2016,5,13,Fr,13.9,25.9,0,4,10,WSW,30,2:47,17.3,60,1,WNW,13,1021.8,25.6,32,1,WNW,11,1019.6 2016,5,14,Sa,12.4,26.6,0,7.2,10.2,W,28,7:06,14.7,62,0,WNW,19,1023.7,22.3,50,0,ENE,15,1019.9 2016,5,15,Su,13.2,24.4,0,2.2,5.2,W,30,4:10,17.8,42,6,W,20,1017.4,22.4,41,5,N,9,1013.7 2016,5,16,Mo,13.6,22.9,0,3.2,9.1,ENE,28,12:52,16.4,66,1,W,17,1021.4,22.4,59,1,NE,15,1017.7 2016,5,17,Tu,16.3,28.2,0,4.8,8.9,WSW,31,15:51,18.3,55,2,WNW,7,1017,27.7,26,1,WNW,15,1014.3 2016,5,18,We,12.7,21.9,0,6.6,3,WSW,39,22:55,14.6,49,7,W,17,1018.8,21.4,34,7,N,11,1015.2 2016,5,19,Th,11.9,23.2,0,4,9.9,W,37,17:50,14.5,54,0,WNW,9,1017.9,23,29,1,WNW,17,1013.4 2016,5,20,Fr,12.8,24.9,0,4,9.2,,,,17.3,53,0,WSW,19,1020.3,22.2,51,1,SE,17,1019.3 2016,5,21,Sa,13.6,21.5,0,3.4,4.4,W,24,7:09,15.5,72,6,W,13,1027.3,20.8,56,7,E,9,1024 2016,5,22,Su,14.5,22,0,2,9.1,NNE,24,16:17,16,82,1,W,13,1024.4,21.8,66,4,E,9,1020.2 2016,5,23,Mo,15.2,26.8,0,4,9.2,WSW,61,15:56,21.5,53,3,NW,9,1013.9,23.8,32,1,SSW,22,1012.4 2016,5,24,Tu,12.1,22.3,0,7.6,9.9,W,44,15:48,14.1,50,1,W,17,1018.1,20.8,21,2,WSW,19,1013.6 2016,5,25,We,10.4,21.2,0,3.2,8.7,W,33,4:08,12.9,54,4,W,22,1017.8,20.8,29,7,E,6,1013.5 2016,5,26,Th,12.9,20.2,0.2,4.6,4.8,NW,48,14:24,15,52,7,NNW,22,1006.9,19.9,48,2,NW,24,1000.4 2016,5,27,Fr,12.8,18.6,0.2,4,9.9,W,61,14:46,14,42,1,W,17,1003.5,17.9,27,1,W,37,1001.7 2016,5,28,Sa,9.8,14.8,0,5.6,2.1,W,39,19:43,12.9,63,6,NNW,9,1007.2,14.3,70,6,W,13,1006 2016,5,29,Su,9.2,19.3,2.2,1.6,9.8,W,37,0:10,11,59,1,W,17,1017.2,18.7,29,1,W,15,1016.5 2016,5,30,Mo,7.3,19.9,0.2,2.6,9.5,W,39,6:28,9.5,62,1,W,24,1025.5,19.1,30,1,WSW,9,1024.1 2016,5,31,Tu,8.7,17.5,0,2.2,1.7,SE,35,18:16,11,76,7,W,17,1029.6,15.8,87,7,SE,6,1027.2 2016,6,1,We,10.9,17.9,29.4,0.6,1.4,W,24,1:32,13.1,89,7,W,15,1029.1,16,74,7,ENE,7,1026.6 2016,6,2,Th,11.1,19.1,1,2.4,8.7,,,,12.3,85,2,W,19,1027,18.6,60,7,SSE,15,1024.3 2016,6,3,Fr,11.7,18.3,0.2,2.4,1.3,WSW,28,20:25,13.3,78,7,WNW,11,1025.4,18.3,73,7,S,11,1021.6 2016,6,4,Sa,13.3,21.1,64.4,,0,ENE,89,23:43,17,91,8,NNE,13,1015.4,18.8,82,8,NE,30,1009.5 2016,6,5,Su,16.3,19.7,93.8,,0,ENE,96,14:16,18.8,89,8,NE,31,1002.5,19.7,85,8,ENE,57,995.9 2016,6,6,Mo,13.7,18.4,68.4,12.6,2.2,W,59,15:47,14.9,59,7,W,28,999.8,17.9,43,7,WSW,28,998.4 2016,6,7,Tu,11.3,19.9,0,5.2,3.6,WNW,44,17:59,13.8,59,4,NNW,20,1004.1,17.9,40,7,WNW,20,1002.7 2016,6,8,We,10.9,20.7,0,1.8,9.3,WNW,31,14:27,13.8,65,1,WNW,15,1011.4,20.2,40,1,WNW,15,1009.2 2016,6,9,Th,13.8,22,0.2,4.8,6,NW,59,10:06,17.4,62,7,NNW,31,1006.8,21.6,32,1,WNW,17,1005.9 2016,6,10,Fr,13.1,21.4,0.2,6.2,9.1,W,35,10:35,15.3,64,1,WNW,15,1017.4,20.8,41,1,WNW,7,1016.8 2016,6,11,Sa,10.1,19.8,0,3.2,8.7,WSW,54,2:15,13.4,53,1,W,24,1022.5,19.3,34,5,W,19,1020.7 2016,6,12,Su,7.9,18.1,0,5.4,8.3,SW,33,0:32,9.4,62,4,W,22,1036.2,17.6,34,1,S,7,1034.9 2016,6,13,Mo,8.5,20.3,0,1,6.5,W,30,8:06,10.3,65,5,W,20,1039,17.7,51,1,E,9,1035.5 2016,6,14,Tu,9.9,21.2,0,2.2,9.6,,,,11.6,80,1,W,15,1035.8,20.9,42,4,WNW,6,1031.6 2016,6,15,We,9.1,20.5,0,3.4,8.2,WNW,26,3:31,10.6,72,5,WNW,17,1033.4,19.2,45,3,E,9,1029.8 2016,6,16,Th,10.2,22.2,0,2.2,9.6,NNW,30,11:00,12.9,81,1,W,13,1025.5,21.2,42,1,N,11,1019.3 2016,6,17,Fr,12.7,19.3,0,3.6,1.4,NNW,30,8:41,17.4,53,6,NNW,20,1017.7,18.7,60,7,N,17,1014.6 2016,6,18,Sa,14.7,21.1,5,3.6,3.4,SSW,31,21:08,15.4,90,7,E,6,1017.4,20.7,64,6,ESE,4,1015.4 2016,6,19,Su,14.5,19.5,0.4,0.8,0,NNE,52,21:07,15.2,81,7,W,13,1017.2,16.7,89,8,ESE,20,1010.4 2016,6,20,Mo,15,20.4,38.8,2.2,3.5,NNW,52,15:58,16.2,76,6,NNW,9,999.5,17.9,52,6,NW,22,994.2 2016,6,21,Tu,14.9,18.4,0,6,5.5,NW,59,5:01,15.8,56,7,NW,22,1003.3,17.8,38,4,WNW,28,1002 2016,6,22,We,12.8,19.7,0,4.4,9,W,50,14:44,15,60,2,NW,17,1004.8,19.3,34,1,W,28,1005 2016,6,23,Th,9.4,18.5,0,5,8.2,NNW,35,15:07,12.8,58,7,NNW,7,1012.6,17.4,43,4,NNW,24,1007.4 2016,6,24,Fr,12.7,16.3,0,3.8,6.5,W,69,13:13,15.8,52,5,WNW,13,1003.8,14.4,33,3,W,43,1003.2 2016,6,25,Sa,7.3,15.1,0.6,5.4,9.6,W,54,1:10,8.4,49,1,W,28,1018.1,14.3,31,1,S,17,1018.1 2016,6,26,Su,5.6,15.3,0,3.6,5.2,W,39,3:05,7.4,61,6,W,20,1021.1,13.7,45,7,W,7,1017 2016,6,27,Mo,7.4,11.7,0,2.2,0,W,46,22:57,9.3,74,8,W,26,1015.3,11,74,8,SW,9,1013.6 2016,6,28,Tu,7.7,17.3,2.4,1.4,8.9,WSW,46,4:55,9.6,57,1,W,31,1022.9,16.9,36,1,SSW,13,1023.1 2016,6,29,We,6.7,18.8,0.2,4,9.6,W,31,4:07,8.8,66,1,W,22,1028.4,18.6,33,1,N,4,1025.4 2016,6,30,Th,5.4,16.5,0,1.8,,NW,44,21:58,7.7,78,1,W,11,1021.2,14.5,33,7,NNW,24,1015.1 2016,7,1,Fr,7.7,16.7,0,4,,NW,52,1:49,11,58,7,WNW,13,1015.7,16.4,33,1,WSW,20,1016.2 2016,7,2,Sa,8.2,18.2,0,2.8,9.5,W,37,0:09,10.1,62,1,W,24,1025.5,17.8,36,1,W,17,1023.2 2016,7,3,Su,6.5,18.1,0,3.6,9.5,W,24,9:27,9.2,72,1,W,17,1026.6,16.5,42,0,ENE,13,1022.6 2016,7,4,Mo,6.4,15.6,0,3.4,7.5,W,22,0:41,7.6,77,5,WNW,13,1020.8,14.4,58,7,SE,6,1016.2 2016,7,5,Tu,7.5,18.8,4.8,0.6,5.8,NW,46,23:42,9.6,90,8,W,11,1011.9,17.7,55,2,NW,17,1006.1 2016,7,6,We,9.5,17.7,0.2,4.4,4.8,W,61,11:39,12.9,54,6,WNW,22,1004,15.9,47,6,WSW,31,1003 2016,7,7,Th,12.9,17.2,29.4,6.4,1.6,S,50,10:55,14.9,85,7,S,19,1015.4,14.6,86,7,WSW,22,1016.5 2016,7,8,Fr,12.4,15.6,22.4,,0,W,28,0:42,12.7,92,8,W,17,1019.8,15.5,80,8,SSW,7,1018.4 2016,7,9,Sa,8.7,18,2.2,0.6,5.2,SW,30,19:13,10.5,88,3,WNW,20,1023.5,16.4,64,5,S,13,1022.4 2016,7,10,Su,10.5,17.9,1,1.8,7.3,W,30,4:51,11.9,90,3,WNW,13,1025.1,16.7,69,3,E,15,1021.3 2016,7,11,Mo,11.8,18.6,0,3.2,0.3,NW,69,13:42,15.5,63,7,N,20,1015.2,18.3,54,7,N,39,1009.7 2016,7,12,Tu,14.8,20.2,0.2,1.2,7.8,NW,54,16:40,16.6,41,6,WNW,15,1010.3,18.7,26,7,NW,33,1007.1 2016,7,13,We,9.6,14,0,8,9.8,W,72,13:08,10.5,40,3,WNW,37,1017.3,13.3,26,7,WSW,37,1019.8 2016,7,14,Th,6.7,17,0,5.2,9.6,WSW,50,21:53,8.6,54,7,W,30,1029.4,16.4,27,2,W,20,1025.9 2016,7,15,Fr,6.4,17.7,0,3,9.5,W,31,10:16,8.8,60,1,W,22,1030.3,16.9,35,2,E,4,1027.3 2016,7,16,Sa,6.9,18.9,0,1.8,4.7,W,28,8:00,9.3,71,2,W,19,1033.3,17.1,65,7,SSE,15,1030.2 2016,7,17,Su,9.2,18.8,0.8,1.8,3.2,WNW,24,9:37,13.3,91,7,W,13,1031.2,17.8,73,7,ESE,2,1027.1 2016,7,18,Mo,11,24.2,0,1.2,8.9,WNW,30,13:54,11.5,92,1,W,9,1024.6,23.9,45,0,WNW,15,1019.8 2016,7,19,Tu,11.4,24.9,0.2,3.2,6.2,NW,20,0:48,18,71,7,Calm,0,1021.7,23,58,5,NE,6,1019.1 2016,7,20,We,16.5,16.9,17,2.4,0,SW,35,20:20,16.7,91,8,S,9,1019.7,16.2,90,8,WSW,13,1018.4 2016,7,21,Th,13.9,19.3,20.4,1.8,2.6,W,31,3:48,14.6,92,7,W,15,1019.9,17.6,79,5,NW,2,1016.1 2016,7,22,Fr,13.1,25.7,0,0.2,7.4,NNW,52,13:08,19.3,66,6,NNW,30,1009.1,25.1,44,4,NW,30,1001 2016,7,23,Sa,14.5,17.4,6,7.6,10,WNW,69,7:46,14.5,35,1,WNW,44,1004.3,17,31,1,W,43,1003.8 2016,7,24,Su,7.8,14.3,0,4,5.4,WSW,46,2:35,8.7,50,7,W,22,1015.8,13.6,39,2,NW,15,1012.1 2016,7,25,Mo,8.4,17.9,0,4,9.9,W,44,16:36,12.2,50,1,NNW,19,1012.7,17.1,31,1,WNW,24,1011.3 2016,7,26,Tu,10.2,17.9,0,4.8,9.9,WNW,39,19:57,14,52,1,N,6,1014.7,17.7,47,1,E,9,1012.8 2016,7,27,We,10.8,18.6,0,5.4,10.1,W,63,12:36,14.7,53,2,NW,19,1012.2,18.5,32,3,WSW,26,1013.3 2016,7,28,Th,8.8,19.4,0,4,10.1,WSW,48,20:33,11.1,57,1,W,19,1024.3,19,34,0,WNW,11,1020.5 2016,7,29,Fr,7.3,18.5,0,4,10.1,WSW,50,19:29,9.8,61,1,W,24,1023,17.6,34,2,WNW,11,1018 2016,7,30,Sa,8.7,19.8,0,4.2,10,W,33,0:38,10.4,63,1,W,26,1023.3,17.9,50,3,ESE,9,1020.2 2016,7,31,Su,10.4,20.9,0,2.8,3.2,W,24,7:22,11.9,75,7,W,15,1018,20.5,37,2,WNW,7,1015.3 2016,8,1,Mo,10.9,18.4,0,2.2,0.7,W,22,8:27,13.3,66,6,WNW,6,1017.2,17,54,8,W,6,1013 2016,8,2,Tu,11.5,15.8,0.4,1,0.9,SE,54,23:45,12.8,76,7,W,15,1013.7,13.2,77,8,S,24,1013.9 2016,8,3,We,10.7,14.6,24.8,2.6,0.2,S,63,22:26,13.6,77,8,SE,31,1018,12.6,89,8,SSE,24,1017.7 2016,8,4,Th,10.8,18.1,61,9.4,,SE,61,3:11,11.6,90,6,W,15,1024.3,15.5,60,7,SSW,19,1025.1 2016,8,5,Fr,9.7,16.2,4,3.2,2.7,SW,35,11:50,11.2,90,6,W,20,1030.4,14.7,67,6,SSW,17,1028.3 2016,8,6,Sa,9.2,15.5,1.8,2.6,2.2,SSE,31,11:29,10.8,78,7,W,17,1029,14.7,71,7,SE,7,1026.8 2016,8,7,Su,10.3,16.4,0.6,0.6,2.6,W,33,19:09,12.4,80,7,W,15,1027.4,15.9,64,6,ESE,7,1024.7 2016,8,8,Mo,8.7,19.7,0,2.6,10.8,W,28,8:18,11,75,1,WNW,22,1026.4,17.9,52,1,E,11,1022.9 2016,8,9,Tu,8.1,19.1,0,2,6.9,NNE,28,20:33,11,73,5,W,17,1023.1,18.8,52,7,NE,11,1017.9 2016,8,10,We,10.9,25.2,0,4.8,9.2,NW,44,13:14,17.6,52,3,NNW,17,1015.6,24.6,33,3,NNW,24,1011.4 2016,8,11,Th,11.2,20.3,0,6.2,10.6,W,46,12:01,14,48,1,W,17,1018.4,19.6,30,1,WSW,22,1017 2016,8,12,Fr,7.1,19.4,0,6.4,8.6,NW,41,21:01,9.7,56,0,WNW,20,1024.4,17.3,37,7,E,13,1019.8 2016,8,13,Sa,8.3,20.3,0,3.2,10.6,W,41,10:00,12.4,49,1,W,24,1022.5,19.8,31,2,WSW,19,1020.2 2016,8,14,Su,9.5,20,0,5.4,10.5,W,28,7:55,14.2,52,1,W,20,1027.7,17.6,46,1,SE,13,1027.2 2016,8,15,Mo,8.9,19.6,0,3.2,10.1,NE,30,15:51,12,72,1,WNW,17,1032.7,19.2,54,1,NE,17,1029.7 2016,8,16,Tu,9.8,20.3,0,4,10.5,ENE,24,12:38,12.8,75,0,W,13,1031,19.6,59,0,ENE,19,1025.8 2016,8,17,We,10.6,24.2,0,3.8,10.2,ENE,24,16:17,14.2,63,1,W,15,1026.3,22.7,35,6,ENE,11,1022.1 2016,8,18,Th,11.6,22.9,0,4.6,10.5,N,31,22:44,14.2,62,1,W,15,1024.5,20.7,40,1,ENE,19,1020.7 2016,8,19,Fr,11.2,24.3,0,,10,NNE,48,13:25,14.2,66,0,W,11,1020.1,22.4,37,6,NNW,24,1013.9 2016,8,20,Sa,11,17.9,0.4,6.6,10.7,WSW,59,0:07,13.2,51,1,W,19,1015.3,15.7,35,3,WSW,22,1013.8 2016,8,21,Su,8.2,20.4,0,5.2,10.7,W,39,10:28,12.5,48,1,W,24,1019.8,19.9,32,1,WNW,9,1016.4 2016,8,22,Mo,9.5,13.8,0,3,0.2,W,35,14:49,11.4,66,7,WNW,15,1016.8,12.2,67,7,W,24,1015 2016,8,23,Tu,8.5,18.4,1.8,0.6,6.8,SSE,37,13:38,11.9,69,1,WNW,19,1014.4,15.4,57,7,SSW,20,1013.5 2016,8,24,We,11.8,14.7,2.4,3.2,0,S,43,20:06,13.4,79,8,SSE,11,1013.3,13.7,90,8,ESE,9,1007.5 2016,8,25,Th,9,17.3,52.8,5.2,6.7,W,48,12:05,11.4,67,7,SW,22,1010.2,15.7,48,3,S,15,1010.6 2016,8,26,Fr,7.3,17.3,0,4.6,7.8,W,43,7:35,10.4,57,7,W,20,1018.7,16.4,50,7,S,20,1017.6 2016,8,27,Sa,7.4,19.9,1.4,2.8,10.7,W,33,5:25,11.3,57,1,W,24,1023.6,17.7,45,1,ENE,15,1020.3 2016,8,28,Su,8.4,20.5,0,3.8,10.7,W,33,4:29,12.2,50,1,W,22,1022.9,18,36,2,ESE,17,1020.9 2016,8,29,Mo,8.4,19.6,0,4,6.1,W,22,1:21,12.9,57,7,W,17,1027.4,18.1,38,7,W,9,1025 2016,8,30,Tu,10.7,19.9,0,3.2,4.4,NNE,31,18:04,13.8,71,7,W,17,1026.9,19.6,62,6,ENE,15,1023.1 2016,8,31,We,13.6,23.6,0,,2.3,NNW,54,13:39,18.8,61,7,N,20,1018.7,21.2,53,7,NNW,39,1013.9 -------------------------------------------------------------------------------- /1.tools/L1 - Pandas-2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Pivot Table on Pandas\n", 8 | "\n", 9 | "In this notebook, we focus on the `pivot_table` feature of `pandas`." 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "## Import Modules" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": { 23 | "collapsed": true 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "import pandas as pd\n", 28 | "import numpy as np\n", 29 | "import plotly.plotly as py\n", 30 | "import plotly.graph_objs as go" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "## Import data" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 2, 43 | "metadata": {}, 44 | "outputs": [ 45 | { 46 | "data": { 47 | "text/html": [ 48 | "
\n", 49 | "\n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | "
suburbcounciltypedistance_to_CBDvaluesold
0TurrellaRockdalehouse9.9$839,67617
1SydenhamMarrickvillehouse7.4$845,77121
2St PetersMarrickvillehouse6.9$920,16950
3TempeMarrickvillehouse8.3$921,39059
4WaterlooSydneyhouse4.7$937,31611
\n", 109 | "
" 110 | ], 111 | "text/plain": [ 112 | " suburb council type distance_to_CBD value sold\n", 113 | "0 Turrella Rockdale house 9.9 $839,676 17\n", 114 | "1 Sydenham Marrickville house 7.4 $845,771 21\n", 115 | "2 St Peters Marrickville house 6.9 $920,169 50\n", 116 | "3 Tempe Marrickville house 8.3 $921,390 59\n", 117 | "4 Waterloo Sydney house 4.7 $937,316 11" 118 | ] 119 | }, 120 | "execution_count": 2, 121 | "metadata": {}, 122 | "output_type": "execute_result" 123 | } 124 | ], 125 | "source": [ 126 | "df = pd.read_csv('./asset/sydney_housing_market.txt', sep='\\t')\n", 127 | "df.head()" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "# Pivot Table" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "In order to build up a pivot table, we must specify an index. " 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 19, 147 | "metadata": {}, 148 | "outputs": [ 149 | { 150 | "data": { 151 | "text/html": [ 152 | "
\n", 153 | "\n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | "
distance_to_CBDsoldvalue
type
house6.58569.201024662.90
unit6.280108.55585739.05
\n", 183 | "
" 184 | ], 185 | "text/plain": [ 186 | " distance_to_CBD sold value\n", 187 | "type \n", 188 | "house 6.585 69.20 1024662.90\n", 189 | "unit 6.280 108.55 585739.05" 190 | ] 191 | }, 192 | "execution_count": 19, 193 | "metadata": {}, 194 | "output_type": "execute_result" 195 | } 196 | ], 197 | "source": [ 198 | "pd.pivot_table(df, index=['type'])" 199 | ] 200 | }, 201 | { 202 | "cell_type": "markdown", 203 | "metadata": {}, 204 | "source": [ 205 | "Note that the default aggregation function is `np.mean`. We can specify the aggregation function in the `aggfunc` parameter, as shown below. " 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": 18, 211 | "metadata": {}, 212 | "outputs": [ 213 | { 214 | "data": { 215 | "text/html": [ 216 | "
\n", 217 | "\n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | "
distance_to_CBDsold
type
house6.5851384
unit6.2802171
\n", 243 | "
" 244 | ], 245 | "text/plain": [ 246 | " distance_to_CBD sold\n", 247 | "type \n", 248 | "house 6.585 1384\n", 249 | "unit 6.280 2171" 250 | ] 251 | }, 252 | "execution_count": 18, 253 | "metadata": {}, 254 | "output_type": "execute_result" 255 | } 256 | ], 257 | "source": [ 258 | "pd.pivot_table(df, index=['type'], aggfunc={'distance_to_CBD':np.mean, 'sold':np.sum})" 259 | ] 260 | }, 261 | { 262 | "cell_type": "markdown", 263 | "metadata": {}, 264 | "source": [ 265 | "For simplicity, we will stick with the default aggregation function. \n", 266 | "\n", 267 | "We also want to see *value*, but we need to change it into floats first" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": 4, 273 | "metadata": {}, 274 | "outputs": [ 275 | { 276 | "data": { 277 | "text/html": [ 278 | "
\n", 279 | "\n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | "
distance_to_CBDsoldvalue
type
house6.58569.201024662.90
unit6.280108.55585739.05
\n", 309 | "
" 310 | ], 311 | "text/plain": [ 312 | " distance_to_CBD sold value\n", 313 | "type \n", 314 | "house 6.585 69.20 1024662.90\n", 315 | "unit 6.280 108.55 585739.05" 316 | ] 317 | }, 318 | "execution_count": 4, 319 | "metadata": {}, 320 | "output_type": "execute_result" 321 | } 322 | ], 323 | "source": [ 324 | "df['value']=df['value'].replace('[\\$,]','',regex=True).astype(float)\n", 325 | "pd.pivot_table(df, index=['type'])" 326 | ] 327 | }, 328 | { 329 | "cell_type": "markdown", 330 | "metadata": {}, 331 | "source": [ 332 | "We could also choose more than one column as index" 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": 5, 338 | "metadata": {}, 339 | "outputs": [ 340 | { 341 | "data": { 342 | "text/html": [ 343 | "
\n", 344 | "\n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | "
distance_to_CBDsoldvalue
typecouncil
houseAshfield7.60000079.5000001.101914e+06
Botany Bay8.20000029.0000001.090744e+06
Canada Bay9.20000015.0000001.036064e+06
Canterbury9.10000036.0000001.066214e+06
Leichhardt5.200000225.0000001.076623e+06
Marrickville6.74285781.2857149.933390e+05
Rockdale9.90000017.0000008.396760e+05
Sydney4.61666755.6666671.037789e+06
unitAshfield7.233333109.6666675.933117e+05
Botany Bay8.20000056.0000004.844870e+05
Burwood8.40000043.0000006.059270e+05
Canterbury9.10000015.0000005.930810e+05
Lane Cove7.000000220.0000006.112070e+05
Marrickville6.55000087.3333335.929137e+05
Randwick5.20000053.0000005.778140e+05
Ryde7.900000139.0000005.729230e+05
Sydney3.760000158.4000005.863850e+05
\n", 468 | "
" 469 | ], 470 | "text/plain": [ 471 | " distance_to_CBD sold value\n", 472 | "type council \n", 473 | "house Ashfield 7.600000 79.500000 1.101914e+06\n", 474 | " Botany Bay 8.200000 29.000000 1.090744e+06\n", 475 | " Canada Bay 9.200000 15.000000 1.036064e+06\n", 476 | " Canterbury 9.100000 36.000000 1.066214e+06\n", 477 | " Leichhardt 5.200000 225.000000 1.076623e+06\n", 478 | " Marrickville 6.742857 81.285714 9.933390e+05\n", 479 | " Rockdale 9.900000 17.000000 8.396760e+05\n", 480 | " Sydney 4.616667 55.666667 1.037789e+06\n", 481 | "unit Ashfield 7.233333 109.666667 5.933117e+05\n", 482 | " Botany Bay 8.200000 56.000000 4.844870e+05\n", 483 | " Burwood 8.400000 43.000000 6.059270e+05\n", 484 | " Canterbury 9.100000 15.000000 5.930810e+05\n", 485 | " Lane Cove 7.000000 220.000000 6.112070e+05\n", 486 | " Marrickville 6.550000 87.333333 5.929137e+05\n", 487 | " Randwick 5.200000 53.000000 5.778140e+05\n", 488 | " Ryde 7.900000 139.000000 5.729230e+05\n", 489 | " Sydney 3.760000 158.400000 5.863850e+05" 490 | ] 491 | }, 492 | "execution_count": 5, 493 | "metadata": {}, 494 | "output_type": "execute_result" 495 | } 496 | ], 497 | "source": [ 498 | "pd.pivot_table(df, index=['type','council'])" 499 | ] 500 | }, 501 | { 502 | "cell_type": "markdown", 503 | "metadata": {}, 504 | "source": [ 505 | "```columns``` provide an additional way to segment the data" 506 | ] 507 | }, 508 | { 509 | "cell_type": "code", 510 | "execution_count": 6, 511 | "metadata": {}, 512 | "outputs": [ 513 | { 514 | "data": { 515 | "text/html": [ 516 | "
\n", 517 | "\n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | "
distance_to_CBDvaluesold
typehouseunithouseunithouseunit
council
Ashfield7.6000007.2333331.101914e+06593311.66666779.500000109.666667
Botany Bay8.2000008.2000001.090744e+06484487.00000029.00000056.000000
BurwoodNaN8.400000NaN605927.000000NaN43.000000
Canada Bay9.200000NaN1.036064e+06NaN15.000000NaN
Canterbury9.1000009.1000001.066214e+06593081.00000036.00000015.000000
Lane CoveNaN7.000000NaN611207.000000NaN220.000000
Leichhardt5.200000NaN1.076623e+06NaN225.000000NaN
Marrickville6.7428576.5500009.933390e+05592913.66666781.28571487.333333
RandwickNaN5.200000NaN577814.000000NaN53.000000
Rockdale9.900000NaN8.396760e+05NaN17.000000NaN
RydeNaN7.900000NaN572923.000000NaN139.000000
Sydney4.6166673.7600001.037789e+06586385.00000055.666667158.400000
\n", 655 | "
" 656 | ], 657 | "text/plain": [ 658 | " distance_to_CBD value \\\n", 659 | "type house unit house unit \n", 660 | "council \n", 661 | "Ashfield 7.600000 7.233333 1.101914e+06 593311.666667 \n", 662 | "Botany Bay 8.200000 8.200000 1.090744e+06 484487.000000 \n", 663 | "Burwood NaN 8.400000 NaN 605927.000000 \n", 664 | "Canada Bay 9.200000 NaN 1.036064e+06 NaN \n", 665 | "Canterbury 9.100000 9.100000 1.066214e+06 593081.000000 \n", 666 | "Lane Cove NaN 7.000000 NaN 611207.000000 \n", 667 | "Leichhardt 5.200000 NaN 1.076623e+06 NaN \n", 668 | "Marrickville 6.742857 6.550000 9.933390e+05 592913.666667 \n", 669 | "Randwick NaN 5.200000 NaN 577814.000000 \n", 670 | "Rockdale 9.900000 NaN 8.396760e+05 NaN \n", 671 | "Ryde NaN 7.900000 NaN 572923.000000 \n", 672 | "Sydney 4.616667 3.760000 1.037789e+06 586385.000000 \n", 673 | "\n", 674 | " sold \n", 675 | "type house unit \n", 676 | "council \n", 677 | "Ashfield 79.500000 109.666667 \n", 678 | "Botany Bay 29.000000 56.000000 \n", 679 | "Burwood NaN 43.000000 \n", 680 | "Canada Bay 15.000000 NaN \n", 681 | "Canterbury 36.000000 15.000000 \n", 682 | "Lane Cove NaN 220.000000 \n", 683 | "Leichhardt 225.000000 NaN \n", 684 | "Marrickville 81.285714 87.333333 \n", 685 | "Randwick NaN 53.000000 \n", 686 | "Rockdale 17.000000 NaN \n", 687 | "Ryde NaN 139.000000 \n", 688 | "Sydney 55.666667 158.400000 " 689 | ] 690 | }, 691 | "execution_count": 6, 692 | "metadata": {}, 693 | "output_type": "execute_result" 694 | } 695 | ], 696 | "source": [ 697 | "pd.pivot_table(df, index=['council'], columns=['type'])" 698 | ] 699 | }, 700 | { 701 | "cell_type": "markdown", 702 | "metadata": {}, 703 | "source": [ 704 | "Note that ```NaN``` implies that there is no data here" 705 | ] 706 | }, 707 | { 708 | "cell_type": "markdown", 709 | "metadata": {}, 710 | "source": [ 711 | "The default ```aggfunc``` is ```avg``` but we could use other functions such as ```np.sum```" 712 | ] 713 | }, 714 | { 715 | "cell_type": "code", 716 | "execution_count": 7, 717 | "metadata": {}, 718 | "outputs": [ 719 | { 720 | "data": { 721 | "text/html": [ 722 | "
\n", 723 | "\n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | "
distance_to_CBDsoldvalue
type
house131.7138420493258.0
unit125.6217111714781.0
\n", 753 | "
" 754 | ], 755 | "text/plain": [ 756 | " distance_to_CBD sold value\n", 757 | "type \n", 758 | "house 131.7 1384 20493258.0\n", 759 | "unit 125.6 2171 11714781.0" 760 | ] 761 | }, 762 | "execution_count": 7, 763 | "metadata": {}, 764 | "output_type": "execute_result" 765 | } 766 | ], 767 | "source": [ 768 | "pd.pivot_table(df, index=['type'],aggfunc=np.sum)" 769 | ] 770 | }, 771 | { 772 | "cell_type": "markdown", 773 | "metadata": {}, 774 | "source": [ 775 | "Use ```margins=True``` to show the total numbers" 776 | ] 777 | }, 778 | { 779 | "cell_type": "code", 780 | "execution_count": 8, 781 | "metadata": {}, 782 | "outputs": [ 783 | { 784 | "data": { 785 | "text/html": [ 786 | "
\n", 787 | "\n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | "
distance_to_CBDsoldvalue
typecouncil
houseAshfield15.2159.02203829.0
Botany Bay8.229.01090744.0
Canada Bay9.215.01036064.0
Canterbury9.136.01066214.0
Leichhardt5.2225.01076623.0
Marrickville47.2569.06953373.0
Rockdale9.917.0839676.0
Sydney27.7334.06226735.0
unitAshfield21.7329.01779935.0
Botany Bay8.256.0484487.0
Burwood8.443.0605927.0
Canterbury9.115.0593081.0
Lane Cove7.0220.0611207.0
Marrickville39.3524.03557482.0
Randwick5.253.0577814.0
Ryde7.9139.0572923.0
Sydney18.8792.02931925.0
All257.33555.032208039.0
\n", 918 | "
" 919 | ], 920 | "text/plain": [ 921 | " distance_to_CBD sold value\n", 922 | "type council \n", 923 | "house Ashfield 15.2 159.0 2203829.0\n", 924 | " Botany Bay 8.2 29.0 1090744.0\n", 925 | " Canada Bay 9.2 15.0 1036064.0\n", 926 | " Canterbury 9.1 36.0 1066214.0\n", 927 | " Leichhardt 5.2 225.0 1076623.0\n", 928 | " Marrickville 47.2 569.0 6953373.0\n", 929 | " Rockdale 9.9 17.0 839676.0\n", 930 | " Sydney 27.7 334.0 6226735.0\n", 931 | "unit Ashfield 21.7 329.0 1779935.0\n", 932 | " Botany Bay 8.2 56.0 484487.0\n", 933 | " Burwood 8.4 43.0 605927.0\n", 934 | " Canterbury 9.1 15.0 593081.0\n", 935 | " Lane Cove 7.0 220.0 611207.0\n", 936 | " Marrickville 39.3 524.0 3557482.0\n", 937 | " Randwick 5.2 53.0 577814.0\n", 938 | " Ryde 7.9 139.0 572923.0\n", 939 | " Sydney 18.8 792.0 2931925.0\n", 940 | "All 257.3 3555.0 32208039.0" 941 | ] 942 | }, 943 | "execution_count": 8, 944 | "metadata": {}, 945 | "output_type": "execute_result" 946 | } 947 | ], 948 | "source": [ 949 | "pd.pivot_table(df, index=['type','council'], aggfunc=np.sum, margins=True)" 950 | ] 951 | }, 952 | { 953 | "cell_type": "markdown", 954 | "metadata": {}, 955 | "source": [ 956 | "We should use avg for *value* but sum for *sold*, and we do not want to see *distance_to_CBD* for now" 957 | ] 958 | }, 959 | { 960 | "cell_type": "code", 961 | "execution_count": 9, 962 | "metadata": {}, 963 | "outputs": [ 964 | { 965 | "data": { 966 | "text/html": [ 967 | "
\n", 968 | "\n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1084 | " \n", 1085 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | " \n", 1090 | " \n", 1091 | " \n", 1092 | " \n", 1093 | " \n", 1094 | " \n", 1095 | " \n", 1096 | " \n", 1097 | " \n", 1098 | " \n", 1099 | " \n", 1100 | " \n", 1101 | " \n", 1102 | " \n", 1103 | " \n", 1104 | " \n", 1105 | " \n", 1106 | " \n", 1107 | " \n", 1108 | " \n", 1109 | " \n", 1110 | " \n", 1111 | " \n", 1112 | " \n", 1113 | " \n", 1114 | " \n", 1115 | " \n", 1116 | " \n", 1117 | " \n", 1118 | " \n", 1119 | " \n", 1120 | " \n", 1121 | " \n", 1122 | " \n", 1123 | " \n", 1124 | " \n", 1125 | " \n", 1126 | " \n", 1127 | " \n", 1128 | " \n", 1129 | " \n", 1130 | " \n", 1131 | " \n", 1132 | " \n", 1133 | " \n", 1134 | " \n", 1135 | " \n", 1136 | " \n", 1137 | " \n", 1138 | " \n", 1139 | " \n", 1140 | " \n", 1141 | " \n", 1142 | " \n", 1143 | " \n", 1144 | " \n", 1145 | " \n", 1146 | " \n", 1147 | " \n", 1148 | " \n", 1149 | " \n", 1150 | " \n", 1151 | " \n", 1152 | " \n", 1153 | " \n", 1154 | " \n", 1155 | " \n", 1156 | " \n", 1157 | " \n", 1158 | " \n", 1159 | " \n", 1160 | " \n", 1161 | " \n", 1162 | " \n", 1163 | " \n", 1164 | " \n", 1165 | " \n", 1166 | " \n", 1167 | " \n", 1168 | " \n", 1169 | " \n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | " \n", 1177 | " \n", 1178 | " \n", 1179 | " \n", 1180 | " \n", 1181 | " \n", 1182 | " \n", 1183 | " \n", 1184 | " \n", 1185 | " \n", 1186 | " \n", 1187 | " \n", 1188 | " \n", 1189 | " \n", 1190 | " \n", 1191 | " \n", 1192 | " \n", 1193 | " \n", 1194 | " \n", 1195 | " \n", 1196 | " \n", 1197 | " \n", 1198 | " \n", 1199 | " \n", 1200 | " \n", 1201 | " \n", 1202 | " \n", 1203 | " \n", 1204 | " \n", 1205 | " \n", 1206 | " \n", 1207 | " \n", 1208 | " \n", 1209 | " \n", 1210 | " \n", 1211 | " \n", 1212 | " \n", 1213 | " \n", 1214 | " \n", 1215 | " \n", 1216 | " \n", 1217 | " \n", 1218 | " \n", 1219 | " \n", 1220 | " \n", 1221 | " \n", 1222 | " \n", 1223 | " \n", 1224 | " \n", 1225 | " \n", 1226 | " \n", 1227 | " \n", 1228 | " \n", 1229 | " \n", 1230 | " \n", 1231 | " \n", 1232 | " \n", 1233 | " \n", 1234 | " \n", 1235 | " \n", 1236 | " \n", 1237 | " \n", 1238 | " \n", 1239 | " \n", 1240 | " \n", 1241 | " \n", 1242 | " \n", 1243 | " \n", 1244 | " \n", 1245 | " \n", 1246 | " \n", 1247 | " \n", 1248 | " \n", 1249 | " \n", 1250 | " \n", 1251 | " \n", 1252 | " \n", 1253 | " \n", 1254 | " \n", 1255 | " \n", 1256 | " \n", 1257 | " \n", 1258 | " \n", 1259 | " \n", 1260 | " \n", 1261 | " \n", 1262 | " \n", 1263 | " \n", 1264 | " \n", 1265 | " \n", 1266 | " \n", 1267 | " \n", 1268 | " \n", 1269 | " \n", 1270 | " \n", 1271 | " \n", 1272 | " \n", 1273 | " \n", 1274 | " \n", 1275 | " \n", 1276 | " \n", 1277 | " \n", 1278 | " \n", 1279 | " \n", 1280 | " \n", 1281 | " \n", 1282 | " \n", 1283 | " \n", 1284 | " \n", 1285 | " \n", 1286 | " \n", 1287 | " \n", 1288 | " \n", 1289 | " \n", 1290 | " \n", 1291 | " \n", 1292 | " \n", 1293 | " \n", 1294 | " \n", 1295 | " \n", 1296 | " \n", 1297 | " \n", 1298 | " \n", 1299 | " \n", 1300 | "
valuesold
typehouseunitAllhouseunitAll
councilsuburb
AshfieldAshfield1123787.0575096.00849441.500110.0228.0338.0
HaberfieldNaN573347.00573347.000NaN13.013.0
Summer Hill1080042.0631492.00855767.00049.088.0137.0
Botany BayEastlakes1090744.0484487.00787615.50029.056.085.0
BurwoodCroydonNaN605927.00605927.000NaN43.043.0
Canada BayMortlake1036064.0NaN1036064.00015.0NaN15.0
CanterburyHurlstone Park1066214.0593081.00829647.50036.015.051.0
Lane CoveLane Cove NorthNaN611207.00611207.000NaN220.0220.0
LeichhardtLeichhardt1076623.0NaN1076623.000225.0NaN225.0
MarrickvilleDulwich HillNaN622141.00622141.000NaN143.0143.0
Enmore1079403.0607195.00843299.00058.015.073.0
LewishamNaN569889.00569889.000NaN20.020.0
Marrickville994026.0552713.00773369.500191.0225.0416.0
Petersham1122372.0610890.00866631.00082.069.0151.0
St Peters920169.0NaN920169.00050.0NaN50.0
Stanmore1070242.0594654.00832448.000108.052.0160.0
Sydenham845771.0NaN845771.00021.0NaN21.0
Tempe921390.0NaN921390.00059.0NaN59.0
RandwickCentennial ParkNaN577814.00577814.000NaN53.053.0
RockdaleTurrella839676.0NaN839676.00017.0NaN17.0
RydeGladesvilleNaN572923.00572923.000NaN139.0139.0
SydneyAlexandriaNaN617024.00617024.000NaN211.0211.0
Chippendale1060415.0NaN1060415.00031.0NaN31.0
Darlington1062426.0NaN1062426.00032.0NaN32.0
Elizabeth BayNaN615967.00615967.000NaN182.0182.0
Erskineville1005107.0NaN1005107.00084.0NaN84.0
Newtown1044330.0559175.00801752.500166.0104.0270.0
Potts PointNaN565141.00565141.000NaN217.0217.0
Rushcutters BayNaN574618.00574618.000NaN78.078.0
Waterloo937316.0NaN937316.00011.0NaN11.0
Zetland1117141.0NaN1117141.00010.0NaN10.0
All1024662.9585739.05805200.9751384.02171.03555.0
\n", 1301 | "
" 1302 | ], 1303 | "text/plain": [ 1304 | " value sold \\\n", 1305 | "type house unit All house \n", 1306 | "council suburb \n", 1307 | "Ashfield Ashfield 1123787.0 575096.00 849441.500 110.0 \n", 1308 | " Haberfield NaN 573347.00 573347.000 NaN \n", 1309 | " Summer Hill 1080042.0 631492.00 855767.000 49.0 \n", 1310 | "Botany Bay Eastlakes 1090744.0 484487.00 787615.500 29.0 \n", 1311 | "Burwood Croydon NaN 605927.00 605927.000 NaN \n", 1312 | "Canada Bay Mortlake 1036064.0 NaN 1036064.000 15.0 \n", 1313 | "Canterbury Hurlstone Park 1066214.0 593081.00 829647.500 36.0 \n", 1314 | "Lane Cove Lane Cove North NaN 611207.00 611207.000 NaN \n", 1315 | "Leichhardt Leichhardt 1076623.0 NaN 1076623.000 225.0 \n", 1316 | "Marrickville Dulwich Hill NaN 622141.00 622141.000 NaN \n", 1317 | " Enmore 1079403.0 607195.00 843299.000 58.0 \n", 1318 | " Lewisham NaN 569889.00 569889.000 NaN \n", 1319 | " Marrickville 994026.0 552713.00 773369.500 191.0 \n", 1320 | " Petersham 1122372.0 610890.00 866631.000 82.0 \n", 1321 | " St Peters 920169.0 NaN 920169.000 50.0 \n", 1322 | " Stanmore 1070242.0 594654.00 832448.000 108.0 \n", 1323 | " Sydenham 845771.0 NaN 845771.000 21.0 \n", 1324 | " Tempe 921390.0 NaN 921390.000 59.0 \n", 1325 | "Randwick Centennial Park NaN 577814.00 577814.000 NaN \n", 1326 | "Rockdale Turrella 839676.0 NaN 839676.000 17.0 \n", 1327 | "Ryde Gladesville NaN 572923.00 572923.000 NaN \n", 1328 | "Sydney Alexandria NaN 617024.00 617024.000 NaN \n", 1329 | " Chippendale 1060415.0 NaN 1060415.000 31.0 \n", 1330 | " Darlington 1062426.0 NaN 1062426.000 32.0 \n", 1331 | " Elizabeth Bay NaN 615967.00 615967.000 NaN \n", 1332 | " Erskineville 1005107.0 NaN 1005107.000 84.0 \n", 1333 | " Newtown 1044330.0 559175.00 801752.500 166.0 \n", 1334 | " Potts Point NaN 565141.00 565141.000 NaN \n", 1335 | " Rushcutters Bay NaN 574618.00 574618.000 NaN \n", 1336 | " Waterloo 937316.0 NaN 937316.000 11.0 \n", 1337 | " Zetland 1117141.0 NaN 1117141.000 10.0 \n", 1338 | "All 1024662.9 585739.05 805200.975 1384.0 \n", 1339 | "\n", 1340 | " \n", 1341 | "type unit All \n", 1342 | "council suburb \n", 1343 | "Ashfield Ashfield 228.0 338.0 \n", 1344 | " Haberfield 13.0 13.0 \n", 1345 | " Summer Hill 88.0 137.0 \n", 1346 | "Botany Bay Eastlakes 56.0 85.0 \n", 1347 | "Burwood Croydon 43.0 43.0 \n", 1348 | "Canada Bay Mortlake NaN 15.0 \n", 1349 | "Canterbury Hurlstone Park 15.0 51.0 \n", 1350 | "Lane Cove Lane Cove North 220.0 220.0 \n", 1351 | "Leichhardt Leichhardt NaN 225.0 \n", 1352 | "Marrickville Dulwich Hill 143.0 143.0 \n", 1353 | " Enmore 15.0 73.0 \n", 1354 | " Lewisham 20.0 20.0 \n", 1355 | " Marrickville 225.0 416.0 \n", 1356 | " Petersham 69.0 151.0 \n", 1357 | " St Peters NaN 50.0 \n", 1358 | " Stanmore 52.0 160.0 \n", 1359 | " Sydenham NaN 21.0 \n", 1360 | " Tempe NaN 59.0 \n", 1361 | "Randwick Centennial Park 53.0 53.0 \n", 1362 | "Rockdale Turrella NaN 17.0 \n", 1363 | "Ryde Gladesville 139.0 139.0 \n", 1364 | "Sydney Alexandria 211.0 211.0 \n", 1365 | " Chippendale NaN 31.0 \n", 1366 | " Darlington NaN 32.0 \n", 1367 | " Elizabeth Bay 182.0 182.0 \n", 1368 | " Erskineville NaN 84.0 \n", 1369 | " Newtown 104.0 270.0 \n", 1370 | " Potts Point 217.0 217.0 \n", 1371 | " Rushcutters Bay 78.0 78.0 \n", 1372 | " Waterloo NaN 11.0 \n", 1373 | " Zetland NaN 10.0 \n", 1374 | "All 2171.0 3555.0 " 1375 | ] 1376 | }, 1377 | "execution_count": 9, 1378 | "metadata": {}, 1379 | "output_type": "execute_result" 1380 | } 1381 | ], 1382 | "source": [ 1383 | "pd.pivot_table(df, index=['council','suburb'], \n", 1384 | " columns=['type'], \n", 1385 | " values=['sold', 'value'], \n", 1386 | " aggfunc={'sold':np.sum, 'value':np.mean}, \n", 1387 | " margins=True)" 1388 | ] 1389 | }, 1390 | { 1391 | "cell_type": "markdown", 1392 | "metadata": {}, 1393 | "source": [ 1394 | "# Advanced Filtering over Pivot Table" 1395 | ] 1396 | }, 1397 | { 1398 | "cell_type": "markdown", 1399 | "metadata": {}, 1400 | "source": [ 1401 | "We firstly build a pivot table" 1402 | ] 1403 | }, 1404 | { 1405 | "cell_type": "code", 1406 | "execution_count": 10, 1407 | "metadata": {}, 1408 | "outputs": [ 1409 | { 1410 | "data": { 1411 | "text/html": [ 1412 | "
\n", 1413 | "\n", 1414 | " \n", 1415 | " \n", 1416 | " \n", 1417 | " \n", 1418 | " \n", 1419 | " \n", 1420 | " \n", 1421 | " \n", 1422 | " \n", 1423 | " \n", 1424 | " \n", 1425 | " \n", 1426 | " \n", 1427 | " \n", 1428 | " \n", 1429 | " \n", 1430 | " \n", 1431 | " \n", 1432 | " \n", 1433 | " \n", 1434 | " \n", 1435 | " \n", 1436 | " \n", 1437 | " \n", 1438 | " \n", 1439 | " \n", 1440 | " \n", 1441 | " \n", 1442 | " \n", 1443 | " \n", 1444 | " \n", 1445 | " \n", 1446 | " \n", 1447 | " \n", 1448 | " \n", 1449 | " \n", 1450 | " \n", 1451 | " \n", 1452 | " \n", 1453 | " \n", 1454 | " \n", 1455 | " \n", 1456 | " \n", 1457 | " \n", 1458 | " \n", 1459 | " \n", 1460 | " \n", 1461 | " \n", 1462 | " \n", 1463 | " \n", 1464 | " \n", 1465 | " \n", 1466 | " \n", 1467 | " \n", 1468 | " \n", 1469 | " \n", 1470 | " \n", 1471 | " \n", 1472 | " \n", 1473 | " \n", 1474 | " \n", 1475 | " \n", 1476 | " \n", 1477 | " \n", 1478 | " \n", 1479 | " \n", 1480 | " \n", 1481 | " \n", 1482 | " \n", 1483 | " \n", 1484 | " \n", 1485 | " \n", 1486 | " \n", 1487 | " \n", 1488 | " \n", 1489 | " \n", 1490 | " \n", 1491 | " \n", 1492 | " \n", 1493 | " \n", 1494 | " \n", 1495 | " \n", 1496 | " \n", 1497 | " \n", 1498 | " \n", 1499 | " \n", 1500 | " \n", 1501 | " \n", 1502 | " \n", 1503 | " \n", 1504 | " \n", 1505 | " \n", 1506 | " \n", 1507 | " \n", 1508 | " \n", 1509 | " \n", 1510 | " \n", 1511 | " \n", 1512 | "
sold
typehouseunitAll
council
Ashfield159.0329.0488.0
Botany Bay29.056.085.0
BurwoodNaN43.043.0
Canada Bay15.0NaN15.0
Canterbury36.015.051.0
Lane CoveNaN220.0220.0
Leichhardt225.0NaN225.0
Marrickville569.0524.01093.0
RandwickNaN53.053.0
Rockdale17.0NaN17.0
RydeNaN139.0139.0
Sydney334.0792.01126.0
All1384.02171.03555.0
\n", 1513 | "
" 1514 | ], 1515 | "text/plain": [ 1516 | " sold \n", 1517 | "type house unit All\n", 1518 | "council \n", 1519 | "Ashfield 159.0 329.0 488.0\n", 1520 | "Botany Bay 29.0 56.0 85.0\n", 1521 | "Burwood NaN 43.0 43.0\n", 1522 | "Canada Bay 15.0 NaN 15.0\n", 1523 | "Canterbury 36.0 15.0 51.0\n", 1524 | "Lane Cove NaN 220.0 220.0\n", 1525 | "Leichhardt 225.0 NaN 225.0\n", 1526 | "Marrickville 569.0 524.0 1093.0\n", 1527 | "Randwick NaN 53.0 53.0\n", 1528 | "Rockdale 17.0 NaN 17.0\n", 1529 | "Ryde NaN 139.0 139.0\n", 1530 | "Sydney 334.0 792.0 1126.0\n", 1531 | "All 1384.0 2171.0 3555.0" 1532 | ] 1533 | }, 1534 | "execution_count": 10, 1535 | "metadata": {}, 1536 | "output_type": "execute_result" 1537 | } 1538 | ], 1539 | "source": [ 1540 | "table = pd.pivot_table(df, index=['council'], columns=['type'], values=['sold'], aggfunc=np.sum, margins=True)\n", 1541 | "table" 1542 | ] 1543 | }, 1544 | { 1545 | "cell_type": "markdown", 1546 | "metadata": {}, 1547 | "source": [ 1548 | "We can just look at data from one city" 1549 | ] 1550 | }, 1551 | { 1552 | "cell_type": "code", 1553 | "execution_count": 11, 1554 | "metadata": {}, 1555 | "outputs": [ 1556 | { 1557 | "data": { 1558 | "text/html": [ 1559 | "
\n", 1560 | "\n", 1561 | " \n", 1562 | " \n", 1563 | " \n", 1564 | " \n", 1565 | " \n", 1566 | " \n", 1567 | " \n", 1568 | " \n", 1569 | " \n", 1570 | " \n", 1571 | " \n", 1572 | " \n", 1573 | " \n", 1574 | " \n", 1575 | " \n", 1576 | " \n", 1577 | " \n", 1578 | " \n", 1579 | " \n", 1580 | " \n", 1581 | " \n", 1582 | " \n", 1583 | " \n", 1584 | " \n", 1585 | " \n", 1586 | " \n", 1587 | "
sold
typehouseunitAll
council
RandwickNaN53.053.0
\n", 1588 | "
" 1589 | ], 1590 | "text/plain": [ 1591 | " sold \n", 1592 | "type house unit All\n", 1593 | "council \n", 1594 | "Randwick NaN 53.0 53.0" 1595 | ] 1596 | }, 1597 | "execution_count": 11, 1598 | "metadata": {}, 1599 | "output_type": "execute_result" 1600 | } 1601 | ], 1602 | "source": [ 1603 | "table.query('council==[\"Randwick\"]')" 1604 | ] 1605 | }, 1606 | { 1607 | "cell_type": "markdown", 1608 | "metadata": {}, 1609 | "source": [ 1610 | "We can also specify multiple values" 1611 | ] 1612 | }, 1613 | { 1614 | "cell_type": "code", 1615 | "execution_count": 12, 1616 | "metadata": {}, 1617 | "outputs": [ 1618 | { 1619 | "data": { 1620 | "text/html": [ 1621 | "
\n", 1622 | "\n", 1623 | " \n", 1624 | " \n", 1625 | " \n", 1626 | " \n", 1627 | " \n", 1628 | " \n", 1629 | " \n", 1630 | " \n", 1631 | " \n", 1632 | " \n", 1633 | " \n", 1634 | " \n", 1635 | " \n", 1636 | " \n", 1637 | " \n", 1638 | " \n", 1639 | " \n", 1640 | " \n", 1641 | " \n", 1642 | " \n", 1643 | " \n", 1644 | " \n", 1645 | " \n", 1646 | " \n", 1647 | " \n", 1648 | " \n", 1649 | " \n", 1650 | " \n", 1651 | " \n", 1652 | " \n", 1653 | " \n", 1654 | " \n", 1655 | "
sold
typehouseunitAll
council
Lane CoveNaN220.0220.0
Rockdale17.0NaN17.0
\n", 1656 | "
" 1657 | ], 1658 | "text/plain": [ 1659 | " sold \n", 1660 | "type house unit All\n", 1661 | "council \n", 1662 | "Lane Cove NaN 220.0 220.0\n", 1663 | "Rockdale 17.0 NaN 17.0" 1664 | ] 1665 | }, 1666 | "execution_count": 12, 1667 | "metadata": {}, 1668 | "output_type": "execute_result" 1669 | } 1670 | ], 1671 | "source": [ 1672 | "table.query('council==[\"Rockdale\",\"Lane Cove\"]')" 1673 | ] 1674 | }, 1675 | { 1676 | "cell_type": "markdown", 1677 | "metadata": {}, 1678 | "source": [ 1679 | "Note: we can not query a pivot table from the ```columns```. So you should put *item* in ```index``` if you want to query it." 1680 | ] 1681 | }, 1682 | { 1683 | "cell_type": "markdown", 1684 | "metadata": {}, 1685 | "source": [ 1686 | "### Plot the data using plot.ly\n", 1687 | "\n", 1688 | "Before you can execute the code below, you need to install `plotly`, register a free account with them, and create a profile that contains your own API key. See https://plot.ly/python/getting-started/ " 1689 | ] 1690 | }, 1691 | { 1692 | "cell_type": "code", 1693 | "execution_count": 13, 1694 | "metadata": { 1695 | "collapsed": true 1696 | }, 1697 | "outputs": [], 1698 | "source": [ 1699 | "plot_table = table[:-1] # get rid of ALL" 1700 | ] 1701 | }, 1702 | { 1703 | "cell_type": "code", 1704 | "execution_count": 14, 1705 | "metadata": {}, 1706 | "outputs": [ 1707 | { 1708 | "data": { 1709 | "text/plain": [ 1710 | "council\n", 1711 | "Ashfield 159.0\n", 1712 | "Botany Bay 29.0\n", 1713 | "Burwood NaN\n", 1714 | "Canada Bay 15.0\n", 1715 | "Canterbury 36.0\n", 1716 | "Lane Cove NaN\n", 1717 | "Leichhardt 225.0\n", 1718 | "Marrickville 569.0\n", 1719 | "Randwick NaN\n", 1720 | "Rockdale 17.0\n", 1721 | "Ryde NaN\n", 1722 | "Sydney 334.0\n", 1723 | "Name: house, dtype: float64" 1724 | ] 1725 | }, 1726 | "execution_count": 14, 1727 | "metadata": {}, 1728 | "output_type": "execute_result" 1729 | } 1730 | ], 1731 | "source": [ 1732 | "plot_table.sold.house" 1733 | ] 1734 | }, 1735 | { 1736 | "cell_type": "code", 1737 | "execution_count": 15, 1738 | "metadata": {}, 1739 | "outputs": [ 1740 | { 1741 | "data": { 1742 | "text/plain": [ 1743 | "council\n", 1744 | "Ashfield 159.0\n", 1745 | "Botany Bay 29.0\n", 1746 | "Burwood NaN\n", 1747 | "Canada Bay 15.0\n", 1748 | "Canterbury 36.0\n", 1749 | "Lane Cove NaN\n", 1750 | "Leichhardt 225.0\n", 1751 | "Marrickville 569.0\n", 1752 | "Randwick NaN\n", 1753 | "Rockdale 17.0\n", 1754 | "Ryde NaN\n", 1755 | "Sydney 334.0\n", 1756 | "All 1384.0\n", 1757 | "Name: house, dtype: float64" 1758 | ] 1759 | }, 1760 | "execution_count": 15, 1761 | "metadata": {}, 1762 | "output_type": "execute_result" 1763 | } 1764 | ], 1765 | "source": [ 1766 | "table.sold.house" 1767 | ] 1768 | }, 1769 | { 1770 | "cell_type": "code", 1771 | "execution_count": 16, 1772 | "metadata": {}, 1773 | "outputs": [ 1774 | { 1775 | "data": { 1776 | "text/plain": [ 1777 | "Index(['Ashfield', 'Botany Bay', 'Burwood', 'Canada Bay', 'Canterbury',\n", 1778 | " 'Lane Cove', 'Leichhardt', 'Marrickville', 'Randwick', 'Rockdale',\n", 1779 | " 'Ryde', 'Sydney'],\n", 1780 | " dtype='object', name='council')" 1781 | ] 1782 | }, 1783 | "execution_count": 16, 1784 | "metadata": {}, 1785 | "output_type": "execute_result" 1786 | } 1787 | ], 1788 | "source": [ 1789 | "plot_table.index" 1790 | ] 1791 | }, 1792 | { 1793 | "cell_type": "code", 1794 | "execution_count": 17, 1795 | "metadata": {}, 1796 | "outputs": [ 1797 | { 1798 | "data": { 1799 | "text/html": [ 1800 | "" 1801 | ], 1802 | "text/plain": [ 1803 | "" 1804 | ] 1805 | }, 1806 | "execution_count": 17, 1807 | "metadata": {}, 1808 | "output_type": "execute_result" 1809 | } 1810 | ], 1811 | "source": [ 1812 | "trace1 = go.Bar(\n", 1813 | " x=plot_table.index,\n", 1814 | " y=plot_table.sold.house,\n", 1815 | " name='House'\n", 1816 | ")\n", 1817 | "trace2 = go.Bar(\n", 1818 | " x=plot_table.index,\n", 1819 | " y=plot_table.sold.unit,\n", 1820 | " name='Unit'\n", 1821 | ")\n", 1822 | "\n", 1823 | "data = [trace1, trace2]\n", 1824 | "layout = go.Layout(\n", 1825 | " barmode='group'\n", 1826 | ")\n", 1827 | "\n", 1828 | "fig = go.Figure(data=data, layout=layout)\n", 1829 | "py.iplot(fig, filename='pandas-notebook-plot2')" 1830 | ] 1831 | }, 1832 | { 1833 | "cell_type": "markdown", 1834 | "metadata": {}, 1835 | "source": [ 1836 | "# Exercise\n", 1837 | "\n", 1838 | "Perform some analysis that interests you using the Sydney Acution Data at https://auction-results.domain.com.au/Proofed/PDF/Sydney_Domain.pdf\n" 1839 | ] 1840 | }, 1841 | { 1842 | "cell_type": "code", 1843 | "execution_count": null, 1844 | "metadata": { 1845 | "collapsed": true 1846 | }, 1847 | "outputs": [], 1848 | "source": [] 1849 | }, 1850 | { 1851 | "cell_type": "code", 1852 | "execution_count": null, 1853 | "metadata": { 1854 | "collapsed": true 1855 | }, 1856 | "outputs": [], 1857 | "source": [] 1858 | }, 1859 | { 1860 | "cell_type": "code", 1861 | "execution_count": null, 1862 | "metadata": { 1863 | "collapsed": true 1864 | }, 1865 | "outputs": [], 1866 | "source": [] 1867 | }, 1868 | { 1869 | "cell_type": "code", 1870 | "execution_count": null, 1871 | "metadata": { 1872 | "collapsed": true 1873 | }, 1874 | "outputs": [], 1875 | "source": [] 1876 | }, 1877 | { 1878 | "cell_type": "code", 1879 | "execution_count": null, 1880 | "metadata": { 1881 | "collapsed": true 1882 | }, 1883 | "outputs": [], 1884 | "source": [] 1885 | } 1886 | ], 1887 | "metadata": { 1888 | "kernelspec": { 1889 | "display_name": "Python 3", 1890 | "language": "python", 1891 | "name": "python3" 1892 | }, 1893 | "language_info": { 1894 | "codemirror_mode": { 1895 | "name": "ipython", 1896 | "version": 3 1897 | }, 1898 | "file_extension": ".py", 1899 | "mimetype": "text/x-python", 1900 | "name": "python", 1901 | "nbconvert_exporter": "python", 1902 | "pygments_lexer": "ipython3", 1903 | "version": "3.6.1" 1904 | } 1905 | }, 1906 | "nbformat": 4, 1907 | "nbformat_minor": 1 1908 | } 1909 | --------------------------------------------------------------------------------