├── .github └── FUNDING.yml ├── data ├── fake_items.csv ├── fake_ratings.csv ├── stocks1.csv ├── stocks2.csv ├── stocks3.csv ├── stocks.csv ├── drinks2.csv ├── drinks1.csv ├── drinks.csv ├── u.user └── titanic_test.csv ├── .gitignore ├── environment.yml ├── README.md ├── pandas_tricks.ipynb ├── pandas_merge.ipynb └── pandas_multiindex.ipynb /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | patreon: dataschool 2 | -------------------------------------------------------------------------------- /data/fake_items.csv: -------------------------------------------------------------------------------- 1 | sku,name 2 | 1,A 3 | 2,B 4 | 3,C 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints/ 2 | .DS_Store 3 | *.pyc 4 | extras/ 5 | -------------------------------------------------------------------------------- /data/fake_ratings.csv: -------------------------------------------------------------------------------- 1 | sku,rating,user_id 2 | 2,10,100 3 | 2,8,101 4 | 1,5,102 5 | 4,9,103 6 | 1,6,104 7 | 4,7,105 8 | -------------------------------------------------------------------------------- /data/stocks1.csv: -------------------------------------------------------------------------------- 1 | Date,Close,Volume,Symbol 2 | 2016-10-03,31.50,14070500,CSCO 3 | 2016-10-03,112.52,21701800,AAPL 4 | 2016-10-03,57.42,19189500,MSFT 5 | -------------------------------------------------------------------------------- /data/stocks2.csv: -------------------------------------------------------------------------------- 1 | Date,Close,Volume,Symbol 2 | 2016-10-04,113.00,29736800,AAPL 3 | 2016-10-04,57.24,20085900,MSFT 4 | 2016-10-04,31.35,18460400,CSCO 5 | -------------------------------------------------------------------------------- /data/stocks3.csv: -------------------------------------------------------------------------------- 1 | Date,Close,Volume,Symbol 2 | 2016-10-05,57.64,16726400,MSFT 3 | 2016-10-05,31.59,11808600,CSCO 4 | 2016-10-05,113.05,21453100,AAPL 5 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: pandas-videos 2 | channels: 3 | - defaults 4 | dependencies: 5 | - pandas 6 | - numpy 7 | - matplotlib 8 | - scikit-learn 9 | -------------------------------------------------------------------------------- /data/stocks.csv: -------------------------------------------------------------------------------- 1 | Date,Close,Volume,Symbol 2 | 2016-10-03,31.50,14070500,CSCO 3 | 2016-10-03,112.52,21701800,AAPL 4 | 2016-10-03,57.42,19189500,MSFT 5 | 2016-10-04,113.00,29736800,AAPL 6 | 2016-10-04,57.24,20085900,MSFT 7 | 2016-10-04,31.35,18460400,CSCO 8 | 2016-10-05,57.64,16726400,MSFT 9 | 2016-10-05,31.59,11808600,CSCO 10 | 2016-10-05,113.05,21453100,AAPL 11 | -------------------------------------------------------------------------------- /data/drinks2.csv: -------------------------------------------------------------------------------- 1 | wine_servings,total_litres_of_pure_alcohol,continent 2 | 0,0.0,Asia 3 | 54,4.9,Europe 4 | 14,0.7,Africa 5 | 312,12.4,Europe 6 | 45,5.9,Africa 7 | 45,4.9,North America 8 | 221,8.3,South America 9 | 11,3.8,Europe 10 | 212,10.4,Oceania 11 | 191,9.7,Europe 12 | 5,1.3,Europe 13 | 51,6.3,North America 14 | 7,2.0,Asia 15 | 0,0.0,Asia 16 | 36,6.3,North America 17 | 42,14.4,Europe 18 | 212,10.5,Europe 19 | 8,6.8,North America 20 | 13,1.1,Africa 21 | 0,0.4,Asia 22 | 8,3.8,South America 23 | 8,4.6,Europe 24 | 35,5.4,Africa 25 | 16,7.2,South America 26 | 1,0.6,Asia 27 | 94,10.3,Europe 28 | 7,4.3,Africa 29 | 0,6.3,Africa 30 | 7,4.0,Africa 31 | 16,4.0,Africa 32 | 1,2.2,Asia 33 | 4,5.8,Africa 34 | 100,8.2,North America 35 | 1,1.8,Africa 36 | 1,0.4,Africa 37 | 172,7.6,South America 38 | 8,5.0,Asia 39 | 3,4.2,South America 40 | 1,0.1,Africa 41 | 9,1.7,Africa 42 | 74,5.9,Oceania 43 | 11,4.4,North America 44 | 254,10.2,Europe 45 | 5,4.2,North America 46 | 113,8.2,Europe 47 | 134,11.8,Europe 48 | 0,0.0,Asia 49 | 1,2.3,Africa 50 | 278,10.4,Europe 51 | 3,1.1,Africa 52 | 26,6.6,North America 53 | 9,6.2,North America 54 | 3,4.2,South America 55 | 1,0.2,Africa 56 | 2,2.2,North America 57 | 233,5.8,Africa 58 | 0,0.5,Africa 59 | 59,9.5,Europe 60 | 0,0.7,Africa 61 | 1,2.0,Oceania 62 | 97,10.0,Europe 63 | 370,11.8,Europe 64 | 59,8.9,Africa 65 | 1,2.4,Africa 66 | 149,5.4,Europe 67 | 175,11.3,Europe 68 | 10,1.8,Africa 69 | 218,8.3,Europe 70 | 28,11.9,North America 71 | 2,2.2,North America 72 | 2,0.2,Africa 73 | 21,2.5,Africa 74 | 1,7.1,South America 75 | 1,5.9,North America 76 | 2,3.0,North America 77 | 185,11.3,Europe 78 | 78,6.6,Europe 79 | 0,2.2,Asia 80 | 0,0.1,Asia 81 | 0,0.0,Asia 82 | 0,0.2,Asia 83 | 165,11.4,Europe 84 | 9,2.5,Asia 85 | 237,6.5,Europe 86 | 9,3.4,North America 87 | 16,7.0,Asia 88 | 1,0.5,Asia 89 | 12,6.8,Asia 90 | 2,1.8,Africa 91 | 1,1.0,Oceania 92 | 0,0.0,Asia 93 | 6,2.4,Asia 94 | 123,6.2,Asia 95 | 62,10.5,Europe 96 | 31,1.9,Asia 97 | 0,2.8,Africa 98 | 2,3.1,Africa 99 | 0,0.0,Africa 100 | 56,12.9,Europe 101 | 271,11.4,Europe 102 | 4,0.8,Africa 103 | 1,1.5,Africa 104 | 0,0.3,Asia 105 | 0,0.0,Asia 106 | 1,0.6,Africa 107 | 120,6.6,Europe 108 | 0,0.0,Oceania 109 | 0,0.0,Africa 110 | 18,2.6,Africa 111 | 5,5.5,North America 112 | 18,2.3,Oceania 113 | 0,0.0,Europe 114 | 8,4.9,Asia 115 | 128,4.9,Europe 116 | 10,0.5,Africa 117 | 5,1.3,Africa 118 | 0,0.1,Asia 119 | 1,6.8,Africa 120 | 8,1.0,Oceania 121 | 0,0.2,Asia 122 | 190,9.4,Europe 123 | 175,9.3,Oceania 124 | 1,3.5,North America 125 | 1,0.1,Africa 126 | 2,9.1,Africa 127 | 7,7.0,Oceania 128 | 129,6.7,Europe 129 | 1,0.7,Asia 130 | 0,0.0,Asia 131 | 23,6.9,Oceania 132 | 18,7.2,North America 133 | 1,1.5,Oceania 134 | 74,7.3,South America 135 | 21,6.1,South America 136 | 1,4.6,Asia 137 | 56,10.9,Europe 138 | 339,11.0,Europe 139 | 7,0.9,Asia 140 | 9,9.8,Asia 141 | 18,6.3,Europe 142 | 167,10.4,Europe 143 | 73,11.5,Asia 144 | 0,6.8,Africa 145 | 32,7.7,North America 146 | 71,10.1,North America 147 | 11,6.3,North America 148 | 24,2.6,Oceania 149 | 0,0.0,Europe 150 | 140,4.2,Africa 151 | 0,0.1,Asia 152 | 7,0.3,Africa 153 | 127,9.6,Europe 154 | 51,4.1,Africa 155 | 2,6.7,Africa 156 | 11,1.5,Asia 157 | 116,11.4,Europe 158 | 276,10.6,Europe 159 | 1,1.2,Oceania 160 | 0,0.0,Africa 161 | 81,8.2,Africa 162 | 112,10.0,Europe 163 | 0,2.2,Asia 164 | 0,1.7,Africa 165 | 7,5.6,South America 166 | 2,4.7,Africa 167 | 186,7.2,Europe 168 | 280,10.2,Europe 169 | 16,1.0,Asia 170 | 0,0.3,Asia 171 | 1,6.4,Asia 172 | 86,3.9,Europe 173 | 4,0.1,Asia 174 | 19,1.3,Africa 175 | 5,1.1,Oceania 176 | 7,6.4,North America 177 | 20,1.3,Africa 178 | 7,1.4,Asia 179 | 32,2.2,Asia 180 | 9,1.0,Oceania 181 | 0,8.3,Africa 182 | 45,8.9,Europe 183 | 5,2.8,Asia 184 | 195,10.4,Europe 185 | 1,5.7,Africa 186 | 84,8.7,North America 187 | 220,6.6,South America 188 | 8,2.4,Asia 189 | 11,0.9,Oceania 190 | 3,7.7,South America 191 | 1,2.0,Asia 192 | 0,0.1,Asia 193 | 4,2.5,Africa 194 | 4,4.7,Africa 195 | -------------------------------------------------------------------------------- /data/drinks1.csv: -------------------------------------------------------------------------------- 1 | country,beer_servings,spirit_servings 2 | Afghanistan,0,0 3 | Albania,89,132 4 | Algeria,25,0 5 | Andorra,245,138 6 | Angola,217,57 7 | Antigua & Barbuda,102,128 8 | Argentina,193,25 9 | Armenia,21,179 10 | Australia,261,72 11 | Austria,279,75 12 | Azerbaijan,21,46 13 | Bahamas,122,176 14 | Bahrain,42,63 15 | Bangladesh,0,0 16 | Barbados,143,173 17 | Belarus,142,373 18 | Belgium,295,84 19 | Belize,263,114 20 | Benin,34,4 21 | Bhutan,23,0 22 | Bolivia,167,41 23 | Bosnia-Herzegovina,76,173 24 | Botswana,173,35 25 | Brazil,245,145 26 | Brunei,31,2 27 | Bulgaria,231,252 28 | Burkina Faso,25,7 29 | Burundi,88,0 30 | Cote d'Ivoire,37,1 31 | Cabo Verde,144,56 32 | Cambodia,57,65 33 | Cameroon,147,1 34 | Canada,240,122 35 | Central African Republic,17,2 36 | Chad,15,1 37 | Chile,130,124 38 | China,79,192 39 | Colombia,159,76 40 | Comoros,1,3 41 | Congo,76,1 42 | Cook Islands,0,254 43 | Costa Rica,149,87 44 | Croatia,230,87 45 | Cuba,93,137 46 | Cyprus,192,154 47 | Czech Republic,361,170 48 | North Korea,0,0 49 | DR Congo,32,3 50 | Denmark,224,81 51 | Djibouti,15,44 52 | Dominica,52,286 53 | Dominican Republic,193,147 54 | Ecuador,162,74 55 | Egypt,6,4 56 | El Salvador,52,69 57 | Equatorial Guinea,92,0 58 | Eritrea,18,0 59 | Estonia,224,194 60 | Ethiopia,20,3 61 | Fiji,77,35 62 | Finland,263,133 63 | France,127,151 64 | Gabon,347,98 65 | Gambia,8,0 66 | Georgia,52,100 67 | Germany,346,117 68 | Ghana,31,3 69 | Greece,133,112 70 | Grenada,199,438 71 | Guatemala,53,69 72 | Guinea,9,0 73 | Guinea-Bissau,28,31 74 | Guyana,93,302 75 | Haiti,1,326 76 | Honduras,69,98 77 | Hungary,234,215 78 | Iceland,233,61 79 | India,9,114 80 | Indonesia,5,1 81 | Iran,0,0 82 | Iraq,9,3 83 | Ireland,313,118 84 | Israel,63,69 85 | Italy,85,42 86 | Jamaica,82,97 87 | Japan,77,202 88 | Jordan,6,21 89 | Kazakhstan,124,246 90 | Kenya,58,22 91 | Kiribati,21,34 92 | Kuwait,0,0 93 | Kyrgyzstan,31,97 94 | Laos,62,0 95 | Latvia,281,216 96 | Lebanon,20,55 97 | Lesotho,82,29 98 | Liberia,19,152 99 | Libya,0,0 100 | Lithuania,343,244 101 | Luxembourg,236,133 102 | Madagascar,26,15 103 | Malawi,8,11 104 | Malaysia,13,4 105 | Maldives,0,0 106 | Mali,5,1 107 | Malta,149,100 108 | Marshall Islands,0,0 109 | Mauritania,0,0 110 | Mauritius,98,31 111 | Mexico,238,68 112 | Micronesia,62,50 113 | Monaco,0,0 114 | Mongolia,77,189 115 | Montenegro,31,114 116 | Morocco,12,6 117 | Mozambique,47,18 118 | Myanmar,5,1 119 | Namibia,376,3 120 | Nauru,49,0 121 | Nepal,5,6 122 | Netherlands,251,88 123 | New Zealand,203,79 124 | Nicaragua,78,118 125 | Niger,3,2 126 | Nigeria,42,5 127 | Niue,188,200 128 | Norway,169,71 129 | Oman,22,16 130 | Pakistan,0,0 131 | Palau,306,63 132 | Panama,285,104 133 | Papua New Guinea,44,39 134 | Paraguay,213,117 135 | Peru,163,160 136 | Philippines,71,186 137 | Poland,343,215 138 | Portugal,194,67 139 | Qatar,1,42 140 | South Korea,140,16 141 | Moldova,109,226 142 | Romania,297,122 143 | Russian Federation,247,326 144 | Rwanda,43,2 145 | St. Kitts & Nevis,194,205 146 | St. Lucia,171,315 147 | St. Vincent & the Grenadines,120,221 148 | Samoa,105,18 149 | San Marino,0,0 150 | Sao Tome & Principe,56,38 151 | Saudi Arabia,0,5 152 | Senegal,9,1 153 | Serbia,283,131 154 | Seychelles,157,25 155 | Sierra Leone,25,3 156 | Singapore,60,12 157 | Slovakia,196,293 158 | Slovenia,270,51 159 | Solomon Islands,56,11 160 | Somalia,0,0 161 | South Africa,225,76 162 | Spain,284,157 163 | Sri Lanka,16,104 164 | Sudan,8,13 165 | Suriname,128,178 166 | Swaziland,90,2 167 | Sweden,152,60 168 | Switzerland,185,100 169 | Syria,5,35 170 | Tajikistan,2,15 171 | Thailand,99,258 172 | Macedonia,106,27 173 | Timor-Leste,1,1 174 | Togo,36,2 175 | Tonga,36,21 176 | Trinidad & Tobago,197,156 177 | Tunisia,51,3 178 | Turkey,51,22 179 | Turkmenistan,19,71 180 | Tuvalu,6,41 181 | Uganda,45,9 182 | Ukraine,206,237 183 | United Arab Emirates,16,135 184 | United Kingdom,219,126 185 | Tanzania,36,6 186 | USA,249,158 187 | Uruguay,115,35 188 | Uzbekistan,25,101 189 | Vanuatu,21,18 190 | Venezuela,333,100 191 | Vietnam,111,2 192 | Yemen,6,0 193 | Zambia,32,19 194 | Zimbabwe,64,18 195 | -------------------------------------------------------------------------------- /data/drinks.csv: -------------------------------------------------------------------------------- 1 | country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent 2 | Afghanistan,0,0,0,0.0,Asia 3 | Albania,89,132,54,4.9,Europe 4 | Algeria,25,0,14,0.7,Africa 5 | Andorra,245,138,312,12.4,Europe 6 | Angola,217,57,45,5.9,Africa 7 | Antigua & Barbuda,102,128,45,4.9,North America 8 | Argentina,193,25,221,8.3,South America 9 | Armenia,21,179,11,3.8,Europe 10 | Australia,261,72,212,10.4,Oceania 11 | Austria,279,75,191,9.7,Europe 12 | Azerbaijan,21,46,5,1.3,Europe 13 | Bahamas,122,176,51,6.3,North America 14 | Bahrain,42,63,7,2.0,Asia 15 | Bangladesh,0,0,0,0.0,Asia 16 | Barbados,143,173,36,6.3,North America 17 | Belarus,142,373,42,14.4,Europe 18 | Belgium,295,84,212,10.5,Europe 19 | Belize,263,114,8,6.8,North America 20 | Benin,34,4,13,1.1,Africa 21 | Bhutan,23,0,0,0.4,Asia 22 | Bolivia,167,41,8,3.8,South America 23 | Bosnia-Herzegovina,76,173,8,4.6,Europe 24 | Botswana,173,35,35,5.4,Africa 25 | Brazil,245,145,16,7.2,South America 26 | Brunei,31,2,1,0.6,Asia 27 | Bulgaria,231,252,94,10.3,Europe 28 | Burkina Faso,25,7,7,4.3,Africa 29 | Burundi,88,0,0,6.3,Africa 30 | Cote d'Ivoire,37,1,7,4.0,Africa 31 | Cabo Verde,144,56,16,4.0,Africa 32 | Cambodia,57,65,1,2.2,Asia 33 | Cameroon,147,1,4,5.8,Africa 34 | Canada,240,122,100,8.2,North America 35 | Central African Republic,17,2,1,1.8,Africa 36 | Chad,15,1,1,0.4,Africa 37 | Chile,130,124,172,7.6,South America 38 | China,79,192,8,5.0,Asia 39 | Colombia,159,76,3,4.2,South America 40 | Comoros,1,3,1,0.1,Africa 41 | Congo,76,1,9,1.7,Africa 42 | Cook Islands,0,254,74,5.9,Oceania 43 | Costa Rica,149,87,11,4.4,North America 44 | Croatia,230,87,254,10.2,Europe 45 | Cuba,93,137,5,4.2,North America 46 | Cyprus,192,154,113,8.2,Europe 47 | Czech Republic,361,170,134,11.8,Europe 48 | North Korea,0,0,0,0.0,Asia 49 | DR Congo,32,3,1,2.3,Africa 50 | Denmark,224,81,278,10.4,Europe 51 | Djibouti,15,44,3,1.1,Africa 52 | Dominica,52,286,26,6.6,North America 53 | Dominican Republic,193,147,9,6.2,North America 54 | Ecuador,162,74,3,4.2,South America 55 | Egypt,6,4,1,0.2,Africa 56 | El Salvador,52,69,2,2.2,North America 57 | Equatorial Guinea,92,0,233,5.8,Africa 58 | Eritrea,18,0,0,0.5,Africa 59 | Estonia,224,194,59,9.5,Europe 60 | Ethiopia,20,3,0,0.7,Africa 61 | Fiji,77,35,1,2.0,Oceania 62 | Finland,263,133,97,10.0,Europe 63 | France,127,151,370,11.8,Europe 64 | Gabon,347,98,59,8.9,Africa 65 | Gambia,8,0,1,2.4,Africa 66 | Georgia,52,100,149,5.4,Europe 67 | Germany,346,117,175,11.3,Europe 68 | Ghana,31,3,10,1.8,Africa 69 | Greece,133,112,218,8.3,Europe 70 | Grenada,199,438,28,11.9,North America 71 | Guatemala,53,69,2,2.2,North America 72 | Guinea,9,0,2,0.2,Africa 73 | Guinea-Bissau,28,31,21,2.5,Africa 74 | Guyana,93,302,1,7.1,South America 75 | Haiti,1,326,1,5.9,North America 76 | Honduras,69,98,2,3.0,North America 77 | Hungary,234,215,185,11.3,Europe 78 | Iceland,233,61,78,6.6,Europe 79 | India,9,114,0,2.2,Asia 80 | Indonesia,5,1,0,0.1,Asia 81 | Iran,0,0,0,0.0,Asia 82 | Iraq,9,3,0,0.2,Asia 83 | Ireland,313,118,165,11.4,Europe 84 | Israel,63,69,9,2.5,Asia 85 | Italy,85,42,237,6.5,Europe 86 | Jamaica,82,97,9,3.4,North America 87 | Japan,77,202,16,7.0,Asia 88 | Jordan,6,21,1,0.5,Asia 89 | Kazakhstan,124,246,12,6.8,Asia 90 | Kenya,58,22,2,1.8,Africa 91 | Kiribati,21,34,1,1.0,Oceania 92 | Kuwait,0,0,0,0.0,Asia 93 | Kyrgyzstan,31,97,6,2.4,Asia 94 | Laos,62,0,123,6.2,Asia 95 | Latvia,281,216,62,10.5,Europe 96 | Lebanon,20,55,31,1.9,Asia 97 | Lesotho,82,29,0,2.8,Africa 98 | Liberia,19,152,2,3.1,Africa 99 | Libya,0,0,0,0.0,Africa 100 | Lithuania,343,244,56,12.9,Europe 101 | Luxembourg,236,133,271,11.4,Europe 102 | Madagascar,26,15,4,0.8,Africa 103 | Malawi,8,11,1,1.5,Africa 104 | Malaysia,13,4,0,0.3,Asia 105 | Maldives,0,0,0,0.0,Asia 106 | Mali,5,1,1,0.6,Africa 107 | Malta,149,100,120,6.6,Europe 108 | Marshall Islands,0,0,0,0.0,Oceania 109 | Mauritania,0,0,0,0.0,Africa 110 | Mauritius,98,31,18,2.6,Africa 111 | Mexico,238,68,5,5.5,North America 112 | Micronesia,62,50,18,2.3,Oceania 113 | Monaco,0,0,0,0.0,Europe 114 | Mongolia,77,189,8,4.9,Asia 115 | Montenegro,31,114,128,4.9,Europe 116 | Morocco,12,6,10,0.5,Africa 117 | Mozambique,47,18,5,1.3,Africa 118 | Myanmar,5,1,0,0.1,Asia 119 | Namibia,376,3,1,6.8,Africa 120 | Nauru,49,0,8,1.0,Oceania 121 | Nepal,5,6,0,0.2,Asia 122 | Netherlands,251,88,190,9.4,Europe 123 | New Zealand,203,79,175,9.3,Oceania 124 | Nicaragua,78,118,1,3.5,North America 125 | Niger,3,2,1,0.1,Africa 126 | Nigeria,42,5,2,9.1,Africa 127 | Niue,188,200,7,7.0,Oceania 128 | Norway,169,71,129,6.7,Europe 129 | Oman,22,16,1,0.7,Asia 130 | Pakistan,0,0,0,0.0,Asia 131 | Palau,306,63,23,6.9,Oceania 132 | Panama,285,104,18,7.2,North America 133 | Papua New Guinea,44,39,1,1.5,Oceania 134 | Paraguay,213,117,74,7.3,South America 135 | Peru,163,160,21,6.1,South America 136 | Philippines,71,186,1,4.6,Asia 137 | Poland,343,215,56,10.9,Europe 138 | Portugal,194,67,339,11.0,Europe 139 | Qatar,1,42,7,0.9,Asia 140 | South Korea,140,16,9,9.8,Asia 141 | Moldova,109,226,18,6.3,Europe 142 | Romania,297,122,167,10.4,Europe 143 | Russian Federation,247,326,73,11.5,Asia 144 | Rwanda,43,2,0,6.8,Africa 145 | St. Kitts & Nevis,194,205,32,7.7,North America 146 | St. Lucia,171,315,71,10.1,North America 147 | St. Vincent & the Grenadines,120,221,11,6.3,North America 148 | Samoa,105,18,24,2.6,Oceania 149 | San Marino,0,0,0,0.0,Europe 150 | Sao Tome & Principe,56,38,140,4.2,Africa 151 | Saudi Arabia,0,5,0,0.1,Asia 152 | Senegal,9,1,7,0.3,Africa 153 | Serbia,283,131,127,9.6,Europe 154 | Seychelles,157,25,51,4.1,Africa 155 | Sierra Leone,25,3,2,6.7,Africa 156 | Singapore,60,12,11,1.5,Asia 157 | Slovakia,196,293,116,11.4,Europe 158 | Slovenia,270,51,276,10.6,Europe 159 | Solomon Islands,56,11,1,1.2,Oceania 160 | Somalia,0,0,0,0.0,Africa 161 | South Africa,225,76,81,8.2,Africa 162 | Spain,284,157,112,10.0,Europe 163 | Sri Lanka,16,104,0,2.2,Asia 164 | Sudan,8,13,0,1.7,Africa 165 | Suriname,128,178,7,5.6,South America 166 | Swaziland,90,2,2,4.7,Africa 167 | Sweden,152,60,186,7.2,Europe 168 | Switzerland,185,100,280,10.2,Europe 169 | Syria,5,35,16,1.0,Asia 170 | Tajikistan,2,15,0,0.3,Asia 171 | Thailand,99,258,1,6.4,Asia 172 | Macedonia,106,27,86,3.9,Europe 173 | Timor-Leste,1,1,4,0.1,Asia 174 | Togo,36,2,19,1.3,Africa 175 | Tonga,36,21,5,1.1,Oceania 176 | Trinidad & Tobago,197,156,7,6.4,North America 177 | Tunisia,51,3,20,1.3,Africa 178 | Turkey,51,22,7,1.4,Asia 179 | Turkmenistan,19,71,32,2.2,Asia 180 | Tuvalu,6,41,9,1.0,Oceania 181 | Uganda,45,9,0,8.3,Africa 182 | Ukraine,206,237,45,8.9,Europe 183 | United Arab Emirates,16,135,5,2.8,Asia 184 | United Kingdom,219,126,195,10.4,Europe 185 | Tanzania,36,6,1,5.7,Africa 186 | USA,249,158,84,8.7,North America 187 | Uruguay,115,35,220,6.6,South America 188 | Uzbekistan,25,101,8,2.4,Asia 189 | Vanuatu,21,18,11,0.9,Oceania 190 | Venezuela,333,100,3,7.7,South America 191 | Vietnam,111,2,1,2.0,Asia 192 | Yemen,6,0,0,0.1,Asia 193 | Zambia,32,19,4,2.5,Africa 194 | Zimbabwe,64,18,4,4.7,Africa 195 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python pandas Q&A video series 2 | 3 | Read about the series, and view all of the videos on one page: [Easier data analysis in Python with pandas](http://www.dataschool.io/easier-data-analysis-with-pandas/). 4 | 5 | ## 📺 Videos ([playlist](https://www.youtube.com/playlist?list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y)) 6 | 7 | 1. [What is pandas? (Introduction to the Q&A series)](https://www.youtube.com/watch?v=yzIMircGU5I&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=1) (6:24) 8 | 2. [How do I read a tabular data file into pandas?](https://www.youtube.com/watch?v=5_QXMwezPJE&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=2) (8:54) 9 | 3. [How do I select a pandas Series from a DataFrame?](https://www.youtube.com/watch?v=zxqjeyKP2Tk&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=3) (11:10) 10 | 4. [Why do some pandas commands end with parentheses (and others don't)?](https://www.youtube.com/watch?v=hSrDViyKWVk&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=4) (8:45) 11 | 5. [How do I rename columns in a pandas DataFrame?](https://www.youtube.com/watch?v=0uBirYFhizE&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=5) (9:36) 12 | 6. [How do I remove columns from a pandas DataFrame?](https://www.youtube.com/watch?v=gnUKkS964WQ&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=6) (6:35) 13 | 7. [How do I sort a pandas DataFrame or a Series?](https://www.youtube.com/watch?v=zY4doF6xSxY&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=7) (8:56) 14 | 8. [How do I filter rows of a pandas DataFrame by column value?](https://www.youtube.com/watch?v=2AFGPdNn4FM&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=8) (13:44) 15 | 9. [How do I apply multiple filter criteria to a pandas DataFrame?](https://www.youtube.com/watch?v=YPItfQ87qjM&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=9) (9:51) 16 | 10. [Your pandas questions answered!](https://www.youtube.com/watch?v=B-r9VuK80dk&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=10) (9:06) 17 | 11. [How do I use the "axis" parameter in pandas?](https://www.youtube.com/watch?v=PtO3t6ynH-8&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=11) (8:33) 18 | 12. [How do I use string methods in pandas?](https://www.youtube.com/watch?v=bofaC0IckHo&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=12) (6:16) 19 | 13. [How do I change the data type of a pandas Series?](https://www.youtube.com/watch?v=V0AWyzVMf54&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=13) (7:28) 20 | 14. [When should I use a "groupby" in pandas?](https://www.youtube.com/watch?v=qy0fDqoMJx8&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=14) (8:24) 21 | 15. [How do I explore a pandas Series?](https://www.youtube.com/watch?v=QTVTq8SPzxM&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=15) (9:50) 22 | 16. [How do I handle missing values in pandas?](https://www.youtube.com/watch?v=fCMrO_VzeL8&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=16) (14:27) 23 | 17. [What do I need to know about the pandas index? (Part 1)](https://www.youtube.com/watch?v=OYZNk7Z9s6I&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=17) (13:36) 24 | 18. [What do I need to know about the pandas index? (Part 2)](https://www.youtube.com/watch?v=15q-is8P_H4&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=18) (10:38) 25 | 19. [How do I select multiple rows and columns from a pandas DataFrame?](https://www.youtube.com/watch?v=xvpNA7bC8cs&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=19) (21:46) 26 | 20. [When should I use the "inplace" parameter in pandas?](https://www.youtube.com/watch?v=XaCSdr7pPmY&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=20) (10:18) 27 | 21. [How do I make my pandas DataFrame smaller and faster?](https://www.youtube.com/watch?v=wDYDYGyN_cw&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=21) (19:05) 28 | 22. [How do I use pandas with scikit-learn to create Kaggle submissions?](https://www.youtube.com/watch?v=ylRlGCtAtiE&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=22) (13:25) 29 | 23. [More of your pandas questions answered!](https://www.youtube.com/watch?v=oH3wYKvwpJ8&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=23) (19:23) 30 | 24. [How do I create dummy variables in pandas?](https://www.youtube.com/watch?v=0s_1IsROgDc&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=24) (13:13) 31 | 25. [How do I work with dates and times in pandas?](https://www.youtube.com/watch?v=yCgJGsg0Xa4&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=25) (10:20) 32 | 26. [How do I find and remove duplicate rows in pandas?](https://www.youtube.com/watch?v=ht5buXUMqkQ&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=26) (9:47) 33 | 27. [How do I avoid a SettingWithCopyWarning in pandas?](https://www.youtube.com/watch?v=4R4WsDJ-KVc&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=27) (13:29) 34 | 28. [How do I change display options in pandas?](https://www.youtube.com/watch?v=yiO43TQ4xvc&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=28) (14:55) 35 | 29. [How do I create a pandas DataFrame from another object?](https://www.youtube.com/watch?v=-Ov1N1_FbP8&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=29) (14:25) 36 | 30. [How do I apply a function to a pandas Series or DataFrame?](https://www.youtube.com/watch?v=P_q0tkYqvSk&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=30) (17:57) 37 | 31. **Bonus:** [How do I use the MultiIndex in pandas?](https://www.youtube.com/watch?v=tcRGa2soc-c&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=31) (25:00) 38 | 32. **Bonus:** [How do I merge DataFrames in pandas?](https://www.youtube.com/watch?v=iYWKfUOtGaw&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=32) (21:48) 39 | 33. **Bonus:** [4 new time-saving tricks in pandas](https://www.youtube.com/watch?v=-NbY7E9hKxk&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=33) (14:50) 40 | 34. **Bonus:** [5 new changes in pandas you need to know about](https://www.youtube.com/watch?v=te5JrSCW-LY&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=34) (20:54) 41 | 35. **Bonus:** [My top 25 pandas tricks](https://www.youtube.com/watch?v=RlIiVeig3hc&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=35) (27:37) 42 | 36. **Bonus:** [21 more pandas tricks](https://www.youtube.com/watch?v=tWFQqaRtSQA&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=36) (24:39) 43 | 37. **Bonus:** [Data Science Best Practices with pandas (PyCon 2019)](https://www.youtube.com/watch?v=dPwLlJkSHLo&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=37) (1:44:16) 44 | 38. **Bonus:** [Your pandas questions answered! (webcast)](https://www.youtube.com/watch?v=CWRKgBtZN18&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=38) (1:56:01) 45 | 46 | ## 📓 Jupyter Notebooks 47 | 48 | - [Python pandas Q&A series](http://nbviewer.jupyter.org/github/justmarkham/pandas-videos/blob/master/pandas.ipynb) (videos 1 to 30) 49 | - [How do I use the MultiIndex in pandas?](http://nbviewer.jupyter.org/github/justmarkham/pandas-videos/blob/master/pandas_multiindex.ipynb) (video 31) 50 | - [How do I merge DataFrames in pandas?](http://nbviewer.jupyter.org/github/justmarkham/pandas-videos/blob/master/pandas_merge.ipynb) (video 32) 51 | - [4 new time-saving tricks in pandas](http://nbviewer.jupyter.org/github/justmarkham/pandas-videos/blob/master/pandas_tricks.ipynb) (video 33) 52 | - [5 new changes in pandas you need to know about](http://nbviewer.jupyter.org/github/justmarkham/pandas-videos/blob/master/pandas_changes.ipynb) (video 34) 53 | - [My top 25 pandas tricks](http://nbviewer.jupyter.org/github/justmarkham/pandas-videos/blob/master/top_25_pandas_tricks.ipynb) (video 35) 54 | - [21 more pandas tricks](http://nbviewer.jupyter.org/github/justmarkham/pandas-videos/blob/master/21_more_pandas_tricks.ipynb) (video 36) 55 | - [Data Science Best Practices with pandas (PyCon 2019)](https://nbviewer.jupyter.org/github/justmarkham/pycon-2019-tutorial/blob/master/tutorial.ipynb) (video 37) 56 | 57 | ## 📊 Datasets 58 | 59 | Filename | Description | Raw File | Original Source | Other 60 | --- | --- | --- | --- | --- 61 | [chipotle.tsv](data/chipotle.tsv) | Online orders from the Chipotle restaurant chain | [bit.ly/chiporders](http://bit.ly/chiporders) | [The Upshot](https://github.com/TheUpshot/chipotle) | [Upshot article](http://www.nytimes.com/interactive/2015/02/17/upshot/what-do-people-actually-order-at-chipotle.html) 62 | [drinks.csv](data/drinks.csv) | Alcohol consumption by country | [bit.ly/drinksbycountry](http://bit.ly/drinksbycountry) | [FiveThirtyEight](https://github.com/fivethirtyeight/data/tree/master/alcohol-consumption) | [FiveThirtyEight article](http://fivethirtyeight.com/datalab/dear-mona-followup-where-do-people-drink-the-most-beer-wine-and-spirits/) 63 | [imdb_1000.csv](data/imdb_1000.csv) | Top rated movies from IMDb | [bit.ly/imdbratings](http://bit.ly/imdbratings) | [IMDb](http://www.imdb.com/search/title?groups=top_1000&sort=user_rating&view=simple) | [Web scraping script](https://github.com/justmarkham/DAT5/blob/master/code/08_web_scraping.py) 64 | [stocks.csv](data/stocks.csv) | Small dataset of stock prices | [bit.ly/smallstocks](http://bit.ly/smallstocks) | [DataCamp](https://www.datacamp.com/courses/manipulating-dataframes-with-pandas?tap_a=5644-dce66f&tap_s=280411-a25fc8) | 65 | [titanic_test.csv](data/titanic_test.csv) | Testing set from Kaggle's Titanic competition | [bit.ly/kaggletest](http://bit.ly/kaggletest) | [Kaggle](https://www.kaggle.com/c/titanic) | [Data dictionary](https://www.kaggle.com/c/titanic/data) 66 | [titanic_train.csv](data/titanic_train.csv) | Training set from Kaggle's Titanic competition | [bit.ly/kaggletrain](http://bit.ly/kaggletrain) | [Kaggle](https://www.kaggle.com/c/titanic) | [Data dictionary](https://www.kaggle.com/c/titanic/data) 67 | [u.data](data/u.data) | Movie ratings by MovieLens users | [bit.ly/movielensdata](http://bit.ly/movielensdata) | [GroupLens](http://grouplens.org/datasets/movielens/100k/) | [Data dictionary](http://files.grouplens.org/datasets/movielens/ml-100k-README.txt) 68 | [u.item](data/u.item) | Movie information from MovieLens | [bit.ly/movieitems](http://bit.ly/movieitems) | [GroupLens](http://grouplens.org/datasets/movielens/100k/) | [Data dictionary](http://files.grouplens.org/datasets/movielens/ml-100k-README.txt) 69 | [u.user](data/u.user) | Demographic information about MovieLens users | [bit.ly/movieusers](http://bit.ly/movieusers) | [GroupLens](http://grouplens.org/datasets/movielens/100k/) | [Data dictionary](http://files.grouplens.org/datasets/movielens/ml-100k-README.txt) 70 | [ufo.csv](data/ufo.csv) | Reports of UFO sightings from 1930-2000 | [bit.ly/uforeports](http://bit.ly/uforeports) | [National UFO Reporting Center](http://www.nuforc.org/webreports.html) | [Web scraping script](https://github.com/josiahdavis/josiahdavis.github.io/blob/master/supporting%20material/get_ufo_data.py) 71 | -------------------------------------------------------------------------------- /data/u.user: -------------------------------------------------------------------------------- 1 | 1|24|M|technician|85711 2 | 2|53|F|other|94043 3 | 3|23|M|writer|32067 4 | 4|24|M|technician|43537 5 | 5|33|F|other|15213 6 | 6|42|M|executive|98101 7 | 7|57|M|administrator|91344 8 | 8|36|M|administrator|05201 9 | 9|29|M|student|01002 10 | 10|53|M|lawyer|90703 11 | 11|39|F|other|30329 12 | 12|28|F|other|06405 13 | 13|47|M|educator|29206 14 | 14|45|M|scientist|55106 15 | 15|49|F|educator|97301 16 | 16|21|M|entertainment|10309 17 | 17|30|M|programmer|06355 18 | 18|35|F|other|37212 19 | 19|40|M|librarian|02138 20 | 20|42|F|homemaker|95660 21 | 21|26|M|writer|30068 22 | 22|25|M|writer|40206 23 | 23|30|F|artist|48197 24 | 24|21|F|artist|94533 25 | 25|39|M|engineer|55107 26 | 26|49|M|engineer|21044 27 | 27|40|F|librarian|30030 28 | 28|32|M|writer|55369 29 | 29|41|M|programmer|94043 30 | 30|7|M|student|55436 31 | 31|24|M|artist|10003 32 | 32|28|F|student|78741 33 | 33|23|M|student|27510 34 | 34|38|F|administrator|42141 35 | 35|20|F|homemaker|42459 36 | 36|19|F|student|93117 37 | 37|23|M|student|55105 38 | 38|28|F|other|54467 39 | 39|41|M|entertainment|01040 40 | 40|38|M|scientist|27514 41 | 41|33|M|engineer|80525 42 | 42|30|M|administrator|17870 43 | 43|29|F|librarian|20854 44 | 44|26|M|technician|46260 45 | 45|29|M|programmer|50233 46 | 46|27|F|marketing|46538 47 | 47|53|M|marketing|07102 48 | 48|45|M|administrator|12550 49 | 49|23|F|student|76111 50 | 50|21|M|writer|52245 51 | 51|28|M|educator|16509 52 | 52|18|F|student|55105 53 | 53|26|M|programmer|55414 54 | 54|22|M|executive|66315 55 | 55|37|M|programmer|01331 56 | 56|25|M|librarian|46260 57 | 57|16|M|none|84010 58 | 58|27|M|programmer|52246 59 | 59|49|M|educator|08403 60 | 60|50|M|healthcare|06472 61 | 61|36|M|engineer|30040 62 | 62|27|F|administrator|97214 63 | 63|31|M|marketing|75240 64 | 64|32|M|educator|43202 65 | 65|51|F|educator|48118 66 | 66|23|M|student|80521 67 | 67|17|M|student|60402 68 | 68|19|M|student|22904 69 | 69|24|M|engineer|55337 70 | 70|27|M|engineer|60067 71 | 71|39|M|scientist|98034 72 | 72|48|F|administrator|73034 73 | 73|24|M|student|41850 74 | 74|39|M|scientist|T8H1N 75 | 75|24|M|entertainment|08816 76 | 76|20|M|student|02215 77 | 77|30|M|technician|29379 78 | 78|26|M|administrator|61801 79 | 79|39|F|administrator|03755 80 | 80|34|F|administrator|52241 81 | 81|21|M|student|21218 82 | 82|50|M|programmer|22902 83 | 83|40|M|other|44133 84 | 84|32|M|executive|55369 85 | 85|51|M|educator|20003 86 | 86|26|M|administrator|46005 87 | 87|47|M|administrator|89503 88 | 88|49|F|librarian|11701 89 | 89|43|F|administrator|68106 90 | 90|60|M|educator|78155 91 | 91|55|M|marketing|01913 92 | 92|32|M|entertainment|80525 93 | 93|48|M|executive|23112 94 | 94|26|M|student|71457 95 | 95|31|M|administrator|10707 96 | 96|25|F|artist|75206 97 | 97|43|M|artist|98006 98 | 98|49|F|executive|90291 99 | 99|20|M|student|63129 100 | 100|36|M|executive|90254 101 | 101|15|M|student|05146 102 | 102|38|M|programmer|30220 103 | 103|26|M|student|55108 104 | 104|27|M|student|55108 105 | 105|24|M|engineer|94043 106 | 106|61|M|retired|55125 107 | 107|39|M|scientist|60466 108 | 108|44|M|educator|63130 109 | 109|29|M|other|55423 110 | 110|19|M|student|77840 111 | 111|57|M|engineer|90630 112 | 112|30|M|salesman|60613 113 | 113|47|M|executive|95032 114 | 114|27|M|programmer|75013 115 | 115|31|M|engineer|17110 116 | 116|40|M|healthcare|97232 117 | 117|20|M|student|16125 118 | 118|21|M|administrator|90210 119 | 119|32|M|programmer|67401 120 | 120|47|F|other|06260 121 | 121|54|M|librarian|99603 122 | 122|32|F|writer|22206 123 | 123|48|F|artist|20008 124 | 124|34|M|student|60615 125 | 125|30|M|lawyer|22202 126 | 126|28|F|lawyer|20015 127 | 127|33|M|none|73439 128 | 128|24|F|marketing|20009 129 | 129|36|F|marketing|07039 130 | 130|20|M|none|60115 131 | 131|59|F|administrator|15237 132 | 132|24|M|other|94612 133 | 133|53|M|engineer|78602 134 | 134|31|M|programmer|80236 135 | 135|23|M|student|38401 136 | 136|51|M|other|97365 137 | 137|50|M|educator|84408 138 | 138|46|M|doctor|53211 139 | 139|20|M|student|08904 140 | 140|30|F|student|32250 141 | 141|49|M|programmer|36117 142 | 142|13|M|other|48118 143 | 143|42|M|technician|08832 144 | 144|53|M|programmer|20910 145 | 145|31|M|entertainment|V3N4P 146 | 146|45|M|artist|83814 147 | 147|40|F|librarian|02143 148 | 148|33|M|engineer|97006 149 | 149|35|F|marketing|17325 150 | 150|20|F|artist|02139 151 | 151|38|F|administrator|48103 152 | 152|33|F|educator|68767 153 | 153|25|M|student|60641 154 | 154|25|M|student|53703 155 | 155|32|F|other|11217 156 | 156|25|M|educator|08360 157 | 157|57|M|engineer|70808 158 | 158|50|M|educator|27606 159 | 159|23|F|student|55346 160 | 160|27|M|programmer|66215 161 | 161|50|M|lawyer|55104 162 | 162|25|M|artist|15610 163 | 163|49|M|administrator|97212 164 | 164|47|M|healthcare|80123 165 | 165|20|F|other|53715 166 | 166|47|M|educator|55113 167 | 167|37|M|other|L9G2B 168 | 168|48|M|other|80127 169 | 169|52|F|other|53705 170 | 170|53|F|healthcare|30067 171 | 171|48|F|educator|78750 172 | 172|55|M|marketing|22207 173 | 173|56|M|other|22306 174 | 174|30|F|administrator|52302 175 | 175|26|F|scientist|21911 176 | 176|28|M|scientist|07030 177 | 177|20|M|programmer|19104 178 | 178|26|M|other|49512 179 | 179|15|M|entertainment|20755 180 | 180|22|F|administrator|60202 181 | 181|26|M|executive|21218 182 | 182|36|M|programmer|33884 183 | 183|33|M|scientist|27708 184 | 184|37|M|librarian|76013 185 | 185|53|F|librarian|97403 186 | 186|39|F|executive|00000 187 | 187|26|M|educator|16801 188 | 188|42|M|student|29440 189 | 189|32|M|artist|95014 190 | 190|30|M|administrator|95938 191 | 191|33|M|administrator|95161 192 | 192|42|M|educator|90840 193 | 193|29|M|student|49931 194 | 194|38|M|administrator|02154 195 | 195|42|M|scientist|93555 196 | 196|49|M|writer|55105 197 | 197|55|M|technician|75094 198 | 198|21|F|student|55414 199 | 199|30|M|writer|17604 200 | 200|40|M|programmer|93402 201 | 201|27|M|writer|E2A4H 202 | 202|41|F|educator|60201 203 | 203|25|F|student|32301 204 | 204|52|F|librarian|10960 205 | 205|47|M|lawyer|06371 206 | 206|14|F|student|53115 207 | 207|39|M|marketing|92037 208 | 208|43|M|engineer|01720 209 | 209|33|F|educator|85710 210 | 210|39|M|engineer|03060 211 | 211|66|M|salesman|32605 212 | 212|49|F|educator|61401 213 | 213|33|M|executive|55345 214 | 214|26|F|librarian|11231 215 | 215|35|M|programmer|63033 216 | 216|22|M|engineer|02215 217 | 217|22|M|other|11727 218 | 218|37|M|administrator|06513 219 | 219|32|M|programmer|43212 220 | 220|30|M|librarian|78205 221 | 221|19|M|student|20685 222 | 222|29|M|programmer|27502 223 | 223|19|F|student|47906 224 | 224|31|F|educator|43512 225 | 225|51|F|administrator|58202 226 | 226|28|M|student|92103 227 | 227|46|M|executive|60659 228 | 228|21|F|student|22003 229 | 229|29|F|librarian|22903 230 | 230|28|F|student|14476 231 | 231|48|M|librarian|01080 232 | 232|45|M|scientist|99709 233 | 233|38|M|engineer|98682 234 | 234|60|M|retired|94702 235 | 235|37|M|educator|22973 236 | 236|44|F|writer|53214 237 | 237|49|M|administrator|63146 238 | 238|42|F|administrator|44124 239 | 239|39|M|artist|95628 240 | 240|23|F|educator|20784 241 | 241|26|F|student|20001 242 | 242|33|M|educator|31404 243 | 243|33|M|educator|60201 244 | 244|28|M|technician|80525 245 | 245|22|M|student|55109 246 | 246|19|M|student|28734 247 | 247|28|M|engineer|20770 248 | 248|25|M|student|37235 249 | 249|25|M|student|84103 250 | 250|29|M|executive|95110 251 | 251|28|M|doctor|85032 252 | 252|42|M|engineer|07733 253 | 253|26|F|librarian|22903 254 | 254|44|M|educator|42647 255 | 255|23|M|entertainment|07029 256 | 256|35|F|none|39042 257 | 257|17|M|student|77005 258 | 258|19|F|student|77801 259 | 259|21|M|student|48823 260 | 260|40|F|artist|89801 261 | 261|28|M|administrator|85202 262 | 262|19|F|student|78264 263 | 263|41|M|programmer|55346 264 | 264|36|F|writer|90064 265 | 265|26|M|executive|84601 266 | 266|62|F|administrator|78756 267 | 267|23|M|engineer|83716 268 | 268|24|M|engineer|19422 269 | 269|31|F|librarian|43201 270 | 270|18|F|student|63119 271 | 271|51|M|engineer|22932 272 | 272|33|M|scientist|53706 273 | 273|50|F|other|10016 274 | 274|20|F|student|55414 275 | 275|38|M|engineer|92064 276 | 276|21|M|student|95064 277 | 277|35|F|administrator|55406 278 | 278|37|F|librarian|30033 279 | 279|33|M|programmer|85251 280 | 280|30|F|librarian|22903 281 | 281|15|F|student|06059 282 | 282|22|M|administrator|20057 283 | 283|28|M|programmer|55305 284 | 284|40|M|executive|92629 285 | 285|25|M|programmer|53713 286 | 286|27|M|student|15217 287 | 287|21|M|salesman|31211 288 | 288|34|M|marketing|23226 289 | 289|11|M|none|94619 290 | 290|40|M|engineer|93550 291 | 291|19|M|student|44106 292 | 292|35|F|programmer|94703 293 | 293|24|M|writer|60804 294 | 294|34|M|technician|92110 295 | 295|31|M|educator|50325 296 | 296|43|F|administrator|16803 297 | 297|29|F|educator|98103 298 | 298|44|M|executive|01581 299 | 299|29|M|doctor|63108 300 | 300|26|F|programmer|55106 301 | 301|24|M|student|55439 302 | 302|42|M|educator|77904 303 | 303|19|M|student|14853 304 | 304|22|F|student|71701 305 | 305|23|M|programmer|94086 306 | 306|45|M|other|73132 307 | 307|25|M|student|55454 308 | 308|60|M|retired|95076 309 | 309|40|M|scientist|70802 310 | 310|37|M|educator|91711 311 | 311|32|M|technician|73071 312 | 312|48|M|other|02110 313 | 313|41|M|marketing|60035 314 | 314|20|F|student|08043 315 | 315|31|M|educator|18301 316 | 316|43|F|other|77009 317 | 317|22|M|administrator|13210 318 | 318|65|M|retired|06518 319 | 319|38|M|programmer|22030 320 | 320|19|M|student|24060 321 | 321|49|F|educator|55413 322 | 322|20|M|student|50613 323 | 323|21|M|student|19149 324 | 324|21|F|student|02176 325 | 325|48|M|technician|02139 326 | 326|41|M|administrator|15235 327 | 327|22|M|student|11101 328 | 328|51|M|administrator|06779 329 | 329|48|M|educator|01720 330 | 330|35|F|educator|33884 331 | 331|33|M|entertainment|91344 332 | 332|20|M|student|40504 333 | 333|47|M|other|V0R2M 334 | 334|32|M|librarian|30002 335 | 335|45|M|executive|33775 336 | 336|23|M|salesman|42101 337 | 337|37|M|scientist|10522 338 | 338|39|F|librarian|59717 339 | 339|35|M|lawyer|37901 340 | 340|46|M|engineer|80123 341 | 341|17|F|student|44405 342 | 342|25|F|other|98006 343 | 343|43|M|engineer|30093 344 | 344|30|F|librarian|94117 345 | 345|28|F|librarian|94143 346 | 346|34|M|other|76059 347 | 347|18|M|student|90210 348 | 348|24|F|student|45660 349 | 349|68|M|retired|61455 350 | 350|32|M|student|97301 351 | 351|61|M|educator|49938 352 | 352|37|F|programmer|55105 353 | 353|25|M|scientist|28480 354 | 354|29|F|librarian|48197 355 | 355|25|M|student|60135 356 | 356|32|F|homemaker|92688 357 | 357|26|M|executive|98133 358 | 358|40|M|educator|10022 359 | 359|22|M|student|61801 360 | 360|51|M|other|98027 361 | 361|22|M|student|44074 362 | 362|35|F|homemaker|85233 363 | 363|20|M|student|87501 364 | 364|63|M|engineer|01810 365 | 365|29|M|lawyer|20009 366 | 366|20|F|student|50670 367 | 367|17|M|student|37411 368 | 368|18|M|student|92113 369 | 369|24|M|student|91335 370 | 370|52|M|writer|08534 371 | 371|36|M|engineer|99206 372 | 372|25|F|student|66046 373 | 373|24|F|other|55116 374 | 374|36|M|executive|78746 375 | 375|17|M|entertainment|37777 376 | 376|28|F|other|10010 377 | 377|22|M|student|18015 378 | 378|35|M|student|02859 379 | 379|44|M|programmer|98117 380 | 380|32|M|engineer|55117 381 | 381|33|M|artist|94608 382 | 382|45|M|engineer|01824 383 | 383|42|M|administrator|75204 384 | 384|52|M|programmer|45218 385 | 385|36|M|writer|10003 386 | 386|36|M|salesman|43221 387 | 387|33|M|entertainment|37412 388 | 388|31|M|other|36106 389 | 389|44|F|writer|83702 390 | 390|42|F|writer|85016 391 | 391|23|M|student|84604 392 | 392|52|M|writer|59801 393 | 393|19|M|student|83686 394 | 394|25|M|administrator|96819 395 | 395|43|M|other|44092 396 | 396|57|M|engineer|94551 397 | 397|17|M|student|27514 398 | 398|40|M|other|60008 399 | 399|25|M|other|92374 400 | 400|33|F|administrator|78213 401 | 401|46|F|healthcare|84107 402 | 402|30|M|engineer|95129 403 | 403|37|M|other|06811 404 | 404|29|F|programmer|55108 405 | 405|22|F|healthcare|10019 406 | 406|52|M|educator|93109 407 | 407|29|M|engineer|03261 408 | 408|23|M|student|61755 409 | 409|48|M|administrator|98225 410 | 410|30|F|artist|94025 411 | 411|34|M|educator|44691 412 | 412|25|M|educator|15222 413 | 413|55|M|educator|78212 414 | 414|24|M|programmer|38115 415 | 415|39|M|educator|85711 416 | 416|20|F|student|92626 417 | 417|27|F|other|48103 418 | 418|55|F|none|21206 419 | 419|37|M|lawyer|43215 420 | 420|53|M|educator|02140 421 | 421|38|F|programmer|55105 422 | 422|26|M|entertainment|94533 423 | 423|64|M|other|91606 424 | 424|36|F|marketing|55422 425 | 425|19|M|student|58644 426 | 426|55|M|educator|01602 427 | 427|51|M|doctor|85258 428 | 428|28|M|student|55414 429 | 429|27|M|student|29205 430 | 430|38|M|scientist|98199 431 | 431|24|M|marketing|92629 432 | 432|22|M|entertainment|50311 433 | 433|27|M|artist|11211 434 | 434|16|F|student|49705 435 | 435|24|M|engineer|60007 436 | 436|30|F|administrator|17345 437 | 437|27|F|other|20009 438 | 438|51|F|administrator|43204 439 | 439|23|F|administrator|20817 440 | 440|30|M|other|48076 441 | 441|50|M|technician|55013 442 | 442|22|M|student|85282 443 | 443|35|M|salesman|33308 444 | 444|51|F|lawyer|53202 445 | 445|21|M|writer|92653 446 | 446|57|M|educator|60201 447 | 447|30|M|administrator|55113 448 | 448|23|M|entertainment|10021 449 | 449|23|M|librarian|55021 450 | 450|35|F|educator|11758 451 | 451|16|M|student|48446 452 | 452|35|M|administrator|28018 453 | 453|18|M|student|06333 454 | 454|57|M|other|97330 455 | 455|48|M|administrator|83709 456 | 456|24|M|technician|31820 457 | 457|33|F|salesman|30011 458 | 458|47|M|technician|Y1A6B 459 | 459|22|M|student|29201 460 | 460|44|F|other|60630 461 | 461|15|M|student|98102 462 | 462|19|F|student|02918 463 | 463|48|F|healthcare|75218 464 | 464|60|M|writer|94583 465 | 465|32|M|other|05001 466 | 466|22|M|student|90804 467 | 467|29|M|engineer|91201 468 | 468|28|M|engineer|02341 469 | 469|60|M|educator|78628 470 | 470|24|M|programmer|10021 471 | 471|10|M|student|77459 472 | 472|24|M|student|87544 473 | 473|29|M|student|94708 474 | 474|51|M|executive|93711 475 | 475|30|M|programmer|75230 476 | 476|28|M|student|60440 477 | 477|23|F|student|02125 478 | 478|29|M|other|10019 479 | 479|30|M|educator|55409 480 | 480|57|M|retired|98257 481 | 481|73|M|retired|37771 482 | 482|18|F|student|40256 483 | 483|29|M|scientist|43212 484 | 484|27|M|student|21208 485 | 485|44|F|educator|95821 486 | 486|39|M|educator|93101 487 | 487|22|M|engineer|92121 488 | 488|48|M|technician|21012 489 | 489|55|M|other|45218 490 | 490|29|F|artist|V5A2B 491 | 491|43|F|writer|53711 492 | 492|57|M|educator|94618 493 | 493|22|M|engineer|60090 494 | 494|38|F|administrator|49428 495 | 495|29|M|engineer|03052 496 | 496|21|F|student|55414 497 | 497|20|M|student|50112 498 | 498|26|M|writer|55408 499 | 499|42|M|programmer|75006 500 | 500|28|M|administrator|94305 501 | 501|22|M|student|10025 502 | 502|22|M|student|23092 503 | 503|50|F|writer|27514 504 | 504|40|F|writer|92115 505 | 505|27|F|other|20657 506 | 506|46|M|programmer|03869 507 | 507|18|F|writer|28450 508 | 508|27|M|marketing|19382 509 | 509|23|M|administrator|10011 510 | 510|34|M|other|98038 511 | 511|22|M|student|21250 512 | 512|29|M|other|20090 513 | 513|43|M|administrator|26241 514 | 514|27|M|programmer|20707 515 | 515|53|M|marketing|49508 516 | 516|53|F|librarian|10021 517 | 517|24|M|student|55454 518 | 518|49|F|writer|99709 519 | 519|22|M|other|55320 520 | 520|62|M|healthcare|12603 521 | 521|19|M|student|02146 522 | 522|36|M|engineer|55443 523 | 523|50|F|administrator|04102 524 | 524|56|M|educator|02159 525 | 525|27|F|administrator|19711 526 | 526|30|M|marketing|97124 527 | 527|33|M|librarian|12180 528 | 528|18|M|student|55104 529 | 529|47|F|administrator|44224 530 | 530|29|M|engineer|94040 531 | 531|30|F|salesman|97408 532 | 532|20|M|student|92705 533 | 533|43|M|librarian|02324 534 | 534|20|M|student|05464 535 | 535|45|F|educator|80302 536 | 536|38|M|engineer|30078 537 | 537|36|M|engineer|22902 538 | 538|31|M|scientist|21010 539 | 539|53|F|administrator|80303 540 | 540|28|M|engineer|91201 541 | 541|19|F|student|84302 542 | 542|21|M|student|60515 543 | 543|33|M|scientist|95123 544 | 544|44|F|other|29464 545 | 545|27|M|technician|08052 546 | 546|36|M|executive|22911 547 | 547|50|M|educator|14534 548 | 548|51|M|writer|95468 549 | 549|42|M|scientist|45680 550 | 550|16|F|student|95453 551 | 551|25|M|programmer|55414 552 | 552|45|M|other|68147 553 | 553|58|M|educator|62901 554 | 554|32|M|scientist|62901 555 | 555|29|F|educator|23227 556 | 556|35|F|educator|30606 557 | 557|30|F|writer|11217 558 | 558|56|F|writer|63132 559 | 559|69|M|executive|10022 560 | 560|32|M|student|10003 561 | 561|23|M|engineer|60005 562 | 562|54|F|administrator|20879 563 | 563|39|F|librarian|32707 564 | 564|65|M|retired|94591 565 | 565|40|M|student|55422 566 | 566|20|M|student|14627 567 | 567|24|M|entertainment|10003 568 | 568|39|M|educator|01915 569 | 569|34|M|educator|91903 570 | 570|26|M|educator|14627 571 | 571|34|M|artist|01945 572 | 572|51|M|educator|20003 573 | 573|68|M|retired|48911 574 | 574|56|M|educator|53188 575 | 575|33|M|marketing|46032 576 | 576|48|M|executive|98281 577 | 577|36|F|student|77845 578 | 578|31|M|administrator|M7A1A 579 | 579|32|M|educator|48103 580 | 580|16|M|student|17961 581 | 581|37|M|other|94131 582 | 582|17|M|student|93003 583 | 583|44|M|engineer|29631 584 | 584|25|M|student|27511 585 | 585|69|M|librarian|98501 586 | 586|20|M|student|79508 587 | 587|26|M|other|14216 588 | 588|18|F|student|93063 589 | 589|21|M|lawyer|90034 590 | 590|50|M|educator|82435 591 | 591|57|F|librarian|92093 592 | 592|18|M|student|97520 593 | 593|31|F|educator|68767 594 | 594|46|M|educator|M4J2K 595 | 595|25|M|programmer|31909 596 | 596|20|M|artist|77073 597 | 597|23|M|other|84116 598 | 598|40|F|marketing|43085 599 | 599|22|F|student|R3T5K 600 | 600|34|M|programmer|02320 601 | 601|19|F|artist|99687 602 | 602|47|F|other|34656 603 | 603|21|M|programmer|47905 604 | 604|39|M|educator|11787 605 | 605|33|M|engineer|33716 606 | 606|28|M|programmer|63044 607 | 607|49|F|healthcare|02154 608 | 608|22|M|other|10003 609 | 609|13|F|student|55106 610 | 610|22|M|student|21227 611 | 611|46|M|librarian|77008 612 | 612|36|M|educator|79070 613 | 613|37|F|marketing|29678 614 | 614|54|M|educator|80227 615 | 615|38|M|educator|27705 616 | 616|55|M|scientist|50613 617 | 617|27|F|writer|11201 618 | 618|15|F|student|44212 619 | 619|17|M|student|44134 620 | 620|18|F|writer|81648 621 | 621|17|M|student|60402 622 | 622|25|M|programmer|14850 623 | 623|50|F|educator|60187 624 | 624|19|M|student|30067 625 | 625|27|M|programmer|20723 626 | 626|23|M|scientist|19807 627 | 627|24|M|engineer|08034 628 | 628|13|M|none|94306 629 | 629|46|F|other|44224 630 | 630|26|F|healthcare|55408 631 | 631|18|F|student|38866 632 | 632|18|M|student|55454 633 | 633|35|M|programmer|55414 634 | 634|39|M|engineer|T8H1N 635 | 635|22|M|other|23237 636 | 636|47|M|educator|48043 637 | 637|30|M|other|74101 638 | 638|45|M|engineer|01940 639 | 639|42|F|librarian|12065 640 | 640|20|M|student|61801 641 | 641|24|M|student|60626 642 | 642|18|F|student|95521 643 | 643|39|M|scientist|55122 644 | 644|51|M|retired|63645 645 | 645|27|M|programmer|53211 646 | 646|17|F|student|51250 647 | 647|40|M|educator|45810 648 | 648|43|M|engineer|91351 649 | 649|20|M|student|39762 650 | 650|42|M|engineer|83814 651 | 651|65|M|retired|02903 652 | 652|35|M|other|22911 653 | 653|31|M|executive|55105 654 | 654|27|F|student|78739 655 | 655|50|F|healthcare|60657 656 | 656|48|M|educator|10314 657 | 657|26|F|none|78704 658 | 658|33|M|programmer|92626 659 | 659|31|M|educator|54248 660 | 660|26|M|student|77380 661 | 661|28|M|programmer|98121 662 | 662|55|M|librarian|19102 663 | 663|26|M|other|19341 664 | 664|30|M|engineer|94115 665 | 665|25|M|administrator|55412 666 | 666|44|M|administrator|61820 667 | 667|35|M|librarian|01970 668 | 668|29|F|writer|10016 669 | 669|37|M|other|20009 670 | 670|30|M|technician|21114 671 | 671|21|M|programmer|91919 672 | 672|54|F|administrator|90095 673 | 673|51|M|educator|22906 674 | 674|13|F|student|55337 675 | 675|34|M|other|28814 676 | 676|30|M|programmer|32712 677 | 677|20|M|other|99835 678 | 678|50|M|educator|61462 679 | 679|20|F|student|54302 680 | 680|33|M|lawyer|90405 681 | 681|44|F|marketing|97208 682 | 682|23|M|programmer|55128 683 | 683|42|M|librarian|23509 684 | 684|28|M|student|55414 685 | 685|32|F|librarian|55409 686 | 686|32|M|educator|26506 687 | 687|31|F|healthcare|27713 688 | 688|37|F|administrator|60476 689 | 689|25|M|other|45439 690 | 690|35|M|salesman|63304 691 | 691|34|M|educator|60089 692 | 692|34|M|engineer|18053 693 | 693|43|F|healthcare|85210 694 | 694|60|M|programmer|06365 695 | 695|26|M|writer|38115 696 | 696|55|M|other|94920 697 | 697|25|M|other|77042 698 | 698|28|F|programmer|06906 699 | 699|44|M|other|96754 700 | 700|17|M|student|76309 701 | 701|51|F|librarian|56321 702 | 702|37|M|other|89104 703 | 703|26|M|educator|49512 704 | 704|51|F|librarian|91105 705 | 705|21|F|student|54494 706 | 706|23|M|student|55454 707 | 707|56|F|librarian|19146 708 | 708|26|F|homemaker|96349 709 | 709|21|M|other|N4T1A 710 | 710|19|M|student|92020 711 | 711|22|F|student|15203 712 | 712|22|F|student|54901 713 | 713|42|F|other|07204 714 | 714|26|M|engineer|55343 715 | 715|21|M|technician|91206 716 | 716|36|F|administrator|44265 717 | 717|24|M|technician|84105 718 | 718|42|M|technician|64118 719 | 719|37|F|other|V0R2H 720 | 720|49|F|administrator|16506 721 | 721|24|F|entertainment|11238 722 | 722|50|F|homemaker|17331 723 | 723|26|M|executive|94403 724 | 724|31|M|executive|40243 725 | 725|21|M|student|91711 726 | 726|25|F|administrator|80538 727 | 727|25|M|student|78741 728 | 728|58|M|executive|94306 729 | 729|19|M|student|56567 730 | 730|31|F|scientist|32114 731 | 731|41|F|educator|70403 732 | 732|28|F|other|98405 733 | 733|44|F|other|60630 734 | 734|25|F|other|63108 735 | 735|29|F|healthcare|85719 736 | 736|48|F|writer|94618 737 | 737|30|M|programmer|98072 738 | 738|35|M|technician|95403 739 | 739|35|M|technician|73162 740 | 740|25|F|educator|22206 741 | 741|25|M|writer|63108 742 | 742|35|M|student|29210 743 | 743|31|M|programmer|92660 744 | 744|35|M|marketing|47024 745 | 745|42|M|writer|55113 746 | 746|25|M|engineer|19047 747 | 747|19|M|other|93612 748 | 748|28|M|administrator|94720 749 | 749|33|M|other|80919 750 | 750|28|M|administrator|32303 751 | 751|24|F|other|90034 752 | 752|60|M|retired|21201 753 | 753|56|M|salesman|91206 754 | 754|59|F|librarian|62901 755 | 755|44|F|educator|97007 756 | 756|30|F|none|90247 757 | 757|26|M|student|55104 758 | 758|27|M|student|53706 759 | 759|20|F|student|68503 760 | 760|35|F|other|14211 761 | 761|17|M|student|97302 762 | 762|32|M|administrator|95050 763 | 763|27|M|scientist|02113 764 | 764|27|F|educator|62903 765 | 765|31|M|student|33066 766 | 766|42|M|other|10960 767 | 767|70|M|engineer|00000 768 | 768|29|M|administrator|12866 769 | 769|39|M|executive|06927 770 | 770|28|M|student|14216 771 | 771|26|M|student|15232 772 | 772|50|M|writer|27105 773 | 773|20|M|student|55414 774 | 774|30|M|student|80027 775 | 775|46|M|executive|90036 776 | 776|30|M|librarian|51157 777 | 777|63|M|programmer|01810 778 | 778|34|M|student|01960 779 | 779|31|M|student|K7L5J 780 | 780|49|M|programmer|94560 781 | 781|20|M|student|48825 782 | 782|21|F|artist|33205 783 | 783|30|M|marketing|77081 784 | 784|47|M|administrator|91040 785 | 785|32|M|engineer|23322 786 | 786|36|F|engineer|01754 787 | 787|18|F|student|98620 788 | 788|51|M|administrator|05779 789 | 789|29|M|other|55420 790 | 790|27|M|technician|80913 791 | 791|31|M|educator|20064 792 | 792|40|M|programmer|12205 793 | 793|22|M|student|85281 794 | 794|32|M|educator|57197 795 | 795|30|M|programmer|08610 796 | 796|32|F|writer|33755 797 | 797|44|F|other|62522 798 | 798|40|F|writer|64131 799 | 799|49|F|administrator|19716 800 | 800|25|M|programmer|55337 801 | 801|22|M|writer|92154 802 | 802|35|M|administrator|34105 803 | 803|70|M|administrator|78212 804 | 804|39|M|educator|61820 805 | 805|27|F|other|20009 806 | 806|27|M|marketing|11217 807 | 807|41|F|healthcare|93555 808 | 808|45|M|salesman|90016 809 | 809|50|F|marketing|30803 810 | 810|55|F|other|80526 811 | 811|40|F|educator|73013 812 | 812|22|M|technician|76234 813 | 813|14|F|student|02136 814 | 814|30|M|other|12345 815 | 815|32|M|other|28806 816 | 816|34|M|other|20755 817 | 817|19|M|student|60152 818 | 818|28|M|librarian|27514 819 | 819|59|M|administrator|40205 820 | 820|22|M|student|37725 821 | 821|37|M|engineer|77845 822 | 822|29|F|librarian|53144 823 | 823|27|M|artist|50322 824 | 824|31|M|other|15017 825 | 825|44|M|engineer|05452 826 | 826|28|M|artist|77048 827 | 827|23|F|engineer|80228 828 | 828|28|M|librarian|85282 829 | 829|48|M|writer|80209 830 | 830|46|M|programmer|53066 831 | 831|21|M|other|33765 832 | 832|24|M|technician|77042 833 | 833|34|M|writer|90019 834 | 834|26|M|other|64153 835 | 835|44|F|executive|11577 836 | 836|44|M|artist|10018 837 | 837|36|F|artist|55409 838 | 838|23|M|student|01375 839 | 839|38|F|entertainment|90814 840 | 840|39|M|artist|55406 841 | 841|45|M|doctor|47401 842 | 842|40|M|writer|93055 843 | 843|35|M|librarian|44212 844 | 844|22|M|engineer|95662 845 | 845|64|M|doctor|97405 846 | 846|27|M|lawyer|47130 847 | 847|29|M|student|55417 848 | 848|46|M|engineer|02146 849 | 849|15|F|student|25652 850 | 850|34|M|technician|78390 851 | 851|18|M|other|29646 852 | 852|46|M|administrator|94086 853 | 853|49|M|writer|40515 854 | 854|29|F|student|55408 855 | 855|53|M|librarian|04988 856 | 856|43|F|marketing|97215 857 | 857|35|F|administrator|V1G4L 858 | 858|63|M|educator|09645 859 | 859|18|F|other|06492 860 | 860|70|F|retired|48322 861 | 861|38|F|student|14085 862 | 862|25|M|executive|13820 863 | 863|17|M|student|60089 864 | 864|27|M|programmer|63021 865 | 865|25|M|artist|11231 866 | 866|45|M|other|60302 867 | 867|24|M|scientist|92507 868 | 868|21|M|programmer|55303 869 | 869|30|M|student|10025 870 | 870|22|M|student|65203 871 | 871|31|M|executive|44648 872 | 872|19|F|student|74078 873 | 873|48|F|administrator|33763 874 | 874|36|M|scientist|37076 875 | 875|24|F|student|35802 876 | 876|41|M|other|20902 877 | 877|30|M|other|77504 878 | 878|50|F|educator|98027 879 | 879|33|F|administrator|55337 880 | 880|13|M|student|83702 881 | 881|39|M|marketing|43017 882 | 882|35|M|engineer|40503 883 | 883|49|M|librarian|50266 884 | 884|44|M|engineer|55337 885 | 885|30|F|other|95316 886 | 886|20|M|student|61820 887 | 887|14|F|student|27249 888 | 888|41|M|scientist|17036 889 | 889|24|M|technician|78704 890 | 890|32|M|student|97301 891 | 891|51|F|administrator|03062 892 | 892|36|M|other|45243 893 | 893|25|M|student|95823 894 | 894|47|M|educator|74075 895 | 895|31|F|librarian|32301 896 | 896|28|M|writer|91505 897 | 897|30|M|other|33484 898 | 898|23|M|homemaker|61755 899 | 899|32|M|other|55116 900 | 900|60|M|retired|18505 901 | 901|38|M|executive|L1V3W 902 | 902|45|F|artist|97203 903 | 903|28|M|educator|20850 904 | 904|17|F|student|61073 905 | 905|27|M|other|30350 906 | 906|45|M|librarian|70124 907 | 907|25|F|other|80526 908 | 908|44|F|librarian|68504 909 | 909|50|F|educator|53171 910 | 910|28|M|healthcare|29301 911 | 911|37|F|writer|53210 912 | 912|51|M|other|06512 913 | 913|27|M|student|76201 914 | 914|44|F|other|08105 915 | 915|50|M|entertainment|60614 916 | 916|27|M|engineer|N2L5N 917 | 917|22|F|student|20006 918 | 918|40|M|scientist|70116 919 | 919|25|M|other|14216 920 | 920|30|F|artist|90008 921 | 921|20|F|student|98801 922 | 922|29|F|administrator|21114 923 | 923|21|M|student|E2E3R 924 | 924|29|M|other|11753 925 | 925|18|F|salesman|49036 926 | 926|49|M|entertainment|01701 927 | 927|23|M|programmer|55428 928 | 928|21|M|student|55408 929 | 929|44|M|scientist|53711 930 | 930|28|F|scientist|07310 931 | 931|60|M|educator|33556 932 | 932|58|M|educator|06437 933 | 933|28|M|student|48105 934 | 934|61|M|engineer|22902 935 | 935|42|M|doctor|66221 936 | 936|24|M|other|32789 937 | 937|48|M|educator|98072 938 | 938|38|F|technician|55038 939 | 939|26|F|student|33319 940 | 940|32|M|administrator|02215 941 | 941|20|M|student|97229 942 | 942|48|F|librarian|78209 943 | 943|22|M|student|77841 944 | -------------------------------------------------------------------------------- /data/titanic_test.csv: -------------------------------------------------------------------------------- 1 | PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked 2 | 892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q 3 | 893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47,1,0,363272,7,,S 4 | 894,2,"Myles, Mr. Thomas Francis",male,62,0,0,240276,9.6875,,Q 5 | 895,3,"Wirz, Mr. Albert",male,27,0,0,315154,8.6625,,S 6 | 896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22,1,1,3101298,12.2875,,S 7 | 897,3,"Svensson, Mr. Johan Cervin",male,14,0,0,7538,9.225,,S 8 | 898,3,"Connolly, Miss. Kate",female,30,0,0,330972,7.6292,,Q 9 | 899,2,"Caldwell, Mr. Albert Francis",male,26,1,1,248738,29,,S 10 | 900,3,"Abrahim, Mrs. Joseph (Sophie Halaut Easu)",female,18,0,0,2657,7.2292,,C 11 | 901,3,"Davies, Mr. John Samuel",male,21,2,0,A/4 48871,24.15,,S 12 | 902,3,"Ilieff, Mr. Ylio",male,,0,0,349220,7.8958,,S 13 | 903,1,"Jones, Mr. Charles Cresson",male,46,0,0,694,26,,S 14 | 904,1,"Snyder, Mrs. John Pillsbury (Nelle Stevenson)",female,23,1,0,21228,82.2667,B45,S 15 | 905,2,"Howard, Mr. Benjamin",male,63,1,0,24065,26,,S 16 | 906,1,"Chaffee, Mrs. Herbert Fuller (Carrie Constance Toogood)",female,47,1,0,W.E.P. 5734,61.175,E31,S 17 | 907,2,"del Carlo, Mrs. Sebastiano (Argenia Genovesi)",female,24,1,0,SC/PARIS 2167,27.7208,,C 18 | 908,2,"Keane, Mr. Daniel",male,35,0,0,233734,12.35,,Q 19 | 909,3,"Assaf, Mr. Gerios",male,21,0,0,2692,7.225,,C 20 | 910,3,"Ilmakangas, Miss. Ida Livija",female,27,1,0,STON/O2. 3101270,7.925,,S 21 | 911,3,"Assaf Khalil, Mrs. Mariana (Miriam"")""",female,45,0,0,2696,7.225,,C 22 | 912,1,"Rothschild, Mr. Martin",male,55,1,0,PC 17603,59.4,,C 23 | 913,3,"Olsen, Master. Artur Karl",male,9,0,1,C 17368,3.1708,,S 24 | 914,1,"Flegenheim, Mrs. Alfred (Antoinette)",female,,0,0,PC 17598,31.6833,,S 25 | 915,1,"Williams, Mr. Richard Norris II",male,21,0,1,PC 17597,61.3792,,C 26 | 916,1,"Ryerson, Mrs. Arthur Larned (Emily Maria Borie)",female,48,1,3,PC 17608,262.375,B57 B59 B63 B66,C 27 | 917,3,"Robins, Mr. Alexander A",male,50,1,0,A/5. 3337,14.5,,S 28 | 918,1,"Ostby, Miss. Helene Ragnhild",female,22,0,1,113509,61.9792,B36,C 29 | 919,3,"Daher, Mr. Shedid",male,22.5,0,0,2698,7.225,,C 30 | 920,1,"Brady, Mr. John Bertram",male,41,0,0,113054,30.5,A21,S 31 | 921,3,"Samaan, Mr. Elias",male,,2,0,2662,21.6792,,C 32 | 922,2,"Louch, Mr. Charles Alexander",male,50,1,0,SC/AH 3085,26,,S 33 | 923,2,"Jefferys, Mr. Clifford Thomas",male,24,2,0,C.A. 31029,31.5,,S 34 | 924,3,"Dean, Mrs. Bertram (Eva Georgetta Light)",female,33,1,2,C.A. 2315,20.575,,S 35 | 925,3,"Johnston, Mrs. Andrew G (Elizabeth Lily"" Watson)""",female,,1,2,W./C. 6607,23.45,,S 36 | 926,1,"Mock, Mr. Philipp Edmund",male,30,1,0,13236,57.75,C78,C 37 | 927,3,"Katavelas, Mr. Vassilios (Catavelas Vassilios"")""",male,18.5,0,0,2682,7.2292,,C 38 | 928,3,"Roth, Miss. Sarah A",female,,0,0,342712,8.05,,S 39 | 929,3,"Cacic, Miss. Manda",female,21,0,0,315087,8.6625,,S 40 | 930,3,"Sap, Mr. Julius",male,25,0,0,345768,9.5,,S 41 | 931,3,"Hee, Mr. Ling",male,,0,0,1601,56.4958,,S 42 | 932,3,"Karun, Mr. Franz",male,39,0,1,349256,13.4167,,C 43 | 933,1,"Franklin, Mr. Thomas Parham",male,,0,0,113778,26.55,D34,S 44 | 934,3,"Goldsmith, Mr. Nathan",male,41,0,0,SOTON/O.Q. 3101263,7.85,,S 45 | 935,2,"Corbett, Mrs. Walter H (Irene Colvin)",female,30,0,0,237249,13,,S 46 | 936,1,"Kimball, Mrs. Edwin Nelson Jr (Gertrude Parsons)",female,45,1,0,11753,52.5542,D19,S 47 | 937,3,"Peltomaki, Mr. Nikolai Johannes",male,25,0,0,STON/O 2. 3101291,7.925,,S 48 | 938,1,"Chevre, Mr. Paul Romaine",male,45,0,0,PC 17594,29.7,A9,C 49 | 939,3,"Shaughnessy, Mr. Patrick",male,,0,0,370374,7.75,,Q 50 | 940,1,"Bucknell, Mrs. William Robert (Emma Eliza Ward)",female,60,0,0,11813,76.2917,D15,C 51 | 941,3,"Coutts, Mrs. William (Winnie Minnie"" Treanor)""",female,36,0,2,C.A. 37671,15.9,,S 52 | 942,1,"Smith, Mr. Lucien Philip",male,24,1,0,13695,60,C31,S 53 | 943,2,"Pulbaum, Mr. Franz",male,27,0,0,SC/PARIS 2168,15.0333,,C 54 | 944,2,"Hocking, Miss. Ellen Nellie""""",female,20,2,1,29105,23,,S 55 | 945,1,"Fortune, Miss. Ethel Flora",female,28,3,2,19950,263,C23 C25 C27,S 56 | 946,2,"Mangiavacchi, Mr. Serafino Emilio",male,,0,0,SC/A.3 2861,15.5792,,C 57 | 947,3,"Rice, Master. Albert",male,10,4,1,382652,29.125,,Q 58 | 948,3,"Cor, Mr. Bartol",male,35,0,0,349230,7.8958,,S 59 | 949,3,"Abelseth, Mr. Olaus Jorgensen",male,25,0,0,348122,7.65,F G63,S 60 | 950,3,"Davison, Mr. Thomas Henry",male,,1,0,386525,16.1,,S 61 | 951,1,"Chaudanson, Miss. Victorine",female,36,0,0,PC 17608,262.375,B61,C 62 | 952,3,"Dika, Mr. Mirko",male,17,0,0,349232,7.8958,,S 63 | 953,2,"McCrae, Mr. Arthur Gordon",male,32,0,0,237216,13.5,,S 64 | 954,3,"Bjorklund, Mr. Ernst Herbert",male,18,0,0,347090,7.75,,S 65 | 955,3,"Bradley, Miss. Bridget Delia",female,22,0,0,334914,7.725,,Q 66 | 956,1,"Ryerson, Master. John Borie",male,13,2,2,PC 17608,262.375,B57 B59 B63 B66,C 67 | 957,2,"Corey, Mrs. Percy C (Mary Phyllis Elizabeth Miller)",female,,0,0,F.C.C. 13534,21,,S 68 | 958,3,"Burns, Miss. Mary Delia",female,18,0,0,330963,7.8792,,Q 69 | 959,1,"Moore, Mr. Clarence Bloomfield",male,47,0,0,113796,42.4,,S 70 | 960,1,"Tucker, Mr. Gilbert Milligan Jr",male,31,0,0,2543,28.5375,C53,C 71 | 961,1,"Fortune, Mrs. Mark (Mary McDougald)",female,60,1,4,19950,263,C23 C25 C27,S 72 | 962,3,"Mulvihill, Miss. Bertha E",female,24,0,0,382653,7.75,,Q 73 | 963,3,"Minkoff, Mr. Lazar",male,21,0,0,349211,7.8958,,S 74 | 964,3,"Nieminen, Miss. Manta Josefina",female,29,0,0,3101297,7.925,,S 75 | 965,1,"Ovies y Rodriguez, Mr. Servando",male,28.5,0,0,PC 17562,27.7208,D43,C 76 | 966,1,"Geiger, Miss. Amalie",female,35,0,0,113503,211.5,C130,C 77 | 967,1,"Keeping, Mr. Edwin",male,32.5,0,0,113503,211.5,C132,C 78 | 968,3,"Miles, Mr. Frank",male,,0,0,359306,8.05,,S 79 | 969,1,"Cornell, Mrs. Robert Clifford (Malvina Helen Lamson)",female,55,2,0,11770,25.7,C101,S 80 | 970,2,"Aldworth, Mr. Charles Augustus",male,30,0,0,248744,13,,S 81 | 971,3,"Doyle, Miss. Elizabeth",female,24,0,0,368702,7.75,,Q 82 | 972,3,"Boulos, Master. Akar",male,6,1,1,2678,15.2458,,C 83 | 973,1,"Straus, Mr. Isidor",male,67,1,0,PC 17483,221.7792,C55 C57,S 84 | 974,1,"Case, Mr. Howard Brown",male,49,0,0,19924,26,,S 85 | 975,3,"Demetri, Mr. Marinko",male,,0,0,349238,7.8958,,S 86 | 976,2,"Lamb, Mr. John Joseph",male,,0,0,240261,10.7083,,Q 87 | 977,3,"Khalil, Mr. Betros",male,,1,0,2660,14.4542,,C 88 | 978,3,"Barry, Miss. Julia",female,27,0,0,330844,7.8792,,Q 89 | 979,3,"Badman, Miss. Emily Louisa",female,18,0,0,A/4 31416,8.05,,S 90 | 980,3,"O'Donoghue, Ms. Bridget",female,,0,0,364856,7.75,,Q 91 | 981,2,"Wells, Master. Ralph Lester",male,2,1,1,29103,23,,S 92 | 982,3,"Dyker, Mrs. Adolf Fredrik (Anna Elisabeth Judith Andersson)",female,22,1,0,347072,13.9,,S 93 | 983,3,"Pedersen, Mr. Olaf",male,,0,0,345498,7.775,,S 94 | 984,1,"Davidson, Mrs. Thornton (Orian Hays)",female,27,1,2,F.C. 12750,52,B71,S 95 | 985,3,"Guest, Mr. Robert",male,,0,0,376563,8.05,,S 96 | 986,1,"Birnbaum, Mr. Jakob",male,25,0,0,13905,26,,C 97 | 987,3,"Tenglin, Mr. Gunnar Isidor",male,25,0,0,350033,7.7958,,S 98 | 988,1,"Cavendish, Mrs. Tyrell William (Julia Florence Siegel)",female,76,1,0,19877,78.85,C46,S 99 | 989,3,"Makinen, Mr. Kalle Edvard",male,29,0,0,STON/O 2. 3101268,7.925,,S 100 | 990,3,"Braf, Miss. Elin Ester Maria",female,20,0,0,347471,7.8542,,S 101 | 991,3,"Nancarrow, Mr. William Henry",male,33,0,0,A./5. 3338,8.05,,S 102 | 992,1,"Stengel, Mrs. Charles Emil Henry (Annie May Morris)",female,43,1,0,11778,55.4417,C116,C 103 | 993,2,"Weisz, Mr. Leopold",male,27,1,0,228414,26,,S 104 | 994,3,"Foley, Mr. William",male,,0,0,365235,7.75,,Q 105 | 995,3,"Johansson Palmquist, Mr. Oskar Leander",male,26,0,0,347070,7.775,,S 106 | 996,3,"Thomas, Mrs. Alexander (Thamine Thelma"")""",female,16,1,1,2625,8.5167,,C 107 | 997,3,"Holthen, Mr. Johan Martin",male,28,0,0,C 4001,22.525,,S 108 | 998,3,"Buckley, Mr. Daniel",male,21,0,0,330920,7.8208,,Q 109 | 999,3,"Ryan, Mr. Edward",male,,0,0,383162,7.75,,Q 110 | 1000,3,"Willer, Mr. Aaron (Abi Weller"")""",male,,0,0,3410,8.7125,,S 111 | 1001,2,"Swane, Mr. George",male,18.5,0,0,248734,13,F,S 112 | 1002,2,"Stanton, Mr. Samuel Ward",male,41,0,0,237734,15.0458,,C 113 | 1003,3,"Shine, Miss. Ellen Natalia",female,,0,0,330968,7.7792,,Q 114 | 1004,1,"Evans, Miss. Edith Corse",female,36,0,0,PC 17531,31.6792,A29,C 115 | 1005,3,"Buckley, Miss. Katherine",female,18.5,0,0,329944,7.2833,,Q 116 | 1006,1,"Straus, Mrs. Isidor (Rosalie Ida Blun)",female,63,1,0,PC 17483,221.7792,C55 C57,S 117 | 1007,3,"Chronopoulos, Mr. Demetrios",male,18,1,0,2680,14.4542,,C 118 | 1008,3,"Thomas, Mr. John",male,,0,0,2681,6.4375,,C 119 | 1009,3,"Sandstrom, Miss. Beatrice Irene",female,1,1,1,PP 9549,16.7,G6,S 120 | 1010,1,"Beattie, Mr. Thomson",male,36,0,0,13050,75.2417,C6,C 121 | 1011,2,"Chapman, Mrs. John Henry (Sara Elizabeth Lawry)",female,29,1,0,SC/AH 29037,26,,S 122 | 1012,2,"Watt, Miss. Bertha J",female,12,0,0,C.A. 33595,15.75,,S 123 | 1013,3,"Kiernan, Mr. John",male,,1,0,367227,7.75,,Q 124 | 1014,1,"Schabert, Mrs. Paul (Emma Mock)",female,35,1,0,13236,57.75,C28,C 125 | 1015,3,"Carver, Mr. Alfred John",male,28,0,0,392095,7.25,,S 126 | 1016,3,"Kennedy, Mr. John",male,,0,0,368783,7.75,,Q 127 | 1017,3,"Cribb, Miss. Laura Alice",female,17,0,1,371362,16.1,,S 128 | 1018,3,"Brobeck, Mr. Karl Rudolf",male,22,0,0,350045,7.7958,,S 129 | 1019,3,"McCoy, Miss. Alicia",female,,2,0,367226,23.25,,Q 130 | 1020,2,"Bowenur, Mr. Solomon",male,42,0,0,211535,13,,S 131 | 1021,3,"Petersen, Mr. Marius",male,24,0,0,342441,8.05,,S 132 | 1022,3,"Spinner, Mr. Henry John",male,32,0,0,STON/OQ. 369943,8.05,,S 133 | 1023,1,"Gracie, Col. Archibald IV",male,53,0,0,113780,28.5,C51,C 134 | 1024,3,"Lefebre, Mrs. Frank (Frances)",female,,0,4,4133,25.4667,,S 135 | 1025,3,"Thomas, Mr. Charles P",male,,1,0,2621,6.4375,,C 136 | 1026,3,"Dintcheff, Mr. Valtcho",male,43,0,0,349226,7.8958,,S 137 | 1027,3,"Carlsson, Mr. Carl Robert",male,24,0,0,350409,7.8542,,S 138 | 1028,3,"Zakarian, Mr. Mapriededer",male,26.5,0,0,2656,7.225,,C 139 | 1029,2,"Schmidt, Mr. August",male,26,0,0,248659,13,,S 140 | 1030,3,"Drapkin, Miss. Jennie",female,23,0,0,SOTON/OQ 392083,8.05,,S 141 | 1031,3,"Goodwin, Mr. Charles Frederick",male,40,1,6,CA 2144,46.9,,S 142 | 1032,3,"Goodwin, Miss. Jessie Allis",female,10,5,2,CA 2144,46.9,,S 143 | 1033,1,"Daniels, Miss. Sarah",female,33,0,0,113781,151.55,,S 144 | 1034,1,"Ryerson, Mr. Arthur Larned",male,61,1,3,PC 17608,262.375,B57 B59 B63 B66,C 145 | 1035,2,"Beauchamp, Mr. Henry James",male,28,0,0,244358,26,,S 146 | 1036,1,"Lindeberg-Lind, Mr. Erik Gustaf (Mr Edward Lingrey"")""",male,42,0,0,17475,26.55,,S 147 | 1037,3,"Vander Planke, Mr. Julius",male,31,3,0,345763,18,,S 148 | 1038,1,"Hilliard, Mr. Herbert Henry",male,,0,0,17463,51.8625,E46,S 149 | 1039,3,"Davies, Mr. Evan",male,22,0,0,SC/A4 23568,8.05,,S 150 | 1040,1,"Crafton, Mr. John Bertram",male,,0,0,113791,26.55,,S 151 | 1041,2,"Lahtinen, Rev. William",male,30,1,1,250651,26,,S 152 | 1042,1,"Earnshaw, Mrs. Boulton (Olive Potter)",female,23,0,1,11767,83.1583,C54,C 153 | 1043,3,"Matinoff, Mr. Nicola",male,,0,0,349255,7.8958,,C 154 | 1044,3,"Storey, Mr. Thomas",male,60.5,0,0,3701,,,S 155 | 1045,3,"Klasen, Mrs. (Hulda Kristina Eugenia Lofqvist)",female,36,0,2,350405,12.1833,,S 156 | 1046,3,"Asplund, Master. Filip Oscar",male,13,4,2,347077,31.3875,,S 157 | 1047,3,"Duquemin, Mr. Joseph",male,24,0,0,S.O./P.P. 752,7.55,,S 158 | 1048,1,"Bird, Miss. Ellen",female,29,0,0,PC 17483,221.7792,C97,S 159 | 1049,3,"Lundin, Miss. Olga Elida",female,23,0,0,347469,7.8542,,S 160 | 1050,1,"Borebank, Mr. John James",male,42,0,0,110489,26.55,D22,S 161 | 1051,3,"Peacock, Mrs. Benjamin (Edith Nile)",female,26,0,2,SOTON/O.Q. 3101315,13.775,,S 162 | 1052,3,"Smyth, Miss. Julia",female,,0,0,335432,7.7333,,Q 163 | 1053,3,"Touma, Master. Georges Youssef",male,7,1,1,2650,15.2458,,C 164 | 1054,2,"Wright, Miss. Marion",female,26,0,0,220844,13.5,,S 165 | 1055,3,"Pearce, Mr. Ernest",male,,0,0,343271,7,,S 166 | 1056,2,"Peruschitz, Rev. Joseph Maria",male,41,0,0,237393,13,,S 167 | 1057,3,"Kink-Heilmann, Mrs. Anton (Luise Heilmann)",female,26,1,1,315153,22.025,,S 168 | 1058,1,"Brandeis, Mr. Emil",male,48,0,0,PC 17591,50.4958,B10,C 169 | 1059,3,"Ford, Mr. Edward Watson",male,18,2,2,W./C. 6608,34.375,,S 170 | 1060,1,"Cassebeer, Mrs. Henry Arthur Jr (Eleanor Genevieve Fosdick)",female,,0,0,17770,27.7208,,C 171 | 1061,3,"Hellstrom, Miss. Hilda Maria",female,22,0,0,7548,8.9625,,S 172 | 1062,3,"Lithman, Mr. Simon",male,,0,0,S.O./P.P. 251,7.55,,S 173 | 1063,3,"Zakarian, Mr. Ortin",male,27,0,0,2670,7.225,,C 174 | 1064,3,"Dyker, Mr. Adolf Fredrik",male,23,1,0,347072,13.9,,S 175 | 1065,3,"Torfa, Mr. Assad",male,,0,0,2673,7.2292,,C 176 | 1066,3,"Asplund, Mr. Carl Oscar Vilhelm Gustafsson",male,40,1,5,347077,31.3875,,S 177 | 1067,2,"Brown, Miss. Edith Eileen",female,15,0,2,29750,39,,S 178 | 1068,2,"Sincock, Miss. Maude",female,20,0,0,C.A. 33112,36.75,,S 179 | 1069,1,"Stengel, Mr. Charles Emil Henry",male,54,1,0,11778,55.4417,C116,C 180 | 1070,2,"Becker, Mrs. Allen Oliver (Nellie E Baumgardner)",female,36,0,3,230136,39,F4,S 181 | 1071,1,"Compton, Mrs. Alexander Taylor (Mary Eliza Ingersoll)",female,64,0,2,PC 17756,83.1583,E45,C 182 | 1072,2,"McCrie, Mr. James Matthew",male,30,0,0,233478,13,,S 183 | 1073,1,"Compton, Mr. Alexander Taylor Jr",male,37,1,1,PC 17756,83.1583,E52,C 184 | 1074,1,"Marvin, Mrs. Daniel Warner (Mary Graham Carmichael Farquarson)",female,18,1,0,113773,53.1,D30,S 185 | 1075,3,"Lane, Mr. Patrick",male,,0,0,7935,7.75,,Q 186 | 1076,1,"Douglas, Mrs. Frederick Charles (Mary Helene Baxter)",female,27,1,1,PC 17558,247.5208,B58 B60,C 187 | 1077,2,"Maybery, Mr. Frank Hubert",male,40,0,0,239059,16,,S 188 | 1078,2,"Phillips, Miss. Alice Frances Louisa",female,21,0,1,S.O./P.P. 2,21,,S 189 | 1079,3,"Davies, Mr. Joseph",male,17,2,0,A/4 48873,8.05,,S 190 | 1080,3,"Sage, Miss. Ada",female,,8,2,CA. 2343,69.55,,S 191 | 1081,2,"Veal, Mr. James",male,40,0,0,28221,13,,S 192 | 1082,2,"Angle, Mr. William A",male,34,1,0,226875,26,,S 193 | 1083,1,"Salomon, Mr. Abraham L",male,,0,0,111163,26,,S 194 | 1084,3,"van Billiard, Master. Walter John",male,11.5,1,1,A/5. 851,14.5,,S 195 | 1085,2,"Lingane, Mr. John",male,61,0,0,235509,12.35,,Q 196 | 1086,2,"Drew, Master. Marshall Brines",male,8,0,2,28220,32.5,,S 197 | 1087,3,"Karlsson, Mr. Julius Konrad Eugen",male,33,0,0,347465,7.8542,,S 198 | 1088,1,"Spedden, Master. Robert Douglas",male,6,0,2,16966,134.5,E34,C 199 | 1089,3,"Nilsson, Miss. Berta Olivia",female,18,0,0,347066,7.775,,S 200 | 1090,2,"Baimbrigge, Mr. Charles Robert",male,23,0,0,C.A. 31030,10.5,,S 201 | 1091,3,"Rasmussen, Mrs. (Lena Jacobsen Solvang)",female,,0,0,65305,8.1125,,S 202 | 1092,3,"Murphy, Miss. Nora",female,,0,0,36568,15.5,,Q 203 | 1093,3,"Danbom, Master. Gilbert Sigvard Emanuel",male,0.33,0,2,347080,14.4,,S 204 | 1094,1,"Astor, Col. John Jacob",male,47,1,0,PC 17757,227.525,C62 C64,C 205 | 1095,2,"Quick, Miss. Winifred Vera",female,8,1,1,26360,26,,S 206 | 1096,2,"Andrew, Mr. Frank Thomas",male,25,0,0,C.A. 34050,10.5,,S 207 | 1097,1,"Omont, Mr. Alfred Fernand",male,,0,0,F.C. 12998,25.7417,,C 208 | 1098,3,"McGowan, Miss. Katherine",female,35,0,0,9232,7.75,,Q 209 | 1099,2,"Collett, Mr. Sidney C Stuart",male,24,0,0,28034,10.5,,S 210 | 1100,1,"Rosenbaum, Miss. Edith Louise",female,33,0,0,PC 17613,27.7208,A11,C 211 | 1101,3,"Delalic, Mr. Redjo",male,25,0,0,349250,7.8958,,S 212 | 1102,3,"Andersen, Mr. Albert Karvin",male,32,0,0,C 4001,22.525,,S 213 | 1103,3,"Finoli, Mr. Luigi",male,,0,0,SOTON/O.Q. 3101308,7.05,,S 214 | 1104,2,"Deacon, Mr. Percy William",male,17,0,0,S.O.C. 14879,73.5,,S 215 | 1105,2,"Howard, Mrs. Benjamin (Ellen Truelove Arman)",female,60,1,0,24065,26,,S 216 | 1106,3,"Andersson, Miss. Ida Augusta Margareta",female,38,4,2,347091,7.775,,S 217 | 1107,1,"Head, Mr. Christopher",male,42,0,0,113038,42.5,B11,S 218 | 1108,3,"Mahon, Miss. Bridget Delia",female,,0,0,330924,7.8792,,Q 219 | 1109,1,"Wick, Mr. George Dennick",male,57,1,1,36928,164.8667,,S 220 | 1110,1,"Widener, Mrs. George Dunton (Eleanor Elkins)",female,50,1,1,113503,211.5,C80,C 221 | 1111,3,"Thomson, Mr. Alexander Morrison",male,,0,0,32302,8.05,,S 222 | 1112,2,"Duran y More, Miss. Florentina",female,30,1,0,SC/PARIS 2148,13.8583,,C 223 | 1113,3,"Reynolds, Mr. Harold J",male,21,0,0,342684,8.05,,S 224 | 1114,2,"Cook, Mrs. (Selena Rogers)",female,22,0,0,W./C. 14266,10.5,F33,S 225 | 1115,3,"Karlsson, Mr. Einar Gervasius",male,21,0,0,350053,7.7958,,S 226 | 1116,1,"Candee, Mrs. Edward (Helen Churchill Hungerford)",female,53,0,0,PC 17606,27.4458,,C 227 | 1117,3,"Moubarek, Mrs. George (Omine Amenia"" Alexander)""",female,,0,2,2661,15.2458,,C 228 | 1118,3,"Asplund, Mr. Johan Charles",male,23,0,0,350054,7.7958,,S 229 | 1119,3,"McNeill, Miss. Bridget",female,,0,0,370368,7.75,,Q 230 | 1120,3,"Everett, Mr. Thomas James",male,40.5,0,0,C.A. 6212,15.1,,S 231 | 1121,2,"Hocking, Mr. Samuel James Metcalfe",male,36,0,0,242963,13,,S 232 | 1122,2,"Sweet, Mr. George Frederick",male,14,0,0,220845,65,,S 233 | 1123,1,"Willard, Miss. Constance",female,21,0,0,113795,26.55,,S 234 | 1124,3,"Wiklund, Mr. Karl Johan",male,21,1,0,3101266,6.4958,,S 235 | 1125,3,"Linehan, Mr. Michael",male,,0,0,330971,7.8792,,Q 236 | 1126,1,"Cumings, Mr. John Bradley",male,39,1,0,PC 17599,71.2833,C85,C 237 | 1127,3,"Vendel, Mr. Olof Edvin",male,20,0,0,350416,7.8542,,S 238 | 1128,1,"Warren, Mr. Frank Manley",male,64,1,0,110813,75.25,D37,C 239 | 1129,3,"Baccos, Mr. Raffull",male,20,0,0,2679,7.225,,C 240 | 1130,2,"Hiltunen, Miss. Marta",female,18,1,1,250650,13,,S 241 | 1131,1,"Douglas, Mrs. Walter Donald (Mahala Dutton)",female,48,1,0,PC 17761,106.425,C86,C 242 | 1132,1,"Lindstrom, Mrs. Carl Johan (Sigrid Posse)",female,55,0,0,112377,27.7208,,C 243 | 1133,2,"Christy, Mrs. (Alice Frances)",female,45,0,2,237789,30,,S 244 | 1134,1,"Spedden, Mr. Frederic Oakley",male,45,1,1,16966,134.5,E34,C 245 | 1135,3,"Hyman, Mr. Abraham",male,,0,0,3470,7.8875,,S 246 | 1136,3,"Johnston, Master. William Arthur Willie""""",male,,1,2,W./C. 6607,23.45,,S 247 | 1137,1,"Kenyon, Mr. Frederick R",male,41,1,0,17464,51.8625,D21,S 248 | 1138,2,"Karnes, Mrs. J Frank (Claire Bennett)",female,22,0,0,F.C.C. 13534,21,,S 249 | 1139,2,"Drew, Mr. James Vivian",male,42,1,1,28220,32.5,,S 250 | 1140,2,"Hold, Mrs. Stephen (Annie Margaret Hill)",female,29,1,0,26707,26,,S 251 | 1141,3,"Khalil, Mrs. Betros (Zahie Maria"" Elias)""",female,,1,0,2660,14.4542,,C 252 | 1142,2,"West, Miss. Barbara J",female,0.92,1,2,C.A. 34651,27.75,,S 253 | 1143,3,"Abrahamsson, Mr. Abraham August Johannes",male,20,0,0,SOTON/O2 3101284,7.925,,S 254 | 1144,1,"Clark, Mr. Walter Miller",male,27,1,0,13508,136.7792,C89,C 255 | 1145,3,"Salander, Mr. Karl Johan",male,24,0,0,7266,9.325,,S 256 | 1146,3,"Wenzel, Mr. Linhart",male,32.5,0,0,345775,9.5,,S 257 | 1147,3,"MacKay, Mr. George William",male,,0,0,C.A. 42795,7.55,,S 258 | 1148,3,"Mahon, Mr. John",male,,0,0,AQ/4 3130,7.75,,Q 259 | 1149,3,"Niklasson, Mr. Samuel",male,28,0,0,363611,8.05,,S 260 | 1150,2,"Bentham, Miss. Lilian W",female,19,0,0,28404,13,,S 261 | 1151,3,"Midtsjo, Mr. Karl Albert",male,21,0,0,345501,7.775,,S 262 | 1152,3,"de Messemaeker, Mr. Guillaume Joseph",male,36.5,1,0,345572,17.4,,S 263 | 1153,3,"Nilsson, Mr. August Ferdinand",male,21,0,0,350410,7.8542,,S 264 | 1154,2,"Wells, Mrs. Arthur Henry (Addie"" Dart Trevaskis)""",female,29,0,2,29103,23,,S 265 | 1155,3,"Klasen, Miss. Gertrud Emilia",female,1,1,1,350405,12.1833,,S 266 | 1156,2,"Portaluppi, Mr. Emilio Ilario Giuseppe",male,30,0,0,C.A. 34644,12.7375,,C 267 | 1157,3,"Lyntakoff, Mr. Stanko",male,,0,0,349235,7.8958,,S 268 | 1158,1,"Chisholm, Mr. Roderick Robert Crispin",male,,0,0,112051,0,,S 269 | 1159,3,"Warren, Mr. Charles William",male,,0,0,C.A. 49867,7.55,,S 270 | 1160,3,"Howard, Miss. May Elizabeth",female,,0,0,A. 2. 39186,8.05,,S 271 | 1161,3,"Pokrnic, Mr. Mate",male,17,0,0,315095,8.6625,,S 272 | 1162,1,"McCaffry, Mr. Thomas Francis",male,46,0,0,13050,75.2417,C6,C 273 | 1163,3,"Fox, Mr. Patrick",male,,0,0,368573,7.75,,Q 274 | 1164,1,"Clark, Mrs. Walter Miller (Virginia McDowell)",female,26,1,0,13508,136.7792,C89,C 275 | 1165,3,"Lennon, Miss. Mary",female,,1,0,370371,15.5,,Q 276 | 1166,3,"Saade, Mr. Jean Nassr",male,,0,0,2676,7.225,,C 277 | 1167,2,"Bryhl, Miss. Dagmar Jenny Ingeborg ",female,20,1,0,236853,26,,S 278 | 1168,2,"Parker, Mr. Clifford Richard",male,28,0,0,SC 14888,10.5,,S 279 | 1169,2,"Faunthorpe, Mr. Harry",male,40,1,0,2926,26,,S 280 | 1170,2,"Ware, Mr. John James",male,30,1,0,CA 31352,21,,S 281 | 1171,2,"Oxenham, Mr. Percy Thomas",male,22,0,0,W./C. 14260,10.5,,S 282 | 1172,3,"Oreskovic, Miss. Jelka",female,23,0,0,315085,8.6625,,S 283 | 1173,3,"Peacock, Master. Alfred Edward",male,0.75,1,1,SOTON/O.Q. 3101315,13.775,,S 284 | 1174,3,"Fleming, Miss. Honora",female,,0,0,364859,7.75,,Q 285 | 1175,3,"Touma, Miss. Maria Youssef",female,9,1,1,2650,15.2458,,C 286 | 1176,3,"Rosblom, Miss. Salli Helena",female,2,1,1,370129,20.2125,,S 287 | 1177,3,"Dennis, Mr. William",male,36,0,0,A/5 21175,7.25,,S 288 | 1178,3,"Franklin, Mr. Charles (Charles Fardon)",male,,0,0,SOTON/O.Q. 3101314,7.25,,S 289 | 1179,1,"Snyder, Mr. John Pillsbury",male,24,1,0,21228,82.2667,B45,S 290 | 1180,3,"Mardirosian, Mr. Sarkis",male,,0,0,2655,7.2292,F E46,C 291 | 1181,3,"Ford, Mr. Arthur",male,,0,0,A/5 1478,8.05,,S 292 | 1182,1,"Rheims, Mr. George Alexander Lucien",male,,0,0,PC 17607,39.6,,S 293 | 1183,3,"Daly, Miss. Margaret Marcella Maggie""""",female,30,0,0,382650,6.95,,Q 294 | 1184,3,"Nasr, Mr. Mustafa",male,,0,0,2652,7.2292,,C 295 | 1185,1,"Dodge, Dr. Washington",male,53,1,1,33638,81.8583,A34,S 296 | 1186,3,"Wittevrongel, Mr. Camille",male,36,0,0,345771,9.5,,S 297 | 1187,3,"Angheloff, Mr. Minko",male,26,0,0,349202,7.8958,,S 298 | 1188,2,"Laroche, Miss. Louise",female,1,1,2,SC/Paris 2123,41.5792,,C 299 | 1189,3,"Samaan, Mr. Hanna",male,,2,0,2662,21.6792,,C 300 | 1190,1,"Loring, Mr. Joseph Holland",male,30,0,0,113801,45.5,,S 301 | 1191,3,"Johansson, Mr. Nils",male,29,0,0,347467,7.8542,,S 302 | 1192,3,"Olsson, Mr. Oscar Wilhelm",male,32,0,0,347079,7.775,,S 303 | 1193,2,"Malachard, Mr. Noel",male,,0,0,237735,15.0458,D,C 304 | 1194,2,"Phillips, Mr. Escott Robert",male,43,0,1,S.O./P.P. 2,21,,S 305 | 1195,3,"Pokrnic, Mr. Tome",male,24,0,0,315092,8.6625,,S 306 | 1196,3,"McCarthy, Miss. Catherine Katie""""",female,,0,0,383123,7.75,,Q 307 | 1197,1,"Crosby, Mrs. Edward Gifford (Catherine Elizabeth Halstead)",female,64,1,1,112901,26.55,B26,S 308 | 1198,1,"Allison, Mr. Hudson Joshua Creighton",male,30,1,2,113781,151.55,C22 C26,S 309 | 1199,3,"Aks, Master. Philip Frank",male,0.83,0,1,392091,9.35,,S 310 | 1200,1,"Hays, Mr. Charles Melville",male,55,1,1,12749,93.5,B69,S 311 | 1201,3,"Hansen, Mrs. Claus Peter (Jennie L Howard)",female,45,1,0,350026,14.1083,,S 312 | 1202,3,"Cacic, Mr. Jego Grga",male,18,0,0,315091,8.6625,,S 313 | 1203,3,"Vartanian, Mr. David",male,22,0,0,2658,7.225,,C 314 | 1204,3,"Sadowitz, Mr. Harry",male,,0,0,LP 1588,7.575,,S 315 | 1205,3,"Carr, Miss. Jeannie",female,37,0,0,368364,7.75,,Q 316 | 1206,1,"White, Mrs. John Stuart (Ella Holmes)",female,55,0,0,PC 17760,135.6333,C32,C 317 | 1207,3,"Hagardon, Miss. Kate",female,17,0,0,AQ/3. 30631,7.7333,,Q 318 | 1208,1,"Spencer, Mr. William Augustus",male,57,1,0,PC 17569,146.5208,B78,C 319 | 1209,2,"Rogers, Mr. Reginald Harry",male,19,0,0,28004,10.5,,S 320 | 1210,3,"Jonsson, Mr. Nils Hilding",male,27,0,0,350408,7.8542,,S 321 | 1211,2,"Jefferys, Mr. Ernest Wilfred",male,22,2,0,C.A. 31029,31.5,,S 322 | 1212,3,"Andersson, Mr. Johan Samuel",male,26,0,0,347075,7.775,,S 323 | 1213,3,"Krekorian, Mr. Neshan",male,25,0,0,2654,7.2292,F E57,C 324 | 1214,2,"Nesson, Mr. Israel",male,26,0,0,244368,13,F2,S 325 | 1215,1,"Rowe, Mr. Alfred G",male,33,0,0,113790,26.55,,S 326 | 1216,1,"Kreuchen, Miss. Emilie",female,39,0,0,24160,211.3375,,S 327 | 1217,3,"Assam, Mr. Ali",male,23,0,0,SOTON/O.Q. 3101309,7.05,,S 328 | 1218,2,"Becker, Miss. Ruth Elizabeth",female,12,2,1,230136,39,F4,S 329 | 1219,1,"Rosenshine, Mr. George (Mr George Thorne"")""",male,46,0,0,PC 17585,79.2,,C 330 | 1220,2,"Clarke, Mr. Charles Valentine",male,29,1,0,2003,26,,S 331 | 1221,2,"Enander, Mr. Ingvar",male,21,0,0,236854,13,,S 332 | 1222,2,"Davies, Mrs. John Morgan (Elizabeth Agnes Mary White) ",female,48,0,2,C.A. 33112,36.75,,S 333 | 1223,1,"Dulles, Mr. William Crothers",male,39,0,0,PC 17580,29.7,A18,C 334 | 1224,3,"Thomas, Mr. Tannous",male,,0,0,2684,7.225,,C 335 | 1225,3,"Nakid, Mrs. Said (Waika Mary"" Mowad)""",female,19,1,1,2653,15.7417,,C 336 | 1226,3,"Cor, Mr. Ivan",male,27,0,0,349229,7.8958,,S 337 | 1227,1,"Maguire, Mr. John Edward",male,30,0,0,110469,26,C106,S 338 | 1228,2,"de Brito, Mr. Jose Joaquim",male,32,0,0,244360,13,,S 339 | 1229,3,"Elias, Mr. Joseph",male,39,0,2,2675,7.2292,,C 340 | 1230,2,"Denbury, Mr. Herbert",male,25,0,0,C.A. 31029,31.5,,S 341 | 1231,3,"Betros, Master. Seman",male,,0,0,2622,7.2292,,C 342 | 1232,2,"Fillbrook, Mr. Joseph Charles",male,18,0,0,C.A. 15185,10.5,,S 343 | 1233,3,"Lundstrom, Mr. Thure Edvin",male,32,0,0,350403,7.5792,,S 344 | 1234,3,"Sage, Mr. John George",male,,1,9,CA. 2343,69.55,,S 345 | 1235,1,"Cardeza, Mrs. James Warburton Martinez (Charlotte Wardle Drake)",female,58,0,1,PC 17755,512.3292,B51 B53 B55,C 346 | 1236,3,"van Billiard, Master. James William",male,,1,1,A/5. 851,14.5,,S 347 | 1237,3,"Abelseth, Miss. Karen Marie",female,16,0,0,348125,7.65,,S 348 | 1238,2,"Botsford, Mr. William Hull",male,26,0,0,237670,13,,S 349 | 1239,3,"Whabee, Mrs. George Joseph (Shawneene Abi-Saab)",female,38,0,0,2688,7.2292,,C 350 | 1240,2,"Giles, Mr. Ralph",male,24,0,0,248726,13.5,,S 351 | 1241,2,"Walcroft, Miss. Nellie",female,31,0,0,F.C.C. 13528,21,,S 352 | 1242,1,"Greenfield, Mrs. Leo David (Blanche Strouse)",female,45,0,1,PC 17759,63.3583,D10 D12,C 353 | 1243,2,"Stokes, Mr. Philip Joseph",male,25,0,0,F.C.C. 13540,10.5,,S 354 | 1244,2,"Dibden, Mr. William",male,18,0,0,S.O.C. 14879,73.5,,S 355 | 1245,2,"Herman, Mr. Samuel",male,49,1,2,220845,65,,S 356 | 1246,3,"Dean, Miss. Elizabeth Gladys Millvina""""",female,0.17,1,2,C.A. 2315,20.575,,S 357 | 1247,1,"Julian, Mr. Henry Forbes",male,50,0,0,113044,26,E60,S 358 | 1248,1,"Brown, Mrs. John Murray (Caroline Lane Lamson)",female,59,2,0,11769,51.4792,C101,S 359 | 1249,3,"Lockyer, Mr. Edward",male,,0,0,1222,7.8792,,S 360 | 1250,3,"O'Keefe, Mr. Patrick",male,,0,0,368402,7.75,,Q 361 | 1251,3,"Lindell, Mrs. Edvard Bengtsson (Elin Gerda Persson)",female,30,1,0,349910,15.55,,S 362 | 1252,3,"Sage, Master. William Henry",male,14.5,8,2,CA. 2343,69.55,,S 363 | 1253,2,"Mallet, Mrs. Albert (Antoinette Magnin)",female,24,1,1,S.C./PARIS 2079,37.0042,,C 364 | 1254,2,"Ware, Mrs. John James (Florence Louise Long)",female,31,0,0,CA 31352,21,,S 365 | 1255,3,"Strilic, Mr. Ivan",male,27,0,0,315083,8.6625,,S 366 | 1256,1,"Harder, Mrs. George Achilles (Dorothy Annan)",female,25,1,0,11765,55.4417,E50,C 367 | 1257,3,"Sage, Mrs. John (Annie Bullen)",female,,1,9,CA. 2343,69.55,,S 368 | 1258,3,"Caram, Mr. Joseph",male,,1,0,2689,14.4583,,C 369 | 1259,3,"Riihivouri, Miss. Susanna Juhantytar Sanni""""",female,22,0,0,3101295,39.6875,,S 370 | 1260,1,"Gibson, Mrs. Leonard (Pauline C Boeson)",female,45,0,1,112378,59.4,,C 371 | 1261,2,"Pallas y Castello, Mr. Emilio",male,29,0,0,SC/PARIS 2147,13.8583,,C 372 | 1262,2,"Giles, Mr. Edgar",male,21,1,0,28133,11.5,,S 373 | 1263,1,"Wilson, Miss. Helen Alice",female,31,0,0,16966,134.5,E39 E41,C 374 | 1264,1,"Ismay, Mr. Joseph Bruce",male,49,0,0,112058,0,B52 B54 B56,S 375 | 1265,2,"Harbeck, Mr. William H",male,44,0,0,248746,13,,S 376 | 1266,1,"Dodge, Mrs. Washington (Ruth Vidaver)",female,54,1,1,33638,81.8583,A34,S 377 | 1267,1,"Bowen, Miss. Grace Scott",female,45,0,0,PC 17608,262.375,,C 378 | 1268,3,"Kink, Miss. Maria",female,22,2,0,315152,8.6625,,S 379 | 1269,2,"Cotterill, Mr. Henry Harry""""",male,21,0,0,29107,11.5,,S 380 | 1270,1,"Hipkins, Mr. William Edward",male,55,0,0,680,50,C39,S 381 | 1271,3,"Asplund, Master. Carl Edgar",male,5,4,2,347077,31.3875,,S 382 | 1272,3,"O'Connor, Mr. Patrick",male,,0,0,366713,7.75,,Q 383 | 1273,3,"Foley, Mr. Joseph",male,26,0,0,330910,7.8792,,Q 384 | 1274,3,"Risien, Mrs. Samuel (Emma)",female,,0,0,364498,14.5,,S 385 | 1275,3,"McNamee, Mrs. Neal (Eileen O'Leary)",female,19,1,0,376566,16.1,,S 386 | 1276,2,"Wheeler, Mr. Edwin Frederick""""",male,,0,0,SC/PARIS 2159,12.875,,S 387 | 1277,2,"Herman, Miss. Kate",female,24,1,2,220845,65,,S 388 | 1278,3,"Aronsson, Mr. Ernst Axel Algot",male,24,0,0,349911,7.775,,S 389 | 1279,2,"Ashby, Mr. John",male,57,0,0,244346,13,,S 390 | 1280,3,"Canavan, Mr. Patrick",male,21,0,0,364858,7.75,,Q 391 | 1281,3,"Palsson, Master. Paul Folke",male,6,3,1,349909,21.075,,S 392 | 1282,1,"Payne, Mr. Vivian Ponsonby",male,23,0,0,12749,93.5,B24,S 393 | 1283,1,"Lines, Mrs. Ernest H (Elizabeth Lindsey James)",female,51,0,1,PC 17592,39.4,D28,S 394 | 1284,3,"Abbott, Master. Eugene Joseph",male,13,0,2,C.A. 2673,20.25,,S 395 | 1285,2,"Gilbert, Mr. William",male,47,0,0,C.A. 30769,10.5,,S 396 | 1286,3,"Kink-Heilmann, Mr. Anton",male,29,3,1,315153,22.025,,S 397 | 1287,1,"Smith, Mrs. Lucien Philip (Mary Eloise Hughes)",female,18,1,0,13695,60,C31,S 398 | 1288,3,"Colbert, Mr. Patrick",male,24,0,0,371109,7.25,,Q 399 | 1289,1,"Frolicher-Stehli, Mrs. Maxmillian (Margaretha Emerentia Stehli)",female,48,1,1,13567,79.2,B41,C 400 | 1290,3,"Larsson-Rondberg, Mr. Edvard A",male,22,0,0,347065,7.775,,S 401 | 1291,3,"Conlon, Mr. Thomas Henry",male,31,0,0,21332,7.7333,,Q 402 | 1292,1,"Bonnell, Miss. Caroline",female,30,0,0,36928,164.8667,C7,S 403 | 1293,2,"Gale, Mr. Harry",male,38,1,0,28664,21,,S 404 | 1294,1,"Gibson, Miss. Dorothy Winifred",female,22,0,1,112378,59.4,,C 405 | 1295,1,"Carrau, Mr. Jose Pedro",male,17,0,0,113059,47.1,,S 406 | 1296,1,"Frauenthal, Mr. Isaac Gerald",male,43,1,0,17765,27.7208,D40,C 407 | 1297,2,"Nourney, Mr. Alfred (Baron von Drachstedt"")""",male,20,0,0,SC/PARIS 2166,13.8625,D38,C 408 | 1298,2,"Ware, Mr. William Jeffery",male,23,1,0,28666,10.5,,S 409 | 1299,1,"Widener, Mr. George Dunton",male,50,1,1,113503,211.5,C80,C 410 | 1300,3,"Riordan, Miss. Johanna Hannah""""",female,,0,0,334915,7.7208,,Q 411 | 1301,3,"Peacock, Miss. Treasteall",female,3,1,1,SOTON/O.Q. 3101315,13.775,,S 412 | 1302,3,"Naughton, Miss. Hannah",female,,0,0,365237,7.75,,Q 413 | 1303,1,"Minahan, Mrs. William Edward (Lillian E Thorpe)",female,37,1,0,19928,90,C78,Q 414 | 1304,3,"Henriksson, Miss. Jenny Lovisa",female,28,0,0,347086,7.775,,S 415 | 1305,3,"Spector, Mr. Woolf",male,,0,0,A.5. 3236,8.05,,S 416 | 1306,1,"Oliva y Ocana, Dona. Fermina",female,39,0,0,PC 17758,108.9,C105,C 417 | 1307,3,"Saether, Mr. Simon Sivertsen",male,38.5,0,0,SOTON/O.Q. 3101262,7.25,,S 418 | 1308,3,"Ware, Mr. Frederick",male,,0,0,359309,8.05,,S 419 | 1309,3,"Peter, Master. Michael J",male,,1,1,2668,22.3583,,C 420 | -------------------------------------------------------------------------------- /pandas_tricks.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 4 new time-saving tricks in pandas ([video](https://www.youtube.com/watch?v=-NbY7E9hKxk&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=32))\n", 8 | "\n", 9 | "- [My pandas video series (30 videos)](http://www.dataschool.io/easier-data-analysis-with-pandas/)\n", 10 | "- [GitHub repository](https://github.com/justmarkham/pandas-videos)\n", 11 | "- [pandas release notes](http://pandas.pydata.org/pandas-docs/stable/whatsnew.html)" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 1, 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "data": { 21 | "text/plain": [ 22 | "'0.22.0'" 23 | ] 24 | }, 25 | "execution_count": 1, 26 | "metadata": {}, 27 | "output_type": "execute_result" 28 | } 29 | ], 30 | "source": [ 31 | "import pandas as pd\n", 32 | "pd.__version__" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "## 1. Create a datetime column from a DataFrame\n", 40 | "\n", 41 | "*New in 0.18.1*" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 2, 47 | "metadata": {}, 48 | "outputs": [ 49 | { 50 | "data": { 51 | "text/html": [ 52 | "
\n", 53 | "\n", 66 | "\n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | "
monthdayyearhour
01225201710
1115201811
\n", 93 | "
" 94 | ], 95 | "text/plain": [ 96 | " month day year hour\n", 97 | "0 12 25 2017 10\n", 98 | "1 1 15 2018 11" 99 | ] 100 | }, 101 | "execution_count": 2, 102 | "metadata": {}, 103 | "output_type": "execute_result" 104 | } 105 | ], 106 | "source": [ 107 | "# create an example DataFrame\n", 108 | "df = pd.DataFrame([[12, 25, 2017, 10], [1, 15, 2018, 11]],\n", 109 | " columns=['month', 'day', 'year', 'hour'])\n", 110 | "df" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 3, 116 | "metadata": {}, 117 | "outputs": [ 118 | { 119 | "data": { 120 | "text/plain": [ 121 | "0 2017-12-25 10:00:00\n", 122 | "1 2018-01-15 11:00:00\n", 123 | "dtype: datetime64[ns]" 124 | ] 125 | }, 126 | "execution_count": 3, 127 | "metadata": {}, 128 | "output_type": "execute_result" 129 | } 130 | ], 131 | "source": [ 132 | "# new: create a datetime column from the entire DataFrame\n", 133 | "pd.to_datetime(df)" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 4, 139 | "metadata": {}, 140 | "outputs": [ 141 | { 142 | "data": { 143 | "text/plain": [ 144 | "0 2017-12-25\n", 145 | "1 2018-01-15\n", 146 | "dtype: datetime64[ns]" 147 | ] 148 | }, 149 | "execution_count": 4, 150 | "metadata": {}, 151 | "output_type": "execute_result" 152 | } 153 | ], 154 | "source": [ 155 | "# new: create a datetime column from a subset of columns\n", 156 | "pd.to_datetime(df[['month', 'day', 'year']])" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": 5, 162 | "metadata": {}, 163 | "outputs": [ 164 | { 165 | "data": { 166 | "text/html": [ 167 | "
\n", 168 | "\n", 181 | "\n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | "
monthdayyearhour
2017-12-251225201710
2018-01-15115201811
\n", 208 | "
" 209 | ], 210 | "text/plain": [ 211 | " month day year hour\n", 212 | "2017-12-25 12 25 2017 10\n", 213 | "2018-01-15 1 15 2018 11" 214 | ] 215 | }, 216 | "execution_count": 5, 217 | "metadata": {}, 218 | "output_type": "execute_result" 219 | } 220 | ], 221 | "source": [ 222 | "# overwrite the index\n", 223 | "df.index = pd.to_datetime(df[['month', 'day', 'year']])\n", 224 | "df" 225 | ] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "metadata": {}, 230 | "source": [ 231 | "- [More information](http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#assembling-datetimes)\n", 232 | "- [Video: How do I work with dates and times in pandas?](https://www.youtube.com/watch?v=yCgJGsg0Xa4&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=25)" 233 | ] 234 | }, 235 | { 236 | "cell_type": "markdown", 237 | "metadata": {}, 238 | "source": [ 239 | "## 2. Create a category column during file reading\n", 240 | "\n", 241 | "*New in 0.19.0*" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": 6, 247 | "metadata": {}, 248 | "outputs": [ 249 | { 250 | "data": { 251 | "text/html": [ 252 | "
\n", 253 | "\n", 266 | "\n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | "
countrybeer_servingsspirit_servingswine_servingstotal_litres_of_pure_alcoholcontinent
0Afghanistan0000.0Asia
1Albania89132544.9Europe
2Algeria250140.7Africa
3Andorra24513831212.4Europe
4Angola21757455.9Africa
\n", 326 | "
" 327 | ], 328 | "text/plain": [ 329 | " country beer_servings spirit_servings wine_servings \\\n", 330 | "0 Afghanistan 0 0 0 \n", 331 | "1 Albania 89 132 54 \n", 332 | "2 Algeria 25 0 14 \n", 333 | "3 Andorra 245 138 312 \n", 334 | "4 Angola 217 57 45 \n", 335 | "\n", 336 | " total_litres_of_pure_alcohol continent \n", 337 | "0 0.0 Asia \n", 338 | "1 4.9 Europe \n", 339 | "2 0.7 Africa \n", 340 | "3 12.4 Europe \n", 341 | "4 5.9 Africa " 342 | ] 343 | }, 344 | "execution_count": 6, 345 | "metadata": {}, 346 | "output_type": "execute_result" 347 | } 348 | ], 349 | "source": [ 350 | "# read the drinks dataset into a DataFrame\n", 351 | "drinks = pd.read_csv('http://bit.ly/drinksbycountry')\n", 352 | "drinks.head()" 353 | ] 354 | }, 355 | { 356 | "cell_type": "code", 357 | "execution_count": 7, 358 | "metadata": {}, 359 | "outputs": [ 360 | { 361 | "data": { 362 | "text/plain": [ 363 | "country object\n", 364 | "beer_servings int64\n", 365 | "spirit_servings int64\n", 366 | "wine_servings int64\n", 367 | "total_litres_of_pure_alcohol float64\n", 368 | "continent object\n", 369 | "dtype: object" 370 | ] 371 | }, 372 | "execution_count": 7, 373 | "metadata": {}, 374 | "output_type": "execute_result" 375 | } 376 | ], 377 | "source": [ 378 | "# data types are automatically detected\n", 379 | "drinks.dtypes" 380 | ] 381 | }, 382 | { 383 | "cell_type": "code", 384 | "execution_count": 8, 385 | "metadata": {}, 386 | "outputs": [ 387 | { 388 | "data": { 389 | "text/plain": [ 390 | "country object\n", 391 | "beer_servings int64\n", 392 | "spirit_servings int64\n", 393 | "wine_servings int64\n", 394 | "total_litres_of_pure_alcohol float64\n", 395 | "continent category\n", 396 | "dtype: object" 397 | ] 398 | }, 399 | "execution_count": 8, 400 | "metadata": {}, 401 | "output_type": "execute_result" 402 | } 403 | ], 404 | "source": [ 405 | "# old way to create a category (after file reading)\n", 406 | "drinks['continent'] = drinks.continent.astype('category')\n", 407 | "drinks.dtypes" 408 | ] 409 | }, 410 | { 411 | "cell_type": "code", 412 | "execution_count": 9, 413 | "metadata": {}, 414 | "outputs": [ 415 | { 416 | "data": { 417 | "text/plain": [ 418 | "country object\n", 419 | "beer_servings int64\n", 420 | "spirit_servings int64\n", 421 | "wine_servings int64\n", 422 | "total_litres_of_pure_alcohol float64\n", 423 | "continent category\n", 424 | "dtype: object" 425 | ] 426 | }, 427 | "execution_count": 9, 428 | "metadata": {}, 429 | "output_type": "execute_result" 430 | } 431 | ], 432 | "source": [ 433 | "# new way to create a category (during file reading)\n", 434 | "drinks = pd.read_csv('http://bit.ly/drinksbycountry', dtype={'continent':'category'})\n", 435 | "drinks.dtypes" 436 | ] 437 | }, 438 | { 439 | "cell_type": "markdown", 440 | "metadata": {}, 441 | "source": [ 442 | "- [More information](http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#read-csv-supports-parsing-categorical-directly)\n", 443 | "- [Video: How do I make my pandas DataFrame smaller and faster?](https://www.youtube.com/watch?v=wDYDYGyN_cw&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=21)" 444 | ] 445 | }, 446 | { 447 | "cell_type": "markdown", 448 | "metadata": {}, 449 | "source": [ 450 | "## 3. Convert the data type of multiple columns at once\n", 451 | "\n", 452 | "*New in 0.19.0*" 453 | ] 454 | }, 455 | { 456 | "cell_type": "code", 457 | "execution_count": 10, 458 | "metadata": {}, 459 | "outputs": [ 460 | { 461 | "data": { 462 | "text/plain": [ 463 | "country object\n", 464 | "beer_servings int64\n", 465 | "spirit_servings int64\n", 466 | "wine_servings int64\n", 467 | "total_litres_of_pure_alcohol float64\n", 468 | "continent object\n", 469 | "dtype: object" 470 | ] 471 | }, 472 | "execution_count": 10, 473 | "metadata": {}, 474 | "output_type": "execute_result" 475 | } 476 | ], 477 | "source": [ 478 | "# read the drinks dataset into a DataFrame\n", 479 | "drinks = pd.read_csv('http://bit.ly/drinksbycountry')\n", 480 | "drinks.dtypes" 481 | ] 482 | }, 483 | { 484 | "cell_type": "code", 485 | "execution_count": 11, 486 | "metadata": {}, 487 | "outputs": [ 488 | { 489 | "data": { 490 | "text/plain": [ 491 | "country object\n", 492 | "beer_servings float64\n", 493 | "spirit_servings float64\n", 494 | "wine_servings int64\n", 495 | "total_litres_of_pure_alcohol float64\n", 496 | "continent object\n", 497 | "dtype: object" 498 | ] 499 | }, 500 | "execution_count": 11, 501 | "metadata": {}, 502 | "output_type": "execute_result" 503 | } 504 | ], 505 | "source": [ 506 | "# old way to convert data types (one at a time)\n", 507 | "drinks['beer_servings'] = drinks.beer_servings.astype('float')\n", 508 | "drinks['spirit_servings'] = drinks.spirit_servings.astype('float')\n", 509 | "drinks.dtypes" 510 | ] 511 | }, 512 | { 513 | "cell_type": "code", 514 | "execution_count": 12, 515 | "metadata": {}, 516 | "outputs": [ 517 | { 518 | "data": { 519 | "text/plain": [ 520 | "country object\n", 521 | "beer_servings float64\n", 522 | "spirit_servings float64\n", 523 | "wine_servings int64\n", 524 | "total_litres_of_pure_alcohol float64\n", 525 | "continent object\n", 526 | "dtype: object" 527 | ] 528 | }, 529 | "execution_count": 12, 530 | "metadata": {}, 531 | "output_type": "execute_result" 532 | } 533 | ], 534 | "source": [ 535 | "# new way to convert data types (all at once)\n", 536 | "drinks = pd.read_csv('http://bit.ly/drinksbycountry')\n", 537 | "drinks = drinks.astype({'beer_servings':'float', 'spirit_servings':'float'})\n", 538 | "drinks.dtypes" 539 | ] 540 | }, 541 | { 542 | "cell_type": "markdown", 543 | "metadata": {}, 544 | "source": [ 545 | "- [More information](http://pandas.pydata.org/pandas-docs/stable/basics.html#astype)\n", 546 | "- [Video: How do I change the data type of a pandas Series?](https://www.youtube.com/watch?v=V0AWyzVMf54&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=13)" 547 | ] 548 | }, 549 | { 550 | "cell_type": "markdown", 551 | "metadata": {}, 552 | "source": [ 553 | "## 4. Apply multiple aggregations on a Series or DataFrame\n", 554 | "\n", 555 | "*New in 0.20.0*" 556 | ] 557 | }, 558 | { 559 | "cell_type": "code", 560 | "execution_count": 13, 561 | "metadata": {}, 562 | "outputs": [ 563 | { 564 | "data": { 565 | "text/plain": [ 566 | "continent\n", 567 | "Africa 61.471698\n", 568 | "Asia 37.045455\n", 569 | "Europe 193.777778\n", 570 | "North America 145.434783\n", 571 | "Oceania 89.687500\n", 572 | "South America 175.083333\n", 573 | "Name: beer_servings, dtype: float64" 574 | ] 575 | }, 576 | "execution_count": 13, 577 | "metadata": {}, 578 | "output_type": "execute_result" 579 | } 580 | ], 581 | "source": [ 582 | "# example of a single aggregation function after a groupby\n", 583 | "drinks.groupby('continent').beer_servings.mean()" 584 | ] 585 | }, 586 | { 587 | "cell_type": "code", 588 | "execution_count": 14, 589 | "metadata": {}, 590 | "outputs": [ 591 | { 592 | "data": { 593 | "text/html": [ 594 | "
\n", 595 | "\n", 608 | "\n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | "
meanminmax
continent
Africa61.4716980.0376.0
Asia37.0454550.0247.0
Europe193.7777780.0361.0
North America145.4347831.0285.0
Oceania89.6875000.0306.0
South America175.08333393.0333.0
\n", 662 | "
" 663 | ], 664 | "text/plain": [ 665 | " mean min max\n", 666 | "continent \n", 667 | "Africa 61.471698 0.0 376.0\n", 668 | "Asia 37.045455 0.0 247.0\n", 669 | "Europe 193.777778 0.0 361.0\n", 670 | "North America 145.434783 1.0 285.0\n", 671 | "Oceania 89.687500 0.0 306.0\n", 672 | "South America 175.083333 93.0 333.0" 673 | ] 674 | }, 675 | "execution_count": 14, 676 | "metadata": {}, 677 | "output_type": "execute_result" 678 | } 679 | ], 680 | "source": [ 681 | "# multiple aggregation functions can be applied simultaneously\n", 682 | "drinks.groupby('continent').beer_servings.agg(['mean', 'min', 'max'])" 683 | ] 684 | }, 685 | { 686 | "cell_type": "code", 687 | "execution_count": 15, 688 | "metadata": {}, 689 | "outputs": [ 690 | { 691 | "data": { 692 | "text/plain": [ 693 | "mean 106.160622\n", 694 | "min 0.000000\n", 695 | "max 376.000000\n", 696 | "Name: beer_servings, dtype: float64" 697 | ] 698 | }, 699 | "execution_count": 15, 700 | "metadata": {}, 701 | "output_type": "execute_result" 702 | } 703 | ], 704 | "source": [ 705 | "# new: apply the same aggregations to a Series\n", 706 | "drinks.beer_servings.agg(['mean', 'min', 'max'])" 707 | ] 708 | }, 709 | { 710 | "cell_type": "code", 711 | "execution_count": 16, 712 | "metadata": {}, 713 | "outputs": [ 714 | { 715 | "data": { 716 | "text/html": [ 717 | "
\n", 718 | "\n", 731 | "\n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | "
countrybeer_servingsspirit_servingswine_servingstotal_litres_of_pure_alcoholcontinent
maxZimbabwe376.000000438.000000370.00000014.400000South America
meanNaN106.16062280.99481949.4507774.717098NaN
minAfghanistan0.0000000.0000000.0000000.000000Africa
\n", 773 | "
" 774 | ], 775 | "text/plain": [ 776 | " country beer_servings spirit_servings wine_servings \\\n", 777 | "max Zimbabwe 376.000000 438.000000 370.000000 \n", 778 | "mean NaN 106.160622 80.994819 49.450777 \n", 779 | "min Afghanistan 0.000000 0.000000 0.000000 \n", 780 | "\n", 781 | " total_litres_of_pure_alcohol continent \n", 782 | "max 14.400000 South America \n", 783 | "mean 4.717098 NaN \n", 784 | "min 0.000000 Africa " 785 | ] 786 | }, 787 | "execution_count": 16, 788 | "metadata": {}, 789 | "output_type": "execute_result" 790 | } 791 | ], 792 | "source": [ 793 | "# new: apply the same aggregations to a DataFrame\n", 794 | "drinks.agg(['mean', 'min', 'max'])" 795 | ] 796 | }, 797 | { 798 | "cell_type": "code", 799 | "execution_count": 17, 800 | "metadata": {}, 801 | "outputs": [ 802 | { 803 | "data": { 804 | "text/html": [ 805 | "
\n", 806 | "\n", 819 | "\n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | "
beer_servingsspirit_servingswine_servingstotal_litres_of_pure_alcohol
count193.000000193.000000193.000000193.000000
mean106.16062280.99481949.4507774.717098
std101.14310388.28431279.6975983.773298
min0.0000000.0000000.0000000.000000
25%20.0000004.0000001.0000001.300000
50%76.00000056.0000008.0000004.200000
75%188.000000128.00000059.0000007.200000
max376.000000438.000000370.00000014.400000
\n", 888 | "
" 889 | ], 890 | "text/plain": [ 891 | " beer_servings spirit_servings wine_servings \\\n", 892 | "count 193.000000 193.000000 193.000000 \n", 893 | "mean 106.160622 80.994819 49.450777 \n", 894 | "std 101.143103 88.284312 79.697598 \n", 895 | "min 0.000000 0.000000 0.000000 \n", 896 | "25% 20.000000 4.000000 1.000000 \n", 897 | "50% 76.000000 56.000000 8.000000 \n", 898 | "75% 188.000000 128.000000 59.000000 \n", 899 | "max 376.000000 438.000000 370.000000 \n", 900 | "\n", 901 | " total_litres_of_pure_alcohol \n", 902 | "count 193.000000 \n", 903 | "mean 4.717098 \n", 904 | "std 3.773298 \n", 905 | "min 0.000000 \n", 906 | "25% 1.300000 \n", 907 | "50% 4.200000 \n", 908 | "75% 7.200000 \n", 909 | "max 14.400000 " 910 | ] 911 | }, 912 | "execution_count": 17, 913 | "metadata": {}, 914 | "output_type": "execute_result" 915 | } 916 | ], 917 | "source": [ 918 | "# DataFrame describe method provides similar functionality but is less flexible\n", 919 | "drinks.describe()" 920 | ] 921 | }, 922 | { 923 | "cell_type": "markdown", 924 | "metadata": {}, 925 | "source": [ 926 | "- [More information](http://pandas.pydata.org/pandas-docs/stable/basics.html#basics-aggregate)\n", 927 | "- [Video: When should I use a \"groupby\" in pandas?](https://www.youtube.com/watch?v=qy0fDqoMJx8&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=14)" 928 | ] 929 | }, 930 | { 931 | "cell_type": "markdown", 932 | "metadata": {}, 933 | "source": [ 934 | "## Bonus: Download the official pandas cheat sheet\n", 935 | "\n", 936 | "*New in 0.19.2*\n", 937 | "\n", 938 | "[Cheat sheet (PDF)](https://github.com/pandas-dev/pandas/blob/master/doc/cheatsheet/Pandas_Cheat_Sheet.pdf)" 939 | ] 940 | } 941 | ], 942 | "metadata": { 943 | "kernelspec": { 944 | "display_name": "Python 3", 945 | "language": "python", 946 | "name": "python3" 947 | }, 948 | "language_info": { 949 | "codemirror_mode": { 950 | "name": "ipython", 951 | "version": 3 952 | }, 953 | "file_extension": ".py", 954 | "mimetype": "text/x-python", 955 | "name": "python", 956 | "nbconvert_exporter": "python", 957 | "pygments_lexer": "ipython3", 958 | "version": "3.5.4" 959 | } 960 | }, 961 | "nbformat": 4, 962 | "nbformat_minor": 2 963 | } 964 | -------------------------------------------------------------------------------- /pandas_merge.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# How do I merge DataFrames in pandas? ([video](https://www.youtube.com/watch?v=iYWKfUOtGaw&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=32))\n", 8 | "\n", 9 | "- [My pandas video series](https://www.dataschool.io/easier-data-analysis-with-pandas/)\n", 10 | "- [GitHub repository](https://github.com/justmarkham/pandas-videos)" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "## Table of contents\n", 18 | "\n", 19 | "1. Selecting a Function\n", 20 | "2. Joining (Merging) DataFrames\n", 21 | "3. What if...?\n", 22 | "4. Four Types of Joins" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "# Part 1: Selecting a Function" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "Taken from [Merging DataFrames with pandas](https://www.datacamp.com/courses/merging-dataframes-with-pandas?tap_a=5644-dce66f&tap_s=280411-a25fc8) (DataCamp course):\n", 37 | "\n", 38 | "- `df1.append(df2)`: stacking vertically\n", 39 | "- `pd.concat([df1, df2])`:\n", 40 | " - stacking many horizontally or vertically\n", 41 | " - simple inner/outer joins on Indexes\n", 42 | "- `df1.join(df2)`: inner/outer/left/right joins on Indexes\n", 43 | "- `pd.merge(df1, df2)`: many joins on multiple columns" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "# Part 2: Joining (Merging) DataFrames" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "Using the [MovieLens 100k data](http://grouplens.org/datasets/movielens/), let's create two DataFrames:\n", 58 | "\n", 59 | "- **movies**: shows information about movies, namely a unique **movie_id** and its **title**\n", 60 | "- **ratings**: shows the **rating** that a particular **user_id** gave to a particular **movie_id** at a particular **timestamp**" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 1, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "import pandas as pd" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "## Movies" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 2, 82 | "metadata": {}, 83 | "outputs": [ 84 | { 85 | "data": { 86 | "text/html": [ 87 | "
\n", 88 | "\n", 101 | "\n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | "
movie_idtitle
01Toy Story (1995)
12GoldenEye (1995)
23Four Rooms (1995)
34Get Shorty (1995)
45Copycat (1995)
\n", 137 | "
" 138 | ], 139 | "text/plain": [ 140 | " movie_id title\n", 141 | "0 1 Toy Story (1995)\n", 142 | "1 2 GoldenEye (1995)\n", 143 | "2 3 Four Rooms (1995)\n", 144 | "3 4 Get Shorty (1995)\n", 145 | "4 5 Copycat (1995)" 146 | ] 147 | }, 148 | "execution_count": 2, 149 | "metadata": {}, 150 | "output_type": "execute_result" 151 | } 152 | ], 153 | "source": [ 154 | "movie_cols = ['movie_id', 'title']\n", 155 | "movies = pd.read_table('data/u.item', sep='|', header=None, names=movie_cols, usecols=[0, 1])\n", 156 | "movies.head()" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": 3, 162 | "metadata": {}, 163 | "outputs": [ 164 | { 165 | "data": { 166 | "text/plain": [ 167 | "(1682, 2)" 168 | ] 169 | }, 170 | "execution_count": 3, 171 | "metadata": {}, 172 | "output_type": "execute_result" 173 | } 174 | ], 175 | "source": [ 176 | "movies.shape" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 4, 182 | "metadata": {}, 183 | "outputs": [ 184 | { 185 | "data": { 186 | "text/plain": [ 187 | "1682" 188 | ] 189 | }, 190 | "execution_count": 4, 191 | "metadata": {}, 192 | "output_type": "execute_result" 193 | } 194 | ], 195 | "source": [ 196 | "movies.movie_id.nunique()" 197 | ] 198 | }, 199 | { 200 | "cell_type": "markdown", 201 | "metadata": {}, 202 | "source": [ 203 | "## Ratings" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": 5, 209 | "metadata": {}, 210 | "outputs": [ 211 | { 212 | "data": { 213 | "text/html": [ 214 | "
\n", 215 | "\n", 228 | "\n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | "
user_idmovie_idratingtimestamp
01962423881250949
11863023891717742
2223771878887116
3244512880606923
41663461886397596
\n", 276 | "
" 277 | ], 278 | "text/plain": [ 279 | " user_id movie_id rating timestamp\n", 280 | "0 196 242 3 881250949\n", 281 | "1 186 302 3 891717742\n", 282 | "2 22 377 1 878887116\n", 283 | "3 244 51 2 880606923\n", 284 | "4 166 346 1 886397596" 285 | ] 286 | }, 287 | "execution_count": 5, 288 | "metadata": {}, 289 | "output_type": "execute_result" 290 | } 291 | ], 292 | "source": [ 293 | "rating_cols = ['user_id', 'movie_id', 'rating', 'timestamp']\n", 294 | "ratings = pd.read_table('data/u.data', sep='\\t', header=None, names=rating_cols)\n", 295 | "ratings.head()" 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": 6, 301 | "metadata": {}, 302 | "outputs": [ 303 | { 304 | "data": { 305 | "text/plain": [ 306 | "(100000, 4)" 307 | ] 308 | }, 309 | "execution_count": 6, 310 | "metadata": {}, 311 | "output_type": "execute_result" 312 | } 313 | ], 314 | "source": [ 315 | "ratings.shape" 316 | ] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": 7, 321 | "metadata": {}, 322 | "outputs": [ 323 | { 324 | "data": { 325 | "text/plain": [ 326 | "1682" 327 | ] 328 | }, 329 | "execution_count": 7, 330 | "metadata": {}, 331 | "output_type": "execute_result" 332 | } 333 | ], 334 | "source": [ 335 | "ratings.movie_id.nunique()" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "execution_count": 8, 341 | "metadata": {}, 342 | "outputs": [ 343 | { 344 | "data": { 345 | "text/html": [ 346 | "
\n", 347 | "\n", 360 | "\n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | "
user_idmovie_idratingtimestamp
2430814887736532
45428715875334088
95714814877019411
97128014891700426
13246613883601324
\n", 408 | "
" 409 | ], 410 | "text/plain": [ 411 | " user_id movie_id rating timestamp\n", 412 | "24 308 1 4 887736532\n", 413 | "454 287 1 5 875334088\n", 414 | "957 148 1 4 877019411\n", 415 | "971 280 1 4 891700426\n", 416 | "1324 66 1 3 883601324" 417 | ] 418 | }, 419 | "execution_count": 8, 420 | "metadata": {}, 421 | "output_type": "execute_result" 422 | } 423 | ], 424 | "source": [ 425 | "ratings.loc[ratings.movie_id == 1, :].head()" 426 | ] 427 | }, 428 | { 429 | "cell_type": "markdown", 430 | "metadata": {}, 431 | "source": [ 432 | "## Merging Movies and Ratings" 433 | ] 434 | }, 435 | { 436 | "cell_type": "markdown", 437 | "metadata": {}, 438 | "source": [ 439 | "Let's pretend that you want to examine the ratings DataFrame, but you want to know the **title** of each movie rather than its **movie_id**. The best way to accomplish this objective is by \"joining\" (or \"merging\") the DataFrames using the Pandas `merge` function:" 440 | ] 441 | }, 442 | { 443 | "cell_type": "code", 444 | "execution_count": 9, 445 | "metadata": {}, 446 | "outputs": [ 447 | { 448 | "data": { 449 | "text/plain": [ 450 | "Index(['movie_id', 'title'], dtype='object')" 451 | ] 452 | }, 453 | "execution_count": 9, 454 | "metadata": {}, 455 | "output_type": "execute_result" 456 | } 457 | ], 458 | "source": [ 459 | "movies.columns" 460 | ] 461 | }, 462 | { 463 | "cell_type": "code", 464 | "execution_count": 10, 465 | "metadata": {}, 466 | "outputs": [ 467 | { 468 | "data": { 469 | "text/plain": [ 470 | "Index(['user_id', 'movie_id', 'rating', 'timestamp'], dtype='object')" 471 | ] 472 | }, 473 | "execution_count": 10, 474 | "metadata": {}, 475 | "output_type": "execute_result" 476 | } 477 | ], 478 | "source": [ 479 | "ratings.columns" 480 | ] 481 | }, 482 | { 483 | "cell_type": "code", 484 | "execution_count": 11, 485 | "metadata": {}, 486 | "outputs": [ 487 | { 488 | "data": { 489 | "text/plain": [ 490 | "Index(['movie_id', 'title', 'user_id', 'rating', 'timestamp'], dtype='object')" 491 | ] 492 | }, 493 | "execution_count": 11, 494 | "metadata": {}, 495 | "output_type": "execute_result" 496 | } 497 | ], 498 | "source": [ 499 | "movie_ratings = pd.merge(movies, ratings)\n", 500 | "movie_ratings.columns" 501 | ] 502 | }, 503 | { 504 | "cell_type": "code", 505 | "execution_count": 12, 506 | "metadata": {}, 507 | "outputs": [ 508 | { 509 | "data": { 510 | "text/html": [ 511 | "
\n", 512 | "\n", 525 | "\n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | "
movie_idtitleuser_idratingtimestamp
01Toy Story (1995)3084887736532
11Toy Story (1995)2875875334088
21Toy Story (1995)1484877019411
31Toy Story (1995)2804891700426
41Toy Story (1995)663883601324
\n", 579 | "
" 580 | ], 581 | "text/plain": [ 582 | " movie_id title user_id rating timestamp\n", 583 | "0 1 Toy Story (1995) 308 4 887736532\n", 584 | "1 1 Toy Story (1995) 287 5 875334088\n", 585 | "2 1 Toy Story (1995) 148 4 877019411\n", 586 | "3 1 Toy Story (1995) 280 4 891700426\n", 587 | "4 1 Toy Story (1995) 66 3 883601324" 588 | ] 589 | }, 590 | "execution_count": 12, 591 | "metadata": {}, 592 | "output_type": "execute_result" 593 | } 594 | ], 595 | "source": [ 596 | "movie_ratings.head()" 597 | ] 598 | }, 599 | { 600 | "cell_type": "code", 601 | "execution_count": 13, 602 | "metadata": {}, 603 | "outputs": [ 604 | { 605 | "data": { 606 | "text/plain": [ 607 | "(100000, 5)" 608 | ] 609 | }, 610 | "execution_count": 13, 611 | "metadata": {}, 612 | "output_type": "execute_result" 613 | } 614 | ], 615 | "source": [ 616 | "movie_ratings.shape" 617 | ] 618 | }, 619 | { 620 | "cell_type": "markdown", 621 | "metadata": {}, 622 | "source": [ 623 | "Here's what just happened:\n", 624 | "\n", 625 | "- Pandas noticed that movies and ratings had one column in common, namely **movie_id**. This is the \"key\" on which the DataFrames will be joined.\n", 626 | "- The first **movie_id** in movies is 1. Thus, Pandas looked through every row in the ratings DataFrame, searching for a movie_id of 1. Every time it found such a row, it recorded the **user_id**, **rating**, and **timestamp** listed in that row. In this case, it found 452 matching rows.\n", 627 | "- The second **movie_id** in movies is 2. Again, Pandas did a search of ratings and found 131 matching rows.\n", 628 | "- This process was repeated for all of the remaining rows in movies.\n", 629 | "\n", 630 | "At the end of the process, the movie_ratings DataFrame is created, which contains the two columns from movies (**movie_id** and **title**) and the three other colums from ratings (**user_id**, **rating**, and **timestamp**).\n", 631 | "\n", 632 | "- **movie_id** 1 and its **title** are listed 452 times, next to the **user_id**, **rating**, and **timestamp** for each of the 452 matching ratings.\n", 633 | "- **movie_id** 2 and its **title** are listed 131 times, next to the **user_id**, **rating**, and **timestamp** for each of the 131 matching ratings.\n", 634 | "- And so on, for every movie in the dataset." 635 | ] 636 | }, 637 | { 638 | "cell_type": "code", 639 | "execution_count": 14, 640 | "metadata": {}, 641 | "outputs": [ 642 | { 643 | "name": "stdout", 644 | "output_type": "stream", 645 | "text": [ 646 | "(1682, 2)\n", 647 | "(100000, 4)\n", 648 | "(100000, 5)\n" 649 | ] 650 | } 651 | ], 652 | "source": [ 653 | "print(movies.shape)\n", 654 | "print(ratings.shape)\n", 655 | "print(movie_ratings.shape)" 656 | ] 657 | }, 658 | { 659 | "cell_type": "markdown", 660 | "metadata": {}, 661 | "source": [ 662 | "Notice the shapes of the three DataFrames:\n", 663 | "\n", 664 | "- There are 1682 rows in the movies DataFrame.\n", 665 | "- There are 100000 rows in the ratings DataFrame.\n", 666 | "- The `merge` function resulted in a movie_ratings DataFrame with 100000 rows, because every row from ratings matched a row from movies.\n", 667 | "- The movie_ratings DataFrame has 5 columns, namely the 2 columns from movies, plus the 4 columns from ratings, minus the 1 column in common.\n", 668 | "\n", 669 | "By default, the `merge` function joins the DataFrames using all column names that are in common (**movie_id**, in this case). The [documentation](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.merge.html) explains how you can override this behavior." 670 | ] 671 | }, 672 | { 673 | "cell_type": "markdown", 674 | "metadata": {}, 675 | "source": [ 676 | "# Part 3: What if...?" 677 | ] 678 | }, 679 | { 680 | "cell_type": "markdown", 681 | "metadata": {}, 682 | "source": [ 683 | "## What if the columns you want to join on don't have the same name?" 684 | ] 685 | }, 686 | { 687 | "cell_type": "code", 688 | "execution_count": 15, 689 | "metadata": {}, 690 | "outputs": [ 691 | { 692 | "data": { 693 | "text/plain": [ 694 | "Index(['m_id', 'title'], dtype='object')" 695 | ] 696 | }, 697 | "execution_count": 15, 698 | "metadata": {}, 699 | "output_type": "execute_result" 700 | } 701 | ], 702 | "source": [ 703 | "movies.columns = ['m_id', 'title']\n", 704 | "movies.columns" 705 | ] 706 | }, 707 | { 708 | "cell_type": "code", 709 | "execution_count": 16, 710 | "metadata": {}, 711 | "outputs": [ 712 | { 713 | "data": { 714 | "text/plain": [ 715 | "Index(['user_id', 'movie_id', 'rating', 'timestamp'], dtype='object')" 716 | ] 717 | }, 718 | "execution_count": 16, 719 | "metadata": {}, 720 | "output_type": "execute_result" 721 | } 722 | ], 723 | "source": [ 724 | "ratings.columns" 725 | ] 726 | }, 727 | { 728 | "cell_type": "code", 729 | "execution_count": 17, 730 | "metadata": {}, 731 | "outputs": [ 732 | { 733 | "data": { 734 | "text/html": [ 735 | "
\n", 736 | "\n", 749 | "\n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | "
m_idtitleuser_idmovie_idratingtimestamp
01Toy Story (1995)30814887736532
11Toy Story (1995)28715875334088
21Toy Story (1995)14814877019411
31Toy Story (1995)28014891700426
41Toy Story (1995)6613883601324
\n", 809 | "
" 810 | ], 811 | "text/plain": [ 812 | " m_id title user_id movie_id rating timestamp\n", 813 | "0 1 Toy Story (1995) 308 1 4 887736532\n", 814 | "1 1 Toy Story (1995) 287 1 5 875334088\n", 815 | "2 1 Toy Story (1995) 148 1 4 877019411\n", 816 | "3 1 Toy Story (1995) 280 1 4 891700426\n", 817 | "4 1 Toy Story (1995) 66 1 3 883601324" 818 | ] 819 | }, 820 | "execution_count": 17, 821 | "metadata": {}, 822 | "output_type": "execute_result" 823 | } 824 | ], 825 | "source": [ 826 | "pd.merge(movies, ratings, left_on='m_id', right_on='movie_id').head()" 827 | ] 828 | }, 829 | { 830 | "cell_type": "markdown", 831 | "metadata": {}, 832 | "source": [ 833 | "## What if you want to join on one index?" 834 | ] 835 | }, 836 | { 837 | "cell_type": "code", 838 | "execution_count": 18, 839 | "metadata": {}, 840 | "outputs": [ 841 | { 842 | "data": { 843 | "text/html": [ 844 | "
\n", 845 | "\n", 858 | "\n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | "
title
m_id
1Toy Story (1995)
2GoldenEye (1995)
3Four Rooms (1995)
4Get Shorty (1995)
5Copycat (1995)
\n", 892 | "
" 893 | ], 894 | "text/plain": [ 895 | " title\n", 896 | "m_id \n", 897 | "1 Toy Story (1995)\n", 898 | "2 GoldenEye (1995)\n", 899 | "3 Four Rooms (1995)\n", 900 | "4 Get Shorty (1995)\n", 901 | "5 Copycat (1995)" 902 | ] 903 | }, 904 | "execution_count": 18, 905 | "metadata": {}, 906 | "output_type": "execute_result" 907 | } 908 | ], 909 | "source": [ 910 | "movies = movies.set_index('m_id')\n", 911 | "movies.head()" 912 | ] 913 | }, 914 | { 915 | "cell_type": "code", 916 | "execution_count": 19, 917 | "metadata": {}, 918 | "outputs": [ 919 | { 920 | "data": { 921 | "text/html": [ 922 | "
\n", 923 | "\n", 936 | "\n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | "
titleuser_idmovie_idratingtimestamp
24Toy Story (1995)30814887736532
454Toy Story (1995)28715875334088
957Toy Story (1995)14814877019411
971Toy Story (1995)28014891700426
1324Toy Story (1995)6613883601324
\n", 990 | "
" 991 | ], 992 | "text/plain": [ 993 | " title user_id movie_id rating timestamp\n", 994 | "24 Toy Story (1995) 308 1 4 887736532\n", 995 | "454 Toy Story (1995) 287 1 5 875334088\n", 996 | "957 Toy Story (1995) 148 1 4 877019411\n", 997 | "971 Toy Story (1995) 280 1 4 891700426\n", 998 | "1324 Toy Story (1995) 66 1 3 883601324" 999 | ] 1000 | }, 1001 | "execution_count": 19, 1002 | "metadata": {}, 1003 | "output_type": "execute_result" 1004 | } 1005 | ], 1006 | "source": [ 1007 | "pd.merge(movies, ratings, left_index=True, right_on='movie_id').head()" 1008 | ] 1009 | }, 1010 | { 1011 | "cell_type": "markdown", 1012 | "metadata": {}, 1013 | "source": [ 1014 | "## What if you want to join on two indexes?" 1015 | ] 1016 | }, 1017 | { 1018 | "cell_type": "code", 1019 | "execution_count": 20, 1020 | "metadata": {}, 1021 | "outputs": [ 1022 | { 1023 | "data": { 1024 | "text/html": [ 1025 | "
\n", 1026 | "\n", 1039 | "\n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1084 | " \n", 1085 | " \n", 1086 | "
user_idratingtimestamp
movie_id
2421963881250949
3021863891717742
377221878887116
512442880606923
3461661886397596
\n", 1087 | "
" 1088 | ], 1089 | "text/plain": [ 1090 | " user_id rating timestamp\n", 1091 | "movie_id \n", 1092 | "242 196 3 881250949\n", 1093 | "302 186 3 891717742\n", 1094 | "377 22 1 878887116\n", 1095 | "51 244 2 880606923\n", 1096 | "346 166 1 886397596" 1097 | ] 1098 | }, 1099 | "execution_count": 20, 1100 | "metadata": {}, 1101 | "output_type": "execute_result" 1102 | } 1103 | ], 1104 | "source": [ 1105 | "ratings = ratings.set_index('movie_id')\n", 1106 | "ratings.head()" 1107 | ] 1108 | }, 1109 | { 1110 | "cell_type": "code", 1111 | "execution_count": 21, 1112 | "metadata": {}, 1113 | "outputs": [ 1114 | { 1115 | "data": { 1116 | "text/html": [ 1117 | "
\n", 1118 | "\n", 1131 | "\n", 1132 | " \n", 1133 | " \n", 1134 | " \n", 1135 | " \n", 1136 | " \n", 1137 | " \n", 1138 | " \n", 1139 | " \n", 1140 | " \n", 1141 | " \n", 1142 | " \n", 1143 | " \n", 1144 | " \n", 1145 | " \n", 1146 | " \n", 1147 | " \n", 1148 | " \n", 1149 | " \n", 1150 | " \n", 1151 | " \n", 1152 | " \n", 1153 | " \n", 1154 | " \n", 1155 | " \n", 1156 | " \n", 1157 | " \n", 1158 | " \n", 1159 | " \n", 1160 | " \n", 1161 | " \n", 1162 | " \n", 1163 | " \n", 1164 | " \n", 1165 | " \n", 1166 | " \n", 1167 | " \n", 1168 | " \n", 1169 | " \n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | " \n", 1177 | " \n", 1178 | "
titleuser_idratingtimestamp
1Toy Story (1995)3084887736532
1Toy Story (1995)2875875334088
1Toy Story (1995)1484877019411
1Toy Story (1995)2804891700426
1Toy Story (1995)663883601324
\n", 1179 | "
" 1180 | ], 1181 | "text/plain": [ 1182 | " title user_id rating timestamp\n", 1183 | "1 Toy Story (1995) 308 4 887736532\n", 1184 | "1 Toy Story (1995) 287 5 875334088\n", 1185 | "1 Toy Story (1995) 148 4 877019411\n", 1186 | "1 Toy Story (1995) 280 4 891700426\n", 1187 | "1 Toy Story (1995) 66 3 883601324" 1188 | ] 1189 | }, 1190 | "execution_count": 21, 1191 | "metadata": {}, 1192 | "output_type": "execute_result" 1193 | } 1194 | ], 1195 | "source": [ 1196 | "pd.merge(movies, ratings, left_index=True, right_index=True).head()" 1197 | ] 1198 | }, 1199 | { 1200 | "cell_type": "markdown", 1201 | "metadata": {}, 1202 | "source": [ 1203 | "# Part 4: Four Types of Joins" 1204 | ] 1205 | }, 1206 | { 1207 | "cell_type": "markdown", 1208 | "metadata": {}, 1209 | "source": [ 1210 | "There are actually four types of joins supported by the Pandas `merge` function. Here's how they are described by the documentation:\n", 1211 | "\n", 1212 | "- **inner:** use intersection of keys from both frames, similar to a SQL inner join; preserve the order of the left keys\n", 1213 | "- **outer:** use union of keys from both frames, similar to a SQL full outer join; sort keys lexicographically\n", 1214 | "- **left:** use only keys from left frame, similar to a SQL left outer join; preserve key order\n", 1215 | "- **right:** use only keys from right frame, similar to a SQL right outer join; preserve key order\n", 1216 | "\n", 1217 | "The default is the \"inner join\", which was used when creating the movie_ratings DataFrame.\n", 1218 | "\n", 1219 | "It's easiest to understand the different types by looking at some simple examples:" 1220 | ] 1221 | }, 1222 | { 1223 | "cell_type": "markdown", 1224 | "metadata": {}, 1225 | "source": [ 1226 | "## Example DataFrames A and B" 1227 | ] 1228 | }, 1229 | { 1230 | "cell_type": "code", 1231 | "execution_count": 22, 1232 | "metadata": {}, 1233 | "outputs": [ 1234 | { 1235 | "data": { 1236 | "text/html": [ 1237 | "
\n", 1238 | "\n", 1251 | "\n", 1252 | " \n", 1253 | " \n", 1254 | " \n", 1255 | " \n", 1256 | " \n", 1257 | " \n", 1258 | " \n", 1259 | " \n", 1260 | " \n", 1261 | " \n", 1262 | " \n", 1263 | " \n", 1264 | " \n", 1265 | " \n", 1266 | " \n", 1267 | " \n", 1268 | " \n", 1269 | " \n", 1270 | " \n", 1271 | " \n", 1272 | " \n", 1273 | " \n", 1274 | " \n", 1275 | " \n", 1276 | "
colornum
0green1
1yellow2
2red3
\n", 1277 | "
" 1278 | ], 1279 | "text/plain": [ 1280 | " color num\n", 1281 | "0 green 1\n", 1282 | "1 yellow 2\n", 1283 | "2 red 3" 1284 | ] 1285 | }, 1286 | "execution_count": 22, 1287 | "metadata": {}, 1288 | "output_type": "execute_result" 1289 | } 1290 | ], 1291 | "source": [ 1292 | "A = pd.DataFrame({'color': ['green', 'yellow', 'red'], 'num':[1, 2, 3]})\n", 1293 | "A" 1294 | ] 1295 | }, 1296 | { 1297 | "cell_type": "code", 1298 | "execution_count": 23, 1299 | "metadata": {}, 1300 | "outputs": [ 1301 | { 1302 | "data": { 1303 | "text/html": [ 1304 | "
\n", 1305 | "\n", 1318 | "\n", 1319 | " \n", 1320 | " \n", 1321 | " \n", 1322 | " \n", 1323 | " \n", 1324 | " \n", 1325 | " \n", 1326 | " \n", 1327 | " \n", 1328 | " \n", 1329 | " \n", 1330 | " \n", 1331 | " \n", 1332 | " \n", 1333 | " \n", 1334 | " \n", 1335 | " \n", 1336 | " \n", 1337 | " \n", 1338 | " \n", 1339 | " \n", 1340 | " \n", 1341 | " \n", 1342 | " \n", 1343 | "
colorsize
0greenS
1yellowM
2pinkL
\n", 1344 | "
" 1345 | ], 1346 | "text/plain": [ 1347 | " color size\n", 1348 | "0 green S\n", 1349 | "1 yellow M\n", 1350 | "2 pink L" 1351 | ] 1352 | }, 1353 | "execution_count": 23, 1354 | "metadata": {}, 1355 | "output_type": "execute_result" 1356 | } 1357 | ], 1358 | "source": [ 1359 | "B = pd.DataFrame({'color': ['green', 'yellow', 'pink'], 'size':['S', 'M', 'L']})\n", 1360 | "B" 1361 | ] 1362 | }, 1363 | { 1364 | "cell_type": "markdown", 1365 | "metadata": {}, 1366 | "source": [ 1367 | "## Inner join\n", 1368 | "\n", 1369 | "Only include observations found in both A and B:" 1370 | ] 1371 | }, 1372 | { 1373 | "cell_type": "code", 1374 | "execution_count": 24, 1375 | "metadata": {}, 1376 | "outputs": [ 1377 | { 1378 | "data": { 1379 | "text/html": [ 1380 | "
\n", 1381 | "\n", 1394 | "\n", 1395 | " \n", 1396 | " \n", 1397 | " \n", 1398 | " \n", 1399 | " \n", 1400 | " \n", 1401 | " \n", 1402 | " \n", 1403 | " \n", 1404 | " \n", 1405 | " \n", 1406 | " \n", 1407 | " \n", 1408 | " \n", 1409 | " \n", 1410 | " \n", 1411 | " \n", 1412 | " \n", 1413 | " \n", 1414 | " \n", 1415 | " \n", 1416 | " \n", 1417 | "
colornumsize
0green1S
1yellow2M
\n", 1418 | "
" 1419 | ], 1420 | "text/plain": [ 1421 | " color num size\n", 1422 | "0 green 1 S\n", 1423 | "1 yellow 2 M" 1424 | ] 1425 | }, 1426 | "execution_count": 24, 1427 | "metadata": {}, 1428 | "output_type": "execute_result" 1429 | } 1430 | ], 1431 | "source": [ 1432 | "pd.merge(A, B, how='inner')" 1433 | ] 1434 | }, 1435 | { 1436 | "cell_type": "markdown", 1437 | "metadata": {}, 1438 | "source": [ 1439 | "## Outer join\n", 1440 | "\n", 1441 | "Include observations found in either A or B:" 1442 | ] 1443 | }, 1444 | { 1445 | "cell_type": "code", 1446 | "execution_count": 25, 1447 | "metadata": {}, 1448 | "outputs": [ 1449 | { 1450 | "data": { 1451 | "text/html": [ 1452 | "
\n", 1453 | "\n", 1466 | "\n", 1467 | " \n", 1468 | " \n", 1469 | " \n", 1470 | " \n", 1471 | " \n", 1472 | " \n", 1473 | " \n", 1474 | " \n", 1475 | " \n", 1476 | " \n", 1477 | " \n", 1478 | " \n", 1479 | " \n", 1480 | " \n", 1481 | " \n", 1482 | " \n", 1483 | " \n", 1484 | " \n", 1485 | " \n", 1486 | " \n", 1487 | " \n", 1488 | " \n", 1489 | " \n", 1490 | " \n", 1491 | " \n", 1492 | " \n", 1493 | " \n", 1494 | " \n", 1495 | " \n", 1496 | " \n", 1497 | " \n", 1498 | " \n", 1499 | " \n", 1500 | " \n", 1501 | "
colornumsize
0green1.0S
1yellow2.0M
2red3.0NaN
3pinkNaNL
\n", 1502 | "
" 1503 | ], 1504 | "text/plain": [ 1505 | " color num size\n", 1506 | "0 green 1.0 S\n", 1507 | "1 yellow 2.0 M\n", 1508 | "2 red 3.0 NaN\n", 1509 | "3 pink NaN L" 1510 | ] 1511 | }, 1512 | "execution_count": 25, 1513 | "metadata": {}, 1514 | "output_type": "execute_result" 1515 | } 1516 | ], 1517 | "source": [ 1518 | "pd.merge(A, B, how='outer')" 1519 | ] 1520 | }, 1521 | { 1522 | "cell_type": "markdown", 1523 | "metadata": {}, 1524 | "source": [ 1525 | "## Left join\n", 1526 | "\n", 1527 | "Include all observations found in A:" 1528 | ] 1529 | }, 1530 | { 1531 | "cell_type": "code", 1532 | "execution_count": 26, 1533 | "metadata": {}, 1534 | "outputs": [ 1535 | { 1536 | "data": { 1537 | "text/html": [ 1538 | "
\n", 1539 | "\n", 1552 | "\n", 1553 | " \n", 1554 | " \n", 1555 | " \n", 1556 | " \n", 1557 | " \n", 1558 | " \n", 1559 | " \n", 1560 | " \n", 1561 | " \n", 1562 | " \n", 1563 | " \n", 1564 | " \n", 1565 | " \n", 1566 | " \n", 1567 | " \n", 1568 | " \n", 1569 | " \n", 1570 | " \n", 1571 | " \n", 1572 | " \n", 1573 | " \n", 1574 | " \n", 1575 | " \n", 1576 | " \n", 1577 | " \n", 1578 | " \n", 1579 | " \n", 1580 | " \n", 1581 | "
colornumsize
0green1S
1yellow2M
2red3NaN
\n", 1582 | "
" 1583 | ], 1584 | "text/plain": [ 1585 | " color num size\n", 1586 | "0 green 1 S\n", 1587 | "1 yellow 2 M\n", 1588 | "2 red 3 NaN" 1589 | ] 1590 | }, 1591 | "execution_count": 26, 1592 | "metadata": {}, 1593 | "output_type": "execute_result" 1594 | } 1595 | ], 1596 | "source": [ 1597 | "pd.merge(A, B, how='left')" 1598 | ] 1599 | }, 1600 | { 1601 | "cell_type": "markdown", 1602 | "metadata": {}, 1603 | "source": [ 1604 | "## Right join\n", 1605 | "\n", 1606 | "Include all observations found in B:" 1607 | ] 1608 | }, 1609 | { 1610 | "cell_type": "code", 1611 | "execution_count": 27, 1612 | "metadata": {}, 1613 | "outputs": [ 1614 | { 1615 | "data": { 1616 | "text/html": [ 1617 | "
\n", 1618 | "\n", 1631 | "\n", 1632 | " \n", 1633 | " \n", 1634 | " \n", 1635 | " \n", 1636 | " \n", 1637 | " \n", 1638 | " \n", 1639 | " \n", 1640 | " \n", 1641 | " \n", 1642 | " \n", 1643 | " \n", 1644 | " \n", 1645 | " \n", 1646 | " \n", 1647 | " \n", 1648 | " \n", 1649 | " \n", 1650 | " \n", 1651 | " \n", 1652 | " \n", 1653 | " \n", 1654 | " \n", 1655 | " \n", 1656 | " \n", 1657 | " \n", 1658 | " \n", 1659 | " \n", 1660 | "
colornumsize
0green1.0S
1yellow2.0M
2pinkNaNL
\n", 1661 | "
" 1662 | ], 1663 | "text/plain": [ 1664 | " color num size\n", 1665 | "0 green 1.0 S\n", 1666 | "1 yellow 2.0 M\n", 1667 | "2 pink NaN L" 1668 | ] 1669 | }, 1670 | "execution_count": 27, 1671 | "metadata": {}, 1672 | "output_type": "execute_result" 1673 | } 1674 | ], 1675 | "source": [ 1676 | "pd.merge(A, B, how='right')" 1677 | ] 1678 | } 1679 | ], 1680 | "metadata": { 1681 | "kernelspec": { 1682 | "display_name": "Python 3", 1683 | "language": "python", 1684 | "name": "python3" 1685 | }, 1686 | "language_info": { 1687 | "codemirror_mode": { 1688 | "name": "ipython", 1689 | "version": 3 1690 | }, 1691 | "file_extension": ".py", 1692 | "mimetype": "text/x-python", 1693 | "name": "python", 1694 | "nbconvert_exporter": "python", 1695 | "pygments_lexer": "ipython3", 1696 | "version": "3.7.5" 1697 | } 1698 | }, 1699 | "nbformat": 4, 1700 | "nbformat_minor": 1 1701 | } 1702 | -------------------------------------------------------------------------------- /pandas_multiindex.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# How do I use the MultiIndex in pandas? ([video](https://www.youtube.com/watch?v=tcRGa2soc-c&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=31))\n", 8 | "\n", 9 | "- [My pandas video series](https://www.dataschool.io/easier-data-analysis-with-pandas/)\n", 10 | "- [GitHub repository](https://github.com/justmarkham/pandas-videos)" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 1, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "import pandas as pd" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "data": { 29 | "text/html": [ 30 | "
\n", 31 | "\n", 44 | "\n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | "
DateCloseVolumeSymbol
02016-10-0331.5014070500CSCO
12016-10-03112.5221701800AAPL
22016-10-0357.4219189500MSFT
32016-10-04113.0029736800AAPL
42016-10-0457.2420085900MSFT
52016-10-0431.3518460400CSCO
62016-10-0557.6416726400MSFT
72016-10-0531.5911808600CSCO
82016-10-05113.0521453100AAPL
\n", 120 | "
" 121 | ], 122 | "text/plain": [ 123 | " Date Close Volume Symbol\n", 124 | "0 2016-10-03 31.50 14070500 CSCO\n", 125 | "1 2016-10-03 112.52 21701800 AAPL\n", 126 | "2 2016-10-03 57.42 19189500 MSFT\n", 127 | "3 2016-10-04 113.00 29736800 AAPL\n", 128 | "4 2016-10-04 57.24 20085900 MSFT\n", 129 | "5 2016-10-04 31.35 18460400 CSCO\n", 130 | "6 2016-10-05 57.64 16726400 MSFT\n", 131 | "7 2016-10-05 31.59 11808600 CSCO\n", 132 | "8 2016-10-05 113.05 21453100 AAPL" 133 | ] 134 | }, 135 | "execution_count": 2, 136 | "metadata": {}, 137 | "output_type": "execute_result" 138 | } 139 | ], 140 | "source": [ 141 | "stocks = pd.read_csv('data/stocks.csv')\n", 142 | "stocks" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 3, 148 | "metadata": {}, 149 | "outputs": [ 150 | { 151 | "data": { 152 | "text/plain": [ 153 | "RangeIndex(start=0, stop=9, step=1)" 154 | ] 155 | }, 156 | "execution_count": 3, 157 | "metadata": {}, 158 | "output_type": "execute_result" 159 | } 160 | ], 161 | "source": [ 162 | "stocks.index" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 4, 168 | "metadata": {}, 169 | "outputs": [ 170 | { 171 | "data": { 172 | "text/plain": [ 173 | "Symbol\n", 174 | "AAPL 112.856667\n", 175 | "CSCO 31.480000\n", 176 | "MSFT 57.433333\n", 177 | "Name: Close, dtype: float64" 178 | ] 179 | }, 180 | "execution_count": 4, 181 | "metadata": {}, 182 | "output_type": "execute_result" 183 | } 184 | ], 185 | "source": [ 186 | "stocks.groupby('Symbol').Close.mean()" 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "metadata": {}, 192 | "source": [ 193 | "## Series with MultiIndex" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": 5, 199 | "metadata": {}, 200 | "outputs": [ 201 | { 202 | "data": { 203 | "text/plain": [ 204 | "Symbol Date \n", 205 | "AAPL 2016-10-03 112.52\n", 206 | " 2016-10-04 113.00\n", 207 | " 2016-10-05 113.05\n", 208 | "CSCO 2016-10-03 31.50\n", 209 | " 2016-10-04 31.35\n", 210 | " 2016-10-05 31.59\n", 211 | "MSFT 2016-10-03 57.42\n", 212 | " 2016-10-04 57.24\n", 213 | " 2016-10-05 57.64\n", 214 | "Name: Close, dtype: float64" 215 | ] 216 | }, 217 | "execution_count": 5, 218 | "metadata": {}, 219 | "output_type": "execute_result" 220 | } 221 | ], 222 | "source": [ 223 | "ser = stocks.groupby(['Symbol', 'Date']).Close.mean()\n", 224 | "ser" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": 6, 230 | "metadata": {}, 231 | "outputs": [ 232 | { 233 | "data": { 234 | "text/plain": [ 235 | "MultiIndex(levels=[['AAPL', 'CSCO', 'MSFT'], ['2016-10-03', '2016-10-04', '2016-10-05']],\n", 236 | " codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]],\n", 237 | " names=['Symbol', 'Date'])" 238 | ] 239 | }, 240 | "execution_count": 6, 241 | "metadata": {}, 242 | "output_type": "execute_result" 243 | } 244 | ], 245 | "source": [ 246 | "ser.index" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": 7, 252 | "metadata": {}, 253 | "outputs": [ 254 | { 255 | "data": { 256 | "text/html": [ 257 | "
\n", 258 | "\n", 271 | "\n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | "
Date2016-10-032016-10-042016-10-05
Symbol
AAPL112.52113.00113.05
CSCO31.5031.3531.59
MSFT57.4257.2457.64
\n", 307 | "
" 308 | ], 309 | "text/plain": [ 310 | "Date 2016-10-03 2016-10-04 2016-10-05\n", 311 | "Symbol \n", 312 | "AAPL 112.52 113.00 113.05\n", 313 | "CSCO 31.50 31.35 31.59\n", 314 | "MSFT 57.42 57.24 57.64" 315 | ] 316 | }, 317 | "execution_count": 7, 318 | "metadata": {}, 319 | "output_type": "execute_result" 320 | } 321 | ], 322 | "source": [ 323 | "ser.unstack()" 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": 8, 329 | "metadata": {}, 330 | "outputs": [ 331 | { 332 | "data": { 333 | "text/html": [ 334 | "
\n", 335 | "\n", 348 | "\n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | "
Date2016-10-032016-10-042016-10-05
Symbol
AAPL112.52113.00113.05
CSCO31.5031.3531.59
MSFT57.4257.2457.64
\n", 384 | "
" 385 | ], 386 | "text/plain": [ 387 | "Date 2016-10-03 2016-10-04 2016-10-05\n", 388 | "Symbol \n", 389 | "AAPL 112.52 113.00 113.05\n", 390 | "CSCO 31.50 31.35 31.59\n", 391 | "MSFT 57.42 57.24 57.64" 392 | ] 393 | }, 394 | "execution_count": 8, 395 | "metadata": {}, 396 | "output_type": "execute_result" 397 | } 398 | ], 399 | "source": [ 400 | "df = stocks.pivot_table(values='Close', index='Symbol', columns='Date')\n", 401 | "df" 402 | ] 403 | }, 404 | { 405 | "cell_type": "markdown", 406 | "metadata": {}, 407 | "source": [ 408 | "## Selection from Series with MultiIndex" 409 | ] 410 | }, 411 | { 412 | "cell_type": "code", 413 | "execution_count": 9, 414 | "metadata": {}, 415 | "outputs": [ 416 | { 417 | "data": { 418 | "text/plain": [ 419 | "Symbol Date \n", 420 | "AAPL 2016-10-03 112.52\n", 421 | " 2016-10-04 113.00\n", 422 | " 2016-10-05 113.05\n", 423 | "CSCO 2016-10-03 31.50\n", 424 | " 2016-10-04 31.35\n", 425 | " 2016-10-05 31.59\n", 426 | "MSFT 2016-10-03 57.42\n", 427 | " 2016-10-04 57.24\n", 428 | " 2016-10-05 57.64\n", 429 | "Name: Close, dtype: float64" 430 | ] 431 | }, 432 | "execution_count": 9, 433 | "metadata": {}, 434 | "output_type": "execute_result" 435 | } 436 | ], 437 | "source": [ 438 | "ser" 439 | ] 440 | }, 441 | { 442 | "cell_type": "code", 443 | "execution_count": 10, 444 | "metadata": { 445 | "scrolled": false 446 | }, 447 | "outputs": [ 448 | { 449 | "data": { 450 | "text/plain": [ 451 | "Date\n", 452 | "2016-10-03 112.52\n", 453 | "2016-10-04 113.00\n", 454 | "2016-10-05 113.05\n", 455 | "Name: Close, dtype: float64" 456 | ] 457 | }, 458 | "execution_count": 10, 459 | "metadata": {}, 460 | "output_type": "execute_result" 461 | } 462 | ], 463 | "source": [ 464 | "ser.loc['AAPL']" 465 | ] 466 | }, 467 | { 468 | "cell_type": "code", 469 | "execution_count": 11, 470 | "metadata": {}, 471 | "outputs": [ 472 | { 473 | "data": { 474 | "text/plain": [ 475 | "112.52" 476 | ] 477 | }, 478 | "execution_count": 11, 479 | "metadata": {}, 480 | "output_type": "execute_result" 481 | } 482 | ], 483 | "source": [ 484 | "ser.loc['AAPL', '2016-10-03']" 485 | ] 486 | }, 487 | { 488 | "cell_type": "code", 489 | "execution_count": 12, 490 | "metadata": { 491 | "scrolled": true 492 | }, 493 | "outputs": [ 494 | { 495 | "data": { 496 | "text/plain": [ 497 | "Symbol\n", 498 | "AAPL 112.52\n", 499 | "CSCO 31.50\n", 500 | "MSFT 57.42\n", 501 | "Name: Close, dtype: float64" 502 | ] 503 | }, 504 | "execution_count": 12, 505 | "metadata": {}, 506 | "output_type": "execute_result" 507 | } 508 | ], 509 | "source": [ 510 | "ser.loc[:, '2016-10-03']" 511 | ] 512 | }, 513 | { 514 | "cell_type": "code", 515 | "execution_count": 13, 516 | "metadata": { 517 | "scrolled": true 518 | }, 519 | "outputs": [ 520 | { 521 | "data": { 522 | "text/html": [ 523 | "
\n", 524 | "\n", 537 | "\n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | "
Date2016-10-032016-10-042016-10-05
Symbol
AAPL112.52113.00113.05
CSCO31.5031.3531.59
MSFT57.4257.2457.64
\n", 573 | "
" 574 | ], 575 | "text/plain": [ 576 | "Date 2016-10-03 2016-10-04 2016-10-05\n", 577 | "Symbol \n", 578 | "AAPL 112.52 113.00 113.05\n", 579 | "CSCO 31.50 31.35 31.59\n", 580 | "MSFT 57.42 57.24 57.64" 581 | ] 582 | }, 583 | "execution_count": 13, 584 | "metadata": {}, 585 | "output_type": "execute_result" 586 | } 587 | ], 588 | "source": [ 589 | "df" 590 | ] 591 | }, 592 | { 593 | "cell_type": "code", 594 | "execution_count": 14, 595 | "metadata": { 596 | "scrolled": true 597 | }, 598 | "outputs": [ 599 | { 600 | "data": { 601 | "text/plain": [ 602 | "Date\n", 603 | "2016-10-03 112.52\n", 604 | "2016-10-04 113.00\n", 605 | "2016-10-05 113.05\n", 606 | "Name: AAPL, dtype: float64" 607 | ] 608 | }, 609 | "execution_count": 14, 610 | "metadata": {}, 611 | "output_type": "execute_result" 612 | } 613 | ], 614 | "source": [ 615 | "df.loc['AAPL']" 616 | ] 617 | }, 618 | { 619 | "cell_type": "code", 620 | "execution_count": 15, 621 | "metadata": {}, 622 | "outputs": [ 623 | { 624 | "data": { 625 | "text/plain": [ 626 | "112.52" 627 | ] 628 | }, 629 | "execution_count": 15, 630 | "metadata": {}, 631 | "output_type": "execute_result" 632 | } 633 | ], 634 | "source": [ 635 | "df.loc['AAPL', '2016-10-03']" 636 | ] 637 | }, 638 | { 639 | "cell_type": "code", 640 | "execution_count": 16, 641 | "metadata": {}, 642 | "outputs": [ 643 | { 644 | "data": { 645 | "text/plain": [ 646 | "Symbol\n", 647 | "AAPL 112.52\n", 648 | "CSCO 31.50\n", 649 | "MSFT 57.42\n", 650 | "Name: 2016-10-03, dtype: float64" 651 | ] 652 | }, 653 | "execution_count": 16, 654 | "metadata": {}, 655 | "output_type": "execute_result" 656 | } 657 | ], 658 | "source": [ 659 | "df.loc[:, '2016-10-03']" 660 | ] 661 | }, 662 | { 663 | "cell_type": "markdown", 664 | "metadata": {}, 665 | "source": [ 666 | "## DataFrame with MultiIndex" 667 | ] 668 | }, 669 | { 670 | "cell_type": "code", 671 | "execution_count": 17, 672 | "metadata": {}, 673 | "outputs": [ 674 | { 675 | "data": { 676 | "text/html": [ 677 | "
\n", 678 | "\n", 691 | "\n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | "
CloseVolume
SymbolDate
CSCO2016-10-0331.5014070500
AAPL2016-10-03112.5221701800
MSFT2016-10-0357.4219189500
AAPL2016-10-04113.0029736800
MSFT2016-10-0457.2420085900
CSCO2016-10-0431.3518460400
MSFT2016-10-0557.6416726400
CSCO2016-10-0531.5911808600
AAPL2016-10-05113.0521453100
\n", 763 | "
" 764 | ], 765 | "text/plain": [ 766 | " Close Volume\n", 767 | "Symbol Date \n", 768 | "CSCO 2016-10-03 31.50 14070500\n", 769 | "AAPL 2016-10-03 112.52 21701800\n", 770 | "MSFT 2016-10-03 57.42 19189500\n", 771 | "AAPL 2016-10-04 113.00 29736800\n", 772 | "MSFT 2016-10-04 57.24 20085900\n", 773 | "CSCO 2016-10-04 31.35 18460400\n", 774 | "MSFT 2016-10-05 57.64 16726400\n", 775 | "CSCO 2016-10-05 31.59 11808600\n", 776 | "AAPL 2016-10-05 113.05 21453100" 777 | ] 778 | }, 779 | "execution_count": 17, 780 | "metadata": {}, 781 | "output_type": "execute_result" 782 | } 783 | ], 784 | "source": [ 785 | "stocks.set_index(['Symbol', 'Date'], inplace=True)\n", 786 | "stocks" 787 | ] 788 | }, 789 | { 790 | "cell_type": "code", 791 | "execution_count": 18, 792 | "metadata": {}, 793 | "outputs": [ 794 | { 795 | "data": { 796 | "text/plain": [ 797 | "MultiIndex(levels=[['AAPL', 'CSCO', 'MSFT'], ['2016-10-03', '2016-10-04', '2016-10-05']],\n", 798 | " codes=[[1, 0, 2, 0, 2, 1, 2, 1, 0], [0, 0, 0, 1, 1, 1, 2, 2, 2]],\n", 799 | " names=['Symbol', 'Date'])" 800 | ] 801 | }, 802 | "execution_count": 18, 803 | "metadata": {}, 804 | "output_type": "execute_result" 805 | } 806 | ], 807 | "source": [ 808 | "stocks.index" 809 | ] 810 | }, 811 | { 812 | "cell_type": "code", 813 | "execution_count": 19, 814 | "metadata": {}, 815 | "outputs": [ 816 | { 817 | "data": { 818 | "text/html": [ 819 | "
\n", 820 | "\n", 833 | "\n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | "
CloseVolume
SymbolDate
AAPL2016-10-03112.5221701800
2016-10-04113.0029736800
2016-10-05113.0521453100
CSCO2016-10-0331.5014070500
2016-10-0431.3518460400
2016-10-0531.5911808600
MSFT2016-10-0357.4219189500
2016-10-0457.2420085900
2016-10-0557.6416726400
\n", 899 | "
" 900 | ], 901 | "text/plain": [ 902 | " Close Volume\n", 903 | "Symbol Date \n", 904 | "AAPL 2016-10-03 112.52 21701800\n", 905 | " 2016-10-04 113.00 29736800\n", 906 | " 2016-10-05 113.05 21453100\n", 907 | "CSCO 2016-10-03 31.50 14070500\n", 908 | " 2016-10-04 31.35 18460400\n", 909 | " 2016-10-05 31.59 11808600\n", 910 | "MSFT 2016-10-03 57.42 19189500\n", 911 | " 2016-10-04 57.24 20085900\n", 912 | " 2016-10-05 57.64 16726400" 913 | ] 914 | }, 915 | "execution_count": 19, 916 | "metadata": {}, 917 | "output_type": "execute_result" 918 | } 919 | ], 920 | "source": [ 921 | "stocks.sort_index(inplace=True)\n", 922 | "stocks" 923 | ] 924 | }, 925 | { 926 | "cell_type": "markdown", 927 | "metadata": {}, 928 | "source": [ 929 | "## Selection from DataFrame with MultiIndex" 930 | ] 931 | }, 932 | { 933 | "cell_type": "code", 934 | "execution_count": 20, 935 | "metadata": {}, 936 | "outputs": [ 937 | { 938 | "data": { 939 | "text/html": [ 940 | "
\n", 941 | "\n", 954 | "\n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | "
CloseVolume
Date
2016-10-03112.5221701800
2016-10-04113.0029736800
2016-10-05113.0521453100
\n", 985 | "
" 986 | ], 987 | "text/plain": [ 988 | " Close Volume\n", 989 | "Date \n", 990 | "2016-10-03 112.52 21701800\n", 991 | "2016-10-04 113.00 29736800\n", 992 | "2016-10-05 113.05 21453100" 993 | ] 994 | }, 995 | "execution_count": 20, 996 | "metadata": {}, 997 | "output_type": "execute_result" 998 | } 999 | ], 1000 | "source": [ 1001 | "stocks.loc['AAPL']" 1002 | ] 1003 | }, 1004 | { 1005 | "cell_type": "code", 1006 | "execution_count": 21, 1007 | "metadata": {}, 1008 | "outputs": [ 1009 | { 1010 | "data": { 1011 | "text/plain": [ 1012 | "Close 112.52\n", 1013 | "Volume 21701800.00\n", 1014 | "Name: (AAPL, 2016-10-03), dtype: float64" 1015 | ] 1016 | }, 1017 | "execution_count": 21, 1018 | "metadata": {}, 1019 | "output_type": "execute_result" 1020 | } 1021 | ], 1022 | "source": [ 1023 | "stocks.loc[('AAPL', '2016-10-03'), :]" 1024 | ] 1025 | }, 1026 | { 1027 | "cell_type": "code", 1028 | "execution_count": 22, 1029 | "metadata": {}, 1030 | "outputs": [ 1031 | { 1032 | "data": { 1033 | "text/plain": [ 1034 | "112.52" 1035 | ] 1036 | }, 1037 | "execution_count": 22, 1038 | "metadata": {}, 1039 | "output_type": "execute_result" 1040 | } 1041 | ], 1042 | "source": [ 1043 | "stocks.loc[('AAPL', '2016-10-03'), 'Close']" 1044 | ] 1045 | }, 1046 | { 1047 | "cell_type": "code", 1048 | "execution_count": 23, 1049 | "metadata": {}, 1050 | "outputs": [ 1051 | { 1052 | "data": { 1053 | "text/html": [ 1054 | "
\n", 1055 | "\n", 1068 | "\n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1084 | " \n", 1085 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | " \n", 1090 | " \n", 1091 | " \n", 1092 | " \n", 1093 | " \n", 1094 | " \n", 1095 | " \n", 1096 | " \n", 1097 | " \n", 1098 | " \n", 1099 | " \n", 1100 | " \n", 1101 | " \n", 1102 | " \n", 1103 | " \n", 1104 | " \n", 1105 | " \n", 1106 | " \n", 1107 | " \n", 1108 | " \n", 1109 | " \n", 1110 | " \n", 1111 | " \n", 1112 | " \n", 1113 | " \n", 1114 | " \n", 1115 | " \n", 1116 | " \n", 1117 | "
CloseVolume
SymbolDate
AAPL2016-10-03112.5221701800
2016-10-04113.0029736800
2016-10-05113.0521453100
MSFT2016-10-0357.4219189500
2016-10-0457.2420085900
2016-10-0557.6416726400
\n", 1118 | "
" 1119 | ], 1120 | "text/plain": [ 1121 | " Close Volume\n", 1122 | "Symbol Date \n", 1123 | "AAPL 2016-10-03 112.52 21701800\n", 1124 | " 2016-10-04 113.00 29736800\n", 1125 | " 2016-10-05 113.05 21453100\n", 1126 | "MSFT 2016-10-03 57.42 19189500\n", 1127 | " 2016-10-04 57.24 20085900\n", 1128 | " 2016-10-05 57.64 16726400" 1129 | ] 1130 | }, 1131 | "execution_count": 23, 1132 | "metadata": {}, 1133 | "output_type": "execute_result" 1134 | } 1135 | ], 1136 | "source": [ 1137 | "stocks.loc[['AAPL', 'MSFT'], :]" 1138 | ] 1139 | }, 1140 | { 1141 | "cell_type": "code", 1142 | "execution_count": 24, 1143 | "metadata": {}, 1144 | "outputs": [ 1145 | { 1146 | "data": { 1147 | "text/html": [ 1148 | "
\n", 1149 | "\n", 1162 | "\n", 1163 | " \n", 1164 | " \n", 1165 | " \n", 1166 | " \n", 1167 | " \n", 1168 | " \n", 1169 | " \n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | " \n", 1177 | " \n", 1178 | " \n", 1179 | " \n", 1180 | " \n", 1181 | " \n", 1182 | " \n", 1183 | " \n", 1184 | " \n", 1185 | " \n", 1186 | " \n", 1187 | " \n", 1188 | " \n", 1189 | " \n", 1190 | " \n", 1191 | "
CloseVolume
SymbolDate
AAPL2016-10-03112.5221701800
MSFT2016-10-0357.4219189500
\n", 1192 | "
" 1193 | ], 1194 | "text/plain": [ 1195 | " Close Volume\n", 1196 | "Symbol Date \n", 1197 | "AAPL 2016-10-03 112.52 21701800\n", 1198 | "MSFT 2016-10-03 57.42 19189500" 1199 | ] 1200 | }, 1201 | "execution_count": 24, 1202 | "metadata": {}, 1203 | "output_type": "execute_result" 1204 | } 1205 | ], 1206 | "source": [ 1207 | "stocks.loc[(['AAPL', 'MSFT'], '2016-10-03'), :]" 1208 | ] 1209 | }, 1210 | { 1211 | "cell_type": "code", 1212 | "execution_count": 25, 1213 | "metadata": {}, 1214 | "outputs": [ 1215 | { 1216 | "data": { 1217 | "text/plain": [ 1218 | "Symbol Date \n", 1219 | "AAPL 2016-10-03 112.52\n", 1220 | "MSFT 2016-10-03 57.42\n", 1221 | "Name: Close, dtype: float64" 1222 | ] 1223 | }, 1224 | "execution_count": 25, 1225 | "metadata": {}, 1226 | "output_type": "execute_result" 1227 | } 1228 | ], 1229 | "source": [ 1230 | "stocks.loc[(['AAPL', 'MSFT'], '2016-10-03'), 'Close']" 1231 | ] 1232 | }, 1233 | { 1234 | "cell_type": "code", 1235 | "execution_count": 26, 1236 | "metadata": {}, 1237 | "outputs": [ 1238 | { 1239 | "data": { 1240 | "text/plain": [ 1241 | "Symbol Date \n", 1242 | "AAPL 2016-10-03 112.52\n", 1243 | " 2016-10-04 113.00\n", 1244 | "Name: Close, dtype: float64" 1245 | ] 1246 | }, 1247 | "execution_count": 26, 1248 | "metadata": {}, 1249 | "output_type": "execute_result" 1250 | } 1251 | ], 1252 | "source": [ 1253 | "stocks.loc[('AAPL', ['2016-10-03', '2016-10-04']), 'Close']" 1254 | ] 1255 | }, 1256 | { 1257 | "cell_type": "code", 1258 | "execution_count": 27, 1259 | "metadata": {}, 1260 | "outputs": [ 1261 | { 1262 | "data": { 1263 | "text/html": [ 1264 | "
\n", 1265 | "\n", 1278 | "\n", 1279 | " \n", 1280 | " \n", 1281 | " \n", 1282 | " \n", 1283 | " \n", 1284 | " \n", 1285 | " \n", 1286 | " \n", 1287 | " \n", 1288 | " \n", 1289 | " \n", 1290 | " \n", 1291 | " \n", 1292 | " \n", 1293 | " \n", 1294 | " \n", 1295 | " \n", 1296 | " \n", 1297 | " \n", 1298 | " \n", 1299 | " \n", 1300 | " \n", 1301 | " \n", 1302 | " \n", 1303 | " \n", 1304 | " \n", 1305 | " \n", 1306 | " \n", 1307 | " \n", 1308 | " \n", 1309 | " \n", 1310 | " \n", 1311 | " \n", 1312 | " \n", 1313 | " \n", 1314 | " \n", 1315 | " \n", 1316 | " \n", 1317 | " \n", 1318 | " \n", 1319 | " \n", 1320 | " \n", 1321 | " \n", 1322 | " \n", 1323 | " \n", 1324 | " \n", 1325 | " \n", 1326 | " \n", 1327 | " \n", 1328 | "
CloseVolume
SymbolDate
AAPL2016-10-03112.5221701800
2016-10-04113.0029736800
CSCO2016-10-0331.5014070500
2016-10-0431.3518460400
MSFT2016-10-0357.4219189500
2016-10-0457.2420085900
\n", 1329 | "
" 1330 | ], 1331 | "text/plain": [ 1332 | " Close Volume\n", 1333 | "Symbol Date \n", 1334 | "AAPL 2016-10-03 112.52 21701800\n", 1335 | " 2016-10-04 113.00 29736800\n", 1336 | "CSCO 2016-10-03 31.50 14070500\n", 1337 | " 2016-10-04 31.35 18460400\n", 1338 | "MSFT 2016-10-03 57.42 19189500\n", 1339 | " 2016-10-04 57.24 20085900" 1340 | ] 1341 | }, 1342 | "execution_count": 27, 1343 | "metadata": {}, 1344 | "output_type": "execute_result" 1345 | } 1346 | ], 1347 | "source": [ 1348 | "stocks.loc[(slice(None), ['2016-10-03', '2016-10-04']), :]" 1349 | ] 1350 | }, 1351 | { 1352 | "cell_type": "markdown", 1353 | "metadata": {}, 1354 | "source": [ 1355 | "## Merging DataFrames with MultiIndexes" 1356 | ] 1357 | }, 1358 | { 1359 | "cell_type": "code", 1360 | "execution_count": 28, 1361 | "metadata": {}, 1362 | "outputs": [ 1363 | { 1364 | "data": { 1365 | "text/html": [ 1366 | "
\n", 1367 | "\n", 1380 | "\n", 1381 | " \n", 1382 | " \n", 1383 | " \n", 1384 | " \n", 1385 | " \n", 1386 | " \n", 1387 | " \n", 1388 | " \n", 1389 | " \n", 1390 | " \n", 1391 | " \n", 1392 | " \n", 1393 | " \n", 1394 | " \n", 1395 | " \n", 1396 | " \n", 1397 | " \n", 1398 | " \n", 1399 | " \n", 1400 | " \n", 1401 | " \n", 1402 | " \n", 1403 | " \n", 1404 | " \n", 1405 | " \n", 1406 | " \n", 1407 | " \n", 1408 | " \n", 1409 | " \n", 1410 | " \n", 1411 | " \n", 1412 | " \n", 1413 | " \n", 1414 | " \n", 1415 | " \n", 1416 | " \n", 1417 | " \n", 1418 | " \n", 1419 | " \n", 1420 | " \n", 1421 | " \n", 1422 | " \n", 1423 | " \n", 1424 | " \n", 1425 | " \n", 1426 | " \n", 1427 | " \n", 1428 | " \n", 1429 | " \n", 1430 | " \n", 1431 | " \n", 1432 | " \n", 1433 | " \n", 1434 | "
Close
SymbolDate
AAPL2016-10-03112.52
2016-10-04113.00
2016-10-05113.05
CSCO2016-10-0331.50
2016-10-0431.35
2016-10-0531.59
MSFT2016-10-0357.42
2016-10-0457.24
2016-10-0557.64
\n", 1435 | "
" 1436 | ], 1437 | "text/plain": [ 1438 | " Close\n", 1439 | "Symbol Date \n", 1440 | "AAPL 2016-10-03 112.52\n", 1441 | " 2016-10-04 113.00\n", 1442 | " 2016-10-05 113.05\n", 1443 | "CSCO 2016-10-03 31.50\n", 1444 | " 2016-10-04 31.35\n", 1445 | " 2016-10-05 31.59\n", 1446 | "MSFT 2016-10-03 57.42\n", 1447 | " 2016-10-04 57.24\n", 1448 | " 2016-10-05 57.64" 1449 | ] 1450 | }, 1451 | "execution_count": 28, 1452 | "metadata": {}, 1453 | "output_type": "execute_result" 1454 | } 1455 | ], 1456 | "source": [ 1457 | "close = pd.read_csv('data/stocks.csv', usecols=[0, 1, 3], index_col=['Symbol', 'Date']).sort_index()\n", 1458 | "close" 1459 | ] 1460 | }, 1461 | { 1462 | "cell_type": "code", 1463 | "execution_count": 29, 1464 | "metadata": {}, 1465 | "outputs": [ 1466 | { 1467 | "data": { 1468 | "text/html": [ 1469 | "
\n", 1470 | "\n", 1483 | "\n", 1484 | " \n", 1485 | " \n", 1486 | " \n", 1487 | " \n", 1488 | " \n", 1489 | " \n", 1490 | " \n", 1491 | " \n", 1492 | " \n", 1493 | " \n", 1494 | " \n", 1495 | " \n", 1496 | " \n", 1497 | " \n", 1498 | " \n", 1499 | " \n", 1500 | " \n", 1501 | " \n", 1502 | " \n", 1503 | " \n", 1504 | " \n", 1505 | " \n", 1506 | " \n", 1507 | " \n", 1508 | " \n", 1509 | " \n", 1510 | " \n", 1511 | " \n", 1512 | " \n", 1513 | " \n", 1514 | " \n", 1515 | " \n", 1516 | " \n", 1517 | " \n", 1518 | " \n", 1519 | " \n", 1520 | " \n", 1521 | " \n", 1522 | " \n", 1523 | " \n", 1524 | " \n", 1525 | " \n", 1526 | " \n", 1527 | " \n", 1528 | " \n", 1529 | " \n", 1530 | " \n", 1531 | " \n", 1532 | " \n", 1533 | " \n", 1534 | " \n", 1535 | " \n", 1536 | " \n", 1537 | "
Volume
SymbolDate
AAPL2016-10-0321701800
2016-10-0429736800
2016-10-0521453100
CSCO2016-10-0314070500
2016-10-0418460400
2016-10-0511808600
MSFT2016-10-0319189500
2016-10-0420085900
2016-10-0516726400
\n", 1538 | "
" 1539 | ], 1540 | "text/plain": [ 1541 | " Volume\n", 1542 | "Symbol Date \n", 1543 | "AAPL 2016-10-03 21701800\n", 1544 | " 2016-10-04 29736800\n", 1545 | " 2016-10-05 21453100\n", 1546 | "CSCO 2016-10-03 14070500\n", 1547 | " 2016-10-04 18460400\n", 1548 | " 2016-10-05 11808600\n", 1549 | "MSFT 2016-10-03 19189500\n", 1550 | " 2016-10-04 20085900\n", 1551 | " 2016-10-05 16726400" 1552 | ] 1553 | }, 1554 | "execution_count": 29, 1555 | "metadata": {}, 1556 | "output_type": "execute_result" 1557 | } 1558 | ], 1559 | "source": [ 1560 | "volume = pd.read_csv('data/stocks.csv', usecols=[0, 2, 3], index_col=['Symbol', 'Date']).sort_index()\n", 1561 | "volume" 1562 | ] 1563 | }, 1564 | { 1565 | "cell_type": "code", 1566 | "execution_count": 30, 1567 | "metadata": {}, 1568 | "outputs": [ 1569 | { 1570 | "data": { 1571 | "text/html": [ 1572 | "
\n", 1573 | "\n", 1586 | "\n", 1587 | " \n", 1588 | " \n", 1589 | " \n", 1590 | " \n", 1591 | " \n", 1592 | " \n", 1593 | " \n", 1594 | " \n", 1595 | " \n", 1596 | " \n", 1597 | " \n", 1598 | " \n", 1599 | " \n", 1600 | " \n", 1601 | " \n", 1602 | " \n", 1603 | " \n", 1604 | " \n", 1605 | " \n", 1606 | " \n", 1607 | " \n", 1608 | " \n", 1609 | " \n", 1610 | " \n", 1611 | " \n", 1612 | " \n", 1613 | " \n", 1614 | " \n", 1615 | " \n", 1616 | " \n", 1617 | " \n", 1618 | " \n", 1619 | " \n", 1620 | " \n", 1621 | " \n", 1622 | " \n", 1623 | " \n", 1624 | " \n", 1625 | " \n", 1626 | " \n", 1627 | " \n", 1628 | " \n", 1629 | " \n", 1630 | " \n", 1631 | " \n", 1632 | " \n", 1633 | " \n", 1634 | " \n", 1635 | " \n", 1636 | " \n", 1637 | " \n", 1638 | " \n", 1639 | " \n", 1640 | " \n", 1641 | " \n", 1642 | " \n", 1643 | " \n", 1644 | " \n", 1645 | " \n", 1646 | " \n", 1647 | " \n", 1648 | " \n", 1649 | " \n", 1650 | " \n", 1651 | "
CloseVolume
SymbolDate
AAPL2016-10-03112.5221701800
2016-10-04113.0029736800
2016-10-05113.0521453100
CSCO2016-10-0331.5014070500
2016-10-0431.3518460400
2016-10-0531.5911808600
MSFT2016-10-0357.4219189500
2016-10-0457.2420085900
2016-10-0557.6416726400
\n", 1652 | "
" 1653 | ], 1654 | "text/plain": [ 1655 | " Close Volume\n", 1656 | "Symbol Date \n", 1657 | "AAPL 2016-10-03 112.52 21701800\n", 1658 | " 2016-10-04 113.00 29736800\n", 1659 | " 2016-10-05 113.05 21453100\n", 1660 | "CSCO 2016-10-03 31.50 14070500\n", 1661 | " 2016-10-04 31.35 18460400\n", 1662 | " 2016-10-05 31.59 11808600\n", 1663 | "MSFT 2016-10-03 57.42 19189500\n", 1664 | " 2016-10-04 57.24 20085900\n", 1665 | " 2016-10-05 57.64 16726400" 1666 | ] 1667 | }, 1668 | "execution_count": 30, 1669 | "metadata": {}, 1670 | "output_type": "execute_result" 1671 | } 1672 | ], 1673 | "source": [ 1674 | "both = pd.merge(close, volume, left_index=True, right_index=True)\n", 1675 | "both" 1676 | ] 1677 | }, 1678 | { 1679 | "cell_type": "code", 1680 | "execution_count": 31, 1681 | "metadata": {}, 1682 | "outputs": [ 1683 | { 1684 | "data": { 1685 | "text/html": [ 1686 | "
\n", 1687 | "\n", 1700 | "\n", 1701 | " \n", 1702 | " \n", 1703 | " \n", 1704 | " \n", 1705 | " \n", 1706 | " \n", 1707 | " \n", 1708 | " \n", 1709 | " \n", 1710 | " \n", 1711 | " \n", 1712 | " \n", 1713 | " \n", 1714 | " \n", 1715 | " \n", 1716 | " \n", 1717 | " \n", 1718 | " \n", 1719 | " \n", 1720 | " \n", 1721 | " \n", 1722 | " \n", 1723 | " \n", 1724 | " \n", 1725 | " \n", 1726 | " \n", 1727 | " \n", 1728 | " \n", 1729 | " \n", 1730 | " \n", 1731 | " \n", 1732 | " \n", 1733 | " \n", 1734 | " \n", 1735 | " \n", 1736 | " \n", 1737 | " \n", 1738 | " \n", 1739 | " \n", 1740 | " \n", 1741 | " \n", 1742 | " \n", 1743 | " \n", 1744 | " \n", 1745 | " \n", 1746 | " \n", 1747 | " \n", 1748 | " \n", 1749 | " \n", 1750 | " \n", 1751 | " \n", 1752 | " \n", 1753 | " \n", 1754 | " \n", 1755 | " \n", 1756 | " \n", 1757 | " \n", 1758 | " \n", 1759 | " \n", 1760 | " \n", 1761 | " \n", 1762 | " \n", 1763 | " \n", 1764 | " \n", 1765 | " \n", 1766 | " \n", 1767 | " \n", 1768 | " \n", 1769 | " \n", 1770 | " \n", 1771 | " \n", 1772 | " \n", 1773 | " \n", 1774 | " \n", 1775 | "
SymbolDateCloseVolume
0AAPL2016-10-03112.5221701800
1AAPL2016-10-04113.0029736800
2AAPL2016-10-05113.0521453100
3CSCO2016-10-0331.5014070500
4CSCO2016-10-0431.3518460400
5CSCO2016-10-0531.5911808600
6MSFT2016-10-0357.4219189500
7MSFT2016-10-0457.2420085900
8MSFT2016-10-0557.6416726400
\n", 1776 | "
" 1777 | ], 1778 | "text/plain": [ 1779 | " Symbol Date Close Volume\n", 1780 | "0 AAPL 2016-10-03 112.52 21701800\n", 1781 | "1 AAPL 2016-10-04 113.00 29736800\n", 1782 | "2 AAPL 2016-10-05 113.05 21453100\n", 1783 | "3 CSCO 2016-10-03 31.50 14070500\n", 1784 | "4 CSCO 2016-10-04 31.35 18460400\n", 1785 | "5 CSCO 2016-10-05 31.59 11808600\n", 1786 | "6 MSFT 2016-10-03 57.42 19189500\n", 1787 | "7 MSFT 2016-10-04 57.24 20085900\n", 1788 | "8 MSFT 2016-10-05 57.64 16726400" 1789 | ] 1790 | }, 1791 | "execution_count": 31, 1792 | "metadata": {}, 1793 | "output_type": "execute_result" 1794 | } 1795 | ], 1796 | "source": [ 1797 | "both.reset_index()" 1798 | ] 1799 | } 1800 | ], 1801 | "metadata": { 1802 | "kernelspec": { 1803 | "display_name": "Python 3", 1804 | "language": "python", 1805 | "name": "python3" 1806 | }, 1807 | "language_info": { 1808 | "codemirror_mode": { 1809 | "name": "ipython", 1810 | "version": 3 1811 | }, 1812 | "file_extension": ".py", 1813 | "mimetype": "text/x-python", 1814 | "name": "python", 1815 | "nbconvert_exporter": "python", 1816 | "pygments_lexer": "ipython3", 1817 | "version": "3.7.2" 1818 | } 1819 | }, 1820 | "nbformat": 4, 1821 | "nbformat_minor": 2 1822 | } 1823 | --------------------------------------------------------------------------------