"
88 | ],
89 | "text/plain": [
90 | " TIME CITIES INDIC_UR Value \\\n",
91 | "0 2008 Germany Population on the 1st of January, total 82217837 \n",
92 | "1 2008 Germany Population on the 1st of January, male 40274292 \n",
93 | "2 2008 Germany Population on the 1st of January, female 41943545 \n",
94 | "3 2008 Germany Population on the 1st of January, 0-4 years, t... 3469044 \n",
95 | "4 2008 Germany Population on the 1st of January, 0-4 years, male 1780414 \n",
96 | "\n",
97 | " Flag and Footnotes \n",
98 | "0 NaN \n",
99 | "1 NaN \n",
100 | "2 NaN \n",
101 | "3 NaN \n",
102 | "4 NaN "
103 | ]
104 | },
105 | "execution_count": 1,
106 | "metadata": {},
107 | "output_type": "execute_result"
108 | }
109 | ],
110 | "source": [
111 | "import pandas as pd\n",
112 | "\n",
113 | "data = pd.read_csv('urb_cpop1_1_Data.csv')\n",
114 | "print(pd.__version__) # I ran with 0.23.4\n",
115 | "print(len(data))\n",
116 | "data.head()"
117 | ]
118 | },
119 | {
120 | "cell_type": "code",
121 | "execution_count": 2,
122 | "metadata": {},
123 | "outputs": [
124 | {
125 | "data": {
126 | "text/html": [
127 | "
\n",
128 | "\n",
141 | "
\n",
142 | " \n",
143 | "
\n",
144 | "
TIME
\n",
145 | "
2008
\n",
146 | "
2009
\n",
147 | "
2010
\n",
148 | "
2011
\n",
149 | "
2012
\n",
150 | "
2013
\n",
151 | "
2014
\n",
152 | "
2015
\n",
153 | "
2016
\n",
154 | "
2017
\n",
155 | "
\n",
156 | "
\n",
157 | "
key
\n",
158 | "
\n",
159 | "
\n",
160 | "
\n",
161 | "
\n",
162 | "
\n",
163 | "
\n",
164 | "
\n",
165 | "
\n",
166 | "
\n",
167 | "
\n",
168 | "
\n",
169 | " \n",
170 | " \n",
171 | "
\n",
172 | "
's-Hertogenbosch:Population on the 1st of January, 0-4 years, male
\n",
173 | "
NaN
\n",
174 | "
NaN
\n",
175 | "
4121.0
\n",
176 | "
4131.0
\n",
177 | "
4181.0
\n",
178 | "
4255.0
\n",
179 | "
4295.0
\n",
180 | "
NaN
\n",
181 | "
NaN
\n",
182 | "
NaN
\n",
183 | "
\n",
184 | "
\n",
185 | "
's-Hertogenbosch:Population on the 1st of January, 0-4 years, total
\n",
186 | "
7885.0
\n",
187 | "
7915.0
\n",
188 | "
8127.0
\n",
189 | "
8211.0
\n",
190 | "
8292.0
\n",
191 | "
8313.0
\n",
192 | "
8290.0
\n",
193 | "
NaN
\n",
194 | "
NaN
\n",
195 | "
NaN
\n",
196 | "
\n",
197 | "
\n",
198 | "
's-Hertogenbosch:Population on the 1st of January, female
\n",
199 | "
69434.0
\n",
200 | "
70060.0
\n",
201 | "
70979.0
\n",
202 | "
71586.0
\n",
203 | "
72085.0
\n",
204 | "
72541.0
\n",
205 | "
72877.0
\n",
206 | "
NaN
\n",
207 | "
NaN
\n",
208 | "
NaN
\n",
209 | "
\n",
210 | "
\n",
211 | "
's-Hertogenbosch:Population on the 1st of January, male
\n",
212 | "
67047.0
\n",
213 | "
67715.0
\n",
214 | "
68628.0
\n",
215 | "
69200.0
\n",
216 | "
69808.0
\n",
217 | "
70276.0
\n",
218 | "
70856.0
\n",
219 | "
NaN
\n",
220 | "
NaN
\n",
221 | "
NaN
\n",
222 | "
\n",
223 | "
\n",
224 | "
's-Hertogenbosch:Population on the 1st of January, total
\n",
225 | "
136481.0
\n",
226 | "
137775.0
\n",
227 | "
139607.0
\n",
228 | "
140786.0
\n",
229 | "
141893.0
\n",
230 | "
142817.0
\n",
231 | "
143733.0
\n",
232 | "
NaN
\n",
233 | "
NaN
\n",
234 | "
NaN
\n",
235 | "
\n",
236 | " \n",
237 | "
\n",
238 | "
"
239 | ],
240 | "text/plain": [
241 | "TIME 2008 2009 \\\n",
242 | "key \n",
243 | "'s-Hertogenbosch:Population on the 1st of Janua... NaN NaN \n",
244 | "'s-Hertogenbosch:Population on the 1st of Janua... 7885.0 7915.0 \n",
245 | "'s-Hertogenbosch:Population on the 1st of Janua... 69434.0 70060.0 \n",
246 | "'s-Hertogenbosch:Population on the 1st of Janua... 67047.0 67715.0 \n",
247 | "'s-Hertogenbosch:Population on the 1st of Janua... 136481.0 137775.0 \n",
248 | "\n",
249 | "TIME 2010 2011 \\\n",
250 | "key \n",
251 | "'s-Hertogenbosch:Population on the 1st of Janua... 4121.0 4131.0 \n",
252 | "'s-Hertogenbosch:Population on the 1st of Janua... 8127.0 8211.0 \n",
253 | "'s-Hertogenbosch:Population on the 1st of Janua... 70979.0 71586.0 \n",
254 | "'s-Hertogenbosch:Population on the 1st of Janua... 68628.0 69200.0 \n",
255 | "'s-Hertogenbosch:Population on the 1st of Janua... 139607.0 140786.0 \n",
256 | "\n",
257 | "TIME 2012 2013 \\\n",
258 | "key \n",
259 | "'s-Hertogenbosch:Population on the 1st of Janua... 4181.0 4255.0 \n",
260 | "'s-Hertogenbosch:Population on the 1st of Janua... 8292.0 8313.0 \n",
261 | "'s-Hertogenbosch:Population on the 1st of Janua... 72085.0 72541.0 \n",
262 | "'s-Hertogenbosch:Population on the 1st of Janua... 69808.0 70276.0 \n",
263 | "'s-Hertogenbosch:Population on the 1st of Janua... 141893.0 142817.0 \n",
264 | "\n",
265 | "TIME 2014 2015 2016 2017 \n",
266 | "key \n",
267 | "'s-Hertogenbosch:Population on the 1st of Janua... 4295.0 NaN NaN NaN \n",
268 | "'s-Hertogenbosch:Population on the 1st of Janua... 8290.0 NaN NaN NaN \n",
269 | "'s-Hertogenbosch:Population on the 1st of Janua... 72877.0 NaN NaN NaN \n",
270 | "'s-Hertogenbosch:Population on the 1st of Janua... 70856.0 NaN NaN NaN \n",
271 | "'s-Hertogenbosch:Population on the 1st of Janua... 143733.0 NaN NaN NaN "
272 | ]
273 | },
274 | "execution_count": 2,
275 | "metadata": {},
276 | "output_type": "execute_result"
277 | }
278 | ],
279 | "source": [
280 | "# Remove rows that have no value\n",
281 | "filtered = data.drop(data[pd.isna(data.Value) | (data.Value == \":\")].index)\n",
282 | "filtered['key'] = filtered['CITIES'] + ':' + filtered['INDIC_UR']\n",
283 | "filtered['Value'] = pd.to_numeric(filtered['Value'])\n",
284 | "\n",
285 | "# pivot(...) will fail because there are some cities (e.g. Barcelona, Bilbao) that have multiple entries\n",
286 | "# for the same year\n",
287 | "cities = filtered.pivot_table(index='key', columns='TIME', values='Value', aggfunc=\"mean\")\n",
288 | "cities.head()"
289 | ]
290 | },
291 | {
292 | "cell_type": "code",
293 | "execution_count": 3,
294 | "metadata": {},
295 | "outputs": [
296 | {
297 | "data": {
298 | "text/html": [
299 | "
\n",
300 | "\n",
313 | "
\n",
314 | " \n",
315 | "
\n",
316 | "
TIME
\n",
317 | "
2008
\n",
318 | "
2009
\n",
319 | "
2010
\n",
320 | "
2011
\n",
321 | "
2012
\n",
322 | "
2013
\n",
323 | "
2014
\n",
324 | "
2015
\n",
325 | "
2016
\n",
326 | "
2017
\n",
327 | "
\n",
328 | "
\n",
329 | "
key
\n",
330 | "
\n",
331 | "
\n",
332 | "
\n",
333 | "
\n",
334 | "
\n",
335 | "
\n",
336 | "
\n",
337 | "
\n",
338 | "
\n",
339 | "
\n",
340 | "
\n",
341 | " \n",
342 | " \n",
343 | "
\n",
344 | "
United Kingdom:Population on the 1st of January, total
\n",
345 | "
NaN
\n",
346 | "
NaN
\n",
347 | "
NaN
\n",
348 | "
NaN
\n",
349 | "
NaN
\n",
350 | "
NaN
\n",
351 | "
NaN
\n",
352 | "
64853393.0
\n",
353 | "
65379044.0
\n",
354 | "
65844142.0
\n",
355 | "
\n",
356 | "
\n",
357 | "
Portugal:Population on the 1st of January, total
\n",
358 | "
10627250.0
\n",
359 | "
10637713.0
\n",
360 | "
10573479.0
\n",
361 | "
10572721.0
\n",
362 | "
10542398.0
\n",
363 | "
10487289.0
\n",
364 | "
10427301.0
\n",
365 | "
10374822.0
\n",
366 | "
10341330.0
\n",
367 | "
10309573.0
\n",
368 | "
\n",
369 | "
\n",
370 | "
London (greater city):Population on the 1st of January, total
\n",
371 | "
7668300.0
\n",
372 | "
7753600.0
\n",
373 | "
8002000.0
\n",
374 | "
8173941.0
\n",
375 | "
8256400.0
\n",
376 | "
8362500.0
\n",
377 | "
8477600.0
\n",
378 | "
8606201.0
\n",
379 | "
8730803.0
\n",
380 | "
8797330.0
\n",
381 | "
\n",
382 | "
\n",
383 | "
Slovakia:Population on the 1st of January, total
\n",
384 | "
5412254.0
\n",
385 | "
5424925.0
\n",
386 | "
5435273.0
\n",
387 | "
5397036.0
\n",
388 | "
5404322.0
\n",
389 | "
5410836.0
\n",
390 | "
5415949.0
\n",
391 | "
5421349.0
\n",
392 | "
5426252.0
\n",
393 | "
5435343.0
\n",
394 | "
\n",
395 | "
\n",
396 | "
Greater Manchester:Population on the 1st of January, total
\n",
397 | "
NaN
\n",
398 | "
NaN
\n",
399 | "
2650800.0
\n",
400 | "
2682528.0
\n",
401 | "
2693800.0
\n",
402 | "
2708600.0
\n",
403 | "
2723900.0
\n",
404 | "
2744508.0
\n",
405 | "
2769152.0
\n",
406 | "
2789822.0
\n",
407 | "
\n",
408 | "
\n",
409 | "
West Midlands urban area:Population on the 1st of January, total
\n",
410 | "
NaN
\n",
411 | "
NaN
\n",
412 | "
2390000.0
\n",
413 | "
2419500.0
\n",
414 | "
2431200.0
\n",
415 | "
2446600.0
\n",
416 | "
2462300.0
\n",
417 | "
2479550.0
\n",
418 | "
2500093.0
\n",
419 | "
2527245.0
\n",
420 | "
\n",
421 | "
\n",
422 | "
Latvia:Population on the 1st of January, total
\n",
423 | "
2270894.0
\n",
424 | "
2261294.0
\n",
425 | "
NaN
\n",
426 | "
2070371.0
\n",
427 | "
2044813.0
\n",
428 | "
2023825.0
\n",
429 | "
2001468.0
\n",
430 | "
1986096.0
\n",
431 | "
1968957.0
\n",
432 | "
1950116.0
\n",
433 | "
\n",
434 | "
\n",
435 | "
Lisboa (greater city):Population on the 1st of January, total
\n",
436 | "
1790389.0
\n",
437 | "
1784236.0
\n",
438 | "
1857112.0
\n",
439 | "
1863069.0
\n",
440 | "
1860256.0
\n",
441 | "
1849472.0
\n",
442 | "
1837852.0
\n",
443 | "
1835785.0
\n",
444 | "
1835894.0
\n",
445 | "
1842352.0
\n",
446 | "
\n",
447 | "
\n",
448 | "
Birmingham:Population on the 1st of January, total
\n",
449 | "
1019200.0
\n",
450 | "
1028700.0
\n",
451 | "
1055600.0
\n",
452 | "
1073045.0
\n",
453 | "
1079900.0
\n",
454 | "
1088900.0
\n",
455 | "
1096800.0
\n",
456 | "
1106334.0
\n",
457 | "
1117938.0
\n",
458 | "
1132600.0
\n",
459 | "
\n",
460 | "
\n",
461 | "
Greater Glasgow:Population on the 1st of January, total
\n",
462 | "
NaN
\n",
463 | "
NaN
\n",
464 | "
NaN
\n",
465 | "
NaN
\n",
466 | "
NaN
\n",
467 | "
NaN
\n",
468 | "
NaN
\n",
469 | "
NaN
\n",
470 | "
986575.0
\n",
471 | "
996545.0
\n",
472 | "
\n",
473 | " \n",
474 | "
\n",
475 | "
"
476 | ],
477 | "text/plain": [
478 | "TIME 2008 2009 \\\n",
479 | "key \n",
480 | "United Kingdom:Population on the 1st of January... NaN NaN \n",
481 | "Portugal:Population on the 1st of January, total 10627250.0 10637713.0 \n",
482 | "London (greater city):Population on the 1st of ... 7668300.0 7753600.0 \n",
483 | "Slovakia:Population on the 1st of January, total 5412254.0 5424925.0 \n",
484 | "Greater Manchester:Population on the 1st of Jan... NaN NaN \n",
485 | "West Midlands urban area:Population on the 1st ... NaN NaN \n",
486 | "Latvia:Population on the 1st of January, total 2270894.0 2261294.0 \n",
487 | "Lisboa (greater city):Population on the 1st of ... 1790389.0 1784236.0 \n",
488 | "Birmingham:Population on the 1st of January, total 1019200.0 1028700.0 \n",
489 | "Greater Glasgow:Population on the 1st of Januar... NaN NaN \n",
490 | "\n",
491 | "TIME 2010 2011 \\\n",
492 | "key \n",
493 | "United Kingdom:Population on the 1st of January... NaN NaN \n",
494 | "Portugal:Population on the 1st of January, total 10573479.0 10572721.0 \n",
495 | "London (greater city):Population on the 1st of ... 8002000.0 8173941.0 \n",
496 | "Slovakia:Population on the 1st of January, total 5435273.0 5397036.0 \n",
497 | "Greater Manchester:Population on the 1st of Jan... 2650800.0 2682528.0 \n",
498 | "West Midlands urban area:Population on the 1st ... 2390000.0 2419500.0 \n",
499 | "Latvia:Population on the 1st of January, total NaN 2070371.0 \n",
500 | "Lisboa (greater city):Population on the 1st of ... 1857112.0 1863069.0 \n",
501 | "Birmingham:Population on the 1st of January, total 1055600.0 1073045.0 \n",
502 | "Greater Glasgow:Population on the 1st of Januar... NaN NaN \n",
503 | "\n",
504 | "TIME 2012 2013 \\\n",
505 | "key \n",
506 | "United Kingdom:Population on the 1st of January... NaN NaN \n",
507 | "Portugal:Population on the 1st of January, total 10542398.0 10487289.0 \n",
508 | "London (greater city):Population on the 1st of ... 8256400.0 8362500.0 \n",
509 | "Slovakia:Population on the 1st of January, total 5404322.0 5410836.0 \n",
510 | "Greater Manchester:Population on the 1st of Jan... 2693800.0 2708600.0 \n",
511 | "West Midlands urban area:Population on the 1st ... 2431200.0 2446600.0 \n",
512 | "Latvia:Population on the 1st of January, total 2044813.0 2023825.0 \n",
513 | "Lisboa (greater city):Population on the 1st of ... 1860256.0 1849472.0 \n",
514 | "Birmingham:Population on the 1st of January, total 1079900.0 1088900.0 \n",
515 | "Greater Glasgow:Population on the 1st of Januar... NaN NaN \n",
516 | "\n",
517 | "TIME 2014 2015 \\\n",
518 | "key \n",
519 | "United Kingdom:Population on the 1st of January... NaN 64853393.0 \n",
520 | "Portugal:Population on the 1st of January, total 10427301.0 10374822.0 \n",
521 | "London (greater city):Population on the 1st of ... 8477600.0 8606201.0 \n",
522 | "Slovakia:Population on the 1st of January, total 5415949.0 5421349.0 \n",
523 | "Greater Manchester:Population on the 1st of Jan... 2723900.0 2744508.0 \n",
524 | "West Midlands urban area:Population on the 1st ... 2462300.0 2479550.0 \n",
525 | "Latvia:Population on the 1st of January, total 2001468.0 1986096.0 \n",
526 | "Lisboa (greater city):Population on the 1st of ... 1837852.0 1835785.0 \n",
527 | "Birmingham:Population on the 1st of January, total 1096800.0 1106334.0 \n",
528 | "Greater Glasgow:Population on the 1st of Januar... NaN NaN \n",
529 | "\n",
530 | "TIME 2016 2017 \n",
531 | "key \n",
532 | "United Kingdom:Population on the 1st of January... 65379044.0 65844142.0 \n",
533 | "Portugal:Population on the 1st of January, total 10341330.0 10309573.0 \n",
534 | "London (greater city):Population on the 1st of ... 8730803.0 8797330.0 \n",
535 | "Slovakia:Population on the 1st of January, total 5426252.0 5435343.0 \n",
536 | "Greater Manchester:Population on the 1st of Jan... 2769152.0 2789822.0 \n",
537 | "West Midlands urban area:Population on the 1st ... 2500093.0 2527245.0 \n",
538 | "Latvia:Population on the 1st of January, total 1968957.0 1950116.0 \n",
539 | "Lisboa (greater city):Population on the 1st of ... 1835894.0 1842352.0 \n",
540 | "Birmingham:Population on the 1st of January, total 1117938.0 1132600.0 \n",
541 | "Greater Glasgow:Population on the 1st of Januar... 986575.0 996545.0 "
542 | ]
543 | },
544 | "execution_count": 3,
545 | "metadata": {},
546 | "output_type": "execute_result"
547 | }
548 | ],
549 | "source": [
550 | "# Show biggest cities in 2017\n",
551 | "\n",
552 | "# just 'total' would also give us \"Population on the 1st of January, 0-4 years, total\" items\n",
553 | "key_filter = 'January, total'\n",
554 | "\n",
555 | "cities.filter(like=key_filter,axis=0).sort_values(by=[2017], ascending=False).head(10)"
556 | ]
557 | },
558 | {
559 | "cell_type": "code",
560 | "execution_count": 4,
561 | "metadata": {},
562 | "outputs": [
563 | {
564 | "data": {
565 | "text/html": [
566 | "
\n",
567 | "\n",
580 | "
\n",
581 | " \n",
582 | "
\n",
583 | "
TIME
\n",
584 | "
2008
\n",
585 | "
2009
\n",
586 | "
2010
\n",
587 | "
2011
\n",
588 | "
2012
\n",
589 | "
2013
\n",
590 | "
2014
\n",
591 | "
2015
\n",
592 | "
2016
\n",
593 | "
2017
\n",
594 | "
growth
\n",
595 | "
\n",
596 | "
\n",
597 | "
key
\n",
598 | "
\n",
599 | "
\n",
600 | "
\n",
601 | "
\n",
602 | "
\n",
603 | "
\n",
604 | "
\n",
605 | "
\n",
606 | "
\n",
607 | "
\n",
608 | "
\n",
609 | "
\n",
610 | " \n",
611 | " \n",
612 | "
\n",
613 | "
Bournemouth:Population on the 1st of January, total
\n",
614 | "
164600.0
\n",
615 | "
NaN
\n",
616 | "
176800.0
\n",
617 | "
183491.0
\n",
618 | "
185100.0
\n",
619 | "
187700.0
\n",
620 | "
190100.0
\n",
621 | "
268124.5
\n",
622 | "
271606.0
\n",
623 | "
269698.0
\n",
624 | "
53.623303
\n",
625 | "
\n",
626 | "
\n",
627 | "
Oulu:Population on the 1st of January, total
\n",
628 | "
131585.0
\n",
629 | "
133550.0
\n",
630 | "
139133.0
\n",
631 | "
141671.0
\n",
632 | "
143909.0
\n",
633 | "
190847.0
\n",
634 | "
193798.0
\n",
635 | "
196291.0
\n",
636 | "
198525.0
\n",
637 | "
NaN
\n",
638 | "
42.687213
\n",
639 | "
\n",
640 | "
\n",
641 | "
Derry & Strabane Local Government District:Population on the 1st of January, total
\n",
642 | "
109100.0
\n",
643 | "
109600.0
\n",
644 | "
108400.0
\n",
645 | "
107877.0
\n",
646 | "
108400.0
\n",
647 | "
108600.0
\n",
648 | "
108900.0
\n",
649 | "
149336.0
\n",
650 | "
149808.0
\n",
651 | "
150320.0
\n",
652 | "
38.199262
\n",
653 | "
\n",
654 | "
\n",
655 | "
Southampton:Population on the 1st of January, total
\n",
656 | "
234100.0
\n",
657 | "
NaN
\n",
658 | "
231600.0
\n",
659 | "
236882.0
\n",
660 | "
237600.0
\n",
661 | "
240800.0
\n",
662 | "
243700.0
\n",
663 | "
311890.0
\n",
664 | "
316571.5
\n",
665 | "
316379.0
\n",
666 | "
36.688903
\n",
667 | "
\n",
668 | "
\n",
669 | "
Blackpool:Population on the 1st of January, total
\n",
670 | "
140600.0
\n",
671 | "
NaN
\n",
672 | "
142700.0
\n",
673 | "
142065.0
\n",
674 | "
142000.0
\n",
675 | "
141700.0
\n",
676 | "
141000.0
\n",
677 | "
194661.5
\n",
678 | "
194388.5
\n",
679 | "
195034.0
\n",
680 | "
36.221794
\n",
681 | "
\n",
682 | "
\n",
683 | "
Valencia:Population on the 1st of January, total
\n",
684 | "
807200.0
\n",
685 | "
814208.0
\n",
686 | "
809267.0
\n",
687 | "
792054.0
\n",
688 | "
797028.0
\n",
689 | "
792303.0
\n",
690 | "
786424.0
\n",
691 | "
1085048.5
\n",
692 | "
1089284.5
\n",
693 | "
NaN
\n",
694 | "
34.601374
\n",
695 | "
\n",
696 | "
\n",
697 | "
Granada:Population on the 1st of January, total
\n",
698 | "
NaN
\n",
699 | "
NaN
\n",
700 | "
239154.0
\n",
701 | "
241003.0
\n",
702 | "
239017.0
\n",
703 | "
237818.0
\n",
704 | "
237540.0
\n",
705 | "
317253.5
\n",
706 | "
317160.0
\n",
707 | "
NaN
\n",
708 | "
32.617477
\n",
709 | "
\n",
710 | "
\n",
711 | "
Pamplona/Iru�a:Population on the 1st of January, total
\n",
712 | "
197275.0
\n",
713 | "
198491.0
\n",
714 | "
197488.0
\n",
715 | "
195943.0
\n",
716 | "
197604.0
\n",
717 | "
196955.0
\n",
718 | "
196166.0
\n",
719 | "
257629.0
\n",
720 | "
257984.0
\n",
721 | "
NaN
\n",
722 | "
30.632747
\n",
723 | "
\n",
724 | "
\n",
725 | "
Milano (greater city):Population on the 1st of January, total
\n",
726 | "
NaN
\n",
727 | "
NaN
\n",
728 | "
3154102.0
\n",
729 | "
3854555.0
\n",
730 | "
3875801.0
\n",
731 | "
3925767.0
\n",
732 | "
4038864.0
\n",
733 | "
4061382.0
\n",
734 | "
4074585.0
\n",
735 | "
NaN
\n",
736 | "
29.183679
\n",
737 | "
\n",
738 | "
\n",
739 | "
Stoke-on-trent:Population on the 1st of January, total
\n",
740 | "
239300.0
\n",
741 | "
238900.0
\n",
742 | "
246600.0
\n",
743 | "
249008.0
\n",
744 | "
249300.0
\n",
745 | "
250100.0
\n",
746 | "
250600.0
\n",
747 | "
314612.0
\n",
748 | "
316315.0
\n",
749 | "
318791.0
\n",
750 | "
28.270479
\n",
751 | "
\n",
752 | " \n",
753 | "
\n",
754 | "
"
755 | ],
756 | "text/plain": [
757 | "TIME 2008 2009 \\\n",
758 | "key \n",
759 | "Bournemouth:Population on the 1st of January, t... 164600.0 NaN \n",
760 | "Oulu:Population on the 1st of January, total 131585.0 133550.0 \n",
761 | "Derry & Strabane Local Government District:Popu... 109100.0 109600.0 \n",
762 | "Southampton:Population on the 1st of January, t... 234100.0 NaN \n",
763 | "Blackpool:Population on the 1st of January, total 140600.0 NaN \n",
764 | "Valencia:Population on the 1st of January, total 807200.0 814208.0 \n",
765 | "Granada:Population on the 1st of January, total NaN NaN \n",
766 | "Pamplona/Iru�a:Population on the 1st of January... 197275.0 198491.0 \n",
767 | "Milano (greater city):Population on the 1st of ... NaN NaN \n",
768 | "Stoke-on-trent:Population on the 1st of January... 239300.0 238900.0 \n",
769 | "\n",
770 | "TIME 2010 2011 \\\n",
771 | "key \n",
772 | "Bournemouth:Population on the 1st of January, t... 176800.0 183491.0 \n",
773 | "Oulu:Population on the 1st of January, total 139133.0 141671.0 \n",
774 | "Derry & Strabane Local Government District:Popu... 108400.0 107877.0 \n",
775 | "Southampton:Population on the 1st of January, t... 231600.0 236882.0 \n",
776 | "Blackpool:Population on the 1st of January, total 142700.0 142065.0 \n",
777 | "Valencia:Population on the 1st of January, total 809267.0 792054.0 \n",
778 | "Granada:Population on the 1st of January, total 239154.0 241003.0 \n",
779 | "Pamplona/Iru�a:Population on the 1st of January... 197488.0 195943.0 \n",
780 | "Milano (greater city):Population on the 1st of ... 3154102.0 3854555.0 \n",
781 | "Stoke-on-trent:Population on the 1st of January... 246600.0 249008.0 \n",
782 | "\n",
783 | "TIME 2012 2013 \\\n",
784 | "key \n",
785 | "Bournemouth:Population on the 1st of January, t... 185100.0 187700.0 \n",
786 | "Oulu:Population on the 1st of January, total 143909.0 190847.0 \n",
787 | "Derry & Strabane Local Government District:Popu... 108400.0 108600.0 \n",
788 | "Southampton:Population on the 1st of January, t... 237600.0 240800.0 \n",
789 | "Blackpool:Population on the 1st of January, total 142000.0 141700.0 \n",
790 | "Valencia:Population on the 1st of January, total 797028.0 792303.0 \n",
791 | "Granada:Population on the 1st of January, total 239017.0 237818.0 \n",
792 | "Pamplona/Iru�a:Population on the 1st of January... 197604.0 196955.0 \n",
793 | "Milano (greater city):Population on the 1st of ... 3875801.0 3925767.0 \n",
794 | "Stoke-on-trent:Population on the 1st of January... 249300.0 250100.0 \n",
795 | "\n",
796 | "TIME 2014 2015 \\\n",
797 | "key \n",
798 | "Bournemouth:Population on the 1st of January, t... 190100.0 268124.5 \n",
799 | "Oulu:Population on the 1st of January, total 193798.0 196291.0 \n",
800 | "Derry & Strabane Local Government District:Popu... 108900.0 149336.0 \n",
801 | "Southampton:Population on the 1st of January, t... 243700.0 311890.0 \n",
802 | "Blackpool:Population on the 1st of January, total 141000.0 194661.5 \n",
803 | "Valencia:Population on the 1st of January, total 786424.0 1085048.5 \n",
804 | "Granada:Population on the 1st of January, total 237540.0 317253.5 \n",
805 | "Pamplona/Iru�a:Population on the 1st of January... 196166.0 257629.0 \n",
806 | "Milano (greater city):Population on the 1st of ... 4038864.0 4061382.0 \n",
807 | "Stoke-on-trent:Population on the 1st of January... 250600.0 314612.0 \n",
808 | "\n",
809 | "TIME 2016 2017 \\\n",
810 | "key \n",
811 | "Bournemouth:Population on the 1st of January, t... 271606.0 269698.0 \n",
812 | "Oulu:Population on the 1st of January, total 198525.0 NaN \n",
813 | "Derry & Strabane Local Government District:Popu... 149808.0 150320.0 \n",
814 | "Southampton:Population on the 1st of January, t... 316571.5 316379.0 \n",
815 | "Blackpool:Population on the 1st of January, total 194388.5 195034.0 \n",
816 | "Valencia:Population on the 1st of January, total 1089284.5 NaN \n",
817 | "Granada:Population on the 1st of January, total 317160.0 NaN \n",
818 | "Pamplona/Iru�a:Population on the 1st of January... 257984.0 NaN \n",
819 | "Milano (greater city):Population on the 1st of ... 4074585.0 NaN \n",
820 | "Stoke-on-trent:Population on the 1st of January... 316315.0 318791.0 \n",
821 | "\n",
822 | "TIME growth \n",
823 | "key \n",
824 | "Bournemouth:Population on the 1st of January, t... 53.623303 \n",
825 | "Oulu:Population on the 1st of January, total 42.687213 \n",
826 | "Derry & Strabane Local Government District:Popu... 38.199262 \n",
827 | "Southampton:Population on the 1st of January, t... 36.688903 \n",
828 | "Blackpool:Population on the 1st of January, total 36.221794 \n",
829 | "Valencia:Population on the 1st of January, total 34.601374 \n",
830 | "Granada:Population on the 1st of January, total 32.617477 \n",
831 | "Pamplona/Iru�a:Population on the 1st of January... 30.632747 \n",
832 | "Milano (greater city):Population on the 1st of ... 29.183679 \n",
833 | "Stoke-on-trent:Population on the 1st of January... 28.270479 "
834 | ]
835 | },
836 | "execution_count": 4,
837 | "metadata": {},
838 | "output_type": "execute_result"
839 | }
840 | ],
841 | "source": [
842 | "# add a growth % 2010 - 2016 column \n",
843 | "cities[\"growth\"] = (cities[2016] / cities[2010] - 1) * 100\n",
844 | "cities.filter(like=key_filter,axis=0).sort_values(by=[\"growth\"], ascending=False).head(10)"
845 | ]
846 | },
847 | {
848 | "cell_type": "code",
849 | "execution_count": null,
850 | "metadata": {},
851 | "outputs": [],
852 | "source": []
853 | }
854 | ],
855 | "metadata": {
856 | "kernelspec": {
857 | "display_name": "Python 3",
858 | "language": "python",
859 | "name": "python3"
860 | },
861 | "language_info": {
862 | "codemirror_mode": {
863 | "name": "ipython",
864 | "version": 3
865 | },
866 | "file_extension": ".py",
867 | "mimetype": "text/x-python",
868 | "name": "python",
869 | "nbconvert_exporter": "python",
870 | "pygments_lexer": "ipython3",
871 | "version": "3.6.6"
872 | }
873 | },
874 | "nbformat": 4,
875 | "nbformat_minor": 2
876 | }
877 |
--------------------------------------------------------------------------------
/src/main/java/test_dataframes/CheckResult.java:
--------------------------------------------------------------------------------
1 | package test_dataframes;
2 |
3 | import java.util.List;
4 |
5 | import com.google.common.base.Verify;
6 |
7 | public class CheckResult {
8 | public static void checkResult(List> highestGrowth) {
9 | String[] expected = { "Bournemouth", "Oulu", "Derry & Strabane", "Southampton", "Blackpool",
10 | "Valencia", "Granada" };
11 | Verify.verify(highestGrowth.size() >= expected.length, "Provide at least %s items, got %s",
12 | expected.length, highestGrowth.size());
13 |
14 | for (int i = 0; i < expected.length; i++) {
15 | Verify.verify(((String) highestGrowth.get(i)).startsWith(expected[i]),
16 | "Expected item %s to start with %s, but was %s", i, expected[i],
17 | highestGrowth.get(i));
18 | }
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/src/main/java/test_dataframes/TestDFLib.java:
--------------------------------------------------------------------------------
1 | package test_dataframes;
2 |
3 | import com.google.common.base.Stopwatch;
4 | import org.apache.commons.csv.CSVFormat;
5 | import org.dflib.DataFrame;
6 | import org.dflib.Printers;
7 | import org.dflib.ValueMapper;
8 | import org.dflib.csv.Csv;
9 | import org.dflib.print.Printer;
10 |
11 | import static org.dflib.Exp.*;
12 |
13 | /**
14 | * Test the API of tablesaw to do some basic dataframe manipulations.
15 | *
16 | * https://github.com/dflib/dflib
17 | *
18 | * See https://medium.com/@thijser/doing-cool-data-science-in-java-how-3-dataframe-libraries-stack-up-5e6ccb7b437
19 | * for more information.
20 | */
21 | public class TestDFLib {
22 | public static void main(String[] args) {
23 |
24 | Printer printer = Printers.tabular(10, 100);
25 |
26 | DataFrame data = Csv.loader()
27 | .format(CSVFormat.DEFAULT.builder().setNullString(":").build())
28 | .col("Value", ValueMapper.stringToInt())
29 | .load("urb_cpop1_1_Data.csv");
30 | System.out.println(printer.toString(data));
31 |
32 | Stopwatch watch = Stopwatch.createStarted();
33 | DataFrame filtered = data.rows($col("Value").isNotNull()).select();
34 |
35 | DataFrame cities = filtered.group("CITIES", "INDIC_UR", "TIME")
36 | .cols("CITIES", "INDIC_UR", "TIME", "Mean [Value]")
37 | .agg($col("CITIES"), $col("INDIC_UR"), $col("TIME"), $int("Value").avg().castAsInt());
38 |
39 | System.out.println(printer.toString(cities));
40 |
41 | // Need to transpose/pivot now too
42 | DataFrame finalTable = cities
43 | .cols("key").merge(concat($str("CITIES"), ":", $str("INDIC_UR")))
44 | .pivot().rows("key").cols("TIME").vals("Mean [Value]");
45 |
46 | // sortDescendingOn puts N/A values first unfortunately, so let's remove them
47 | // before determining and printing.
48 | DataFrame existing2017 = finalTable
49 | .rowsExcept($int("2017").isNull()).select()
50 | .rows($str("key").endsWith("January, total")).select()
51 | .sort($int("2017").desc());
52 | System.out.println(printer.toString(existing2017));
53 |
54 | // Add growth column
55 |
56 | DataFrame finalTable1 = finalTable
57 | .cols("growth").merge($int("2016").castAsDouble().div($int("2010")).sub(1).mul(100));
58 |
59 | DataFrame highestGrowthTable = finalTable1
60 | .rows($str("key").endsWith("January, total")).select()
61 | .rowsExcept($col("growth").isNull()).select()
62 | .sort($double("growth").desc());
63 |
64 | System.out.println(printer.toString(highestGrowthTable));
65 | CheckResult.checkResult(highestGrowthTable.getColumn("key").toList());
66 |
67 | System.out.println("Total time: " + watch);
68 | }
69 | }
70 |
--------------------------------------------------------------------------------
/src/main/java/test_dataframes/TestDatavec.java:
--------------------------------------------------------------------------------
1 | package test_dataframes;
2 |
3 | import static org.datavec.api.transform.condition.ConditionOp.Equal;
4 |
5 | import java.io.File;
6 | import java.util.ArrayList;
7 | import java.util.List;
8 |
9 | import org.datavec.api.records.reader.RecordReader;
10 | import org.datavec.api.records.reader.impl.csv.CSVRecordReader;
11 | import org.datavec.api.split.FileSplit;
12 | import org.datavec.api.transform.ReduceOp;
13 | import org.datavec.api.transform.TransformProcess;
14 | import org.datavec.api.transform.condition.column.StringColumnCondition;
15 | import org.datavec.api.transform.reduce.Reducer;
16 | import org.datavec.api.transform.schema.Schema;
17 | import org.datavec.api.writable.IntWritable;
18 | import org.datavec.api.writable.Writable;
19 | import org.datavec.local.transforms.LocalTransformExecutor;
20 |
21 | /**
22 | * Test the API of DataVec to do some basic dataframe manipulations (unfinished).
23 | *
24 | * https://deeplearning4j.org/docs/latest/datavec-overview
25 | */
26 | public class TestDatavec {
27 | public static void main(String[] args) throws Exception {
28 | int numLinesToSkip = 1;
29 | char delimiter = ',';
30 | RecordReader recordReader = new CSVRecordReader(numLinesToSkip,delimiter);
31 | recordReader.initialize(new FileSplit(new File("urb_cpop1_1_Data.csv")));
32 |
33 | // It seems we need to know in advance what the fields and their order
34 | // are here...
35 | Schema csvSchema = new Schema.Builder()
36 | .addColumnInteger("TIME")
37 | .addColumnsString("CITIES", "INDIC_UR","Value","Flag and Footnotes")
38 | .build();
39 |
40 | TransformProcess tp = new TransformProcess.Builder(csvSchema)
41 | .conditionalReplaceValueTransform("Value", new IntWritable(0), new StringColumnCondition("Value", Equal, ":"))
42 | .convertToInteger("Value")
43 | .reduce(new Reducer.Builder(ReduceOp.TakeLast)
44 | .keyColumns("CITIES", "INDIC_UR", "TIME")
45 | .meanColumns("Value")
46 | .build())
47 | // Here we also need to know in advance the range of items
48 | .integerToOneHot("TIME", 2008, 2017)
49 |
50 | // Now we have one-hot encoded countries, with the Value column separately.
51 | // We would have to either do a conditionalCopyValueTransform Value -> year column
52 | // for every year separately, or we probably have to modify integerToOneHot
53 | // to copy our Value column instead of 1-hot to make the proper pivot.
54 | // IntegerToOneHotTransform is > 200 lines, so it's not trivial to create
55 | // such a custom transform.
56 |
57 | .build();
58 |
59 |
60 | List> csvData = new ArrayList<>();
61 | while(recordReader.hasNext()) {
62 | csvData.add(recordReader.next());
63 | }
64 | printHead(csvData, csvSchema);
65 |
66 | List> transformedData = LocalTransformExecutor.execute(csvData, tp);
67 |
68 | printHead(transformedData, tp.getFinalSchema());
69 | }
70 |
71 | private static void printHead(List> data, Schema schema) {
72 | for (int j = 0; j < schema.getColumnNames().size(); j++) {
73 | System.out.printf("%20s", schema.getColumnNames().get(j));
74 | }
75 | System.out.println();
76 | for (int i = 0; i < Math.min(10, data.size()); i++) {
77 | List row = data.get(i);
78 | for (int j = 0; j < row.size(); j++) {
79 | System.out.printf("%20s", row.get(j).toString());
80 | }
81 | System.out.println();
82 | }
83 | }
84 | }
85 |
--------------------------------------------------------------------------------
/src/main/java/test_dataframes/TestDuckDb.kt:
--------------------------------------------------------------------------------
1 | package test_dataframes
2 |
3 | import com.google.common.base.Stopwatch
4 | import tech.tablesaw.api.Table
5 | import java.sql.DriverManager
6 |
7 |
8 | /**
9 | * Test duckdb to do some basic dataframe manipulations.
10 | *
11 | * See https://medium.com/@thijser/doing-cool-data-science-in-java-how-3-dataframe-libraries-stack-up-5e6ccb7b437
12 | * for more information.
13 | */
14 | fun main() {
15 | val conn = DriverManager.getConnection("jdbc:duckdb:")
16 | val stmt = conn.createStatement()
17 | var rs = stmt.executeQuery("SELECT * FROM 'urb_cpop1_1_Data.csv'")
18 | Table.read().db(rs).print().also { println(it) }
19 |
20 | val watch = Stopwatch.createStarted()
21 | stmt.execute(
22 | """
23 | CREATE TEMP TABLE t1 AS (
24 | WITH cities AS (
25 | SELECT CITIES || ':' || INDIC_UR as key,
26 | CAST(Value AS INTEGER) as Value,
27 | * EXCLUDE (CITIES, INDIC_UR, Value)
28 | FROM 'urb_cpop1_1_Data.csv' WHERE Value != ':'),
29 | pivot_table AS (
30 | PIVOT cities
31 | ON TIME
32 | USING AVG(Value)
33 | GROUP BY key
34 | )
35 | SELECT *, ("2016"::REAL / "2010"::REAL - 1.0 ) * 100.0 as growth
36 | FROM pivot_table
37 | WHERE suffix(key, 'January, total')
38 | ORDER BY growth DESC
39 | )
40 | """
41 | )
42 | rs = stmt.executeQuery("SELECT * FROM t1")
43 | Table.read().db(rs).print().also { println(it) }
44 | val result = stmt.executeQuery("SELECT key FROM t1").use { r ->
45 | mutableListOf().apply {
46 | while (r.next()) {
47 | this += r.getString("key")
48 | }
49 | }
50 | }
51 | CheckResult.checkResult(result)
52 | println("Total time: $watch")
53 | }
--------------------------------------------------------------------------------
/src/main/java/test_dataframes/TestJoinery.java:
--------------------------------------------------------------------------------
1 | package test_dataframes;
2 |
3 | import java.util.Collections;
4 | import java.util.List;
5 | import java.util.function.Consumer;
6 |
7 | import joinery.DataFrame;
8 | import joinery.DataFrame.KeyFunction;
9 | import joinery.DataFrame.RowFunction;
10 | import joinery.impl.Aggregation.Mean;
11 |
12 | import com.google.common.base.Stopwatch;
13 | import com.google.common.collect.Iterables;
14 | import com.google.common.collect.Lists;
15 |
16 | /**
17 | * Test the API of joinery to do some basic dataframe manipulations.
18 | *
19 | * https://github.com/cardillo/joinery
20 | *
21 | * See https://medium.com/@thijser/doing-cool-data-science-in-java-how-3-dataframe-libraries-stack-up-5e6ccb7b437
22 | * for more information.
23 | */
24 | public class TestJoinery {
25 | public static void main(String[] args) throws Exception {
26 | DataFrame