├── .ipynb_checkpoints └── Data Cleaning Tutorial - Real Python-checkpoint.ipynb ├── Data Cleaning Tutorial - Real Python.ipynb └── Datasets ├── BL-Flickr-Images-Book.csv ├── census.csv ├── olympics.csv └── university_towns.txt /.ipynb_checkpoints/Data Cleaning Tutorial - Real Python-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 154, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np\n", 11 | "from functools import reduce" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 155, 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "data": { 21 | "text/html": [ 22 | "
\n", 23 | "\n", 36 | "\n", 37 | " \n", 38 | " \n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | "
IdentifierEdition StatementPlace of PublicationDate of PublicationPublisherTitleAuthorContributorsCorporate AuthorCorporate ContributorsFormer ownerEngraverIssuance typeFlickr URLShelfmarks
0206NaNLondon1879 [1878]S. Tinsley & Co.Walter Forbes. [A novel.] By A. AA. A.FORBES, Walter.NaNNaNNaNNaNmonographichttp://www.flickr.com/photos/britishlibrary/ta...British Library HMNTS 12641.b.30.
1216NaNLondon; Virtue & Yorston1868Virtue & Co.All for Greed. [A novel. The dedication signed...A., A. A.BLAZE DE BURY, Marie Pauline Rose - BaronessNaNNaNNaNNaNmonographichttp://www.flickr.com/photos/britishlibrary/ta...British Library HMNTS 12626.cc.2.
2218NaNLondon1869Bradbury, Evans & Co.Love the Avenger. By the author of “All for Gr...A., A. A.BLAZE DE BURY, Marie Pauline Rose - BaronessNaNNaNNaNNaNmonographichttp://www.flickr.com/photos/britishlibrary/ta...British Library HMNTS 12625.dd.1.
3472NaNLondon1851James DarlingWelsh Sketches, chiefly ecclesiastical, to the...A., E. S.Appleyard, Ernest Silvanus.NaNNaNNaNNaNmonographichttp://www.flickr.com/photos/britishlibrary/ta...British Library HMNTS 10369.bbb.15.
4480A new edition, revised, etc.London1857Wertheim & Macintosh[The World in which I live, and my place in it...A., E. S.BROOME, John Henry.NaNNaNNaNNaNmonographichttp://www.flickr.com/photos/britishlibrary/ta...British Library HMNTS 9007.d.28.
\n", 150 | "
" 151 | ], 152 | "text/plain": [ 153 | " Identifier Edition Statement Place of Publication \\\n", 154 | "0 206 NaN London \n", 155 | "1 216 NaN London; Virtue & Yorston \n", 156 | "2 218 NaN London \n", 157 | "3 472 NaN London \n", 158 | "4 480 A new edition, revised, etc. London \n", 159 | "\n", 160 | " Date of Publication Publisher \\\n", 161 | "0 1879 [1878] S. Tinsley & Co. \n", 162 | "1 1868 Virtue & Co. \n", 163 | "2 1869 Bradbury, Evans & Co. \n", 164 | "3 1851 James Darling \n", 165 | "4 1857 Wertheim & Macintosh \n", 166 | "\n", 167 | " Title Author \\\n", 168 | "0 Walter Forbes. [A novel.] By A. A A. A. \n", 169 | "1 All for Greed. [A novel. The dedication signed... A., A. A. \n", 170 | "2 Love the Avenger. By the author of “All for Gr... A., A. A. \n", 171 | "3 Welsh Sketches, chiefly ecclesiastical, to the... A., E. S. \n", 172 | "4 [The World in which I live, and my place in it... A., E. S. \n", 173 | "\n", 174 | " Contributors Corporate Author \\\n", 175 | "0 FORBES, Walter. NaN \n", 176 | "1 BLAZE DE BURY, Marie Pauline Rose - Baroness NaN \n", 177 | "2 BLAZE DE BURY, Marie Pauline Rose - Baroness NaN \n", 178 | "3 Appleyard, Ernest Silvanus. NaN \n", 179 | "4 BROOME, John Henry. NaN \n", 180 | "\n", 181 | " Corporate Contributors Former owner Engraver Issuance type \\\n", 182 | "0 NaN NaN NaN monographic \n", 183 | "1 NaN NaN NaN monographic \n", 184 | "2 NaN NaN NaN monographic \n", 185 | "3 NaN NaN NaN monographic \n", 186 | "4 NaN NaN NaN monographic \n", 187 | "\n", 188 | " Flickr URL \\\n", 189 | "0 http://www.flickr.com/photos/britishlibrary/ta... \n", 190 | "1 http://www.flickr.com/photos/britishlibrary/ta... \n", 191 | "2 http://www.flickr.com/photos/britishlibrary/ta... \n", 192 | "3 http://www.flickr.com/photos/britishlibrary/ta... \n", 193 | "4 http://www.flickr.com/photos/britishlibrary/ta... \n", 194 | "\n", 195 | " Shelfmarks \n", 196 | "0 British Library HMNTS 12641.b.30. \n", 197 | "1 British Library HMNTS 12626.cc.2. \n", 198 | "2 British Library HMNTS 12625.dd.1. \n", 199 | "3 British Library HMNTS 10369.bbb.15. \n", 200 | "4 British Library HMNTS 9007.d.28. " 201 | ] 202 | }, 203 | "execution_count": 155, 204 | "metadata": {}, 205 | "output_type": "execute_result" 206 | } 207 | ], 208 | "source": [ 209 | "df = pd.read_csv('Datasets\\BL-Flickr-Images-Book.csv')\n", 210 | "df.head()" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": 156, 216 | "metadata": {}, 217 | "outputs": [ 218 | { 219 | "data": { 220 | "text/html": [ 221 | "
\n", 222 | "\n", 235 | "\n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | "
Place of PublicationDate of PublicationPublisherTitleAuthorFlickr URL
Identifier
206London1879 [1878]S. Tinsley & Co.Walter Forbes. [A novel.] By A. AA. A.http://www.flickr.com/photos/britishlibrary/ta...
216London; Virtue & Yorston1868Virtue & Co.All for Greed. [A novel. The dedication signed...A., A. A.http://www.flickr.com/photos/britishlibrary/ta...
218London1869Bradbury, Evans & Co.Love the Avenger. By the author of “All for Gr...A., A. A.http://www.flickr.com/photos/britishlibrary/ta...
472London1851James DarlingWelsh Sketches, chiefly ecclesiastical, to the...A., E. S.http://www.flickr.com/photos/britishlibrary/ta...
480London1857Wertheim & Macintosh[The World in which I live, and my place in it...A., E. S.http://www.flickr.com/photos/britishlibrary/ta...
\n", 304 | "
" 305 | ], 306 | "text/plain": [ 307 | " Place of Publication Date of Publication \\\n", 308 | "Identifier \n", 309 | "206 London 1879 [1878] \n", 310 | "216 London; Virtue & Yorston 1868 \n", 311 | "218 London 1869 \n", 312 | "472 London 1851 \n", 313 | "480 London 1857 \n", 314 | "\n", 315 | " Publisher \\\n", 316 | "Identifier \n", 317 | "206 S. Tinsley & Co. \n", 318 | "216 Virtue & Co. \n", 319 | "218 Bradbury, Evans & Co. \n", 320 | "472 James Darling \n", 321 | "480 Wertheim & Macintosh \n", 322 | "\n", 323 | " Title Author \\\n", 324 | "Identifier \n", 325 | "206 Walter Forbes. [A novel.] By A. A A. A. \n", 326 | "216 All for Greed. [A novel. The dedication signed... A., A. A. \n", 327 | "218 Love the Avenger. By the author of “All for Gr... A., A. A. \n", 328 | "472 Welsh Sketches, chiefly ecclesiastical, to the... A., E. S. \n", 329 | "480 [The World in which I live, and my place in it... A., E. S. \n", 330 | "\n", 331 | " Flickr URL \n", 332 | "Identifier \n", 333 | "206 http://www.flickr.com/photos/britishlibrary/ta... \n", 334 | "216 http://www.flickr.com/photos/britishlibrary/ta... \n", 335 | "218 http://www.flickr.com/photos/britishlibrary/ta... \n", 336 | "472 http://www.flickr.com/photos/britishlibrary/ta... \n", 337 | "480 http://www.flickr.com/photos/britishlibrary/ta... " 338 | ] 339 | }, 340 | "execution_count": 156, 341 | "metadata": {}, 342 | "output_type": "execute_result" 343 | } 344 | ], 345 | "source": [ 346 | "to_drop = ['Edition Statement',\n", 347 | " 'Corporate Author',\n", 348 | " 'Corporate Contributors',\n", 349 | " 'Former owner',\n", 350 | " 'Engraver',\n", 351 | " 'Contributors',\n", 352 | " 'Issuance type',\n", 353 | " 'Shelfmarks']\n", 354 | "\n", 355 | "df.drop(to_drop, inplace = True, axis = 1)\n", 356 | "df = df.set_index('Identifier')\n", 357 | "df.head()" 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": 157, 363 | "metadata": {}, 364 | "outputs": [], 365 | "source": [ 366 | "unwanted_characters = ['[', ',', '-']\n", 367 | "\n", 368 | "def clean_dates(item):\n", 369 | " dop= str(item.loc['Date of Publication'])\n", 370 | " \n", 371 | " if dop == 'nan' or dop[0] == '[':\n", 372 | " return np.NaN\n", 373 | " \n", 374 | " for character in unwanted_characters:\n", 375 | " if character in dop:\n", 376 | " character_index = dop.find(character)\n", 377 | " dop = dop[:character_index]\n", 378 | " \n", 379 | " return dop\n", 380 | "\n", 381 | "df['Date of Publication'] = df.apply(clean_dates, axis = 1)" 382 | ] 383 | }, 384 | { 385 | "cell_type": "code", 386 | "execution_count": 158, 387 | "metadata": {}, 388 | "outputs": [ 389 | { 390 | "data": { 391 | "text/html": [ 392 | "
\n", 393 | "\n", 406 | "\n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | "
Place of PublicationDate of PublicationPublisherTitleAuthorFlickr URL
Identifier
206London1879S. Tinsley & Co.Walter Forbes. [A novel.] By A. AA. A.http://www.flickr.com/photos/britishlibrary/ta...
216London; Virtue & Yorston1868Virtue & Co.All for Greed. [A novel. The dedication signed...A., A. A.http://www.flickr.com/photos/britishlibrary/ta...
218London1869Bradbury, Evans & Co.Love the Avenger. By the author of “All for Gr...A., A. A.http://www.flickr.com/photos/britishlibrary/ta...
472London1851James DarlingWelsh Sketches, chiefly ecclesiastical, to the...A., E. S.http://www.flickr.com/photos/britishlibrary/ta...
480London1857Wertheim & Macintosh[The World in which I live, and my place in it...A., E. S.http://www.flickr.com/photos/britishlibrary/ta...
\n", 475 | "
" 476 | ], 477 | "text/plain": [ 478 | " Place of Publication Date of Publication \\\n", 479 | "Identifier \n", 480 | "206 London 1879 \n", 481 | "216 London; Virtue & Yorston 1868 \n", 482 | "218 London 1869 \n", 483 | "472 London 1851 \n", 484 | "480 London 1857 \n", 485 | "\n", 486 | " Publisher \\\n", 487 | "Identifier \n", 488 | "206 S. Tinsley & Co. \n", 489 | "216 Virtue & Co. \n", 490 | "218 Bradbury, Evans & Co. \n", 491 | "472 James Darling \n", 492 | "480 Wertheim & Macintosh \n", 493 | "\n", 494 | " Title Author \\\n", 495 | "Identifier \n", 496 | "206 Walter Forbes. [A novel.] By A. A A. A. \n", 497 | "216 All for Greed. [A novel. The dedication signed... A., A. A. \n", 498 | "218 Love the Avenger. By the author of “All for Gr... A., A. A. \n", 499 | "472 Welsh Sketches, chiefly ecclesiastical, to the... A., E. S. \n", 500 | "480 [The World in which I live, and my place in it... A., E. S. \n", 501 | "\n", 502 | " Flickr URL \n", 503 | "Identifier \n", 504 | "206 http://www.flickr.com/photos/britishlibrary/ta... \n", 505 | "216 http://www.flickr.com/photos/britishlibrary/ta... \n", 506 | "218 http://www.flickr.com/photos/britishlibrary/ta... \n", 507 | "472 http://www.flickr.com/photos/britishlibrary/ta... \n", 508 | "480 http://www.flickr.com/photos/britishlibrary/ta... " 509 | ] 510 | }, 511 | "execution_count": 158, 512 | "metadata": {}, 513 | "output_type": "execute_result" 514 | } 515 | ], 516 | "source": [ 517 | "df.head()" 518 | ] 519 | }, 520 | { 521 | "cell_type": "code", 522 | "execution_count": 159, 523 | "metadata": {}, 524 | "outputs": [], 525 | "source": [ 526 | "def clean_author_names(item):\n", 527 | " \n", 528 | " author = str(item.loc['Author'])\n", 529 | " \n", 530 | " if author == 'nan':\n", 531 | " return np.NaN\n", 532 | " \n", 533 | " author = author.split(',')\n", 534 | "\n", 535 | " if len(author) == 1:\n", 536 | " name = filter(lambda x: x.isalpha(), author[0])\n", 537 | " return reduce(lambda x, y: x + y, name)\n", 538 | " \n", 539 | " last_name, first_name = author[0], author[1]\n", 540 | "\n", 541 | " first_name = first_name[:first_name.find('-')] if '-' in first_name else first_name\n", 542 | " \n", 543 | " if first_name.endswith(('.', '.|')):\n", 544 | " parts = first_name.split('.')\n", 545 | " \n", 546 | " if len(parts) > 1:\n", 547 | " first_occurence = first_name.find('.')\n", 548 | " final_occurence = first_name.find('.', first_occurence + 1)\n", 549 | " first_name = first_name[:final_occurence]\n", 550 | " else:\n", 551 | " first_name = first_name[:first_name.find('.')]\n", 552 | " \n", 553 | " last_name = last_name.capitalize()\n", 554 | " \n", 555 | " return f'{first_name} {last_name}'\n", 556 | "\n", 557 | "\n", 558 | "df['Author'] = df.apply(clean_author_names, axis = 1)" 559 | ] 560 | }, 561 | { 562 | "cell_type": "code", 563 | "execution_count": 160, 564 | "metadata": {}, 565 | "outputs": [ 566 | { 567 | "data": { 568 | "text/html": [ 569 | "
\n", 570 | "\n", 583 | "\n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | "
Place of PublicationDate of PublicationPublisherTitleAuthorFlickr URL
Identifier
206London1879S. Tinsley & Co.Walter Forbes. [A novel.] By A. AAAhttp://www.flickr.com/photos/britishlibrary/ta...
216London; Virtue & Yorston1868Virtue & Co.All for Greed. [A novel. The dedication signed...A. A A.http://www.flickr.com/photos/britishlibrary/ta...
218London1869Bradbury, Evans & Co.Love the Avenger. By the author of “All for Gr...A. A A.http://www.flickr.com/photos/britishlibrary/ta...
472London1851James DarlingWelsh Sketches, chiefly ecclesiastical, to the...E. S A.http://www.flickr.com/photos/britishlibrary/ta...
480London1857Wertheim & Macintosh[The World in which I live, and my place in it...E. S A.http://www.flickr.com/photos/britishlibrary/ta...
\n", 652 | "
" 653 | ], 654 | "text/plain": [ 655 | " Place of Publication Date of Publication \\\n", 656 | "Identifier \n", 657 | "206 London 1879 \n", 658 | "216 London; Virtue & Yorston 1868 \n", 659 | "218 London 1869 \n", 660 | "472 London 1851 \n", 661 | "480 London 1857 \n", 662 | "\n", 663 | " Publisher \\\n", 664 | "Identifier \n", 665 | "206 S. Tinsley & Co. \n", 666 | "216 Virtue & Co. \n", 667 | "218 Bradbury, Evans & Co. \n", 668 | "472 James Darling \n", 669 | "480 Wertheim & Macintosh \n", 670 | "\n", 671 | " Title Author \\\n", 672 | "Identifier \n", 673 | "206 Walter Forbes. [A novel.] By A. A AA \n", 674 | "216 All for Greed. [A novel. The dedication signed... A. A A. \n", 675 | "218 Love the Avenger. By the author of “All for Gr... A. A A. \n", 676 | "472 Welsh Sketches, chiefly ecclesiastical, to the... E. S A. \n", 677 | "480 [The World in which I live, and my place in it... E. S A. \n", 678 | "\n", 679 | " Flickr URL \n", 680 | "Identifier \n", 681 | "206 http://www.flickr.com/photos/britishlibrary/ta... \n", 682 | "216 http://www.flickr.com/photos/britishlibrary/ta... \n", 683 | "218 http://www.flickr.com/photos/britishlibrary/ta... \n", 684 | "472 http://www.flickr.com/photos/britishlibrary/ta... \n", 685 | "480 http://www.flickr.com/photos/britishlibrary/ta... " 686 | ] 687 | }, 688 | "execution_count": 160, 689 | "metadata": {}, 690 | "output_type": "execute_result" 691 | } 692 | ], 693 | "source": [ 694 | "df.head()" 695 | ] 696 | }, 697 | { 698 | "cell_type": "code", 699 | "execution_count": 161, 700 | "metadata": {}, 701 | "outputs": [], 702 | "source": [ 703 | "def clean_publication_place(item):\n", 704 | " place = str(item['Place of Publication'])\n", 705 | " \n", 706 | " if 'London' in place:\n", 707 | " return 'London'\n", 708 | " if 'Oxford' in place:\n", 709 | " return 'Oxford'\n", 710 | " if place == 'Newcastle upon Tyne':\n", 711 | " return 'Newcastle-upon-Tyne'\n", 712 | " \n", 713 | " return place\n", 714 | " \n", 715 | "df['Place of Publication'] = df.apply(clean_publication_place, axis = 1)" 716 | ] 717 | }, 718 | { 719 | "cell_type": "code", 720 | "execution_count": 162, 721 | "metadata": {}, 722 | "outputs": [ 723 | { 724 | "data": { 725 | "text/html": [ 726 | "
\n", 727 | "\n", 740 | "\n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | "
Place of PublicationDate of PublicationPublisherTitleAuthorFlickr URL
Identifier
206London1879S. Tinsley & Co.Walter Forbes. [A novel.] By A. AAAhttp://www.flickr.com/photos/britishlibrary/ta...
216London1868Virtue & Co.All for Greed. [A novel. The dedication signed...A. A A.http://www.flickr.com/photos/britishlibrary/ta...
218London1869Bradbury, Evans & Co.Love the Avenger. By the author of “All for Gr...A. A A.http://www.flickr.com/photos/britishlibrary/ta...
472London1851James DarlingWelsh Sketches, chiefly ecclesiastical, to the...E. S A.http://www.flickr.com/photos/britishlibrary/ta...
480London1857Wertheim & Macintosh[The World in which I live, and my place in it...E. S A.http://www.flickr.com/photos/britishlibrary/ta...
\n", 809 | "
" 810 | ], 811 | "text/plain": [ 812 | " Place of Publication Date of Publication Publisher \\\n", 813 | "Identifier \n", 814 | "206 London 1879 S. Tinsley & Co. \n", 815 | "216 London 1868 Virtue & Co. \n", 816 | "218 London 1869 Bradbury, Evans & Co. \n", 817 | "472 London 1851 James Darling \n", 818 | "480 London 1857 Wertheim & Macintosh \n", 819 | "\n", 820 | " Title Author \\\n", 821 | "Identifier \n", 822 | "206 Walter Forbes. [A novel.] By A. A AA \n", 823 | "216 All for Greed. [A novel. The dedication signed... A. A A. \n", 824 | "218 Love the Avenger. By the author of “All for Gr... A. A A. \n", 825 | "472 Welsh Sketches, chiefly ecclesiastical, to the... E. S A. \n", 826 | "480 [The World in which I live, and my place in it... E. S A. \n", 827 | "\n", 828 | " Flickr URL \n", 829 | "Identifier \n", 830 | "206 http://www.flickr.com/photos/britishlibrary/ta... \n", 831 | "216 http://www.flickr.com/photos/britishlibrary/ta... \n", 832 | "218 http://www.flickr.com/photos/britishlibrary/ta... \n", 833 | "472 http://www.flickr.com/photos/britishlibrary/ta... \n", 834 | "480 http://www.flickr.com/photos/britishlibrary/ta... " 835 | ] 836 | }, 837 | "execution_count": 162, 838 | "metadata": {}, 839 | "output_type": "execute_result" 840 | } 841 | ], 842 | "source": [ 843 | "df.head()" 844 | ] 845 | }, 846 | { 847 | "cell_type": "code", 848 | "execution_count": 163, 849 | "metadata": {}, 850 | "outputs": [], 851 | "source": [ 852 | "def clean_title(item):\n", 853 | " title = str(item['Title'])\n", 854 | " \n", 855 | " if title == 'nan':\n", 856 | " return np.NaN\n", 857 | " \n", 858 | " if title[0] == '[':\n", 859 | " title = title[1: title.find(']')]\n", 860 | " \n", 861 | " if 'by' in title:\n", 862 | " title = title[:title.find('by')]\n", 863 | " elif 'By' in title:\n", 864 | " title = title[:title.find('By')]\n", 865 | " \n", 866 | " if '[' in title:\n", 867 | " title = title[:title.find('[')]\n", 868 | "\n", 869 | " title = title[:-2]\n", 870 | " \n", 871 | " title = list(map(str.capitalize, title.split()))\n", 872 | " return ' '.join(title)\n", 873 | " \n", 874 | "df['Title'] = df.apply(clean_title, axis = 1)" 875 | ] 876 | }, 877 | { 878 | "cell_type": "code", 879 | "execution_count": 164, 880 | "metadata": {}, 881 | "outputs": [ 882 | { 883 | "data": { 884 | "text/html": [ 885 | "
\n", 886 | "\n", 899 | "\n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1084 | " \n", 1085 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | " \n", 1090 | " \n", 1091 | " \n", 1092 | " \n", 1093 | " \n", 1094 | " \n", 1095 | " \n", 1096 | " \n", 1097 | " \n", 1098 | " \n", 1099 | " \n", 1100 | " \n", 1101 | " \n", 1102 | " \n", 1103 | " \n", 1104 | " \n", 1105 | " \n", 1106 | " \n", 1107 | " \n", 1108 | " \n", 1109 | " \n", 1110 | " \n", 1111 | " \n", 1112 | " \n", 1113 | " \n", 1114 | " \n", 1115 | " \n", 1116 | " \n", 1117 | " \n", 1118 | " \n", 1119 | " \n", 1120 | " \n", 1121 | " \n", 1122 | " \n", 1123 | " \n", 1124 | " \n", 1125 | " \n", 1126 | " \n", 1127 | " \n", 1128 | " \n", 1129 | " \n", 1130 | " \n", 1131 | " \n", 1132 | " \n", 1133 | " \n", 1134 | " \n", 1135 | " \n", 1136 | " \n", 1137 | " \n", 1138 | " \n", 1139 | " \n", 1140 | " \n", 1141 | " \n", 1142 | " \n", 1143 | " \n", 1144 | " \n", 1145 | " \n", 1146 | " \n", 1147 | " \n", 1148 | " \n", 1149 | " \n", 1150 | " \n", 1151 | " \n", 1152 | " \n", 1153 | " \n", 1154 | " \n", 1155 | " \n", 1156 | " \n", 1157 | " \n", 1158 | " \n", 1159 | " \n", 1160 | " \n", 1161 | " \n", 1162 | " \n", 1163 | " \n", 1164 | " \n", 1165 | " \n", 1166 | " \n", 1167 | " \n", 1168 | " \n", 1169 | " \n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | " \n", 1177 | " \n", 1178 | " \n", 1179 | " \n", 1180 | " \n", 1181 | " \n", 1182 | " \n", 1183 | " \n", 1184 | " \n", 1185 | " \n", 1186 | " \n", 1187 | " \n", 1188 | " \n", 1189 | " \n", 1190 | " \n", 1191 | " \n", 1192 | " \n", 1193 | " \n", 1194 | " \n", 1195 | " \n", 1196 | " \n", 1197 | " \n", 1198 | " \n", 1199 | " \n", 1200 | " \n", 1201 | " \n", 1202 | " \n", 1203 | " \n", 1204 | " \n", 1205 | " \n", 1206 | " \n", 1207 | " \n", 1208 | " \n", 1209 | " \n", 1210 | " \n", 1211 | " \n", 1212 | " \n", 1213 | " \n", 1214 | " \n", 1215 | " \n", 1216 | " \n", 1217 | " \n", 1218 | " \n", 1219 | " \n", 1220 | " \n", 1221 | " \n", 1222 | " \n", 1223 | " \n", 1224 | " \n", 1225 | " \n", 1226 | " \n", 1227 | " \n", 1228 | " \n", 1229 | " \n", 1230 | " \n", 1231 | " \n", 1232 | " \n", 1233 | " \n", 1234 | " \n", 1235 | " \n", 1236 | " \n", 1237 | " \n", 1238 | " \n", 1239 | " \n", 1240 | " \n", 1241 | " \n", 1242 | " \n", 1243 | " \n", 1244 | " \n", 1245 | " \n", 1246 | " \n", 1247 | " \n", 1248 | " \n", 1249 | " \n", 1250 | " \n", 1251 | " \n", 1252 | " \n", 1253 | " \n", 1254 | " \n", 1255 | " \n", 1256 | " \n", 1257 | " \n", 1258 | " \n", 1259 | " \n", 1260 | " \n", 1261 | " \n", 1262 | " \n", 1263 | " \n", 1264 | " \n", 1265 | " \n", 1266 | " \n", 1267 | " \n", 1268 | " \n", 1269 | " \n", 1270 | " \n", 1271 | " \n", 1272 | " \n", 1273 | " \n", 1274 | " \n", 1275 | " \n", 1276 | " \n", 1277 | " \n", 1278 | " \n", 1279 | " \n", 1280 | " \n", 1281 | " \n", 1282 | " \n", 1283 | " \n", 1284 | " \n", 1285 | " \n", 1286 | " \n", 1287 | " \n", 1288 | " \n", 1289 | " \n", 1290 | " \n", 1291 | " \n", 1292 | " \n", 1293 | " \n", 1294 | " \n", 1295 | " \n", 1296 | " \n", 1297 | " \n", 1298 | " \n", 1299 | " \n", 1300 | " \n", 1301 | " \n", 1302 | " \n", 1303 | " \n", 1304 | " \n", 1305 | " \n", 1306 | " \n", 1307 | " \n", 1308 | " \n", 1309 | " \n", 1310 | " \n", 1311 | " \n", 1312 | " \n", 1313 | " \n", 1314 | " \n", 1315 | " \n", 1316 | " \n", 1317 | " \n", 1318 | " \n", 1319 | " \n", 1320 | " \n", 1321 | " \n", 1322 | " \n", 1323 | " \n", 1324 | " \n", 1325 | " \n", 1326 | " \n", 1327 | " \n", 1328 | " \n", 1329 | " \n", 1330 | " \n", 1331 | " \n", 1332 | " \n", 1333 | " \n", 1334 | " \n", 1335 | " \n", 1336 | " \n", 1337 | " \n", 1338 | " \n", 1339 | " \n", 1340 | " \n", 1341 | " \n", 1342 | " \n", 1343 | " \n", 1344 | " \n", 1345 | " \n", 1346 | " \n", 1347 | " \n", 1348 | " \n", 1349 | " \n", 1350 | " \n", 1351 | " \n", 1352 | " \n", 1353 | " \n", 1354 | " \n", 1355 | " \n", 1356 | " \n", 1357 | " \n", 1358 | " \n", 1359 | " \n", 1360 | " \n", 1361 | " \n", 1362 | " \n", 1363 | " \n", 1364 | " \n", 1365 | " \n", 1366 | " \n", 1367 | " \n", 1368 | " \n", 1369 | " \n", 1370 | " \n", 1371 | " \n", 1372 | " \n", 1373 | " \n", 1374 | " \n", 1375 | " \n", 1376 | " \n", 1377 | " \n", 1378 | " \n", 1379 | " \n", 1380 | " \n", 1381 | " \n", 1382 | " \n", 1383 | " \n", 1384 | " \n", 1385 | " \n", 1386 | " \n", 1387 | " \n", 1388 | " \n", 1389 | " \n", 1390 | " \n", 1391 | " \n", 1392 | " \n", 1393 | " \n", 1394 | " \n", 1395 | " \n", 1396 | " \n", 1397 | " \n", 1398 | " \n", 1399 | " \n", 1400 | " \n", 1401 | " \n", 1402 | " \n", 1403 | " \n", 1404 | " \n", 1405 | " \n", 1406 | " \n", 1407 | " \n", 1408 | " \n", 1409 | " \n", 1410 | " \n", 1411 | " \n", 1412 | " \n", 1413 | " \n", 1414 | " \n", 1415 | " \n", 1416 | " \n", 1417 | " \n", 1418 | " \n", 1419 | " \n", 1420 | " \n", 1421 | " \n", 1422 | " \n", 1423 | " \n", 1424 | " \n", 1425 | " \n", 1426 | " \n", 1427 | " \n", 1428 | " \n", 1429 | " \n", 1430 | " \n", 1431 | " \n", 1432 | " \n", 1433 | " \n", 1434 | " \n", 1435 | " \n", 1436 | " \n", 1437 | " \n", 1438 | " \n", 1439 | " \n", 1440 | " \n", 1441 | " \n", 1442 | " \n", 1443 | " \n", 1444 | " \n", 1445 | " \n", 1446 | " \n", 1447 | " \n", 1448 | " \n", 1449 | " \n", 1450 | " \n", 1451 | " \n", 1452 | " \n", 1453 | " \n", 1454 | " \n", 1455 | " \n", 1456 | " \n", 1457 | " \n", 1458 | " \n", 1459 | " \n", 1460 | " \n", 1461 | " \n", 1462 | " \n", 1463 | " \n", 1464 | " \n", 1465 | " \n", 1466 | " \n", 1467 | " \n", 1468 | " \n", 1469 | " \n", 1470 | " \n", 1471 | "
Place of PublicationDate of PublicationPublisherTitleAuthorFlickr URL
Identifier
206London1879S. Tinsley & Co.Walter ForbesAAhttp://www.flickr.com/photos/britishlibrary/ta...
216London1868Virtue & Co.All For GreedA. A A.http://www.flickr.com/photos/britishlibrary/ta...
218London1869Bradbury, Evans & Co.Love The AvengerA. A A.http://www.flickr.com/photos/britishlibrary/ta...
472London1851James DarlingWelsh Sketches, Chiefly Ecclesiastical, To The...E. S A.http://www.flickr.com/photos/britishlibrary/ta...
480London1857Wertheim & MacintoshThe World In Which I Live, And My Place In ItE. S A.http://www.flickr.com/photos/britishlibrary/ta...
481London1875William MacintoshThe World In Which I Live, And My Place In ItE. S A.http://www.flickr.com/photos/britishlibrary/ta...
519London1872The AuthorLagonellsF. E A.http://www.flickr.com/photos/britishlibrary/ta...
667OxfordNaNNaNThe Coming Of Spring, And Other PoemsJ.|A A.http://www.flickr.com/photos/britishlibrary/ta...
874London1676NaNA Warning To The Inhabitants Of England, And L...Remaʿhttp://www.flickr.com/photos/britishlibrary/ta...
1143London1679NaNA Satyr Against Vertue. (a Poem: Supposed To B...T A.http://www.flickr.com/photos/britishlibrary/ta...
1280Coventry1802Printed by J. TurnerAn Account Of The Many And Great Loans, Benefa...NaNhttp://www.flickr.com/photos/britishlibrary/ta...
1808Christiania1859NaNErindringer Som Bidrag Til Norges Historie Fra...Jacob Aallhttp://www.flickr.com/photos/britishlibrary/ta...
1905Firenze1888NaNGli Studi Storici In Terra D'otranto ... Framm...Ermanno Aarhttp://www.flickr.com/photos/britishlibrary/ta...
1929Amsterdam1839NaNDe Aardbol. Magazijn Van Hedendaagsche Land- E...NaNhttp://www.flickr.com/photos/britishlibrary/ta...
2836Savona1897NaNCronache Savonesi Dal 1500 Al 1570 ... Accresc...Giovanni Agostino Abatehttp://www.flickr.com/photos/britishlibrary/ta...
2854London1865E. Moxon & Co.See-saw; A Novel ... EditeFrancesco Abatihttp://www.flickr.com/photos/britishlibrary/ta...
2956Paris1860NaNGéodésie D'une Partie De La Haute Éthiopie,...Antoine Thompson d' Abbadiehttp://www.flickr.com/photos/britishlibrary/ta...
2957Paris1873NaNWith Eleven MapAntoine Thompson d' Abbadiehttp://www.flickr.com/photos/britishlibrary/ta...
3017Puerto-Rico1866NaNHistoria Geográfica, Civil Y Politica De La I...Agustín Íñigo Abbad y lasierrahttp://www.flickr.com/photos/britishlibrary/ta...
3131New York1899W. AbbattThe Crisis Of The Revolution, Being The Story ...William Abbatthttp://www.flickr.com/photos/britishlibrary/ta...
4598Hull1814The AuthorPeace: A Lyric PoemThomas Eastoe Abbotthttp://www.flickr.com/photos/britishlibrary/ta...
4884London1820J. Hatchard & SonAbdallah; Or, The Arabian Martyr: A Christian ...NaNhttp://www.flickr.com/photos/britishlibrary/ta...
4976Oxonii1800J. Cooke, etc.Abdollatiphi Historiæ Ægypti CompendiumNaNhttp://www.flickr.com/photos/britishlibrary/ta...
5382London1847Punch OfficeThe Comic History Of England ... With ... Colo...Gilbert Abbott A'becketthttp://www.flickr.com/photos/britishlibrary/ta...
5385LondonNaNBradbury, Agnew & Co.The Comic History Of England ... With Twenty C...Gilbert Abbott A'becketthttp://www.flickr.com/photos/britishlibrary/ta...
5389LondonNaNBradbury, Agnew & Co.The Comic History Of Rome ... IllustrateGilbert Abbott A'becketthttp://www.flickr.com/photos/britishlibrary/ta...
5432Milano1893NaNSigna: Opera In Tre AttGilbert Arthur A'becketthttp://www.flickr.com/photos/britishlibrary/ta...
6036London1805C. & R. BaldwinThe Venetian Outlaw, A Drama In Three Acts ......NaNhttp://www.flickr.com/photos/britishlibrary/ta...
6821Aberdeen1837J. Davidson & Co.Description Of The Coast Between Aberdeen And ...NaNhttp://www.flickr.com/photos/britishlibrary/ta...
7521Wien1896NaNAus Kaukasischen Ländern. Reisebriefe Von H. ...Wilhelm Hermann Abichhttp://www.flickr.com/photos/britishlibrary/ta...
.....................
4053464LondonNaNNaNA Friendly EpistlNaNhttp://www.flickr.com/photos/britishlibrary/ta...
4063671London1896Richard Bentley & SonHonor OrmthwaiteHenrietta Keddiehttp://www.flickr.com/photos/britishlibrary/ta...
4072044LondonNaNNaNThe Scale: Or, Woman Weighed With Man. A Poem ..John Moncreiffhttp://www.flickr.com/photos/britishlibrary/ta...
4077038LondonNaNNaNThe Noble Souldier. Or, A Contract Broken, Jus...Samuel Rowleyhttp://www.flickr.com/photos/britishlibrary/ta...
4079258Glasgow1750printed for John Ross, bookseller in EdinburghYarico To InklNaNhttp://www.flickr.com/photos/britishlibrary/ta...
4079262London1879NaNThe Canadian Farmer. A Missionary IncidentWilliam Joseph H Yateshttp://www.flickr.com/photos/britishlibrary/ta...
4112297LondonNaNNaNJaphet In Search Of A Father ... With An Intro...Frederick Marryathttp://www.flickr.com/photos/britishlibrary/ta...
4112525Pontoise1889NaNHistoire Populaire De Pontoise ... Précédée...Joseph Depoinhttp://www.flickr.com/photos/britishlibrary/ta...
4112839LondonNaNWilloughby & Co.The Vicar Of Wakefield ... And A Prefatory MemoiOliver Goldsmithhttp://www.flickr.com/photos/britishlibrary/ta...
4113012Philadelphia1876S. W. Burley1776 American Enterprise. 1876. Burley's Unite...Charles Holland.|Pope Kidderhttp://www.flickr.com/photos/britishlibrary/ta...
4113816LondonNaNNaNThe Lacemakers: Sketches Of Irish Characters W...Susanna.|WIBERG Meredithhttp://www.flickr.com/photos/britishlibrary/ta...
4114334LondonNaNNaNThe Sword Of Peace; Or, A Voyage Of Love; A ComedNaNhttp://www.flickr.com/photos/britishlibrary/ta...
4114390London1708NaNThe Unfortunate Dutchess Of Malfy, Or, The Unn...John Websterhttp://www.flickr.com/photos/britishlibrary/ta...
4114889LondonNaNNaNThe 5 Alls. A Collection Of Stories, Charades,...NaNhttp://www.flickr.com/photos/britishlibrary/ta...
4114986London1777J. WenmanBarbarossa. A Tragedy. As It Is Performed At T...John Brownhttp://www.flickr.com/photos/britishlibrary/ta...
4115138LondonNaNNaNThe Revenger's Tagaedie. As It Hath Beene Sund...NaNhttp://www.flickr.com/photos/britishlibrary/ta...
4116063London1866[John Dicks]Edith Heron; Or, The Earl And The CountessEdith Heronhttp://www.flickr.com/photos/britishlibrary/ta...
4117526Leipzig1862Charles Fr. FleischerAbridgment Of The History Of England ... Adapt...Oliver Goldsmithhttp://www.flickr.com/photos/britishlibrary/ta...
4117583London1894T. Fisher UnwinThe Wish. A NoveHermann Sudermannhttp://www.flickr.com/photos/britishlibrary/ta...
4117749New York1868John Wiley & SonA System Of Mineralogy. Descriptive Mineralogy...James Dwight Danahttp://www.flickr.com/photos/britishlibrary/ta...
4117751New York1882John Wiley & SonsThird Appendix To The Fifth Edition Of Dana's ...James Dwight Danahttp://www.flickr.com/photos/britishlibrary/ta...
4117752London1883Trübner & Co.A System Of Mineralogy ... Fifth Edition, Rewr...James Dwight Danahttp://www.flickr.com/photos/britishlibrary/ta...
4156359New York1898G. P. Putnam's SonsHistory Of The People Of The Netherlands ... T...Pieter Johan Blokhttp://www.flickr.com/photos/britishlibrary/ta...
4157746London1811F. C. and J. RivingtonThe New Chronicles Of England And France, In T...Robert Fabyanhttp://www.flickr.com/photos/britishlibrary/ta...
4157862Newcastle-upon-Tyne1867T. FordyceLocal Records; Or, Historical Register Of Rema...T. Fordycehttp://www.flickr.com/photos/britishlibrary/ta...
4158088London1838NaNThe Parochial History Of Cornwall, Founded Onafterwards GILBERT Giddyhttp://www.flickr.com/photos/britishlibrary/ta...
4158128Derby1831M. Mozley & SonThe History And Gazetteer Of The County Of DStephen Gloverhttp://www.flickr.com/photos/britishlibrary/ta...
4159563LondonNaNT. Cadell and W. DaviesMagna Britannia; Being A Concise Topographical...Daniel Lysonshttp://www.flickr.com/photos/britishlibrary/ta...
4159587Newcastle-upon-Tyne1834Mackenzie & DentAn Historical, Topographical And Descriptive V...E. (Eneas) Mackenziehttp://www.flickr.com/photos/britishlibrary/ta...
4160339London1834NaNCollectanea Topographica Et GenealogicaNaNhttp://www.flickr.com/photos/britishlibrary/ta...
\n", 1472 | "

8287 rows × 6 columns

\n", 1473 | "
" 1474 | ], 1475 | "text/plain": [ 1476 | " Place of Publication Date of Publication \\\n", 1477 | "Identifier \n", 1478 | "206 London 1879 \n", 1479 | "216 London 1868 \n", 1480 | "218 London 1869 \n", 1481 | "472 London 1851 \n", 1482 | "480 London 1857 \n", 1483 | "481 London 1875 \n", 1484 | "519 London 1872 \n", 1485 | "667 Oxford NaN \n", 1486 | "874 London 1676 \n", 1487 | "1143 London 1679 \n", 1488 | "1280 Coventry 1802 \n", 1489 | "1808 Christiania 1859 \n", 1490 | "1905 Firenze 1888 \n", 1491 | "1929 Amsterdam 1839 \n", 1492 | "2836 Savona 1897 \n", 1493 | "2854 London 1865 \n", 1494 | "2956 Paris 1860 \n", 1495 | "2957 Paris 1873 \n", 1496 | "3017 Puerto-Rico 1866 \n", 1497 | "3131 New York 1899 \n", 1498 | "4598 Hull 1814 \n", 1499 | "4884 London 1820 \n", 1500 | "4976 Oxonii 1800 \n", 1501 | "5382 London 1847 \n", 1502 | "5385 London NaN \n", 1503 | "5389 London NaN \n", 1504 | "5432 Milano 1893 \n", 1505 | "6036 London 1805 \n", 1506 | "6821 Aberdeen 1837 \n", 1507 | "7521 Wien 1896 \n", 1508 | "... ... ... \n", 1509 | "4053464 London NaN \n", 1510 | "4063671 London 1896 \n", 1511 | "4072044 London NaN \n", 1512 | "4077038 London NaN \n", 1513 | "4079258 Glasgow 1750 \n", 1514 | "4079262 London 1879 \n", 1515 | "4112297 London NaN \n", 1516 | "4112525 Pontoise 1889 \n", 1517 | "4112839 London NaN \n", 1518 | "4113012 Philadelphia 1876 \n", 1519 | "4113816 London NaN \n", 1520 | "4114334 London NaN \n", 1521 | "4114390 London 1708 \n", 1522 | "4114889 London NaN \n", 1523 | "4114986 London 1777 \n", 1524 | "4115138 London NaN \n", 1525 | "4116063 London 1866 \n", 1526 | "4117526 Leipzig 1862 \n", 1527 | "4117583 London 1894 \n", 1528 | "4117749 New York 1868 \n", 1529 | "4117751 New York 1882 \n", 1530 | "4117752 London 1883 \n", 1531 | "4156359 New York 1898 \n", 1532 | "4157746 London 1811 \n", 1533 | "4157862 Newcastle-upon-Tyne 1867 \n", 1534 | "4158088 London 1838 \n", 1535 | "4158128 Derby 1831 \n", 1536 | "4159563 London NaN \n", 1537 | "4159587 Newcastle-upon-Tyne 1834 \n", 1538 | "4160339 London 1834 \n", 1539 | "\n", 1540 | " Publisher \\\n", 1541 | "Identifier \n", 1542 | "206 S. Tinsley & Co. \n", 1543 | "216 Virtue & Co. \n", 1544 | "218 Bradbury, Evans & Co. \n", 1545 | "472 James Darling \n", 1546 | "480 Wertheim & Macintosh \n", 1547 | "481 William Macintosh \n", 1548 | "519 The Author \n", 1549 | "667 NaN \n", 1550 | "874 NaN \n", 1551 | "1143 NaN \n", 1552 | "1280 Printed by J. Turner \n", 1553 | "1808 NaN \n", 1554 | "1905 NaN \n", 1555 | "1929 NaN \n", 1556 | "2836 NaN \n", 1557 | "2854 E. Moxon & Co. \n", 1558 | "2956 NaN \n", 1559 | "2957 NaN \n", 1560 | "3017 NaN \n", 1561 | "3131 W. Abbatt \n", 1562 | "4598 The Author \n", 1563 | "4884 J. Hatchard & Son \n", 1564 | "4976 J. Cooke, etc. \n", 1565 | "5382 Punch Office \n", 1566 | "5385 Bradbury, Agnew & Co. \n", 1567 | "5389 Bradbury, Agnew & Co. \n", 1568 | "5432 NaN \n", 1569 | "6036 C. & R. Baldwin \n", 1570 | "6821 J. Davidson & Co. \n", 1571 | "7521 NaN \n", 1572 | "... ... \n", 1573 | "4053464 NaN \n", 1574 | "4063671 Richard Bentley & Son \n", 1575 | "4072044 NaN \n", 1576 | "4077038 NaN \n", 1577 | "4079258 printed for John Ross, bookseller in Edinburgh \n", 1578 | "4079262 NaN \n", 1579 | "4112297 NaN \n", 1580 | "4112525 NaN \n", 1581 | "4112839 Willoughby & Co. \n", 1582 | "4113012 S. W. Burley \n", 1583 | "4113816 NaN \n", 1584 | "4114334 NaN \n", 1585 | "4114390 NaN \n", 1586 | "4114889 NaN \n", 1587 | "4114986 J. Wenman \n", 1588 | "4115138 NaN \n", 1589 | "4116063 [John Dicks] \n", 1590 | "4117526 Charles Fr. Fleischer \n", 1591 | "4117583 T. Fisher Unwin \n", 1592 | "4117749 John Wiley & Son \n", 1593 | "4117751 John Wiley & Sons \n", 1594 | "4117752 Trübner & Co. \n", 1595 | "4156359 G. P. Putnam's Sons \n", 1596 | "4157746 F. C. and J. Rivington \n", 1597 | "4157862 T. Fordyce \n", 1598 | "4158088 NaN \n", 1599 | "4158128 M. Mozley & Son \n", 1600 | "4159563 T. Cadell and W. Davies \n", 1601 | "4159587 Mackenzie & Dent \n", 1602 | "4160339 NaN \n", 1603 | "\n", 1604 | " Title \\\n", 1605 | "Identifier \n", 1606 | "206 Walter Forbes \n", 1607 | "216 All For Greed \n", 1608 | "218 Love The Avenger \n", 1609 | "472 Welsh Sketches, Chiefly Ecclesiastical, To The... \n", 1610 | "480 The World In Which I Live, And My Place In It \n", 1611 | "481 The World In Which I Live, And My Place In It \n", 1612 | "519 Lagonells \n", 1613 | "667 The Coming Of Spring, And Other Poems \n", 1614 | "874 A Warning To The Inhabitants Of England, And L... \n", 1615 | "1143 A Satyr Against Vertue. (a Poem: Supposed To B... \n", 1616 | "1280 An Account Of The Many And Great Loans, Benefa... \n", 1617 | "1808 Erindringer Som Bidrag Til Norges Historie Fra... \n", 1618 | "1905 Gli Studi Storici In Terra D'otranto ... Framm... \n", 1619 | "1929 De Aardbol. Magazijn Van Hedendaagsche Land- E... \n", 1620 | "2836 Cronache Savonesi Dal 1500 Al 1570 ... Accresc... \n", 1621 | "2854 See-saw; A Novel ... Edite \n", 1622 | "2956 Géodésie D'une Partie De La Haute Éthiopie,... \n", 1623 | "2957 With Eleven Map \n", 1624 | "3017 Historia Geográfica, Civil Y Politica De La I... \n", 1625 | "3131 The Crisis Of The Revolution, Being The Story ... \n", 1626 | "4598 Peace: A Lyric Poem \n", 1627 | "4884 Abdallah; Or, The Arabian Martyr: A Christian ... \n", 1628 | "4976 Abdollatiphi Historiæ Ægypti Compendium \n", 1629 | "5382 The Comic History Of England ... With ... Colo... \n", 1630 | "5385 The Comic History Of England ... With Twenty C... \n", 1631 | "5389 The Comic History Of Rome ... Illustrate \n", 1632 | "5432 Signa: Opera In Tre Att \n", 1633 | "6036 The Venetian Outlaw, A Drama In Three Acts ...... \n", 1634 | "6821 Description Of The Coast Between Aberdeen And ... \n", 1635 | "7521 Aus Kaukasischen Ländern. Reisebriefe Von H. ... \n", 1636 | "... ... \n", 1637 | "4053464 A Friendly Epistl \n", 1638 | "4063671 Honor Ormthwaite \n", 1639 | "4072044 The Scale: Or, Woman Weighed With Man. A Poem .. \n", 1640 | "4077038 The Noble Souldier. Or, A Contract Broken, Jus... \n", 1641 | "4079258 Yarico To Inkl \n", 1642 | "4079262 The Canadian Farmer. A Missionary Incident \n", 1643 | "4112297 Japhet In Search Of A Father ... With An Intro... \n", 1644 | "4112525 Histoire Populaire De Pontoise ... Précédée... \n", 1645 | "4112839 The Vicar Of Wakefield ... And A Prefatory Memoi \n", 1646 | "4113012 1776 American Enterprise. 1876. Burley's Unite... \n", 1647 | "4113816 The Lacemakers: Sketches Of Irish Characters W... \n", 1648 | "4114334 The Sword Of Peace; Or, A Voyage Of Love; A Comed \n", 1649 | "4114390 The Unfortunate Dutchess Of Malfy, Or, The Unn... \n", 1650 | "4114889 The 5 Alls. A Collection Of Stories, Charades,... \n", 1651 | "4114986 Barbarossa. A Tragedy. As It Is Performed At T... \n", 1652 | "4115138 The Revenger's Tagaedie. As It Hath Beene Sund... \n", 1653 | "4116063 Edith Heron; Or, The Earl And The Countess \n", 1654 | "4117526 Abridgment Of The History Of England ... Adapt... \n", 1655 | "4117583 The Wish. A Nove \n", 1656 | "4117749 A System Of Mineralogy. Descriptive Mineralogy... \n", 1657 | "4117751 Third Appendix To The Fifth Edition Of Dana's ... \n", 1658 | "4117752 A System Of Mineralogy ... Fifth Edition, Rewr... \n", 1659 | "4156359 History Of The People Of The Netherlands ... T... \n", 1660 | "4157746 The New Chronicles Of England And France, In T... \n", 1661 | "4157862 Local Records; Or, Historical Register Of Rema... \n", 1662 | "4158088 The Parochial History Of Cornwall, Founded On \n", 1663 | "4158128 The History And Gazetteer Of The County Of D \n", 1664 | "4159563 Magna Britannia; Being A Concise Topographical... \n", 1665 | "4159587 An Historical, Topographical And Descriptive V... \n", 1666 | "4160339 Collectanea Topographica Et Genealogica \n", 1667 | "\n", 1668 | " Author \\\n", 1669 | "Identifier \n", 1670 | "206 AA \n", 1671 | "216 A. A A. \n", 1672 | "218 A. A A. \n", 1673 | "472 E. S A. \n", 1674 | "480 E. S A. \n", 1675 | "481 E. S A. \n", 1676 | "519 F. E A. \n", 1677 | "667 J.|A A. \n", 1678 | "874 Remaʿ \n", 1679 | "1143 T A. \n", 1680 | "1280 NaN \n", 1681 | "1808 Jacob Aall \n", 1682 | "1905 Ermanno Aar \n", 1683 | "1929 NaN \n", 1684 | "2836 Giovanni Agostino Abate \n", 1685 | "2854 Francesco Abati \n", 1686 | "2956 Antoine Thompson d' Abbadie \n", 1687 | "2957 Antoine Thompson d' Abbadie \n", 1688 | "3017 Agustín Íñigo Abbad y lasierra \n", 1689 | "3131 William Abbatt \n", 1690 | "4598 Thomas Eastoe Abbott \n", 1691 | "4884 NaN \n", 1692 | "4976 NaN \n", 1693 | "5382 Gilbert Abbott A'beckett \n", 1694 | "5385 Gilbert Abbott A'beckett \n", 1695 | "5389 Gilbert Abbott A'beckett \n", 1696 | "5432 Gilbert Arthur A'beckett \n", 1697 | "6036 NaN \n", 1698 | "6821 NaN \n", 1699 | "7521 Wilhelm Hermann Abich \n", 1700 | "... ... \n", 1701 | "4053464 NaN \n", 1702 | "4063671 Henrietta Keddie \n", 1703 | "4072044 John Moncreiff \n", 1704 | "4077038 Samuel Rowley \n", 1705 | "4079258 NaN \n", 1706 | "4079262 William Joseph H Yates \n", 1707 | "4112297 Frederick Marryat \n", 1708 | "4112525 Joseph Depoin \n", 1709 | "4112839 Oliver Goldsmith \n", 1710 | "4113012 Charles Holland.|Pope Kidder \n", 1711 | "4113816 Susanna.|WIBERG Meredith \n", 1712 | "4114334 NaN \n", 1713 | "4114390 John Webster \n", 1714 | "4114889 NaN \n", 1715 | "4114986 John Brown \n", 1716 | "4115138 NaN \n", 1717 | "4116063 Edith Heron \n", 1718 | "4117526 Oliver Goldsmith \n", 1719 | "4117583 Hermann Sudermann \n", 1720 | "4117749 James Dwight Dana \n", 1721 | "4117751 James Dwight Dana \n", 1722 | "4117752 James Dwight Dana \n", 1723 | "4156359 Pieter Johan Blok \n", 1724 | "4157746 Robert Fabyan \n", 1725 | "4157862 T. Fordyce \n", 1726 | "4158088 afterwards GILBERT Giddy \n", 1727 | "4158128 Stephen Glover \n", 1728 | "4159563 Daniel Lysons \n", 1729 | "4159587 E. (Eneas) Mackenzie \n", 1730 | "4160339 NaN \n", 1731 | "\n", 1732 | " Flickr URL \n", 1733 | "Identifier \n", 1734 | "206 http://www.flickr.com/photos/britishlibrary/ta... \n", 1735 | "216 http://www.flickr.com/photos/britishlibrary/ta... \n", 1736 | "218 http://www.flickr.com/photos/britishlibrary/ta... \n", 1737 | "472 http://www.flickr.com/photos/britishlibrary/ta... \n", 1738 | "480 http://www.flickr.com/photos/britishlibrary/ta... \n", 1739 | "481 http://www.flickr.com/photos/britishlibrary/ta... \n", 1740 | "519 http://www.flickr.com/photos/britishlibrary/ta... \n", 1741 | "667 http://www.flickr.com/photos/britishlibrary/ta... \n", 1742 | "874 http://www.flickr.com/photos/britishlibrary/ta... \n", 1743 | "1143 http://www.flickr.com/photos/britishlibrary/ta... \n", 1744 | "1280 http://www.flickr.com/photos/britishlibrary/ta... \n", 1745 | "1808 http://www.flickr.com/photos/britishlibrary/ta... \n", 1746 | "1905 http://www.flickr.com/photos/britishlibrary/ta... \n", 1747 | "1929 http://www.flickr.com/photos/britishlibrary/ta... \n", 1748 | "2836 http://www.flickr.com/photos/britishlibrary/ta... \n", 1749 | "2854 http://www.flickr.com/photos/britishlibrary/ta... \n", 1750 | "2956 http://www.flickr.com/photos/britishlibrary/ta... \n", 1751 | "2957 http://www.flickr.com/photos/britishlibrary/ta... \n", 1752 | "3017 http://www.flickr.com/photos/britishlibrary/ta... \n", 1753 | "3131 http://www.flickr.com/photos/britishlibrary/ta... \n", 1754 | "4598 http://www.flickr.com/photos/britishlibrary/ta... \n", 1755 | "4884 http://www.flickr.com/photos/britishlibrary/ta... \n", 1756 | "4976 http://www.flickr.com/photos/britishlibrary/ta... \n", 1757 | "5382 http://www.flickr.com/photos/britishlibrary/ta... \n", 1758 | "5385 http://www.flickr.com/photos/britishlibrary/ta... \n", 1759 | "5389 http://www.flickr.com/photos/britishlibrary/ta... \n", 1760 | "5432 http://www.flickr.com/photos/britishlibrary/ta... \n", 1761 | "6036 http://www.flickr.com/photos/britishlibrary/ta... \n", 1762 | "6821 http://www.flickr.com/photos/britishlibrary/ta... \n", 1763 | "7521 http://www.flickr.com/photos/britishlibrary/ta... \n", 1764 | "... ... \n", 1765 | "4053464 http://www.flickr.com/photos/britishlibrary/ta... \n", 1766 | "4063671 http://www.flickr.com/photos/britishlibrary/ta... \n", 1767 | "4072044 http://www.flickr.com/photos/britishlibrary/ta... \n", 1768 | "4077038 http://www.flickr.com/photos/britishlibrary/ta... \n", 1769 | "4079258 http://www.flickr.com/photos/britishlibrary/ta... \n", 1770 | "4079262 http://www.flickr.com/photos/britishlibrary/ta... \n", 1771 | "4112297 http://www.flickr.com/photos/britishlibrary/ta... \n", 1772 | "4112525 http://www.flickr.com/photos/britishlibrary/ta... \n", 1773 | "4112839 http://www.flickr.com/photos/britishlibrary/ta... \n", 1774 | "4113012 http://www.flickr.com/photos/britishlibrary/ta... \n", 1775 | "4113816 http://www.flickr.com/photos/britishlibrary/ta... \n", 1776 | "4114334 http://www.flickr.com/photos/britishlibrary/ta... \n", 1777 | "4114390 http://www.flickr.com/photos/britishlibrary/ta... \n", 1778 | "4114889 http://www.flickr.com/photos/britishlibrary/ta... \n", 1779 | "4114986 http://www.flickr.com/photos/britishlibrary/ta... \n", 1780 | "4115138 http://www.flickr.com/photos/britishlibrary/ta... \n", 1781 | "4116063 http://www.flickr.com/photos/britishlibrary/ta... \n", 1782 | "4117526 http://www.flickr.com/photos/britishlibrary/ta... \n", 1783 | "4117583 http://www.flickr.com/photos/britishlibrary/ta... \n", 1784 | "4117749 http://www.flickr.com/photos/britishlibrary/ta... \n", 1785 | "4117751 http://www.flickr.com/photos/britishlibrary/ta... \n", 1786 | "4117752 http://www.flickr.com/photos/britishlibrary/ta... \n", 1787 | "4156359 http://www.flickr.com/photos/britishlibrary/ta... \n", 1788 | "4157746 http://www.flickr.com/photos/britishlibrary/ta... \n", 1789 | "4157862 http://www.flickr.com/photos/britishlibrary/ta... \n", 1790 | "4158088 http://www.flickr.com/photos/britishlibrary/ta... \n", 1791 | "4158128 http://www.flickr.com/photos/britishlibrary/ta... \n", 1792 | "4159563 http://www.flickr.com/photos/britishlibrary/ta... \n", 1793 | "4159587 http://www.flickr.com/photos/britishlibrary/ta... \n", 1794 | "4160339 http://www.flickr.com/photos/britishlibrary/ta... \n", 1795 | "\n", 1796 | "[8287 rows x 6 columns]" 1797 | ] 1798 | }, 1799 | "execution_count": 164, 1800 | "metadata": {}, 1801 | "output_type": "execute_result" 1802 | } 1803 | ], 1804 | "source": [ 1805 | "df" 1806 | ] 1807 | }, 1808 | { 1809 | "cell_type": "code", 1810 | "execution_count": null, 1811 | "metadata": {}, 1812 | "outputs": [], 1813 | "source": [ 1814 | "def clean_up(item):\n", 1815 | " pass\n", 1816 | "\n", 1817 | "with open('Datasets\\university_towns.txt', 'r') as file:\n", 1818 | " items = file.readlines()\n", 1819 | " " 1820 | ] 1821 | } 1822 | ], 1823 | "metadata": { 1824 | "kernelspec": { 1825 | "display_name": "Python 3", 1826 | "language": "python", 1827 | "name": "python3" 1828 | }, 1829 | "language_info": { 1830 | "codemirror_mode": { 1831 | "name": "ipython", 1832 | "version": 3 1833 | }, 1834 | "file_extension": ".py", 1835 | "mimetype": "text/x-python", 1836 | "name": "python", 1837 | "nbconvert_exporter": "python", 1838 | "pygments_lexer": "ipython3", 1839 | "version": "3.6.4" 1840 | } 1841 | }, 1842 | "nbformat": 4, 1843 | "nbformat_minor": 2 1844 | } 1845 | -------------------------------------------------------------------------------- /Data Cleaning Tutorial - Real Python.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 154, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np\n", 11 | "from functools import reduce" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "## Cleaning specific columns" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 155, 24 | "metadata": {}, 25 | "outputs": [ 26 | { 27 | "data": { 28 | "text/html": [ 29 | "
\n", 30 | "\n", 43 | "\n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | "
IdentifierEdition StatementPlace of PublicationDate of PublicationPublisherTitleAuthorContributorsCorporate AuthorCorporate ContributorsFormer ownerEngraverIssuance typeFlickr URLShelfmarks
0206NaNLondon1879 [1878]S. Tinsley & Co.Walter Forbes. [A novel.] By A. AA. A.FORBES, Walter.NaNNaNNaNNaNmonographichttp://www.flickr.com/photos/britishlibrary/ta...British Library HMNTS 12641.b.30.
1216NaNLondon; Virtue & Yorston1868Virtue & Co.All for Greed. [A novel. The dedication signed...A., A. A.BLAZE DE BURY, Marie Pauline Rose - BaronessNaNNaNNaNNaNmonographichttp://www.flickr.com/photos/britishlibrary/ta...British Library HMNTS 12626.cc.2.
2218NaNLondon1869Bradbury, Evans & Co.Love the Avenger. By the author of “All for Gr...A., A. A.BLAZE DE BURY, Marie Pauline Rose - BaronessNaNNaNNaNNaNmonographichttp://www.flickr.com/photos/britishlibrary/ta...British Library HMNTS 12625.dd.1.
3472NaNLondon1851James DarlingWelsh Sketches, chiefly ecclesiastical, to the...A., E. S.Appleyard, Ernest Silvanus.NaNNaNNaNNaNmonographichttp://www.flickr.com/photos/britishlibrary/ta...British Library HMNTS 10369.bbb.15.
4480A new edition, revised, etc.London1857Wertheim & Macintosh[The World in which I live, and my place in it...A., E. S.BROOME, John Henry.NaNNaNNaNNaNmonographichttp://www.flickr.com/photos/britishlibrary/ta...British Library HMNTS 9007.d.28.
\n", 157 | "
" 158 | ], 159 | "text/plain": [ 160 | " Identifier Edition Statement Place of Publication \\\n", 161 | "0 206 NaN London \n", 162 | "1 216 NaN London; Virtue & Yorston \n", 163 | "2 218 NaN London \n", 164 | "3 472 NaN London \n", 165 | "4 480 A new edition, revised, etc. London \n", 166 | "\n", 167 | " Date of Publication Publisher \\\n", 168 | "0 1879 [1878] S. Tinsley & Co. \n", 169 | "1 1868 Virtue & Co. \n", 170 | "2 1869 Bradbury, Evans & Co. \n", 171 | "3 1851 James Darling \n", 172 | "4 1857 Wertheim & Macintosh \n", 173 | "\n", 174 | " Title Author \\\n", 175 | "0 Walter Forbes. [A novel.] By A. A A. A. \n", 176 | "1 All for Greed. [A novel. The dedication signed... A., A. A. \n", 177 | "2 Love the Avenger. By the author of “All for Gr... A., A. A. \n", 178 | "3 Welsh Sketches, chiefly ecclesiastical, to the... A., E. S. \n", 179 | "4 [The World in which I live, and my place in it... A., E. S. \n", 180 | "\n", 181 | " Contributors Corporate Author \\\n", 182 | "0 FORBES, Walter. NaN \n", 183 | "1 BLAZE DE BURY, Marie Pauline Rose - Baroness NaN \n", 184 | "2 BLAZE DE BURY, Marie Pauline Rose - Baroness NaN \n", 185 | "3 Appleyard, Ernest Silvanus. NaN \n", 186 | "4 BROOME, John Henry. NaN \n", 187 | "\n", 188 | " Corporate Contributors Former owner Engraver Issuance type \\\n", 189 | "0 NaN NaN NaN monographic \n", 190 | "1 NaN NaN NaN monographic \n", 191 | "2 NaN NaN NaN monographic \n", 192 | "3 NaN NaN NaN monographic \n", 193 | "4 NaN NaN NaN monographic \n", 194 | "\n", 195 | " Flickr URL \\\n", 196 | "0 http://www.flickr.com/photos/britishlibrary/ta... \n", 197 | "1 http://www.flickr.com/photos/britishlibrary/ta... \n", 198 | "2 http://www.flickr.com/photos/britishlibrary/ta... \n", 199 | "3 http://www.flickr.com/photos/britishlibrary/ta... \n", 200 | "4 http://www.flickr.com/photos/britishlibrary/ta... \n", 201 | "\n", 202 | " Shelfmarks \n", 203 | "0 British Library HMNTS 12641.b.30. \n", 204 | "1 British Library HMNTS 12626.cc.2. \n", 205 | "2 British Library HMNTS 12625.dd.1. \n", 206 | "3 British Library HMNTS 10369.bbb.15. \n", 207 | "4 British Library HMNTS 9007.d.28. " 208 | ] 209 | }, 210 | "execution_count": 155, 211 | "metadata": {}, 212 | "output_type": "execute_result" 213 | } 214 | ], 215 | "source": [ 216 | "df = pd.read_csv('Datasets\\BL-Flickr-Images-Book.csv')\n", 217 | "df.head()" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": 156, 223 | "metadata": {}, 224 | "outputs": [ 225 | { 226 | "data": { 227 | "text/html": [ 228 | "
\n", 229 | "\n", 242 | "\n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | "
Place of PublicationDate of PublicationPublisherTitleAuthorFlickr URL
Identifier
206London1879 [1878]S. Tinsley & Co.Walter Forbes. [A novel.] By A. AA. A.http://www.flickr.com/photos/britishlibrary/ta...
216London; Virtue & Yorston1868Virtue & Co.All for Greed. [A novel. The dedication signed...A., A. A.http://www.flickr.com/photos/britishlibrary/ta...
218London1869Bradbury, Evans & Co.Love the Avenger. By the author of “All for Gr...A., A. A.http://www.flickr.com/photos/britishlibrary/ta...
472London1851James DarlingWelsh Sketches, chiefly ecclesiastical, to the...A., E. S.http://www.flickr.com/photos/britishlibrary/ta...
480London1857Wertheim & Macintosh[The World in which I live, and my place in it...A., E. S.http://www.flickr.com/photos/britishlibrary/ta...
\n", 311 | "
" 312 | ], 313 | "text/plain": [ 314 | " Place of Publication Date of Publication \\\n", 315 | "Identifier \n", 316 | "206 London 1879 [1878] \n", 317 | "216 London; Virtue & Yorston 1868 \n", 318 | "218 London 1869 \n", 319 | "472 London 1851 \n", 320 | "480 London 1857 \n", 321 | "\n", 322 | " Publisher \\\n", 323 | "Identifier \n", 324 | "206 S. Tinsley & Co. \n", 325 | "216 Virtue & Co. \n", 326 | "218 Bradbury, Evans & Co. \n", 327 | "472 James Darling \n", 328 | "480 Wertheim & Macintosh \n", 329 | "\n", 330 | " Title Author \\\n", 331 | "Identifier \n", 332 | "206 Walter Forbes. [A novel.] By A. A A. A. \n", 333 | "216 All for Greed. [A novel. The dedication signed... A., A. A. \n", 334 | "218 Love the Avenger. By the author of “All for Gr... A., A. A. \n", 335 | "472 Welsh Sketches, chiefly ecclesiastical, to the... A., E. S. \n", 336 | "480 [The World in which I live, and my place in it... A., E. S. \n", 337 | "\n", 338 | " Flickr URL \n", 339 | "Identifier \n", 340 | "206 http://www.flickr.com/photos/britishlibrary/ta... \n", 341 | "216 http://www.flickr.com/photos/britishlibrary/ta... \n", 342 | "218 http://www.flickr.com/photos/britishlibrary/ta... \n", 343 | "472 http://www.flickr.com/photos/britishlibrary/ta... \n", 344 | "480 http://www.flickr.com/photos/britishlibrary/ta... " 345 | ] 346 | }, 347 | "execution_count": 156, 348 | "metadata": {}, 349 | "output_type": "execute_result" 350 | } 351 | ], 352 | "source": [ 353 | "to_drop = ['Edition Statement',\n", 354 | " 'Corporate Author',\n", 355 | " 'Corporate Contributors',\n", 356 | " 'Former owner',\n", 357 | " 'Engraver',\n", 358 | " 'Contributors',\n", 359 | " 'Issuance type',\n", 360 | " 'Shelfmarks']\n", 361 | "\n", 362 | "df.drop(to_drop, inplace = True, axis = 1)\n", 363 | "df = df.set_index('Identifier')\n", 364 | "df.head()" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": 157, 370 | "metadata": {}, 371 | "outputs": [], 372 | "source": [ 373 | "unwanted_characters = ['[', ',', '-']\n", 374 | "\n", 375 | "def clean_dates(item):\n", 376 | " dop= str(item.loc['Date of Publication'])\n", 377 | " \n", 378 | " if dop == 'nan' or dop[0] == '[':\n", 379 | " return np.NaN\n", 380 | " \n", 381 | " for character in unwanted_characters:\n", 382 | " if character in dop:\n", 383 | " character_index = dop.find(character)\n", 384 | " dop = dop[:character_index]\n", 385 | " \n", 386 | " return dop\n", 387 | "\n", 388 | "df['Date of Publication'] = df.apply(clean_dates, axis = 1)" 389 | ] 390 | }, 391 | { 392 | "cell_type": "code", 393 | "execution_count": 158, 394 | "metadata": {}, 395 | "outputs": [ 396 | { 397 | "data": { 398 | "text/html": [ 399 | "
\n", 400 | "\n", 413 | "\n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | "
Place of PublicationDate of PublicationPublisherTitleAuthorFlickr URL
Identifier
206London1879S. Tinsley & Co.Walter Forbes. [A novel.] By A. AA. A.http://www.flickr.com/photos/britishlibrary/ta...
216London; Virtue & Yorston1868Virtue & Co.All for Greed. [A novel. The dedication signed...A., A. A.http://www.flickr.com/photos/britishlibrary/ta...
218London1869Bradbury, Evans & Co.Love the Avenger. By the author of “All for Gr...A., A. A.http://www.flickr.com/photos/britishlibrary/ta...
472London1851James DarlingWelsh Sketches, chiefly ecclesiastical, to the...A., E. S.http://www.flickr.com/photos/britishlibrary/ta...
480London1857Wertheim & Macintosh[The World in which I live, and my place in it...A., E. S.http://www.flickr.com/photos/britishlibrary/ta...
\n", 482 | "
" 483 | ], 484 | "text/plain": [ 485 | " Place of Publication Date of Publication \\\n", 486 | "Identifier \n", 487 | "206 London 1879 \n", 488 | "216 London; Virtue & Yorston 1868 \n", 489 | "218 London 1869 \n", 490 | "472 London 1851 \n", 491 | "480 London 1857 \n", 492 | "\n", 493 | " Publisher \\\n", 494 | "Identifier \n", 495 | "206 S. Tinsley & Co. \n", 496 | "216 Virtue & Co. \n", 497 | "218 Bradbury, Evans & Co. \n", 498 | "472 James Darling \n", 499 | "480 Wertheim & Macintosh \n", 500 | "\n", 501 | " Title Author \\\n", 502 | "Identifier \n", 503 | "206 Walter Forbes. [A novel.] By A. A A. A. \n", 504 | "216 All for Greed. [A novel. The dedication signed... A., A. A. \n", 505 | "218 Love the Avenger. By the author of “All for Gr... A., A. A. \n", 506 | "472 Welsh Sketches, chiefly ecclesiastical, to the... A., E. S. \n", 507 | "480 [The World in which I live, and my place in it... A., E. S. \n", 508 | "\n", 509 | " Flickr URL \n", 510 | "Identifier \n", 511 | "206 http://www.flickr.com/photos/britishlibrary/ta... \n", 512 | "216 http://www.flickr.com/photos/britishlibrary/ta... \n", 513 | "218 http://www.flickr.com/photos/britishlibrary/ta... \n", 514 | "472 http://www.flickr.com/photos/britishlibrary/ta... \n", 515 | "480 http://www.flickr.com/photos/britishlibrary/ta... " 516 | ] 517 | }, 518 | "execution_count": 158, 519 | "metadata": {}, 520 | "output_type": "execute_result" 521 | } 522 | ], 523 | "source": [ 524 | "df.head()" 525 | ] 526 | }, 527 | { 528 | "cell_type": "code", 529 | "execution_count": 159, 530 | "metadata": {}, 531 | "outputs": [], 532 | "source": [ 533 | "def clean_author_names(item):\n", 534 | " \n", 535 | " author = str(item.loc['Author'])\n", 536 | " \n", 537 | " if author == 'nan':\n", 538 | " return np.NaN\n", 539 | " \n", 540 | " author = author.split(',')\n", 541 | "\n", 542 | " if len(author) == 1:\n", 543 | " name = filter(lambda x: x.isalpha(), author[0])\n", 544 | " return reduce(lambda x, y: x + y, name)\n", 545 | " \n", 546 | " last_name, first_name = author[0], author[1]\n", 547 | "\n", 548 | " first_name = first_name[:first_name.find('-')] if '-' in first_name else first_name\n", 549 | " \n", 550 | " if first_name.endswith(('.', '.|')):\n", 551 | " parts = first_name.split('.')\n", 552 | " \n", 553 | " if len(parts) > 1:\n", 554 | " first_occurence = first_name.find('.')\n", 555 | " final_occurence = first_name.find('.', first_occurence + 1)\n", 556 | " first_name = first_name[:final_occurence]\n", 557 | " else:\n", 558 | " first_name = first_name[:first_name.find('.')]\n", 559 | " \n", 560 | " last_name = last_name.capitalize()\n", 561 | " \n", 562 | " return f'{first_name} {last_name}'\n", 563 | "\n", 564 | "\n", 565 | "df['Author'] = df.apply(clean_author_names, axis = 1)" 566 | ] 567 | }, 568 | { 569 | "cell_type": "code", 570 | "execution_count": 160, 571 | "metadata": {}, 572 | "outputs": [ 573 | { 574 | "data": { 575 | "text/html": [ 576 | "
\n", 577 | "\n", 590 | "\n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | "
Place of PublicationDate of PublicationPublisherTitleAuthorFlickr URL
Identifier
206London1879S. Tinsley & Co.Walter Forbes. [A novel.] By A. AAAhttp://www.flickr.com/photos/britishlibrary/ta...
216London; Virtue & Yorston1868Virtue & Co.All for Greed. [A novel. The dedication signed...A. A A.http://www.flickr.com/photos/britishlibrary/ta...
218London1869Bradbury, Evans & Co.Love the Avenger. By the author of “All for Gr...A. A A.http://www.flickr.com/photos/britishlibrary/ta...
472London1851James DarlingWelsh Sketches, chiefly ecclesiastical, to the...E. S A.http://www.flickr.com/photos/britishlibrary/ta...
480London1857Wertheim & Macintosh[The World in which I live, and my place in it...E. S A.http://www.flickr.com/photos/britishlibrary/ta...
\n", 659 | "
" 660 | ], 661 | "text/plain": [ 662 | " Place of Publication Date of Publication \\\n", 663 | "Identifier \n", 664 | "206 London 1879 \n", 665 | "216 London; Virtue & Yorston 1868 \n", 666 | "218 London 1869 \n", 667 | "472 London 1851 \n", 668 | "480 London 1857 \n", 669 | "\n", 670 | " Publisher \\\n", 671 | "Identifier \n", 672 | "206 S. Tinsley & Co. \n", 673 | "216 Virtue & Co. \n", 674 | "218 Bradbury, Evans & Co. \n", 675 | "472 James Darling \n", 676 | "480 Wertheim & Macintosh \n", 677 | "\n", 678 | " Title Author \\\n", 679 | "Identifier \n", 680 | "206 Walter Forbes. [A novel.] By A. A AA \n", 681 | "216 All for Greed. [A novel. The dedication signed... A. A A. \n", 682 | "218 Love the Avenger. By the author of “All for Gr... A. A A. \n", 683 | "472 Welsh Sketches, chiefly ecclesiastical, to the... E. S A. \n", 684 | "480 [The World in which I live, and my place in it... E. S A. \n", 685 | "\n", 686 | " Flickr URL \n", 687 | "Identifier \n", 688 | "206 http://www.flickr.com/photos/britishlibrary/ta... \n", 689 | "216 http://www.flickr.com/photos/britishlibrary/ta... \n", 690 | "218 http://www.flickr.com/photos/britishlibrary/ta... \n", 691 | "472 http://www.flickr.com/photos/britishlibrary/ta... \n", 692 | "480 http://www.flickr.com/photos/britishlibrary/ta... " 693 | ] 694 | }, 695 | "execution_count": 160, 696 | "metadata": {}, 697 | "output_type": "execute_result" 698 | } 699 | ], 700 | "source": [ 701 | "df.head()" 702 | ] 703 | }, 704 | { 705 | "cell_type": "code", 706 | "execution_count": 161, 707 | "metadata": {}, 708 | "outputs": [], 709 | "source": [ 710 | "def clean_publication_place(item):\n", 711 | " place = str(item['Place of Publication'])\n", 712 | " \n", 713 | " if 'London' in place:\n", 714 | " return 'London'\n", 715 | " if 'Oxford' in place:\n", 716 | " return 'Oxford'\n", 717 | " if place == 'Newcastle upon Tyne':\n", 718 | " return 'Newcastle-upon-Tyne'\n", 719 | " \n", 720 | " return place\n", 721 | " \n", 722 | "df['Place of Publication'] = df.apply(clean_publication_place, axis = 1)" 723 | ] 724 | }, 725 | { 726 | "cell_type": "code", 727 | "execution_count": 162, 728 | "metadata": {}, 729 | "outputs": [ 730 | { 731 | "data": { 732 | "text/html": [ 733 | "
\n", 734 | "\n", 747 | "\n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | "
Place of PublicationDate of PublicationPublisherTitleAuthorFlickr URL
Identifier
206London1879S. Tinsley & Co.Walter Forbes. [A novel.] By A. AAAhttp://www.flickr.com/photos/britishlibrary/ta...
216London1868Virtue & Co.All for Greed. [A novel. The dedication signed...A. A A.http://www.flickr.com/photos/britishlibrary/ta...
218London1869Bradbury, Evans & Co.Love the Avenger. By the author of “All for Gr...A. A A.http://www.flickr.com/photos/britishlibrary/ta...
472London1851James DarlingWelsh Sketches, chiefly ecclesiastical, to the...E. S A.http://www.flickr.com/photos/britishlibrary/ta...
480London1857Wertheim & Macintosh[The World in which I live, and my place in it...E. S A.http://www.flickr.com/photos/britishlibrary/ta...
\n", 816 | "
" 817 | ], 818 | "text/plain": [ 819 | " Place of Publication Date of Publication Publisher \\\n", 820 | "Identifier \n", 821 | "206 London 1879 S. Tinsley & Co. \n", 822 | "216 London 1868 Virtue & Co. \n", 823 | "218 London 1869 Bradbury, Evans & Co. \n", 824 | "472 London 1851 James Darling \n", 825 | "480 London 1857 Wertheim & Macintosh \n", 826 | "\n", 827 | " Title Author \\\n", 828 | "Identifier \n", 829 | "206 Walter Forbes. [A novel.] By A. A AA \n", 830 | "216 All for Greed. [A novel. The dedication signed... A. A A. \n", 831 | "218 Love the Avenger. By the author of “All for Gr... A. A A. \n", 832 | "472 Welsh Sketches, chiefly ecclesiastical, to the... E. S A. \n", 833 | "480 [The World in which I live, and my place in it... E. S A. \n", 834 | "\n", 835 | " Flickr URL \n", 836 | "Identifier \n", 837 | "206 http://www.flickr.com/photos/britishlibrary/ta... \n", 838 | "216 http://www.flickr.com/photos/britishlibrary/ta... \n", 839 | "218 http://www.flickr.com/photos/britishlibrary/ta... \n", 840 | "472 http://www.flickr.com/photos/britishlibrary/ta... \n", 841 | "480 http://www.flickr.com/photos/britishlibrary/ta... " 842 | ] 843 | }, 844 | "execution_count": 162, 845 | "metadata": {}, 846 | "output_type": "execute_result" 847 | } 848 | ], 849 | "source": [ 850 | "df.head()" 851 | ] 852 | }, 853 | { 854 | "cell_type": "code", 855 | "execution_count": 163, 856 | "metadata": {}, 857 | "outputs": [], 858 | "source": [ 859 | "def clean_title(item):\n", 860 | " title = str(item['Title'])\n", 861 | " \n", 862 | " if title == 'nan':\n", 863 | " return np.NaN\n", 864 | " \n", 865 | " if title[0] == '[':\n", 866 | " title = title[1: title.find(']')]\n", 867 | " \n", 868 | " if 'by' in title:\n", 869 | " title = title[:title.find('by')]\n", 870 | " elif 'By' in title:\n", 871 | " title = title[:title.find('By')]\n", 872 | " \n", 873 | " if '[' in title:\n", 874 | " title = title[:title.find('[')]\n", 875 | "\n", 876 | " title = title[:-2]\n", 877 | " \n", 878 | " title = list(map(str.capitalize, title.split()))\n", 879 | " return ' '.join(title)\n", 880 | " \n", 881 | "df['Title'] = df.apply(clean_title, axis = 1)" 882 | ] 883 | }, 884 | { 885 | "cell_type": "code", 886 | "execution_count": 180, 887 | "metadata": {}, 888 | "outputs": [ 889 | { 890 | "data": { 891 | "text/html": [ 892 | "
\n", 893 | "\n", 906 | "\n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | "
Place of PublicationDate of PublicationPublisherTitleAuthorFlickr URL
Identifier
206London1879S. Tinsley & Co.Walter ForbesAAhttp://www.flickr.com/photos/britishlibrary/ta...
216London1868Virtue & Co.All For GreedA. A A.http://www.flickr.com/photos/britishlibrary/ta...
218London1869Bradbury, Evans & Co.Love The AvengerA. A A.http://www.flickr.com/photos/britishlibrary/ta...
472London1851James DarlingWelsh Sketches, Chiefly Ecclesiastical, To The...E. S A.http://www.flickr.com/photos/britishlibrary/ta...
480London1857Wertheim & MacintoshThe World In Which I Live, And My Place In ItE. S A.http://www.flickr.com/photos/britishlibrary/ta...
\n", 975 | "
" 976 | ], 977 | "text/plain": [ 978 | " Place of Publication Date of Publication Publisher \\\n", 979 | "Identifier \n", 980 | "206 London 1879 S. Tinsley & Co. \n", 981 | "216 London 1868 Virtue & Co. \n", 982 | "218 London 1869 Bradbury, Evans & Co. \n", 983 | "472 London 1851 James Darling \n", 984 | "480 London 1857 Wertheim & Macintosh \n", 985 | "\n", 986 | " Title Author \\\n", 987 | "Identifier \n", 988 | "206 Walter Forbes AA \n", 989 | "216 All For Greed A. A A. \n", 990 | "218 Love The Avenger A. A A. \n", 991 | "472 Welsh Sketches, Chiefly Ecclesiastical, To The... E. S A. \n", 992 | "480 The World In Which I Live, And My Place In It E. S A. \n", 993 | "\n", 994 | " Flickr URL \n", 995 | "Identifier \n", 996 | "206 http://www.flickr.com/photos/britishlibrary/ta... \n", 997 | "216 http://www.flickr.com/photos/britishlibrary/ta... \n", 998 | "218 http://www.flickr.com/photos/britishlibrary/ta... \n", 999 | "472 http://www.flickr.com/photos/britishlibrary/ta... \n", 1000 | "480 http://www.flickr.com/photos/britishlibrary/ta... " 1001 | ] 1002 | }, 1003 | "execution_count": 180, 1004 | "metadata": {}, 1005 | "output_type": "execute_result" 1006 | } 1007 | ], 1008 | "source": [ 1009 | "df.head()" 1010 | ] 1011 | }, 1012 | { 1013 | "cell_type": "markdown", 1014 | "metadata": {}, 1015 | "source": [ 1016 | "## Cleaning entire dataset" 1017 | ] 1018 | }, 1019 | { 1020 | "cell_type": "code", 1021 | "execution_count": 168, 1022 | "metadata": {}, 1023 | "outputs": [ 1024 | { 1025 | "data": { 1026 | "text/html": [ 1027 | "
\n", 1028 | "\n", 1041 | "\n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | "
StateRegionName
0Alabama[edit]\\nAuburn (Auburn University)[1]\\n
1Alabama[edit]\\nFlorence (University of North Alabama)\\n
2Alabama[edit]\\nJacksonville (Jacksonville State University)[2]\\n
3Alabama[edit]\\nLivingston (University of West Alabama)[2]\\n
4Alabama[edit]\\nMontevallo (University of Montevallo)[2]\\n
\n", 1077 | "
" 1078 | ], 1079 | "text/plain": [ 1080 | " State RegionName\n", 1081 | "0 Alabama[edit]\\n Auburn (Auburn University)[1]\\n\n", 1082 | "1 Alabama[edit]\\n Florence (University of North Alabama)\\n\n", 1083 | "2 Alabama[edit]\\n Jacksonville (Jacksonville State University)[2]\\n\n", 1084 | "3 Alabama[edit]\\n Livingston (University of West Alabama)[2]\\n\n", 1085 | "4 Alabama[edit]\\n Montevallo (University of Montevallo)[2]\\n" 1086 | ] 1087 | }, 1088 | "execution_count": 168, 1089 | "metadata": {}, 1090 | "output_type": "execute_result" 1091 | } 1092 | ], 1093 | "source": [ 1094 | "university_towns = []\n", 1095 | "\n", 1096 | "with open('Datasets\\\\university_towns.txt', 'r') as file:\n", 1097 | " items = file.readlines()\n", 1098 | " states = list(filter(lambda x: '[edit]' in x, items))\n", 1099 | " \n", 1100 | " for index, state in enumerate(states):\n", 1101 | " start = items.index(state) + 1\n", 1102 | " if index == 49: #since 50 states\n", 1103 | " end = len(items)\n", 1104 | " else:\n", 1105 | " end = items.index(states[index + 1])\n", 1106 | " \n", 1107 | " pairs = map(lambda x: [state, x], items[start:end])\n", 1108 | " university_towns.extend(pairs)\n", 1109 | " \n", 1110 | "towns_df = pd.DataFrame(university_towns, columns = ['State', 'RegionName'])\n", 1111 | "towns_df.head()" 1112 | ] 1113 | }, 1114 | { 1115 | "cell_type": "code", 1116 | "execution_count": 169, 1117 | "metadata": {}, 1118 | "outputs": [ 1119 | { 1120 | "data": { 1121 | "text/html": [ 1122 | "
\n", 1123 | "\n", 1136 | "\n", 1137 | " \n", 1138 | " \n", 1139 | " \n", 1140 | " \n", 1141 | " \n", 1142 | " \n", 1143 | " \n", 1144 | " \n", 1145 | " \n", 1146 | " \n", 1147 | " \n", 1148 | " \n", 1149 | " \n", 1150 | " \n", 1151 | " \n", 1152 | " \n", 1153 | " \n", 1154 | " \n", 1155 | " \n", 1156 | " \n", 1157 | " \n", 1158 | " \n", 1159 | " \n", 1160 | " \n", 1161 | " \n", 1162 | " \n", 1163 | " \n", 1164 | " \n", 1165 | " \n", 1166 | " \n", 1167 | " \n", 1168 | " \n", 1169 | " \n", 1170 | " \n", 1171 | "
StateRegionName
0AlabamaAuburn
1AlabamaFlorence
2AlabamaJacksonville
3AlabamaLivingston
4AlabamaMontevallo
\n", 1172 | "
" 1173 | ], 1174 | "text/plain": [ 1175 | " State RegionName\n", 1176 | "0 Alabama Auburn\n", 1177 | "1 Alabama Florence\n", 1178 | "2 Alabama Jacksonville\n", 1179 | "3 Alabama Livingston\n", 1180 | "4 Alabama Montevallo" 1181 | ] 1182 | }, 1183 | "execution_count": 169, 1184 | "metadata": {}, 1185 | "output_type": "execute_result" 1186 | } 1187 | ], 1188 | "source": [ 1189 | "def clean_up(item):\n", 1190 | " if '(' in item:\n", 1191 | " return item[:item.find('(') - 1]\n", 1192 | " \n", 1193 | " if '[' in item:\n", 1194 | " return item[:item.find('[')]\n", 1195 | " \n", 1196 | "\n", 1197 | "towns_df = towns_df.applymap(clean_up)\n", 1198 | "towns_df.head()" 1199 | ] 1200 | }, 1201 | { 1202 | "cell_type": "markdown", 1203 | "metadata": {}, 1204 | "source": [ 1205 | "## Renaming columns and skipping rows" 1206 | ] 1207 | }, 1208 | { 1209 | "cell_type": "code", 1210 | "execution_count": 178, 1211 | "metadata": {}, 1212 | "outputs": [ 1213 | { 1214 | "data": { 1215 | "text/html": [ 1216 | "
\n", 1217 | "\n", 1230 | "\n", 1231 | " \n", 1232 | " \n", 1233 | " \n", 1234 | " \n", 1235 | " \n", 1236 | " \n", 1237 | " \n", 1238 | " \n", 1239 | " \n", 1240 | " \n", 1241 | " \n", 1242 | " \n", 1243 | " \n", 1244 | " \n", 1245 | " \n", 1246 | " \n", 1247 | " \n", 1248 | " \n", 1249 | " \n", 1250 | " \n", 1251 | " \n", 1252 | " \n", 1253 | " \n", 1254 | " \n", 1255 | " \n", 1256 | " \n", 1257 | " \n", 1258 | " \n", 1259 | " \n", 1260 | " \n", 1261 | " \n", 1262 | " \n", 1263 | " \n", 1264 | " \n", 1265 | " \n", 1266 | " \n", 1267 | " \n", 1268 | " \n", 1269 | " \n", 1270 | " \n", 1271 | " \n", 1272 | " \n", 1273 | " \n", 1274 | " \n", 1275 | " \n", 1276 | " \n", 1277 | " \n", 1278 | " \n", 1279 | " \n", 1280 | " \n", 1281 | " \n", 1282 | " \n", 1283 | " \n", 1284 | " \n", 1285 | " \n", 1286 | " \n", 1287 | " \n", 1288 | " \n", 1289 | " \n", 1290 | " \n", 1291 | " \n", 1292 | " \n", 1293 | " \n", 1294 | " \n", 1295 | " \n", 1296 | " \n", 1297 | " \n", 1298 | " \n", 1299 | " \n", 1300 | " \n", 1301 | " \n", 1302 | " \n", 1303 | " \n", 1304 | " \n", 1305 | " \n", 1306 | " \n", 1307 | " \n", 1308 | " \n", 1309 | " \n", 1310 | " \n", 1311 | " \n", 1312 | " \n", 1313 | " \n", 1314 | " \n", 1315 | " \n", 1316 | " \n", 1317 | " \n", 1318 | " \n", 1319 | " \n", 1320 | " \n", 1321 | " \n", 1322 | " \n", 1323 | " \n", 1324 | " \n", 1325 | " \n", 1326 | " \n", 1327 | " \n", 1328 | " \n", 1329 | " \n", 1330 | " \n", 1331 | " \n", 1332 | " \n", 1333 | " \n", 1334 | " \n", 1335 | " \n", 1336 | " \n", 1337 | " \n", 1338 | " \n", 1339 | " \n", 1340 | " \n", 1341 | " \n", 1342 | " \n", 1343 | " \n", 1344 | " \n", 1345 | " \n", 1346 | " \n", 1347 | " \n", 1348 | " \n", 1349 | "
Unnamed: 0? Summer01 !02 !03 !Total? Winter01 !.102 !.103 !.1Total.1? Games01 !.202 !.203 !.2Combined total
0Afghanistan (AFG)13002200000130022
1Algeria (ALG)1252815300001552815
2Argentina (ARG)23182428701800004118242870
3Armenia (ARM)512912600001112912
4Australasia (ANZ) [ANZ]23451200000234512
\n", 1350 | "
" 1351 | ], 1352 | "text/plain": [ 1353 | " Unnamed: 0 ? Summer 01 ! 02 ! 03 ! Total ? Winter \\\n", 1354 | "0 Afghanistan (AFG) 13 0 0 2 2 0 \n", 1355 | "1 Algeria (ALG) 12 5 2 8 15 3 \n", 1356 | "2 Argentina (ARG) 23 18 24 28 70 18 \n", 1357 | "3 Armenia (ARM) 5 1 2 9 12 6 \n", 1358 | "4 Australasia (ANZ) [ANZ] 2 3 4 5 12 0 \n", 1359 | "\n", 1360 | " 01 !.1 02 !.1 03 !.1 Total.1 ? Games 01 !.2 02 !.2 03 !.2 \\\n", 1361 | "0 0 0 0 0 13 0 0 2 \n", 1362 | "1 0 0 0 0 15 5 2 8 \n", 1363 | "2 0 0 0 0 41 18 24 28 \n", 1364 | "3 0 0 0 0 11 1 2 9 \n", 1365 | "4 0 0 0 0 2 3 4 5 \n", 1366 | "\n", 1367 | " Combined total \n", 1368 | "0 2 \n", 1369 | "1 15 \n", 1370 | "2 70 \n", 1371 | "3 12 \n", 1372 | "4 12 " 1373 | ] 1374 | }, 1375 | "execution_count": 178, 1376 | "metadata": {}, 1377 | "output_type": "execute_result" 1378 | } 1379 | ], 1380 | "source": [ 1381 | "olympics_df = pd.read_csv('Datasets\\olympics.csv', skiprows = 1, header = 0)\n", 1382 | "olympics_df.head()" 1383 | ] 1384 | }, 1385 | { 1386 | "cell_type": "code", 1387 | "execution_count": 179, 1388 | "metadata": {}, 1389 | "outputs": [ 1390 | { 1391 | "data": { 1392 | "text/html": [ 1393 | "
\n", 1394 | "\n", 1407 | "\n", 1408 | " \n", 1409 | " \n", 1410 | " \n", 1411 | " \n", 1412 | " \n", 1413 | " \n", 1414 | " \n", 1415 | " \n", 1416 | " \n", 1417 | " \n", 1418 | " \n", 1419 | " \n", 1420 | " \n", 1421 | " \n", 1422 | " \n", 1423 | " \n", 1424 | " \n", 1425 | " \n", 1426 | " \n", 1427 | " \n", 1428 | " \n", 1429 | " \n", 1430 | " \n", 1431 | " \n", 1432 | " \n", 1433 | " \n", 1434 | " \n", 1435 | " \n", 1436 | " \n", 1437 | " \n", 1438 | " \n", 1439 | " \n", 1440 | " \n", 1441 | " \n", 1442 | " \n", 1443 | " \n", 1444 | " \n", 1445 | " \n", 1446 | " \n", 1447 | " \n", 1448 | " \n", 1449 | " \n", 1450 | " \n", 1451 | " \n", 1452 | " \n", 1453 | " \n", 1454 | " \n", 1455 | " \n", 1456 | " \n", 1457 | " \n", 1458 | " \n", 1459 | " \n", 1460 | " \n", 1461 | " \n", 1462 | " \n", 1463 | " \n", 1464 | " \n", 1465 | " \n", 1466 | " \n", 1467 | " \n", 1468 | " \n", 1469 | " \n", 1470 | " \n", 1471 | " \n", 1472 | " \n", 1473 | " \n", 1474 | " \n", 1475 | " \n", 1476 | " \n", 1477 | " \n", 1478 | " \n", 1479 | " \n", 1480 | " \n", 1481 | " \n", 1482 | " \n", 1483 | " \n", 1484 | " \n", 1485 | " \n", 1486 | " \n", 1487 | " \n", 1488 | " \n", 1489 | " \n", 1490 | " \n", 1491 | " \n", 1492 | " \n", 1493 | " \n", 1494 | " \n", 1495 | " \n", 1496 | " \n", 1497 | " \n", 1498 | " \n", 1499 | " \n", 1500 | " \n", 1501 | " \n", 1502 | " \n", 1503 | " \n", 1504 | " \n", 1505 | " \n", 1506 | " \n", 1507 | " \n", 1508 | " \n", 1509 | " \n", 1510 | " \n", 1511 | " \n", 1512 | " \n", 1513 | " \n", 1514 | " \n", 1515 | " \n", 1516 | " \n", 1517 | " \n", 1518 | " \n", 1519 | " \n", 1520 | " \n", 1521 | " \n", 1522 | " \n", 1523 | " \n", 1524 | " \n", 1525 | " \n", 1526 | "
CountrySummer OlympicsGoldSilverBronzeTotalWinter OlympicsGold.1Silver.1Bronze.1Total.1# GamesGold.2Silver.2Bronze.2Combined total
0Afghanistan (AFG)13002200000130022
1Algeria (ALG)1252815300001552815
2Argentina (ARG)23182428701800004118242870
3Armenia (ARM)512912600001112912
4Australasia (ANZ) [ANZ]23451200000234512
\n", 1527 | "
" 1528 | ], 1529 | "text/plain": [ 1530 | " Country Summer Olympics Gold Silver Bronze Total \\\n", 1531 | "0 Afghanistan (AFG) 13 0 0 2 2 \n", 1532 | "1 Algeria (ALG) 12 5 2 8 15 \n", 1533 | "2 Argentina (ARG) 23 18 24 28 70 \n", 1534 | "3 Armenia (ARM) 5 1 2 9 12 \n", 1535 | "4 Australasia (ANZ) [ANZ] 2 3 4 5 12 \n", 1536 | "\n", 1537 | " Winter Olympics Gold.1 Silver.1 Bronze.1 Total.1 # Games Gold.2 \\\n", 1538 | "0 0 0 0 0 0 13 0 \n", 1539 | "1 3 0 0 0 0 15 5 \n", 1540 | "2 18 0 0 0 0 41 18 \n", 1541 | "3 6 0 0 0 0 11 1 \n", 1542 | "4 0 0 0 0 0 2 3 \n", 1543 | "\n", 1544 | " Silver.2 Bronze.2 Combined total \n", 1545 | "0 0 2 2 \n", 1546 | "1 2 8 15 \n", 1547 | "2 24 28 70 \n", 1548 | "3 2 9 12 \n", 1549 | "4 4 5 12 " 1550 | ] 1551 | }, 1552 | "execution_count": 179, 1553 | "metadata": {}, 1554 | "output_type": "execute_result" 1555 | } 1556 | ], 1557 | "source": [ 1558 | "def get_new_column_name(column_name):\n", 1559 | " return {'Unnamed: 0': 'Country',\n", 1560 | " '? Summer': 'Summer Olympics',\n", 1561 | " '01 !': 'Gold',\n", 1562 | " '02 !': 'Silver',\n", 1563 | " '03 !': 'Bronze',\n", 1564 | " '? Winter': 'Winter Olympics',\n", 1565 | " '01 !.1': 'Gold.1',\n", 1566 | " '02 !.1': 'Silver.1',\n", 1567 | " '03 !.1': 'Bronze.1',\n", 1568 | " '? Games': '# Games', \n", 1569 | " '01 !.2': 'Gold.2',\n", 1570 | " '02 !.2': 'Silver.2',\n", 1571 | " '03 !.2': 'Bronze.2'}.get(column_name)\n", 1572 | "\n", 1573 | "for column in olympics_df:\n", 1574 | " new_name = get_new_column_name(column)\n", 1575 | " if new_name:\n", 1576 | " olympics_df.rename({column: new_name}, axis = 1, inplace = True)\n", 1577 | " \n", 1578 | "olympics_df.head()" 1579 | ] 1580 | }, 1581 | { 1582 | "cell_type": "code", 1583 | "execution_count": null, 1584 | "metadata": {}, 1585 | "outputs": [], 1586 | "source": [] 1587 | } 1588 | ], 1589 | "metadata": { 1590 | "kernelspec": { 1591 | "display_name": "Python 3", 1592 | "language": "python", 1593 | "name": "python3" 1594 | }, 1595 | "language_info": { 1596 | "codemirror_mode": { 1597 | "name": "ipython", 1598 | "version": 3 1599 | }, 1600 | "file_extension": ".py", 1601 | "mimetype": "text/x-python", 1602 | "name": "python", 1603 | "nbconvert_exporter": "python", 1604 | "pygments_lexer": "ipython3", 1605 | "version": "3.6.4" 1606 | } 1607 | }, 1608 | "nbformat": 4, 1609 | "nbformat_minor": 2 1610 | } 1611 | -------------------------------------------------------------------------------- /Datasets/olympics.csv: -------------------------------------------------------------------------------- 1 | 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 2 | ,? Summer,01 !,02 !,03 !,Total,? Winter,01 !,02 !,03 !,Total,? Games,01 !,02 !,03 !,Combined total 3 | Afghanistan (AFG),13,0,0,2,2,0,0,0,0,0,13,0,0,2,2 4 | Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15 5 | Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70 6 | Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12 7 | Australasia (ANZ) [ANZ],2,3,4,5,12,0,0,0,0,0,2,3,4,5,12 8 | Australia (AUS) [AUS] [Z],25,139,152,177,468,18,5,3,4,12,43,144,155,181,480 9 | Austria (AUT),26,18,33,35,86,22,59,78,81,218,48,77,111,116,304 10 | Azerbaijan (AZE),5,6,5,15,26,5,0,0,0,0,10,6,5,15,26 11 | Bahamas (BAH),15,5,2,5,12,0,0,0,0,0,15,5,2,5,12 12 | Bahrain (BRN),8,0,0,1,1,0,0,0,0,0,8,0,0,1,1 13 | Barbados (BAR) [BAR],11,0,0,1,1,0,0,0,0,0,11,0,0,1,1 14 | Belarus (BLR),5,12,24,39,75,6,6,4,5,15,11,18,28,44,90 15 | Belgium (BEL),25,37,52,53,142,20,1,1,3,5,45,38,53,56,147 16 | Bermuda (BER),17,0,0,1,1,7,0,0,0,0,24,0,0,1,1 17 | Bohemia (BOH) [BOH] [Z],3,0,1,3,4,0,0,0,0,0,3,0,1,3,4 18 | Botswana (BOT),9,0,1,0,1,0,0,0,0,0,9,0,1,0,1 19 | Brazil (BRA),21,23,30,55,108,7,0,0,0,0,28,23,30,55,108 20 | British West Indies (BWI) [BWI],1,0,0,2,2,0,0,0,0,0,1,0,0,2,2 21 | Bulgaria (BUL) [H],19,51,85,78,214,19,1,2,3,6,38,52,87,81,220 22 | Burundi (BDI),5,1,0,0,1,0,0,0,0,0,5,1,0,0,1 23 | Cameroon (CMR),13,3,1,1,5,1,0,0,0,0,14,3,1,1,5 24 | Canada (CAN),25,59,99,121,279,22,62,56,52,170,47,121,155,173,449 25 | Chile (CHI) [I],22,2,7,4,13,16,0,0,0,0,38,2,7,4,13 26 | China (CHN) [CHN],9,201,146,126,473,10,12,22,19,53,19,213,168,145,526 27 | Colombia (COL),18,2,6,11,19,1,0,0,0,0,19,2,6,11,19 28 | Costa Rica (CRC),14,1,1,2,4,6,0,0,0,0,20,1,1,2,4 29 | Ivory Coast (CIV) [CIV],12,0,1,0,1,0,0,0,0,0,12,0,1,0,1 30 | Croatia (CRO),6,6,7,10,23,7,4,6,1,11,13,10,13,11,34 31 | Cuba (CUB) [Z],19,72,67,70,209,0,0,0,0,0,19,72,67,70,209 32 | Cyprus (CYP),9,0,1,0,1,10,0,0,0,0,19,0,1,0,1 33 | Czech Republic (CZE) [CZE],5,14,15,15,44,6,7,9,8,24,11,21,24,23,68 34 | Czechoslovakia (TCH) [TCH],16,49,49,45,143,16,2,8,15,25,32,51,57,60,168 35 | Denmark (DEN) [Z],26,43,68,68,179,13,0,1,0,1,39,43,69,68,180 36 | Djibouti (DJI) [B],7,0,0,1,1,0,0,0,0,0,7,0,0,1,1 37 | Dominican Republic (DOM),13,3,2,1,6,0,0,0,0,0,13,3,2,1,6 38 | Ecuador (ECU),13,1,1,0,2,0,0,0,0,0,13,1,1,0,2 39 | Egypt (EGY) [EGY] [Z],21,7,9,10,26,1,0,0,0,0,22,7,9,10,26 40 | Eritrea (ERI),4,0,0,1,1,0,0,0,0,0,4,0,0,1,1 41 | Estonia (EST),11,9,9,15,33,9,4,2,1,7,20,13,11,16,40 42 | Ethiopia (ETH),12,21,7,17,45,2,0,0,0,0,14,21,7,17,45 43 | Finland (FIN),24,101,84,117,302,22,42,62,57,161,46,143,146,174,463 44 | France (FRA) [O] [P] [Z],27,202,223,246,671,22,31,31,47,109,49,233,254,293,780 45 | Gabon (GAB),9,0,1,0,1,0,0,0,0,0,9,0,1,0,1 46 | Georgia (GEO),5,6,5,14,25,6,0,0,0,0,11,6,5,14,25 47 | Germany (GER) [GER] [Z],15,174,182,217,573,11,78,78,53,209,26,252,260,270,782 48 | United Team of Germany (EUA) [EUA],3,28,54,36,118,3,8,6,5,19,6,36,60,41,137 49 | East Germany (GDR) [GDR],5,153,129,127,409,6,39,36,35,110,11,192,165,162,519 50 | West Germany (FRG) [FRG],5,56,67,81,204,6,11,15,13,39,11,67,82,94,243 51 | Ghana (GHA) [GHA],13,0,1,3,4,1,0,0,0,0,14,0,1,3,4 52 | Great Britain (GBR) [GBR] [Z],27,236,272,272,780,22,10,4,12,26,49,246,276,284,806 53 | Greece (GRE) [Z],27,30,42,39,111,18,0,0,0,0,45,30,42,39,111 54 | Grenada (GRN),8,1,0,0,1,0,0,0,0,0,8,1,0,0,1 55 | Guatemala (GUA),13,0,1,0,1,1,0,0,0,0,14,0,1,0,1 56 | Guyana (GUY) [GUY],16,0,0,1,1,0,0,0,0,0,16,0,0,1,1 57 | Haiti (HAI) [J],14,0,1,1,2,0,0,0,0,0,14,0,1,1,2 58 | Hong Kong (HKG) [HKG],15,1,1,1,3,4,0,0,0,0,19,1,1,1,3 59 | Hungary (HUN),25,167,144,165,476,22,0,2,4,6,47,167,146,169,482 60 | Iceland (ISL),19,0,2,2,4,17,0,0,0,0,36,0,2,2,4 61 | India (IND) [F],23,9,6,11,26,9,0,0,0,0,32,9,6,11,26 62 | Indonesia (INA),14,6,10,11,27,0,0,0,0,0,14,6,10,11,27 63 | Iran (IRI) [K],15,15,20,25,60,10,0,0,0,0,25,15,20,25,60 64 | Iraq (IRQ),13,0,0,1,1,0,0,0,0,0,13,0,0,1,1 65 | Ireland (IRL),20,9,8,12,29,6,0,0,0,0,26,9,8,12,29 66 | Israel (ISR),15,1,1,5,7,6,0,0,0,0,21,1,1,5,7 67 | Italy (ITA) [M] [S],26,198,166,185,549,22,37,34,43,114,48,235,200,228,663 68 | Jamaica (JAM) [JAM],16,17,30,20,67,7,0,0,0,0,23,17,30,20,67 69 | Japan (JPN),21,130,126,142,398,20,10,17,18,45,41,140,143,160,443 70 | Kazakhstan (KAZ),5,16,17,19,52,6,1,3,3,7,11,17,20,22,59 71 | Kenya (KEN),13,25,32,29,86,3,0,0,0,0,16,25,32,29,86 72 | North Korea (PRK),9,14,12,21,47,8,0,1,1,2,17,14,13,22,49 73 | South Korea (KOR),16,81,82,80,243,17,26,17,10,53,33,107,99,90,296 74 | Kuwait (KUW),12,0,0,2,2,0,0,0,0,0,12,0,0,2,2 75 | Kyrgyzstan (KGZ),5,0,1,2,3,6,0,0,0,0,11,0,1,2,3 76 | Latvia (LAT),10,3,11,5,19,10,0,4,3,7,20,3,15,8,26 77 | Lebanon (LIB),16,0,2,2,4,16,0,0,0,0,32,0,2,2,4 78 | Liechtenstein (LIE),16,0,0,0,0,18,2,2,5,9,34,2,2,5,9 79 | Lithuania (LTU),8,6,5,10,21,8,0,0,0,0,16,6,5,10,21 80 | Luxembourg (LUX) [O],22,1,1,0,2,8,0,2,0,2,30,1,3,0,4 81 | Macedonia (MKD),5,0,0,1,1,5,0,0,0,0,10,0,0,1,1 82 | Malaysia (MAS) [MAS],12,0,3,3,6,0,0,0,0,0,12,0,3,3,6 83 | Mauritius (MRI),8,0,0,1,1,0,0,0,0,0,8,0,0,1,1 84 | Mexico (MEX),22,13,21,28,62,8,0,0,0,0,30,13,21,28,62 85 | Moldova (MDA),5,0,2,5,7,6,0,0,0,0,11,0,2,5,7 86 | Mongolia (MGL),12,2,9,13,24,13,0,0,0,0,25,2,9,13,24 87 | Montenegro (MNE),2,0,1,0,1,2,0,0,0,0,4,0,1,0,1 88 | Morocco (MAR),13,6,5,11,22,6,0,0,0,0,19,6,5,11,22 89 | Mozambique (MOZ),9,1,0,1,2,0,0,0,0,0,9,1,0,1,2 90 | Namibia (NAM),6,0,4,0,4,0,0,0,0,0,6,0,4,0,4 91 | Netherlands (NED) [Z],25,77,85,104,266,20,37,38,35,110,45,114,123,139,376 92 | Netherlands Antilles (AHO) [AHO] [I],13,0,1,0,1,2,0,0,0,0,15,0,1,0,1 93 | New Zealand (NZL) [NZL],22,42,18,39,99,15,0,1,0,1,37,42,19,39,100 94 | Niger (NIG),11,0,0,1,1,0,0,0,0,0,11,0,0,1,1 95 | Nigeria (NGR),15,3,8,12,23,0,0,0,0,0,15,3,8,12,23 96 | Norway (NOR) [Q],24,56,49,43,148,22,118,111,100,329,46,174,160,143,477 97 | Pakistan (PAK),16,3,3,4,10,2,0,0,0,0,18,3,3,4,10 98 | Panama (PAN),16,1,0,2,3,0,0,0,0,0,16,1,0,2,3 99 | Paraguay (PAR),11,0,1,0,1,1,0,0,0,0,12,0,1,0,1 100 | Peru (PER) [L],17,1,3,0,4,2,0,0,0,0,19,1,3,0,4 101 | Philippines (PHI),20,0,2,7,9,4,0,0,0,0,24,0,2,7,9 102 | Poland (POL),20,64,82,125,271,22,6,7,7,20,42,70,89,132,291 103 | Portugal (POR),23,4,8,11,23,7,0,0,0,0,30,4,8,11,23 104 | Puerto Rico (PUR),17,0,2,6,8,6,0,0,0,0,23,0,2,6,8 105 | Qatar (QAT),8,0,0,4,4,0,0,0,0,0,8,0,0,4,4 106 | Romania (ROU),20,88,94,119,301,20,0,0,1,1,40,88,94,120,302 107 | Russia (RUS) [RUS],5,132,121,142,395,6,49,40,35,124,11,181,161,177,519 108 | Russian Empire (RU1) [RU1],3,1,4,3,8,0,0,0,0,0,3,1,4,3,8 109 | Soviet Union (URS) [URS],9,395,319,296,1010,9,78,57,59,194,18,473,376,355,1204 110 | Unified Team (EUN) [EUN],1,45,38,29,112,1,9,6,8,23,2,54,44,37,135 111 | Saudi Arabia (KSA),10,0,1,2,3,0,0,0,0,0,10,0,1,2,3 112 | Senegal (SEN),13,0,1,0,1,5,0,0,0,0,18,0,1,0,1 113 | Serbia (SRB) [SRB],3,1,2,4,7,2,0,0,0,0,5,1,2,4,7 114 | Serbia and Montenegro (SCG) [SCG],3,2,4,3,9,3,0,0,0,0,6,2,4,3,9 115 | Singapore (SIN),15,0,2,2,4,0,0,0,0,0,15,0,2,2,4 116 | Slovakia (SVK) [SVK],5,7,9,8,24,6,2,2,1,5,11,9,11,9,29 117 | Slovenia (SLO),6,4,6,9,19,7,2,4,9,15,13,6,10,18,34 118 | South Africa (RSA),18,23,26,27,76,6,0,0,0,0,24,23,26,27,76 119 | Spain (ESP) [Z],22,37,59,35,131,19,1,0,1,2,41,38,59,36,133 120 | Sri Lanka (SRI) [SRI],16,0,2,0,2,0,0,0,0,0,16,0,2,0,2 121 | Sudan (SUD),11,0,1,0,1,0,0,0,0,0,11,0,1,0,1 122 | Suriname (SUR) [E],11,1,0,1,2,0,0,0,0,0,11,1,0,1,2 123 | Sweden (SWE) [Z],26,143,164,176,483,22,50,40,54,144,48,193,204,230,627 124 | Switzerland (SUI),27,47,73,65,185,22,50,40,48,138,49,97,113,113,323 125 | Syria (SYR),12,1,1,1,3,0,0,0,0,0,12,1,1,1,3 126 | Chinese Taipei (TPE) [TPE] [TPE2],13,2,7,12,21,11,0,0,0,0,24,2,7,12,21 127 | Tajikistan (TJK),5,0,1,2,3,4,0,0,0,0,9,0,1,2,3 128 | Tanzania (TAN) [TAN],12,0,2,0,2,0,0,0,0,0,12,0,2,0,2 129 | Thailand (THA),15,7,6,11,24,3,0,0,0,0,18,7,6,11,24 130 | Togo (TOG),9,0,0,1,1,1,0,0,0,0,10,0,0,1,1 131 | Tonga (TGA),8,0,1,0,1,1,0,0,0,0,9,0,1,0,1 132 | Trinidad and Tobago (TRI) [TRI],16,2,5,11,18,3,0,0,0,0,19,2,5,11,18 133 | Tunisia (TUN),13,3,3,4,10,0,0,0,0,0,13,3,3,4,10 134 | Turkey (TUR),21,39,25,24,88,16,0,0,0,0,37,39,25,24,88 135 | Uganda (UGA),14,2,3,2,7,0,0,0,0,0,14,2,3,2,7 136 | Ukraine (UKR),5,33,27,55,115,6,2,1,4,7,11,35,28,59,122 137 | United Arab Emirates (UAE),8,1,0,0,1,0,0,0,0,0,8,1,0,0,1 138 | United States (USA) [P] [Q] [R] [Z],26,976,757,666,2399,22,96,102,84,282,48,1072,859,750,2681 139 | Uruguay (URU),20,2,2,6,10,1,0,0,0,0,21,2,2,6,10 140 | Uzbekistan (UZB),5,5,5,10,20,6,1,0,0,1,11,6,5,10,21 141 | Venezuela (VEN),17,2,2,8,12,4,0,0,0,0,21,2,2,8,12 142 | Vietnam (VIE),14,0,2,0,2,0,0,0,0,0,14,0,2,0,2 143 | Virgin Islands (ISV),11,0,1,0,1,7,0,0,0,0,18,0,1,0,1 144 | Yugoslavia (YUG) [YUG],16,26,29,28,83,14,0,3,1,4,30,26,32,29,87 145 | Independent Olympic Participants (IOP) [IOP],1,0,1,2,3,0,0,0,0,0,1,0,1,2,3 146 | Zambia (ZAM) [ZAM],12,0,1,1,2,0,0,0,0,0,12,0,1,1,2 147 | Zimbabwe (ZIM) [ZIM],12,3,4,1,8,1,0,0,0,0,13,3,4,1,8 148 | Mixed team (ZZX) [ZZX],3,8,5,4,17,0,0,0,0,0,3,8,5,4,17 149 | Totals,27,4809,4775,5130,14714,22,959,958,948,2865,49,5768,5733,6078,17579 -------------------------------------------------------------------------------- /Datasets/university_towns.txt: -------------------------------------------------------------------------------- 1 | Alabama[edit] 2 | Auburn (Auburn University)[1] 3 | Florence (University of North Alabama) 4 | Jacksonville (Jacksonville State University)[2] 5 | Livingston (University of West Alabama)[2] 6 | Montevallo (University of Montevallo)[2] 7 | Troy (Troy University)[2] 8 | Tuscaloosa (University of Alabama, Stillman College, Shelton State)[3][4] 9 | Tuskegee (Tuskegee University)[5] 10 | Alaska[edit] 11 | Fairbanks (University of Alaska Fairbanks)[2] 12 | Arizona[edit] 13 | Flagstaff (Northern Arizona University)[6] 14 | Tempe (Arizona State University) 15 | Tucson (University of Arizona) 16 | Arkansas[edit] 17 | Arkadelphia (Henderson State University, Ouachita Baptist University)[2] 18 | Conway (Central Baptist College, Hendrix College, University of Central Arkansas)[2] 19 | Fayetteville (University of Arkansas)[7] 20 | Jonesboro (Arkansas State University)[8] 21 | Magnolia (Southern Arkansas University)[2] 22 | Monticello (University of Arkansas at Monticello)[2] 23 | Russellville (Arkansas Tech University)[2] 24 | Searcy (Harding University)[5] 25 | California[edit] 26 | Angwin (Pacific Union College)[2] 27 | Arcata (Humboldt State University)[5] 28 | Berkeley (University of California, Berkeley)[5] 29 | Chico (California State University, Chico)[2] 30 | Claremont (Claremont McKenna College, Pomona College, Harvey Mudd College, Scripps College, Pitzer College, Keck Graduate Institute, Claremont Graduate University)[5] 31 | Cotati (California State University, Sonoma)[2] 32 | Davis (University of California, Davis)[1] 33 | Irvine (University of California, Irvine) 34 | Isla Vista (University of California, Santa Barbara)[2] 35 | University Park, Los Angeles (University of Southern California) 36 | Merced (University of California, Merced) 37 | Orange (Chapman University) 38 | Palo Alto (Stanford University) 39 | Pomona (Cal Poly Pomona, WesternU)[9][10][11] and formerly Pomona College 40 | Redlands (University of Redlands) 41 | Riverside (University of California, Riverside, California Baptist University, La Sierra University) 42 | Sacramento (California State University, Sacramento) 43 | University District, San Bernardino (California State University, San Bernardino, American Sports University) 44 | San Diego (University of California, San Diego, San Diego State University) 45 | San Luis Obispo (California Polytechnic State University)[2] 46 | Santa Barbara (Fielding Graduate University, Santa Barbara City College, University of California, Santa Barbara, Westmont College)[2] 47 | Santa Cruz (University of California, Santa Cruz)[2] 48 | Turlock (California State University, Stanislaus) 49 | Westwood, Los Angeles (University of California, Los Angeles)[2] 50 | Whittier (Whittier CollegeRio Hondo College) 51 | Colorado[edit] 52 | Alamosa (Adams State College)[2] 53 | Boulder (University of Colorado at Boulder)[12] 54 | Durango (Fort Lewis College)[2] 55 | Fort Collins (Colorado State University)[13] 56 | Golden (Colorado School of Mines) 57 | Grand Junction (Colorado Mesa University) 58 | Greeley (University of Northern Colorado) 59 | Gunnison (Western State College)[2] 60 | Pueblo, Colorado (Colorado State University-Pueblo) 61 | Connecticut[edit] 62 | Fairfield (Fairfield University, Sacred Heart University) 63 | Middletown (Wesleyan University) 64 | New Britain (Central Connecticut State University) 65 | New Haven (Yale University, University of New Haven, Southern Connecticut State University, Albertus Magnus College, Quinnipiac University)[14] 66 | New London (Connecticut College, US Coast Guard Academy, Mitchell College)[2] 67 | Storrs (University of Connecticut)[2] 68 | Willimantic (Eastern Connecticut State University)[2] 69 | Delaware[edit] 70 | Dover (Delaware State University)[1] 71 | Newark (University of Delaware)[1] 72 | Florida[edit] 73 | Ave Maria (Ave Maria University) 74 | Boca Raton (Florida Atlantic University) 75 | Coral Gables (University of Miami) 76 | DeLand (Stetson University)[5] 77 | Estero (Florida Gulf Coast University) 78 | Gainesville (University of Florida, Santa Fe College) 79 | Orlando (University of Central Florida) 80 | Sarasota (New College of Florida, Ringling College of Art and Design, State College of Florida, Manatee-Sarasota, University of South Florida Sarasota-Manatee) 81 | St. Augustine (Flagler College) 82 | St. Leo (St. Leo University) 83 | Tallahassee (Florida State University, Florida A&M University) 84 | Tampa (University of South Florida) 85 | Georgia[edit] 86 | Albany (Albany State University) 87 | Athens (University of Georgia)[15] 88 | Atlanta (Georgia State University, Georgia Tech, Emory)[2] 89 | Carrollton (University of West Georgia)[2]*Dahlonega (North Georgia College & State University)[2] 90 | Demorest (Piedmont College)[2] 91 | Fort Valley (Fort Valley State University)[2] 92 | Kennesaw (Kennesaw State University) 93 | Milledgeville (Georgia College & State University)[2] 94 | Mount Vernon (Brewton-Parker College)[2] 95 | Oxford (Oxford College) 96 | Rome (Berry College, Shorter University) 97 | Savannah (Armstrong Atlantic State University, Savannah State University, Savannah College of Art and Design) 98 | Statesboro (Georgia Southern University)[5] 99 | Valdosta (Valdosta State University)[2] 100 | Waleska (Reinhardt College)[2] 101 | Young Harris (Young Harris College)[2] 102 | Hawaii[edit] 103 | Manoa (University of Hawaii at Manoa)[2] 104 | Idaho[edit] 105 | Moscow (University of Idaho)[2] 106 | Pocatello (Idaho State University)[2] 107 | Rexburg (BYU-Idaho)[2] 108 | Illinois[edit] 109 | Carbondale (Southern Illinois University Carbondale)[5] 110 | Champaign–Urbana (University of Illinois at Urbana–Champaign)[5] 111 | Charleston (Eastern Illinois University)[2] 112 | DeKalb (Northern Illinois University)[2] 113 | Edwardsville (Southern Illinois University Edwardsville)[2] 114 | Evanston (Northwestern University)[2] 115 | Lebanon (McKendree University)[2] 116 | Macomb (Western Illinois University)[2] 117 | Normal (Illinois State University)[2] 118 | Peoria (Bradley University) 119 | Indiana[edit] 120 | Bloomington (Indiana University Bloomington)[5] 121 | Crawfordsville (Wabash College) 122 | Greencastle (DePauw University)[5] 123 | Hanover (Hanover College)[2] 124 | Marion (Indiana Wesleyan University)[2] 125 | Muncie (Ball State University)[2] 126 | Oakland City (Oakland City University)[2] 127 | Richmond (Earlham College)[2] 128 | South Bend (Notre Dame University[2]) 129 | Terre Haute (Indiana State University, Rose-Hulman Institute of Technology)[2] 130 | Upland (Taylor University)[2] 131 | Valparaiso (Valparaiso University) 132 | West Lafayette (Purdue University)[2] 133 | Iowa[edit] 134 | Ames (Iowa State University)[2] 135 | Cedar Falls (University of Northern Iowa)[2] 136 | Cedar Rapids, Iowa (Coe College ) 137 | Decorah (Luther College)[5] 138 | Fayette (Upper Iowa University)[2] 139 | Grinnell (Grinnell College)[15] 140 | Iowa City (University of Iowa)[15] 141 | Lamoni (Graceland University)[2] 142 | Mount Vernon, (Cornell College) 143 | Orange City (Northwestern College)[2] 144 | Sioux Center (Dordt College)[2] 145 | Storm Lake (Buena Vista University)[2] 146 | Waverly (Wartburg College)[2] 147 | Kansas[edit] 148 | Baldwin City (Baker University)[5] 149 | Emporia (Emporia State University)[2] 150 | Hays (Fort Hays State University)[2] 151 | Lawrence (University of Kansas, Haskell Indian Nations University)[15] 152 | Manhattan (Kansas State University, Manhattan Christian College)[15] 153 | Pittsburg (Pittsburg State University)[2] 154 | Kentucky[edit] 155 | Bowling Green (Western Kentucky University)[2] 156 | Columbia (Lindsey Wilson College)[2] 157 | Georgetown (Georgetown College) 158 | Highland Heights (Northern Kentucky University) 159 | Lexington (University of Kentucky, Transylvania University[5] 160 | Louisville (University of Louisville) 161 | Morehead (Morehead State University)[2] 162 | Murray (Murray State University)[5] 163 | Richmond (Eastern Kentucky University)[2] 164 | Williamsburg (University of the Cumberlands)[2] 165 | Wilmore (Asbury University, Asbury Theological Seminary)[2] 166 | Louisiana[edit] 167 | Baton Rouge (Louisiana State University, Southern University) 168 | Grambling (Grambling State University)[5] 169 | Hammond (Southeastern Louisiana University)[2] 170 | Lafayette (University of Louisiana at Lafayette) 171 | Monroe (University of Louisiana at Monroe)[2] 172 | Natchitoches (Northwestern State University)[2] 173 | Ruston (Louisiana Tech University)[2] 174 | Thibodaux (Nicholls State University)[2] 175 | Maine[edit] 176 | Augusta (University of Maine at Augusta)[2] 177 | Bar Harbor (College of the Atlantic) 178 | Brunswick (Bowdoin College) 179 | Farmington (University of Maine at Farmington)[2] 180 | Fort Kent (University of Maine at Fort Kent) 181 | Gorham (University of Southern Maine)[2] 182 | Lewiston, Maine (Bates College) 183 | Orono (University of Maine)[2] 184 | Waterville (Thomas College, Colby College) 185 | Maryland[edit] 186 | Annapolis (United States Naval Academy, St. John's College) 187 | Chestertown (Washington College)[2] 188 | College Park (University of Maryland, College Park)[16] 189 | Cumberland (Allegany College of Maryland) 190 | Emmitsburg (Mount St. Mary's University)[2] 191 | Frostburg (Frostburg State University)[5] 192 | Princess Anne (University of Maryland Eastern Shore)[5] 193 | Towson (Towson University, Goucher College)[2] 194 | Salisbury (Salisbury University)[2] 195 | Westminster (McDaniel College) 196 | Massachusetts[edit] 197 | Boston (Boston University, Boston College, Boston Conservatory, New England Conservatory, Brandeis University, Northeastern University, UMass Boston, Emmanuel College, Bunker Hill Community College, Roxbury Community College, Suffolk University, Simmons College, among many others) 198 | Bridgewater (Bridgewater State College)[2] 199 | Cambridge (Harvard University, Massachusetts Institute of Technology)(Lesley University, Cambridge College, Longy School of Music)[15] 200 | Chestnut Hill (Boston College) 201 | The Colleges of Worcester Consortium: 202 | Dudley (Nichols College) 203 | North Grafton (Cummings School of Veterinary Medicine at Tufts University) 204 | Paxton (Anna Maria College) 205 | Worcester (Assumption, Becker, Clark University, Holy Cross, Mass. College of Pharmacy & Health Sciences, Quinsigamond Community College, UMass Medical School, Worcester State University, Worcester Polytechnic Institute) 206 | The Five College Region of Western Massachusetts: 207 | Amherst (Amherst College, Hampshire College, University of Massachusetts Amherst)[15] 208 | Northampton (Smith College) 209 | South Hadley (Mount Holyoke College) 210 | Fitchburg (Fitchburg State College) 211 | North Adams (Massachusetts College of Liberal Arts) 212 | Springfield (American International College), (Springfield College), and (Western New England College) 213 | Waltham (Bentley University), (Brandeis University) 214 | Williamstown (Williams College) 215 | Framingham (Framingham State University) 216 | Michigan[edit] 217 | Adrian (Adrian College, Siena Heights University) 218 | Albion (Albion College)[17] 219 | Allendale (Grand Valley State University) 220 | Alma (Alma College) 221 | Ann Arbor (University of Michigan)[1] 222 | Berrien Springs (Andrews University)[2] 223 | Big Rapids (Ferris State University)[2] 224 | East Lansing (Michigan State University)[2] 225 | Flint (Kettering University, University of Michigan-Flint) 226 | Hillsdale (Hillsdale College) 227 | Houghton (Michigan Technological University)[5] 228 | Kalamazoo (Western Michigan University, Kalamazoo College)[2] 229 | Marquette (Northern Michigan University)[2] 230 | Midland (Northwood University) 231 | Mount Pleasant (Central Michigan University)[2] 232 | Olivet (Olivet College)[2] 233 | Saginaw (Saginaw Valley State University) 234 | Sault Ste. Marie (Lake Superior State University) 235 | Spring Arbor (Spring Arbor University)[2] 236 | Ypsilanti (Eastern Michigan University)[2] 237 | Minnesota[edit] 238 | Bemidji (Bemidji State University)[2] 239 | Crookston (University of Minnesota Crookston)[2] 240 | Duluth (University of Minnesota Duluth, Lake Superior College, The College of St. Scholastica, University of Wisconsin–Superior, Duluth Business University 241 | Faribault, South Central College 242 | Mankato (Minnesota State University, Mankato),[2] Bethany Lutheran College 243 | Marshall (Southwest Minnesota State University)[2] 244 | Moorhead (Minnesota State University, Moorhead, Concordia College)[18] 245 | Morris (University of Minnesota Morris)[2] 246 | Northfield (Carleton College, St. Olaf College)[5] 247 | North Mankato, South Central College 248 | St. Cloud (St. Cloud State University, The College of St. Scholastica)[2] 249 | St. Joseph (College of Saint Benedict)[2] 250 | St. Peter (Gustavus Adolphus College)[2] 251 | Winona (Winona State University, St. Mary's University of Minnesota)[19] 252 | Mississippi[edit] 253 | Cleveland (Delta State University)[2] 254 | Hattiesburg (University of Southern Mississippi)[20] 255 | Itta Bena (Mississippi Valley State University)[2] 256 | Oxford (University of Mississippi)[2] 257 | Starkville (Mississippi State University)[2] 258 | Missouri[edit] 259 | Bolivar (Southwest Baptist University)[2] 260 | Cape Girardeau (Southeast Missouri State University)[2] 261 | Columbia (University of Missouri, Stephens College, Columbia College)[20] 262 | Fayette (Central Methodist University)[2] 263 | Fulton (Westminster College and William Woods University). 264 | Kirksville (Truman State University, A. T. Still University)[2] 265 | Maryville (Northwest Missouri State University)[2] 266 | Rolla (Missouri University of Science and Technology)[2] 267 | Warrensburg (University of Central Missouri)[5] 268 | Montana[edit] 269 | Bozeman (Montana State University)[2] 270 | Dillon (University of Montana Western)[2] 271 | Missoula (University of Montana)[5] 272 | Nebraska[edit] 273 | Chadron (Chadron State College)[5] 274 | Crete (Doane College)[2] 275 | Kearney (University of Nebraska at Kearney)[2] 276 | Lincoln (University of Nebraska at Lincoln)[5] 277 | Peru (Peru State College)[2] 278 | Seward (Concordia University)[2] 279 | Wayne (Wayne State College)[2] 280 | Nevada[edit] 281 | Las Vegas (University of Nevada, Las Vegas) 282 | Reno (University of Nevada, Reno) 283 | New Hampshire[edit] 284 | New London, New Hampshire (Colby-Sawyer College) 285 | Durham (University of New Hampshire)[2] 286 | Hanover (Dartmouth College)[5] 287 | Henniker (New England College) 288 | Keene (Keene State College)[2] 289 | Plymouth (Plymouth State University)[2] 290 | Rindge (Franklin Pierce University) 291 | New Jersey[edit] 292 | Ewing (The College of New Jersey), (Rider University) 293 | Jersey City (New Jersey City University), (Saint Peter's University) 294 | Glassboro (Rowan University)[2] 295 | Hoboken (Stevens Institute of Technology) 296 | Madison (Drew University), (Fairleigh Dickinson University), (College of Saint Elizabeth) 297 | Newark (Rutgers University), (New Jersey Institute of Technology), (UMDNJ) 298 | New Brunswick (Rutgers University)[5] 299 | Princeton (Princeton University)[5] 300 | Union (Kean University) 301 | West Long Branch (Monmouth University) 302 | New Mexico[edit] 303 | Hobbs (University of the Southwest)[2] 304 | Las Cruces (New Mexico State University)[2] 305 | Las Vegas (New Mexico Highlands University)[2] 306 | Portales (Eastern New Mexico University)[2] 307 | Silver City (Western New Mexico University)[2] 308 | New York[edit] 309 | Alfred (Alfred University, Alfred State College)[2] 310 | Albany (SUNY Albany, Siena College, Albany College of Pharmacy, Albany Law School, Albany Medical College, College of Saint Rose, Excelsior College, Maria College of Albany, Mildred Elley, Sage College of Albany) 311 | Aurora (Wells College)[21] 312 | Binghamton (Binghamton University)[2] 313 | Brockport (SUNY Brockport)[5] 314 | Buffalo (University at Buffalo) 315 | Canton (St. Lawrence University, SUNY Canton)[2] 316 | Clinton (Hamilton College)[2] 317 | Cobleskill (SUNY Cobleskill)[2] 318 | Delhi (SUNY Delhi)[2] 319 | Fredonia (SUNY Fredonia)[2] 320 | Geneseo (SUNY Geneseo)[2] 321 | Geneva (Hobart and William Smith Colleges) 322 | Hamilton (Colgate University)[2] 323 | Ithaca (Cornell University, Ithaca College)[1] 324 | Morningside Heights, Manhattan (Columbia University, Barnard College, Teachers College, Manhattan School of Music, Jewish Theological Seminary, Union Theological Seminary, Bank Street College of Education) 325 | New Paltz (SUNY New Paltz)[2] 326 | Oneonta (SUNY Oneonta, Hartwick College)[2] 327 | Oswego (SUNY Oswego)[2] 328 | Plattsburgh (SUNY Plattsburgh)[2] 329 | Potsdam (SUNY Potsdam, Clarkson University)[2] 330 | Poughkeepsie (Vassar College, Marist College)[2] 331 | Purchase (Purchase College, Manhattanville College)[2] 332 | Rochester (University of Rochester, Rochester Institute of Technology, Nazareth College, St. John Fisher College, Monroe Community College, Roberts Wesleyan College, SUNY Brockport, SUNY Empire State College)[2] 333 | Saratoga Springs (Skidmore College)[2] 334 | Seneca Falls (New York Chiropractic College) 335 | Stony Brook (Stony Brook University) 336 | Syracuse (Syracuse University, SUNY ESF, Upstate Medical University) 337 | Tivoli (Bard College) 338 | Troy (Rensselaer Polytechnic Institute, Russell Sage College, Hudson Valley Community College) 339 | West Point (United States Military Academy) 340 | North Carolina[edit] 341 | Banner Elk (Lees-McRae College) 342 | Boiling Springs (Gardner-Webb University)[2] 343 | Boone (Appalachian State University)[2] 344 | Buies Creek (Campbell University)[2] 345 | Chapel Hill (University of North Carolina at Chapel Hill)[20] 346 | Cullowhee (Western Carolina University)[2] 347 | Davidson (Davidson College)[5] 348 | Durham (Duke University, North Carolina Central University)[5] 349 | Elon (Elon University)[2] 350 | Greensboro (University of North Carolina at Greensboro, Greensboro College, Guilford College, North Carolina A & T State University, Bennett College) 351 | Greenville (East Carolina University)[2] 352 | Hickory (Lenoir-Rhyne University)[2] 353 | Mars Hill (Mars Hill College)[2] 354 | Mount Olive (Mount Olive College)[2] 355 | Pembroke (University of North Carolina at Pembroke)[2] 356 | Wilmington, North Carolina (University of North Carolina at Wilmington) 357 | Wingate (Wingate University)[2] 358 | Winston-Salem (Wake Forest University, University of North Carolina School of the Arts, Salem College, Winston-Salem State University) 359 | North Dakota[edit] 360 | Fargo (North Dakota State University)[18] 361 | Grand Forks (University of North Dakota)[5] 362 | Ohio[edit] 363 | Ada (Ohio Northern University)[2] 364 | Alliance (University of Mount Union) 365 | Ashland (Ashland University)[2] 366 | Athens (Ohio University)[2] 367 | Berea (Baldwin Wallace College) 368 | Bluffton (Bluffton University)[2] 369 | Bowling Green (Bowling Green State University)[2] 370 | Cedarville (Cedarville University)[2] 371 | Columbus (Ohio State University) 372 | Delaware (Ohio Wesleyan University) 373 | Fairborn (Wright State University) 374 | Findlay (University of Findlay) 375 | Gambier (Kenyon College)[2] 376 | Granville (Denison University)[2] 377 | Hiram (Hiram College)[2] 378 | Kent (Kent State University)[2] 379 | Nelsonville (Hocking College)[2] 380 | New Concord (Muskingum College)[2] 381 | Oberlin (Oberlin College)[5] 382 | Oxford (Miami University)[5] 383 | Rio Grande (University of Rio Grande)[2] 384 | Wilberforce (Wilberforce University, Central State University)[2] 385 | Oklahoma[edit] 386 | Ada (East Central University)[2] 387 | Alva (Northwestern Oklahoma State University)[2] 388 | Durant (Southeastern Oklahoma State University)[2] 389 | Edmond (University of Central Oklahoma, Oklahoma Christian University)[2] 390 | Goodwell (Oklahoma Panhandle State University)[2] 391 | Langston (Langston University)[5] 392 | Norman (University of Oklahoma)[1] 393 | Stillwater (Oklahoma State University)[5] 394 | Tahlequah (Northeastern State University)[2] 395 | Tulsa (The University of Tulsa) 396 | Weatherford (Southwestern Oklahoma State University) 397 | Oregon[edit] 398 | Ashland (Southern Oregon University)[2] 399 | Corvallis (Oregon State University)[20] 400 | Eugene (Lane Community College, Northwest Christian University, University of Oregon)[20] 401 | Forest Grove (Pacific University) 402 | Klamath Falls (Klamath Community College, Oregon Institute of Technology) 403 | La Grande (Eastern Oregon University)[2] 404 | Marylhurst (Marylhurst University) 405 | McMinnville (Linfield College) 406 | Monmouth (Western Oregon University)[2] 407 | Newberg (George Fox University) 408 | Pennsylvania[edit] 409 | Altoona (Penn State Altoona) 410 | Annville (Lebanon Valley College)[2] 411 | Bethlehem (Lehigh University, Moravian College) 412 | Bloomsburg (Bloomsburg University of Pennsylvania)[2] 413 | Bradford (University of Pittsburgh at Bradford) 414 | California (California University of Pennsylvania)[2] 415 | Carlisle (Dickinson College) 416 | Cecil B. Moore, Philadelphia, also known as "Templetown" (Temple University) 417 | Clarion (Clarion University of Pennsylvania)[2] 418 | Collegeville (Ursinus College) 419 | Cresson (Mount Aloysius College)[2] 420 | East Stroudsburg (East Stroudsburg University of Pennsylvania)[2] 421 | Edinboro (Edinboro University of Pennsylvania)[2] 422 | Erie (Gannon University, Mercyhurst College, Penn State Erie) 423 | Gettysburg (Gettysburg College)[2] 424 | Greensburg (Seton Hill University, University of Pittsburgh at Greensburg) 425 | Grove City (Grove City College)[2] 426 | Huntingdon (Juniata College)[2] 427 | Indiana (Indiana University of Pennsylvania)[2] 428 | Johnstown (University of Pittsburgh at Johnstown) 429 | Kutztown (Kutztown University of Pennsylvania)[2] 430 | Lancaster (Franklin & Marshall) 431 | Lewisburg (Bucknell University)[5] 432 | Lock Haven (Lock Haven University of Pennsylvania)[2] 433 | Loretto (St. Francis University)[2] 434 | Mansfield (Mansfield University of Pennsylvania)[2] 435 | Meadville (Allegheny College) 436 | Mont Alto (Penn State Mont Alto) 437 | Millersville (Millersville University of Pennsylvania)[2] 438 | New Wilmington (Westminster College)[2] 439 | North East (Mercyhurst North East) 440 | University City, Philadelphia (Drexel University, University of Pennsylvania, University of the Sciences in Philadelphia) 441 | Oakland, Pittsburgh (Carnegie Mellon University, University of Pittsburgh, Carlow University) 442 | Reading (Albright College, Alvernia University, Penn State Berks) 443 | Selinsgrove (Susquehanna University)[2] 444 | Shippensburg (Shippensburg University of Pennsylvania)[2] 445 | Slippery Rock (Slippery Rock University of Pennsylvania)[2] 446 | State College (Pennsylvania State University)[22] 447 | Villanova (Villanova University) 448 | Waynesburg (Waynesburg University) 449 | West Chester (West Chester University of Pennsylvania) 450 | Wilkes-Barre (King's College, Wilkes University) 451 | Williamsport (Lycoming College, Pennsylvania College of Technology)[2] 452 | Rhode Island[edit] 453 | Kingston (University of Rhode Island)[2] 454 | Providence (Brown University, (University of Rhode Island), Rhode Island School of Design, Johnson and Wales University, Providence College, Community College of Rhode Island, Rhode Island College, and Roger Williams University.) 455 | South Carolina[edit] 456 | Central (Southern Wesleyan University)[2] 457 | Charleston (College of Charleston, The Citadel, MUSC) 458 | Clemson (Clemson University)[2] 459 | Clinton (Presbyterian College) 460 | Columbia (University of South Carolina)[14] 461 | Due West (Erskine College) 462 | Florence (Francis Marion University) 463 | Greenwood (Lander University) 464 | Orangeburg (South Carolina State University, Claflin University)[2] 465 | Rock Hill (Winthrop University) 466 | Spartanburg (Wofford College, Converse College, University of South Carolina Upstate, Spartanburg Methodist College, Edward Via College of Osteopathic Medicine, Spartanburg Community College, Virginia College, Sherman College of Chiropractic) 467 | South Dakota[edit] 468 | Brookings (South Dakota State University)[2] 469 | Madison (Dakota State University) 470 | Spearfish (Black Hills State University) 471 | Vermillion (University of South Dakota)[5] 472 | Tennessee[edit] 473 | Chattanooga (University of Tennessee at Chattanooga) 474 | Collegedale (Southern Adventist University) 475 | Cookeville (Tennessee Technological University)[2] 476 | Harrogate (Lincoln Memorial University)[2] 477 | Henderson (Freed-Hardeman University)[2] 478 | Johnson City (East Tennessee State University) 479 | Knoxville (University of Tennessee) 480 | Martin (University of Tennessee at Martin)[2] 481 | McKenzie (Bethel University)[2] 482 | Memphis (Christian Brothers University, LeMoyne-Owen College, Memphis College of Art, Memphis Theological Seminary, Rhodes College, Southern College of Optometry, Southwest Tennessee Community College, University of Memphis, University of Tennessee Health Science Center, Visible Music College) 483 | Murfreesboro (Middle Tennessee State University)[2] 484 | Nashville (Vanderbilt University, Belmont University, Tennessee State University, Lipscomb University, Fisk University, Aquinas College, Trevecca Nazarene University) 485 | Sewanee (Sewanee: the University of the South)[2] 486 | Texas[edit] 487 | Abilene (Abilene Christian University, Hardin-Simmons University, McMurry University) 488 | Alpine (Sul Ross State University)[2] 489 | Austin (University of Texas at Austin, St. Edwards University, Huston-Tillotson University)[2] 490 | Beaumont (Lamar University) 491 | Canyon (West Texas A&M University)[2] 492 | College Station (Texas A&M University)[5] 493 | Commerce (Texas A&M University–Commerce)[2] 494 | Dallas (Southern Methodist University) 495 | Denton (University of North Texas, Texas Woman's University)[2] 496 | Fort Worth (Texas Christian University, Texas Wesleyan University) 497 | Georgetown (Southwestern University) 498 | Huntsville (Sam Houston State University)[2] 499 | Houston (University of Houston, Texas Southern University, Rice University, Houston Baptist University) 500 | Keene (Southwestern Adventist University)[2] 501 | Kingsville (Texas A&M University–Kingsville)[2] 502 | Lubbock (Texas Tech University, Lubbock Christian University) 503 | Nacogdoches (Stephen F. Austin State University)[2] 504 | Plainview (Wayland Baptist University)[2] 505 | Prairie View (Prairie View A&M University)[2] 506 | San Marcos (Texas State University)[5] 507 | Stephenville (Tarleton State University)[2] 508 | Waco (Baylor University) 509 | Utah[edit] 510 | Cedar City (Southern Utah University)[2] 511 | Logan (Utah State University)[2] 512 | Provo (Brigham Young University)[5] 513 | Orem (Utah Valley University) 514 | Salt Lake City (University of Utah) 515 | Ephraim (Snow College) 516 | Vermont[edit] 517 | Burlington (University of Vermont, Champlain College and Saint Michael's College)[2] 518 | Castleton (Castleton State College)[2] 519 | Johnson (Johnson State College)[2] 520 | Lyndonville (Lyndon State College)[2] 521 | Middlebury (Middlebury College)[2] 522 | Northfield (Norwich University)[2] 523 | Virginia[edit] 524 | Blacksburg (Virginia Polytechnic Institute and State University)[5] 525 | Bridgewater (Bridgewater College)[2] 526 | Charlottesville (University of Virginia)[23] 527 | Farmville (Longwood University, Hampden-Sydney College)[2] 528 | Fredericksburg (University of Mary Washington)[2] 529 | Harrisonburg (James Madison University, Eastern Mennonite University)[2] 530 | Lexington (Washington and Lee University, Virginia Military Institute)[2] 531 | Lynchburg (Lynchburg College, Randolph College, Liberty University, Central Virginia Community College) 532 | Radford (Radford University)[2] 533 | Williamsburg (The College of William & Mary)[2] 534 | Wise (University of Virginia's College at Wise)[2] 535 | Chesapeake (Averett University, DeVry University, Troy University, Tidewater Community College, Strayer University, Everest University, Sentera College of Health Sciences, St Leo University)[2] 536 | Washington[edit] 537 | Bellingham (Western Washington University) 538 | Cheney (Eastern Washington University)[2] 539 | Ellensburg (Central Washington University)[5] 540 | Pullman (Washington State University)[5] 541 | University District, Seattle (University of Washington)[5] 542 | West Virginia[edit] 543 | Athens (Concord University)[2] 544 | Buckhannon (West Virginia Wesleyan College)[2] 545 | Fairmont (Fairmont State University)[2] 546 | Glenville (Glenville State College)[2] 547 | Huntington (Marshall University)[2] 548 | Montgomery (West Virginia University Institute of Technology)[2] 549 | Morgantown (West Virginia University)[2] 550 | Shepherdstown (Shepherd University)[2] 551 | West Liberty (West Liberty University)[2] 552 | Wisconsin[edit] 553 | Appleton (Lawrence University) 554 | Eau Claire (University of Wisconsin–Eau Claire) 555 | Green Bay (University of Wisconsin-Green Bay) 556 | La Crosse (University of Wisconsin–La Crosse, Western Technical College, Viterbo University)[2] 557 | Madison (University of Wisconsin–Madison)[2] 558 | Menomonie (University of Wisconsin–Stout)[2] 559 | Milwaukee (Marquette University, University of Wisconsin–Milwaukee) 560 | Oshkosh (University of Wisconsin–Oshkosh) 561 | Platteville (University of Wisconsin–Platteville)[2] 562 | River Falls (University of Wisconsin–River Falls)[2] 563 | Stevens Point (University of Wisconsin–Stevens Point)[2] 564 | Waukesha (Carroll University) 565 | Whitewater (University of Wisconsin–Whitewater)[2] 566 | Wyoming[edit] 567 | Laramie (University of Wyoming)[5] 568 | --------------------------------------------------------------------------------