├── .gitignore ├── .gitmodules ├── Untitled.ipynb ├── data ├── repos.tsv └── urls ├── gitenberg-read-metadata.py ├── gitenbergScrape.py ├── gut-aus.ipynb ├── gut-aus2.ipynb ├── gut-aus3.ipynb ├── gut-rdf.ipynb ├── parseRDF.py ├── pg-add-amazon.ipynb ├── pg-add-dbpedia.ipynb ├── pg-add-goodreads-Copy1.ipynb ├── pg-add-goodreads.ipynb ├── pg-add-google-books.ipynb ├── pg-cleanup.ipynb ├── pg-collapse-volumes.ipynb ├── pg-get-corpora.ipynb ├── pg-get-filenames.ipynb ├── pg-metadata-rdf.hdf ├── pg-pr-eng-add-dbpedia.ipynb ├── pg-pr-eng-by-author.ipynb ├── pg-pr-eng-clean-text.ipynb ├── pg-pr-eng-merge-duplicates.ipynb ├── pg-vectorize.ipynb ├── pg-vectorize2.ipynb ├── pr-extract-text.ipynb ├── pr-metadata.ipynb └── pr-metadata2.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | secrets.py 2 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "pyplot-hierarchical-pie"] 2 | path = pyplot-hierarchical-pie 3 | url = https://github.com/klieret/pyplot-hierarchical-pie 4 | -------------------------------------------------------------------------------- /Untitled.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# My metadata notebook" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 2, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "import pandas as pd" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 4, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "df = pd.read_hdf('pg-metadata-rdf.hdf')" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 7, 33 | "metadata": {}, 34 | "outputs": [ 35 | { 36 | "data": { 37 | "text/plain": [ 38 | "0 {}\n", 39 | "1 {'text/html': 'http://www.gutenberg.org/ebooks...\n", 40 | "2 {'text/html': 'http://www.gutenberg.org/files/...\n", 41 | "3 {'text/html': 'http://www.gutenberg.org/files/...\n", 42 | "4 {'text/html': 'http://www.gutenberg.org/files/...\n", 43 | "5 {'text/plain': 'http://www.gutenberg.org/ebook...\n", 44 | "6 {'text/html': 'http://www.gutenberg.org/files/...\n", 45 | "7 {'text/html': 'http://www.gutenberg.org/files/...\n", 46 | "8 {'text/html; charset=us-ascii': 'http://www.gu...\n", 47 | "9 {'text/plain; charset=us-ascii': 'http://www.g...\n", 48 | "10 {'text/plain; charset=us-ascii': 'http://www.g...\n", 49 | "11 {'text/plain; charset=utf-8': 'http://www.gute...\n", 50 | "12 {'text/plain; charset=utf-8': 'http://www.gute...\n", 51 | "13 {'text/html; charset=utf-8': 'http://www.guten...\n", 52 | "14 {'text/html': 'http://www.gutenberg.org/ebooks...\n", 53 | "15 {'text/html': 'http://www.gutenberg.org/ebooks...\n", 54 | "16 {'text/html; charset=utf-8': 'http://www.guten...\n", 55 | "17 {'application/x-mobipocket-ebook': 'http://www...\n", 56 | "18 {'text/html': 'http://www.gutenberg.org/ebooks...\n", 57 | "19 {'application/x-mobipocket-ebook': 'http://www...\n", 58 | "20 {'text/html': 'http://www.gutenberg.org/ebooks...\n", 59 | "21 {'text/plain; charset=utf-8': 'http://www.gute...\n", 60 | "22 {'text/html': 'http://www.gutenberg.org/ebooks...\n", 61 | "23 {'text/html; charset=us-ascii': 'http://www.gu...\n", 62 | "24 {'text/plain; charset=utf-8': 'http://www.gute...\n", 63 | "25 {'text/html': 'http://www.gutenberg.org/ebooks...\n", 64 | "26 {'text/html': 'http://www.gutenberg.org/ebooks...\n", 65 | "27 {'text/html': 'http://www.gutenberg.org/ebooks...\n", 66 | "28 {'text/html': 'http://www.gutenberg.org/ebooks...\n", 67 | "29 {'text/html': 'http://www.gutenberg.org/ebooks...\n", 68 | " ... \n", 69 | "54376 {'image/jpeg': 'http://www.gutenberg.org/cache...\n", 70 | "54377 {'image/jpeg': 'http://www.gutenberg.org/cache...\n", 71 | "54378 {'image/jpeg': 'http://www.gutenberg.org/cache...\n", 72 | "54379 {'text/html': 'http://www.gutenberg.org/ebooks...\n", 73 | "54380 {'image/jpeg': 'http://www.gutenberg.org/cache...\n", 74 | "54381 {'image/jpeg': 'http://www.gutenberg.org/cache...\n", 75 | "54382 {'text/html': 'http://www.gutenberg.org/ebooks...\n", 76 | "54383 {'image/jpeg': 'http://www.gutenberg.org/cache...\n", 77 | "54384 {'image/jpeg': 'http://www.gutenberg.org/cache...\n", 78 | "54385 {'text/html': 'http://www.gutenberg.org/ebooks...\n", 79 | "54386 {'image/jpeg': 'http://www.gutenberg.org/cache...\n", 80 | "54387 {'image/jpeg': 'http://www.gutenberg.org/cache...\n", 81 | "54388 {'image/jpeg': 'http://www.gutenberg.org/cache...\n", 82 | "54389 {'image/jpeg': 'http://www.gutenberg.org/cache...\n", 83 | "54390 {'text/html': 'http://www.gutenberg.org/ebooks...\n", 84 | "54391 {'image/jpeg': 'http://www.gutenberg.org/cache...\n", 85 | "54392 {'image/jpeg': 'http://www.gutenberg.org/cache...\n", 86 | "54393 {'image/jpeg': 'http://www.gutenberg.org/cache...\n", 87 | "54394 {'image/jpeg': 'http://www.gutenberg.org/cache...\n", 88 | "54395 {'text/html': 'http://www.gutenberg.org/ebooks...\n", 89 | "54396 {'image/jpeg': 'http://www.gutenberg.org/cache...\n", 90 | "54397 {'text/html; charset=iso-8859-1': 'http://www....\n", 91 | "54398 {'text/html': 'http://www.gutenberg.org/ebooks...\n", 92 | "54404 {'image/jpeg': 'http://www.gutenberg.org/cache...\n", 93 | "54405 {'image/jpeg': 'http://www.gutenberg.org/cache...\n", 94 | "54406 {'image/jpeg': 'http://www.gutenberg.org/cache...\n", 95 | "54407 {'image/jpeg': 'http://www.gutenberg.org/cache...\n", 96 | "54408 {'text/html': 'http://www.gutenberg.org/ebooks...\n", 97 | "54409 {'image/jpeg': 'http://www.gutenberg.org/cache...\n", 98 | "999999 {}\n", 99 | "Name: formats, dtype: object" 100 | ] 101 | }, 102 | "execution_count": 7, 103 | "metadata": {}, 104 | "output_type": "execute_result" 105 | } 106 | ], 107 | "source": [ 108 | "df.T['formats']" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": { 115 | "collapsed": true 116 | }, 117 | "outputs": [], 118 | "source": [] 119 | } 120 | ], 121 | "metadata": { 122 | "kernelspec": { 123 | "display_name": "Python 3", 124 | "language": "python", 125 | "name": "python3" 126 | }, 127 | "language_info": { 128 | "codemirror_mode": { 129 | "name": "ipython", 130 | "version": 3 131 | }, 132 | "file_extension": ".py", 133 | "mimetype": "text/x-python", 134 | "name": "python", 135 | "nbconvert_exporter": "python", 136 | "pygments_lexer": "ipython3", 137 | "version": "3.6.0" 138 | } 139 | }, 140 | "nbformat": 4, 141 | "nbformat_minor": 2 142 | } 143 | -------------------------------------------------------------------------------- /gitenberg-read-metadata.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import yaml 3 | import pandas as pd 4 | import json 5 | 6 | # Ignore special handling of tags like !lcsh 7 | def default_ctor(loader, tag_suffix, node): 8 | # print(loader, tag_suffix, node) 9 | return tag_suffix + ' ' + node.value 10 | 11 | yaml.add_multi_constructor('', default_ctor) 12 | 13 | repos = glob.glob('/run/media/jon/SAMSUNG/gitenberg/*') 14 | 15 | def loadYaml(path): 16 | yamlFilename = path + '/metadata.yaml' 17 | try: 18 | with open(yamlFilename) as f: 19 | metadata = f.read() 20 | except: 21 | return None 22 | return metadata 23 | 24 | def parseYaml(metadata): 25 | parsed = yaml.load(metadata) 26 | return parsed 27 | 28 | def loadJson(path): 29 | jsonFilename = path + '/metadata.json' 30 | try: 31 | with open(jsonFilename) as f: 32 | metadata = f.read() 33 | except: 34 | return None 35 | return metadata 36 | 37 | def parseJson(metadata): 38 | try: 39 | parsed = json.loads(metadata) 40 | except: 41 | print("Couldn't parse JSON for some reason.") 42 | return None 43 | return parsed 44 | 45 | gitenbergDict = {} 46 | for i, repo in enumerate(repos): 47 | repoLen = len(repos) 48 | print('Processing %s of %s: %s' % (i, repoLen, repo)) 49 | print('Processing: ', repo) 50 | repoID = repo.split('_')[-1] 51 | metadata = loadYaml(repo) 52 | jsonMetadata = loadJson(repo) 53 | if metadata is not None: 54 | metadata = parseYaml(metadata) 55 | print('Successfully parsed YAML.') 56 | else: 57 | print("ERROR: Couldn't parse YAML.") 58 | metadata = {} 59 | if jsonMetadata is not None: 60 | jsonParsed = parseJson(jsonMetadata) 61 | print('Successfully parsed JSON.') 62 | else: 63 | print("ERROR: Couldn't load JSON.") 64 | jsonParsed = {} 65 | if jsonParsed is not None: 66 | # Merge 67 | for key in jsonParsed: 68 | prefixed = 'j' + key 69 | metadata[prefixed] = jsonParsed[key] 70 | gitenbergDict[repoID] = metadata 71 | # if i > 100: 72 | # break 73 | 74 | print('Making data frame...') 75 | df = pd.DataFrame(gitenbergDict).T 76 | 77 | print('Writing to csv...') 78 | df.to_csv('gitenberg-metadata.csv') 79 | -------------------------------------------------------------------------------- /gitenbergScrape.py: -------------------------------------------------------------------------------- 1 | """ Gets a list of all the GITenberg repos, along with their descriptions and URLs. """ 2 | import github3 3 | from getpass import getpass 4 | 5 | try: 6 | from secrets import user, pw 7 | except: 8 | user = input('GitHub username: ') 9 | pw = getpass('GitHub password for {0}: '.format(user)) 10 | 11 | g = github3.login(user,pw) 12 | 13 | org = g.organization('gitenberg') 14 | 15 | for repo in org.iter_repos(): 16 | try: 17 | name = repo.name 18 | desc = repo.description 19 | url = repo.clone_url 20 | data = [name, desc, url] 21 | # Sanitize 22 | data = [item.replace('\t', ' ') for item in data] 23 | data = [item.replace('\n', ' ') for item in data] 24 | print('\t'.join(data)) 25 | except: 26 | continue 27 | -------------------------------------------------------------------------------- /gut-aus3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import pandas as pd\n", 12 | "import re\n", 13 | "import requests\n", 14 | "import sqlite3" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "metadata": { 21 | "collapsed": true 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "df = pd.read_hdf('pgaus.hdf', 'pgaus')" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 3, 31 | "metadata": {}, 32 | "outputs": [ 33 | { 34 | "data": { 35 | "text/html": [ 36 | "
\n", 37 | "\n", 38 | " \n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1084 | " \n", 1085 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | " \n", 1090 | " \n", 1091 | " \n", 1092 | " \n", 1093 | " \n", 1094 | " \n", 1095 | " \n", 1096 | " \n", 1097 | " \n", 1098 | " \n", 1099 | " \n", 1100 | " \n", 1101 | " \n", 1102 | " \n", 1103 | " \n", 1104 | " \n", 1105 | " \n", 1106 | " \n", 1107 | " \n", 1108 | " \n", 1109 | " \n", 1110 | " \n", 1111 | " \n", 1112 | " \n", 1113 | " \n", 1114 | " \n", 1115 | " \n", 1116 | " \n", 1117 | " \n", 1118 | " \n", 1119 | " \n", 1120 | " \n", 1121 | " \n", 1122 | " \n", 1123 | " \n", 1124 | " \n", 1125 | " \n", 1126 | " \n", 1127 | " \n", 1128 | " \n", 1129 | " \n", 1130 | " \n", 1131 | " \n", 1132 | " \n", 1133 | " \n", 1134 | " \n", 1135 | " \n", 1136 | " \n", 1137 | " \n", 1138 | " \n", 1139 | " \n", 1140 | " \n", 1141 | " \n", 1142 | " \n", 1143 | " \n", 1144 | " \n", 1145 | " \n", 1146 | " \n", 1147 | " \n", 1148 | " \n", 1149 | " \n", 1150 | " \n", 1151 | " \n", 1152 | " \n", 1153 | " \n", 1154 | " \n", 1155 | " \n", 1156 | " \n", 1157 | " \n", 1158 | " \n", 1159 | " \n", 1160 | " \n", 1161 | " \n", 1162 | " \n", 1163 | " \n", 1164 | " \n", 1165 | " \n", 1166 | " \n", 1167 | " \n", 1168 | " \n", 1169 | " \n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | " \n", 1177 | " \n", 1178 | " \n", 1179 | " \n", 1180 | " \n", 1181 | " \n", 1182 | " \n", 1183 | " \n", 1184 | " \n", 1185 | " \n", 1186 | " \n", 1187 | " \n", 1188 | " \n", 1189 | " \n", 1190 | " \n", 1191 | " \n", 1192 | " \n", 1193 | " \n", 1194 | " \n", 1195 | " \n", 1196 | " \n", 1197 | " \n", 1198 | " \n", 1199 | " \n", 1200 | " \n", 1201 | " \n", 1202 | " \n", 1203 | " \n", 1204 | " \n", 1205 | " \n", 1206 | " \n", 1207 | " \n", 1208 | " \n", 1209 | " \n", 1210 | " \n", 1211 | " \n", 1212 | " \n", 1213 | " \n", 1214 | " \n", 1215 | " \n", 1216 | " \n", 1217 | " \n", 1218 | " \n", 1219 | " \n", 1220 | "
DateAddedTitle and AuthorIDaIDbURLOtherURLshtmlURLTitletxtURLAuthorNotesSubtitleShortTitleAuthorAltTextHTML
0Mar 2017The Solitary Farm, Fergus Hume[170028xx.xxx]3799Ahttp://gutenberg.net.au/ebooks17/1700281h.htmlhttp://gutenberg.net.au/ebooks17/1700281h.htmlhttp://gutenberg.net.au/ebooks17/1700281h.htmlThe Solitary FarmNaNFergus HumeNaNNaNNaNNaNNaN\\r\\n<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4....
1Mar 2017The Green Mummy, Fergus Hume[170027xx.xxx]3798Ahttp://gutenberg.net.au/ebooks17/1700271h.htmlhttp://gutenberg.net.au/ebooks17/1700271h.htmlhttp://gutenberg.net.au/ebooks17/1700271h.htmlThe Green MummyNaNFergus HumeNaNNaNNaNNaNNaN\\r\\n<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4....
2Mar 2017The Opal Serpent, Fergus Hume[170026xx.xxx]3797Ahttp://gutenberg.net.au/ebooks17/1700261h.htmlhttp://gutenberg.net.au/ebooks17/1700261h.htmlhttp://gutenberg.net.au/ebooks17/1700261h.htmlThe Opal SerpentNaNFergus HumeNaNNaNNaNNaNNaN\\r\\n<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4....
3Mar 2017The Mandarin's Fan, Fergus Hume[170025xx.xxx]3796Ahttp://gutenberg.net.au/ebooks17/1700251h.htmlhttp://gutenberg.net.au/ebooks17/1700251h.htmlhttp://gutenberg.net.au/ebooks17/1700251h.htmlThe Mandarin's FanNaNFergus HumeNaNNaNNaNNaNNaN\\r\\n<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4....
4Mar 2017The Red Window, Fergus Hume[170024xx.xxx]3795Ahttp://gutenberg.net.au/ebooks17/1700241h.htmlhttp://gutenberg.net.au/ebooks17/1700241h.htmlhttp://gutenberg.net.au/ebooks17/1700241h.htmlThe Red WindowNaNFergus HumeNaNNaNNaNNaNNaN\\r\\n<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4....
5Mar 2017The Dwarf's Chamber, Fergus Hume[170023xx.xxx]3794Ahttp://gutenberg.net.au/ebooks17/1700231h.htmlhttp://gutenberg.net.au/ebooks17/1700231h.htmlhttp://gutenberg.net.au/ebooks17/1700231h.htmlThe Dwarf's ChamberNaNFergus HumeNaNNaNNaNNaNNaN\\r\\n<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4....
6Mar 2017A Coin of Edward VII, Fergus Hume[170022xx.xxx]3793Ahttp://gutenberg.net.au/ebooks17/1700221h.htmlhttp://gutenberg.net.au/ebooks17/1700221h.htmlhttp://gutenberg.net.au/ebooks17/1700221h.htmlA Coin of Edward VIINaNFergus HumeNaNNaNNaNNaNNaN\\r\\n<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4....
7Mar 2017The Pagan's Cup, Fergus Hume[170021xx.xxx]3792Ahttp://gutenberg.net.au/ebooks17/1700211h.htmlhttp://gutenberg.net.au/ebooks17/1700211h.htmlhttp://gutenberg.net.au/ebooks17/1700211h.htmlThe Pagan's CupNaNFergus HumeNaNNaNNaNNaNNaN\\r\\n<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4....
8Mar 2017They See in Darkness, Ethel Lina White[170020xx.xxx]3791Ahttp://gutenberg.net.au/ebooks17/1700201h.htmlhttp://gutenberg.net.au/ebooks17/1700201h.htmlhttp://gutenberg.net.au/ebooks17/1700201h.htmlThey See in DarknessNaNEthel Lina WhiteNaNNaNNaNNaNNaN<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 T...
9Mar 2017The Man Who Loved Lions, Ethel Lina White[170019xx.xxx]3790Ahttp://gutenberg.net.au/ebooks17/1700191h.htmlhttp://gutenberg.net.au/ebooks17/1700191h.htmlhttp://gutenberg.net.au/ebooks17/1700191h.htmlThe Man Who Loved LionsNaNEthel Lina WhiteNaNNaNNaNNaNNaN<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 T...
10Mar 2017The Elephant Never Forgets, Ethel Lina White[170018xx.xxx]3789Ahttp://gutenberg.net.au/ebooks17/1700181h.htmlhttp://gutenberg.net.au/ebooks17/1700181h.htmlhttp://gutenberg.net.au/ebooks17/1700181h.htmlThe Elephant Never ForgetsNaNEthel Lina WhiteNaNNaNNaNNaNNaN<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 T...
11Mar 2017Put Out the Light, Ethel Lina White[170017xx.xxx]3788Ahttp://gutenberg.net.au/ebooks17/1700171h.htmlhttp://gutenberg.net.au/ebooks17/1700171h.htmlhttp://gutenberg.net.au/ebooks17/1700171h.htmlPut Out the LightNaNEthel Lina WhiteNaNNaNNaNNaNNaN<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 T...
12Mar 2017The First Time He Died, Ethel Lina White[170016xx.xxx]3787Ahttp://gutenberg.net.au/ebooks17/1700161h.htmlhttp://gutenberg.net.au/ebooks17/1700161h.htmlhttp://gutenberg.net.au/ebooks17/1700161h.htmlThe First Time He DiedNaNEthel Lina WhiteNaNNaNNaNNaNNaN<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 T...
13Mar 2017A Woman's Burden, Fergus Hume[170015xx.xxx]3786Ahttp://gutenberg.net.au/ebooks17/1700151h.htmlhttp://gutenberg.net.au/ebooks17/1700151h.htmlhttp://gutenberg.net.au/ebooks17/1700151h.htmlA Woman's BurdenNaNFergus HumeNaNNaNNaNNaNNaN\\r\\n<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4....
14Mar 2017Midnight House, Ethel Lina White[170014xx.xxx]3785Ahttp://gutenberg.net.au/ebooks17/1700141h.htmlhttp://gutenberg.net.au/ebooks17/1700141h.htmlhttp://gutenberg.net.au/ebooks17/1700141h.htmlMidnight HouseNaNEthel Lina WhiteNaNNaNNaNNaNNaN<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 T...
15Feb 2017The Lone Inn, Fergus Hume[170013xx.xxx]3784Ahttp://gutenberg.net.au/ebooks17/1700131h.htmlhttp://gutenberg.net.au/ebooks17/1700131h.htmlhttp://gutenberg.net.au/ebooks17/1700131h.htmlThe Lone InnNaNFergus HumeNaNNaNNaNNaNNaN\\r\\n<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4....
16Feb 2017The Bishop's Secret, Fergus Hume[170012xx.xxx]3783Ahttp://gutenberg.net.au/ebooks17/1700121h.htmlhttp://gutenberg.net.au/ebooks17/1700121h.htmlhttp://gutenberg.net.au/ebooks17/1700121h.htmlThe Bishop's SecretNaNFergus HumeNaNNaNNaNNaNNaN<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 T...
17Feb 2017The Newcastle Packets, J H M Abbott[170011xx.xxx]3782A[Title: The Newcastle Packets and the Hunter V...http://gutenberg.net.au/ebooks17/1700111.txt o...http://gutenberg.net.au/ebooks17/1700111h.htmlThe Newcastle Packets and the Hunter Valleyhttp://gutenberg.net.au/ebooks17/1700111.txtJ H M AbbottNaNNaNThe Newcastle PacketsNaN\\r\\n<table width=\"45%\" border =\"0\">\\r\\n<tr>...NaN
18Feb 2017The Silent House, Fergus Hume[170010xx.xxx]3781Ahttp://gutenberg.net.au/ebooks17/1700101h.htmlhttp://gutenberg.net.au/ebooks17/1700101h.htmlhttp://gutenberg.net.au/ebooks17/1700101h.htmlThe Silent HouseNaNFergus HumeNaNNaNNaNNaNNaN\\r\\n<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4....
19Feb 2017The Huddle, Carolyn Wells[170009xx.xxx]3780Ahttp://gutenberg.net.au/ebooks17/1700091h.htmlhttp://gutenberg.net.au/ebooks17/1700091h.htmlhttp://gutenberg.net.au/ebooks17/1700091h.htmlThe HuddleNaNCarolyn WellsNaNNaNNaNNaNNaN\\r\\n<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4....
20Feb 2017The Harlequin Opal, Fergus Hume[170008xx.xxx]3779Ahttp://gutenberg.net.au/ebooks17/1700081h.htmlhttp://gutenberg.net.au/ebooks17/1700081h.htmlhttp://gutenberg.net.au/ebooks17/1700081h.htmlThe Harlequin OpalNaNFergus HumeNaNNaNNaNNaNNaN\\r\\n<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4....
21Feb 2017Red Money, Fergus Hume[170007xx.xxx]3778Ahttp://gutenberg.net.au/ebooks17/1700071h.htmlhttp://gutenberg.net.au/ebooks17/1700071h.htmlhttp://gutenberg.net.au/ebooks17/1700071h.htmlRed MoneyNaNFergus HumeNaNNaNNaNNaNNaN\\r\\n<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4....
22Feb 2017The Third Eye, Ethel Lina White[170006xx.xxx]3777Ahttp://gutenberg.net.au/ebooks17/1700061h.htmlhttp://gutenberg.net.au/ebooks17/1700061h.htmlhttp://gutenberg.net.au/ebooks17/1700061h.htmlThe Third EyeNaNEthel Lina WhiteNaNNaNNaNNaNNaN<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 T...
23Feb 2017The Third Volume, Fergus Hume[170005xx.xxx]3776Ahttp://gutenberg.net.au/ebooks17/1700051h.htmlhttp://gutenberg.net.au/ebooks17/1700051h.htmlhttp://gutenberg.net.au/ebooks17/1700051h.htmlThe Third VolumeNaNFergus HumeNaNNaNNaNNaNNaN\\r\\n<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4....
24Jan 2017The Secret Passage, Fergus Hume[170004xx.xxx]3775Ahttp://gutenberg.net.au/ebooks17/1700041h.htmlhttp://gutenberg.net.au/ebooks17/1700041h.htmlhttp://gutenberg.net.au/ebooks17/1700041h.htmlThe Secret PassageNaNFergus HumeNaNNaNNaNNaNNaN\\r\\n<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4....
25Jan 2017Madame Midas, Fergus Hume[170003xx.xxx]3774Ahttp://gutenberg.net.au/ebooks17/1700031h.htmlhttp://gutenberg.net.au/ebooks17/1700031h.htmlhttp://gutenberg.net.au/ebooks17/1700031h.htmlMadame MidasNaNFergus HumeNaNNaNNaNNaNNaN\\r\\n<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4....
26Jan 2017Six-Gun Gorilla, Anonymous[170002xx.xxx]3773Ahttp://gutenberg.net.au/ebooks17/1700021h.htmlhttp://gutenberg.net.au/ebooks17/1700021h.htmlhttp://gutenberg.net.au/ebooks17/1700021h.htmlSix-Gun GorillaNaNAnonymousNaNNaNNaNNaNNaN<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 T...
27Jan 2017Collected Short Stories Vol. XX,Fred Merrick W...[170001xx.xxx]3772Ahttp://gutenberg.net.au/ebooks17/1700011h.htmlhttp://gutenberg.net.au/ebooks17/1700011h.htmlhttp://gutenberg.net.au/ebooks17/1700011h.htmlCollected Short Stories Vol. XXNaNFred Merrick WhiteNaNNaNNaNNaNNaN<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 T...
28Dec 2016Beauty in Distress, Fred Merrick White[160140xx.xxx]3771Ahttp://gutenberg.net.au/ebooks16/1601401.txt o...http://gutenberg.net.au/ebooks16/1601401.txt o...http://gutenberg.net.au/ebooks16/1601401h.htmlBeauty in Distresshttp://gutenberg.net.au/ebooks16/1601401.txtFred Merrick WhiteNaNNaNNaNNaN\\r\\n<table width=\"45%\" border =\"0\">\\r\\n<tr>...NaN
29Dec 2016The Shifting Sands, Fred Merrick White[160139xx.xxx]3770Ahttp://gutenberg.net.au/ebooks16/1601391.txt o...http://gutenberg.net.au/ebooks16/1601391.txt o...http://gutenberg.net.au/ebooks16/1601391h.htmlThe Shifting Sandshttp://gutenberg.net.au/ebooks16/1601391.txtFred Merrick WhiteNaNNaNNaNNaN\\r\\n<table width=\"45%\" border =\"0\">\\r\\n<tr>...NaN
...................................................
3692Dec 2001Here's Another, by Lennie Lower[010030xx.xxx]0030Ahttp://gutenberg.net.au/ebooks01/0100301.txt o...http://gutenberg.net.au/ebooks01/0100301.txt o...http://gutenberg.net.au/ebooks01/0100301h.htmlHere's Anotherhttp://gutenberg.net.au/ebooks01/0100301.txtLennie LowerNaNNaNNaNNaN<table width=\"45%\" border =\"0\">\\r\\n<tr>\\r\\n...NaN
3693Dec 2001The Casebook of Sherlock Holmes, by A C Doyle[010029xx.xxx]0029A[Author: Sir Arthur Conan Doyle] http://gutenb...http://gutenberg.net.au/ebooks01/0100291.txt o...http://gutenberg.net.au/ebooks01/0100291.h.htmlThe Casebook of Sherlock Holmeshttp://gutenberg.net.au/ebooks01/0100291.txtSir Arthur Conan DoyleNaNNaNNaNA C Doyle<table width=\"45%\" border =\"0\">\\r\\n<tr>\\r\\n...NaN
3694Dec 2001Anne of Ingleside, by L M Montgomery[010028xx.xxx]0028Ahttp://gutenberg.net.au/ebooks01/0100281.txt o...http://gutenberg.net.au/ebooks01/0100281.txt o...http://gutenberg.net.au/ebooks01/0100281h.htmlAnne of Inglesidehttp://gutenberg.net.au/ebooks01/0100281.txtL M MontgomeryNaNNaNNaNNaN<table width=\"45%\" border =\"0\">\\r\\n<tr>\\r\\n...NaN
3695Dec 2001Tarzan and the Golden Lion, by E R Burroughs[010027xx.xxx]0027A[Author: Edgar Rice Burroughs] http://gutenber...http://gutenberg.net.au/ebooks01/0100271h.htmlhttp://gutenberg.net.au/ebooks01/0100271h.htmlTarzan and the Golden LionNaNEdgar Rice BurroughsNaNNaNNaNE R BurroughsNaN<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 T...
3696Nov 2001The Island of Desire, by Robert D Frisbie[010026xx.xxx]0026Ahttp://gutenberg.net.au/ebooks01/0100261.txt o...http://gutenberg.net.au/ebooks01/0100261.txt o...http://gutenberg.net.au/ebooks01/0100261h.htmlThe Island of Desirehttp://gutenberg.net.au/ebooks01/0100261.txtRobert D FrisbieNaNNaNNaNNaN\\r\\n\\r\\n<table width=\"45%\" border =\"0\">\\r\\n...NaN
3697Nov 2001Anne of Windy Poplars, by L M Montgomery[010025xx.xxx]0025Ahttp://gutenberg.net.au/ebooks01/0100251.txt o...http://gutenberg.net.au/ebooks01/0100251.txt o...http://gutenberg.net.au/ebooks01/0100251h.htmlAnne of Windy Poplarshttp://gutenberg.net.au/ebooks01/0100251.txtL M MontgomeryNaNNaNNaNNaN<table width=\"45%\" border =\"0\">\\r\\n<tr>\\r\\n...NaN
3698Nov 2001Llana of Gathol, by Edgar Rice Burroughs[010024xx.xxx]0024Ahttp://gutenberg.net.au/ebooks01/0100241h.htmlhttp://gutenberg.net.au/ebooks01/0100241h.htmlhttp://gutenberg.net.au/ebooks01/0100241h.htmlLlana of GatholNaNEdgar Rice BurroughsNaNNaNNaNNaNNaN<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 T...
3699Nov 2001Synthetic Men of Mars, by Edgar Rice Burroughs[010023xx.xxx]0023Ahttp://gutenberg.net.au/ebooks01/0100231h.htmlhttp://gutenberg.net.au/ebooks01/0100231h.htmlhttp://gutenberg.net.au/ebooks01/0100231h.htmlSynthetic Men of MarsNaNEdgar Rice BurroughsNaNNaNNaNNaNNaN<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 T...
3700Nov 2001Swords of Mars, by Edgar Rice Burroughs[010022xx.xxx]0022Ahttp://gutenberg.net.au/ebooks01/0100221h.htmlhttp://gutenberg.net.au/ebooks01/0100221h.htmlhttp://gutenberg.net.au/ebooks01/0100221h.htmlSwords of MarsNaNEdgar Rice BurroughsNaNNaNNaNNaNNaN<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 T...
3701Nov 2001A Fighting Man of Mars, by Edgar Rice Burroughs[010021xx.xxx]0021Ahttp://gutenberg.net.au/ebooks01/0100211h.htmlhttp://gutenberg.net.au/ebooks01/0100211h.htmlhttp://gutenberg.net.au/ebooks01/0100211h.htmlA Fighting Man of MarsNaNEdgar Rice BurroughsNaNNaNNaNNaNNaN<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 T...
3702Nov 2001The Master Mind of Mars, by Edgar Rice Burroughs[010020xx.xxx]0020Ahttp://gutenberg.net.au/ebooks01/0100201h.htmlhttp://gutenberg.net.au/ebooks01/0100201h.htmlhttp://gutenberg.net.au/ebooks01/0100201h.htmlThe Master Mind of MarsNaNEdgar Rice BurroughsNaNNaNNaNNaNNaN<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 T...
3703Nov 2001Two hanged Women, by Henry H H Richardson[010019xx.xxx]0019A[Author: Henry Handel Richardson] http://guten...http://gutenberg.net.au/ebooks01/0100191.txt o...http://gutenberg.net.au/ebooks01/0100191h.htmlTwo hanged Womenhttp://gutenberg.net.au/ebooks01/0100191.txtHenry Handel RichardsonNaNNaNNaNHenry H H Richardson<table width=\"45%\" border =\"0\">\\r\\n<tr>\\r\\n...NaN
3704Nov 2001Lady Chatterley's Lover, by D H Lawrence[010018xx.xxx]0018Ahttp://gutenberg.net.au/ebooks01/0100181.txt o...http://gutenberg.net.au/ebooks01/0100181.txt o...NaNLady Chatterley's Loverhttp://gutenberg.net.au/ebooks01/0100181.txtD H LawrenceNaNNaNNaNNaN\\r\\n<table width=\"45%\" border =\"0\">\\r\\n<tr>...NaN
3705Nov 2001Down and Out in Paris and London, by George Or...[010017xx.xxx]0017Ahttp://gutenberg.net.au/ebooks01/0100171.txt o...http://gutenberg.net.au/ebooks01/0100171.txt o...http://gutenberg.net.au/ebooks01/0100171h.htmlDown and Out in Paris and Londonhttp://gutenberg.net.au/ebooks01/0100171.txtGeorge OrwellNaNNaNNaNNaN\\r\\n<table width=\"45%\" border =\"0\">\\r\\n<tr>...NaN
3706Oct 2001A Raw Youth, by Fyodor Dostoevsky[010016xx.xxx]0016Ahttp://gutenberg.net.au/ebooks01/0100161.txt o...http://gutenberg.net.au/ebooks01/0100161.txt o...http://gutenberg.net.au/ebooks01/0100161h.htmlA Raw Youthhttp://gutenberg.net.au/ebooks01/0100161.txtFyodor DostoevskyNaNNaNNaNNaN\\r\\n<table width=\"45%\" border =\"0\">\\r\\n<tr>...NaN
3707Oct 2001The Dwellers in the Mirage, by Abraham Merritt[010015xx.xxx]0015Ahttp://gutenberg.net.au/ebooks01/0100151.txt o...http://gutenberg.net.au/ebooks01/0100151.txt o...http://gutenberg.net.au/ebooks01/0100151h.htmlThe Dwellers in the Miragehttp://gutenberg.net.au/ebooks01/0100151.txtAbraham MerrittNaNNaNNaNNaN\\r\\n<table width=\"45%\" border =\"0\">\\r\\n<tr>...NaN
3708Oct 2001Bush Studies, by Barbara Baynton[010014xx.xxx]0014Ahttp://gutenberg.net.au/ebooks01/0100141.txt o...http://gutenberg.net.au/ebooks01/0100141.txt o...http://gutenberg.net.au/ebooks01/0100141h.htmlBush Studieshttp://gutenberg.net.au/ebooks01/0100141.txtBarbara BayntonNaNNaNNaNNaN\\r\\n<table width=\"45%\" border =\"0\">\\r\\n<tr>...NaN
3709Oct 2001Klee Wyck, by Emily Carr[010013xx.xxx]0013Ahttp://gutenberg.net.au/ebooks01/0100131.txt o...http://gutenberg.net.au/ebooks01/0100131.txt o...http://gutenberg.net.au/ebooks01/0100131h.htmlKlee Wyckhttp://gutenberg.net.au/ebooks01/0100131.txtEmily CarrNaNNaNNaNNaN<table width=\"45%\" border =\"0\">\\r\\n<tr>\\r\\n...NaN
3710Oct 2001The House of All Sorts, by Emily Carr[010012xx.xxx]0012Ahttp://gutenberg.net.au/ebooks01/0100121.txt o...http://gutenberg.net.au/ebooks01/0100121.txt o...http://gutenberg.net.au/ebooks01/0100121h.htmlThe House of All Sortshttp://gutenberg.net.au/ebooks01/0100121.txtEmily CarrNaNNaNNaNNaN\\r\\n\\r\\n<table width=\"45%\" border =\"0\">\\r\\n...NaN
3711Oct 2001Seven Pillars of Wisdom, by T. E. Lawrence[010011xx.xxx]0011Ahttp://gutenberg.net.au/ebooks01/0100111.txt o...http://gutenberg.net.au/ebooks01/0100111.txt o...NaNSeven Pillars of Wisdomhttp://gutenberg.net.au/ebooks01/0100111.txtT. E. LawrenceNaNNaNNaNNaN\\r\\n\\r\\n<table width=\"45%\" border =\"0\">\\r\\n...NaN
3712Oct 2001To the Lighthouse, by Virginia Woolf[010010xx.xxx]0010Ahttp://gutenberg.net.au/ebooks01/0100101.txt o...http://gutenberg.net.au/ebooks01/0100101.txt o...NaNTo the Lighthousehttp://gutenberg.net.au/ebooks01/0100101.txtVirginia WoolfNaNNaNNaNNaN\\r\\n\\r\\n<table width=\"45%\" border =\"0\">\\r\\n...NaN
3713Aug 2001Fortunes of Richard Mahony, by H H Richardson[010009xx.xxx]0009A[Trilogy comprising 010005xx.xxx, 010006xx.xxx...http://gutenberg.net.au/ebooks01/0100091.txt o...http://gutenberg.net.au/ebooks01/0100091h.htmlFortunes of Richard Mahonyhttp://gutenberg.net.au/ebooks01/0100091.txtHenry Handel Richardson[Trilogy comprising 010005xx.xxx, 010006xx.xxx...NaNNaNH H Richardson<table width=\"45%\" border =\"0\">\\r\\n<tr>\\r\\n...NaN
3714Aug 2001Here's Luck, by Lennie Lower[010008xx.xxx]0008Ahttp://gutenberg.net.au/ebooks01/0100081.txt o...http://gutenberg.net.au/ebooks01/0100081.txt o...http://gutenberg.net.au/ebooks01/0100081h.htmlHere's Luckhttp://gutenberg.net.au/ebooks01/0100081.txtLennie LowerNaNNaNNaNNaN<table width=\"45%\" border =\"0\">\\r\\n<tr>\\r\\n...NaN
3715Aug 2001Ultima Thule, by Henry Handel Richardson[010007xx.xxx]0007Ahttp://gutenberg.net.au/ebooks01/0100071.txt o...http://gutenberg.net.au/ebooks01/0100071.txt o...http://gutenberg.net.au/ebooks01/0100071h.htmlUltima Thulehttp://gutenberg.net.au/ebooks01/0100071.txtHenry Handel RichardsonNaNNaNNaNNaN<table width=\"45%\" border =\"0\">\\r\\n<tr>\\r\\n...NaN
3716Aug 2001The Way Home, by Henry Handel Richardson[010006xx.xxx]0006Ahttp://gutenberg.net.au/ebooks01/0100061.txt o...http://gutenberg.net.au/ebooks01/0100061.txt o...http://gutenberg.net.au/ebooks01/0100061h.htmlThe Way Homehttp://gutenberg.net.au/ebooks01/0100061.txtHenry Handel RichardsonNaNNaNNaNNaN<table width=\"45%\" border =\"0\">\\r\\n<tr>\\r\\n...NaN
3717Aug 2001Australia Felix, by Henry Handel Richardson[010005xx.xxx]0005Ahttp://gutenberg.net.au/ebooks01/0100051.txt o...http://gutenberg.net.au/ebooks01/0100051.txt o...http://gutenberg.net.au/ebooks01/0100051h.htmlAustralia Felixhttp://gutenberg.net.au/ebooks01/0100051.txtHenry Handel RichardsonNaNNaNNaNNaN<table width=\"45%\" border =\"0\">\\r\\n<tr>\\r\\n...NaN
3718Aug 2001Under the Northern Lights, by Alan Sullivan[010004xx.xxx]0004Ahttp://gutenberg.net.au/ebooks01/0100041.txt o...http://gutenberg.net.au/ebooks01/0100041.txt o...http://gutenberg.net.au/ebooks01/0100041h.htmlUnder the Northern Lightshttp://gutenberg.net.au/ebooks01/0100041.txtAlan SullivanNaNNaNNaNNaN<table width=\"45%\" border =\"0\">\\r\\n<tr>\\r\\n...NaN
3719Aug 2001When the World Screamed, by Sir A C Doyle[010003xx.xxx]0003A[Author: Sir Arthur Conan Doyle] http://gutenb...http://gutenberg.net.au/ebooks01/0100031.txt o...http://gutenberg.net.au/ebooks01/0100031h.htmlWhen the World Screamedhttp://gutenberg.net.au/ebooks01/0100031.txtSir Arthur Conan DoyleNaNNaNNaNSir A C Doyle<table width=\"45%\" border =\"0\">\\r\\n<tr>\\r\\n...NaN
3720Aug 2001Nineteen eighty-four, by George Orwell[010002xx.xxx]0002Ahttp://gutenberg.net.au/ebooks01/0100021.txt o...http://gutenberg.net.au/ebooks01/0100021.txt o...NaNNineteen eighty-fourhttp://gutenberg.net.au/ebooks01/0100021.txtGeorge OrwellNaNNaNNaNNaN\\r\\n\\r\\n<table width=\"45%\" border =\"0\">\\r\\n...NaN
3721Aug 2001Animal Farm, by George Orwell[010001xx.xxx]0001Ahttp://gutenberg.net.au/ebooks01/0100011.txt o...http://gutenberg.net.au/ebooks01/0100011.txt o...http://gutenberg.net.au/ebooks01/0100011h.htmlAnimal Farmhttp://gutenberg.net.au/ebooks01/0100011.txtGeorge OrwellNaNNaNNaNNaN\\r\\n\\r\\n<table width=\"45%\" border =\"0\">\\r\\n...NaN
\n", 1221 | "

3722 rows × 16 columns

\n", 1222 | "
" 1223 | ], 1224 | "text/plain": [ 1225 | " DateAdded Title and Author \\\n", 1226 | "0 Mar 2017 The Solitary Farm, Fergus Hume \n", 1227 | "1 Mar 2017 The Green Mummy, Fergus Hume \n", 1228 | "2 Mar 2017 The Opal Serpent, Fergus Hume \n", 1229 | "3 Mar 2017 The Mandarin's Fan, Fergus Hume \n", 1230 | "4 Mar 2017 The Red Window, Fergus Hume \n", 1231 | "5 Mar 2017 The Dwarf's Chamber, Fergus Hume \n", 1232 | "6 Mar 2017 A Coin of Edward VII, Fergus Hume \n", 1233 | "7 Mar 2017 The Pagan's Cup, Fergus Hume \n", 1234 | "8 Mar 2017 They See in Darkness, Ethel Lina White \n", 1235 | "9 Mar 2017 The Man Who Loved Lions, Ethel Lina White \n", 1236 | "10 Mar 2017 The Elephant Never Forgets, Ethel Lina White \n", 1237 | "11 Mar 2017 Put Out the Light, Ethel Lina White \n", 1238 | "12 Mar 2017 The First Time He Died, Ethel Lina White \n", 1239 | "13 Mar 2017 A Woman's Burden, Fergus Hume \n", 1240 | "14 Mar 2017 Midnight House, Ethel Lina White \n", 1241 | "15 Feb 2017 The Lone Inn, Fergus Hume \n", 1242 | "16 Feb 2017 The Bishop's Secret, Fergus Hume \n", 1243 | "17 Feb 2017 The Newcastle Packets, J H M Abbott \n", 1244 | "18 Feb 2017 The Silent House, Fergus Hume \n", 1245 | "19 Feb 2017 The Huddle, Carolyn Wells \n", 1246 | "20 Feb 2017 The Harlequin Opal, Fergus Hume \n", 1247 | "21 Feb 2017 Red Money, Fergus Hume \n", 1248 | "22 Feb 2017 The Third Eye, Ethel Lina White \n", 1249 | "23 Feb 2017 The Third Volume, Fergus Hume \n", 1250 | "24 Jan 2017 The Secret Passage, Fergus Hume \n", 1251 | "25 Jan 2017 Madame Midas, Fergus Hume \n", 1252 | "26 Jan 2017 Six-Gun Gorilla, Anonymous \n", 1253 | "27 Jan 2017 Collected Short Stories Vol. XX,Fred Merrick W... \n", 1254 | "28 Dec 2016 Beauty in Distress, Fred Merrick White \n", 1255 | "29 Dec 2016 The Shifting Sands, Fred Merrick White \n", 1256 | "... ... ... \n", 1257 | "3692 Dec 2001 Here's Another, by Lennie Lower \n", 1258 | "3693 Dec 2001 The Casebook of Sherlock Holmes, by A C Doyle \n", 1259 | "3694 Dec 2001 Anne of Ingleside, by L M Montgomery \n", 1260 | "3695 Dec 2001 Tarzan and the Golden Lion, by E R Burroughs \n", 1261 | "3696 Nov 2001 The Island of Desire, by Robert D Frisbie \n", 1262 | "3697 Nov 2001 Anne of Windy Poplars, by L M Montgomery \n", 1263 | "3698 Nov 2001 Llana of Gathol, by Edgar Rice Burroughs \n", 1264 | "3699 Nov 2001 Synthetic Men of Mars, by Edgar Rice Burroughs \n", 1265 | "3700 Nov 2001 Swords of Mars, by Edgar Rice Burroughs \n", 1266 | "3701 Nov 2001 A Fighting Man of Mars, by Edgar Rice Burroughs \n", 1267 | "3702 Nov 2001 The Master Mind of Mars, by Edgar Rice Burroughs \n", 1268 | "3703 Nov 2001 Two hanged Women, by Henry H H Richardson \n", 1269 | "3704 Nov 2001 Lady Chatterley's Lover, by D H Lawrence \n", 1270 | "3705 Nov 2001 Down and Out in Paris and London, by George Or... \n", 1271 | "3706 Oct 2001 A Raw Youth, by Fyodor Dostoevsky \n", 1272 | "3707 Oct 2001 The Dwellers in the Mirage, by Abraham Merritt \n", 1273 | "3708 Oct 2001 Bush Studies, by Barbara Baynton \n", 1274 | "3709 Oct 2001 Klee Wyck, by Emily Carr \n", 1275 | "3710 Oct 2001 The House of All Sorts, by Emily Carr \n", 1276 | "3711 Oct 2001 Seven Pillars of Wisdom, by T. E. Lawrence \n", 1277 | "3712 Oct 2001 To the Lighthouse, by Virginia Woolf \n", 1278 | "3713 Aug 2001 Fortunes of Richard Mahony, by H H Richardson \n", 1279 | "3714 Aug 2001 Here's Luck, by Lennie Lower \n", 1280 | "3715 Aug 2001 Ultima Thule, by Henry Handel Richardson \n", 1281 | "3716 Aug 2001 The Way Home, by Henry Handel Richardson \n", 1282 | "3717 Aug 2001 Australia Felix, by Henry Handel Richardson \n", 1283 | "3718 Aug 2001 Under the Northern Lights, by Alan Sullivan \n", 1284 | "3719 Aug 2001 When the World Screamed, by Sir A C Doyle \n", 1285 | "3720 Aug 2001 Nineteen eighty-four, by George Orwell \n", 1286 | "3721 Aug 2001 Animal Farm, by George Orwell \n", 1287 | "\n", 1288 | " IDa IDb \\\n", 1289 | "0 [170028xx.xxx] 3799A \n", 1290 | "1 [170027xx.xxx] 3798A \n", 1291 | "2 [170026xx.xxx] 3797A \n", 1292 | "3 [170025xx.xxx] 3796A \n", 1293 | "4 [170024xx.xxx] 3795A \n", 1294 | "5 [170023xx.xxx] 3794A \n", 1295 | "6 [170022xx.xxx] 3793A \n", 1296 | "7 [170021xx.xxx] 3792A \n", 1297 | "8 [170020xx.xxx] 3791A \n", 1298 | "9 [170019xx.xxx] 3790A \n", 1299 | "10 [170018xx.xxx] 3789A \n", 1300 | "11 [170017xx.xxx] 3788A \n", 1301 | "12 [170016xx.xxx] 3787A \n", 1302 | "13 [170015xx.xxx] 3786A \n", 1303 | "14 [170014xx.xxx] 3785A \n", 1304 | "15 [170013xx.xxx] 3784A \n", 1305 | "16 [170012xx.xxx] 3783A \n", 1306 | "17 [170011xx.xxx] 3782A \n", 1307 | "18 [170010xx.xxx] 3781A \n", 1308 | "19 [170009xx.xxx] 3780A \n", 1309 | "20 [170008xx.xxx] 3779A \n", 1310 | "21 [170007xx.xxx] 3778A \n", 1311 | "22 [170006xx.xxx] 3777A \n", 1312 | "23 [170005xx.xxx] 3776A \n", 1313 | "24 [170004xx.xxx] 3775A \n", 1314 | "25 [170003xx.xxx] 3774A \n", 1315 | "26 [170002xx.xxx] 3773A \n", 1316 | "27 [170001xx.xxx] 3772A \n", 1317 | "28 [160140xx.xxx] 3771A \n", 1318 | "29 [160139xx.xxx] 3770A \n", 1319 | "... ... ... \n", 1320 | "3692 [010030xx.xxx] 0030A \n", 1321 | "3693 [010029xx.xxx] 0029A \n", 1322 | "3694 [010028xx.xxx] 0028A \n", 1323 | "3695 [010027xx.xxx] 0027A \n", 1324 | "3696 [010026xx.xxx] 0026A \n", 1325 | "3697 [010025xx.xxx] 0025A \n", 1326 | "3698 [010024xx.xxx] 0024A \n", 1327 | "3699 [010023xx.xxx] 0023A \n", 1328 | "3700 [010022xx.xxx] 0022A \n", 1329 | "3701 [010021xx.xxx] 0021A \n", 1330 | "3702 [010020xx.xxx] 0020A \n", 1331 | "3703 [010019xx.xxx] 0019A \n", 1332 | "3704 [010018xx.xxx] 0018A \n", 1333 | "3705 [010017xx.xxx] 0017A \n", 1334 | "3706 [010016xx.xxx] 0016A \n", 1335 | "3707 [010015xx.xxx] 0015A \n", 1336 | "3708 [010014xx.xxx] 0014A \n", 1337 | "3709 [010013xx.xxx] 0013A \n", 1338 | "3710 [010012xx.xxx] 0012A \n", 1339 | "3711 [010011xx.xxx] 0011A \n", 1340 | "3712 [010010xx.xxx] 0010A \n", 1341 | "3713 [010009xx.xxx] 0009A \n", 1342 | "3714 [010008xx.xxx] 0008A \n", 1343 | "3715 [010007xx.xxx] 0007A \n", 1344 | "3716 [010006xx.xxx] 0006A \n", 1345 | "3717 [010005xx.xxx] 0005A \n", 1346 | "3718 [010004xx.xxx] 0004A \n", 1347 | "3719 [010003xx.xxx] 0003A \n", 1348 | "3720 [010002xx.xxx] 0002A \n", 1349 | "3721 [010001xx.xxx] 0001A \n", 1350 | "\n", 1351 | " URLOther \\\n", 1352 | "0 http://gutenberg.net.au/ebooks17/1700281h.html \n", 1353 | "1 http://gutenberg.net.au/ebooks17/1700271h.html \n", 1354 | "2 http://gutenberg.net.au/ebooks17/1700261h.html \n", 1355 | "3 http://gutenberg.net.au/ebooks17/1700251h.html \n", 1356 | "4 http://gutenberg.net.au/ebooks17/1700241h.html \n", 1357 | "5 http://gutenberg.net.au/ebooks17/1700231h.html \n", 1358 | "6 http://gutenberg.net.au/ebooks17/1700221h.html \n", 1359 | "7 http://gutenberg.net.au/ebooks17/1700211h.html \n", 1360 | "8 http://gutenberg.net.au/ebooks17/1700201h.html \n", 1361 | "9 http://gutenberg.net.au/ebooks17/1700191h.html \n", 1362 | "10 http://gutenberg.net.au/ebooks17/1700181h.html \n", 1363 | "11 http://gutenberg.net.au/ebooks17/1700171h.html \n", 1364 | "12 http://gutenberg.net.au/ebooks17/1700161h.html \n", 1365 | "13 http://gutenberg.net.au/ebooks17/1700151h.html \n", 1366 | "14 http://gutenberg.net.au/ebooks17/1700141h.html \n", 1367 | "15 http://gutenberg.net.au/ebooks17/1700131h.html \n", 1368 | "16 http://gutenberg.net.au/ebooks17/1700121h.html \n", 1369 | "17 [Title: The Newcastle Packets and the Hunter V... \n", 1370 | "18 http://gutenberg.net.au/ebooks17/1700101h.html \n", 1371 | "19 http://gutenberg.net.au/ebooks17/1700091h.html \n", 1372 | "20 http://gutenberg.net.au/ebooks17/1700081h.html \n", 1373 | "21 http://gutenberg.net.au/ebooks17/1700071h.html \n", 1374 | "22 http://gutenberg.net.au/ebooks17/1700061h.html \n", 1375 | "23 http://gutenberg.net.au/ebooks17/1700051h.html \n", 1376 | "24 http://gutenberg.net.au/ebooks17/1700041h.html \n", 1377 | "25 http://gutenberg.net.au/ebooks17/1700031h.html \n", 1378 | "26 http://gutenberg.net.au/ebooks17/1700021h.html \n", 1379 | "27 http://gutenberg.net.au/ebooks17/1700011h.html \n", 1380 | "28 http://gutenberg.net.au/ebooks16/1601401.txt o... \n", 1381 | "29 http://gutenberg.net.au/ebooks16/1601391.txt o... \n", 1382 | "... ... \n", 1383 | "3692 http://gutenberg.net.au/ebooks01/0100301.txt o... \n", 1384 | "3693 [Author: Sir Arthur Conan Doyle] http://gutenb... \n", 1385 | "3694 http://gutenberg.net.au/ebooks01/0100281.txt o... \n", 1386 | "3695 [Author: Edgar Rice Burroughs] http://gutenber... \n", 1387 | "3696 http://gutenberg.net.au/ebooks01/0100261.txt o... \n", 1388 | "3697 http://gutenberg.net.au/ebooks01/0100251.txt o... \n", 1389 | "3698 http://gutenberg.net.au/ebooks01/0100241h.html \n", 1390 | "3699 http://gutenberg.net.au/ebooks01/0100231h.html \n", 1391 | "3700 http://gutenberg.net.au/ebooks01/0100221h.html \n", 1392 | "3701 http://gutenberg.net.au/ebooks01/0100211h.html \n", 1393 | "3702 http://gutenberg.net.au/ebooks01/0100201h.html \n", 1394 | "3703 [Author: Henry Handel Richardson] http://guten... \n", 1395 | "3704 http://gutenberg.net.au/ebooks01/0100181.txt o... \n", 1396 | "3705 http://gutenberg.net.au/ebooks01/0100171.txt o... \n", 1397 | "3706 http://gutenberg.net.au/ebooks01/0100161.txt o... \n", 1398 | "3707 http://gutenberg.net.au/ebooks01/0100151.txt o... \n", 1399 | "3708 http://gutenberg.net.au/ebooks01/0100141.txt o... \n", 1400 | "3709 http://gutenberg.net.au/ebooks01/0100131.txt o... \n", 1401 | "3710 http://gutenberg.net.au/ebooks01/0100121.txt o... \n", 1402 | "3711 http://gutenberg.net.au/ebooks01/0100111.txt o... \n", 1403 | "3712 http://gutenberg.net.au/ebooks01/0100101.txt o... \n", 1404 | "3713 [Trilogy comprising 010005xx.xxx, 010006xx.xxx... \n", 1405 | "3714 http://gutenberg.net.au/ebooks01/0100081.txt o... \n", 1406 | "3715 http://gutenberg.net.au/ebooks01/0100071.txt o... \n", 1407 | "3716 http://gutenberg.net.au/ebooks01/0100061.txt o... \n", 1408 | "3717 http://gutenberg.net.au/ebooks01/0100051.txt o... \n", 1409 | "3718 http://gutenberg.net.au/ebooks01/0100041.txt o... \n", 1410 | "3719 [Author: Sir Arthur Conan Doyle] http://gutenb... \n", 1411 | "3720 http://gutenberg.net.au/ebooks01/0100021.txt o... \n", 1412 | "3721 http://gutenberg.net.au/ebooks01/0100011.txt o... \n", 1413 | "\n", 1414 | " URLs \\\n", 1415 | "0 http://gutenberg.net.au/ebooks17/1700281h.html \n", 1416 | "1 http://gutenberg.net.au/ebooks17/1700271h.html \n", 1417 | "2 http://gutenberg.net.au/ebooks17/1700261h.html \n", 1418 | "3 http://gutenberg.net.au/ebooks17/1700251h.html \n", 1419 | "4 http://gutenberg.net.au/ebooks17/1700241h.html \n", 1420 | "5 http://gutenberg.net.au/ebooks17/1700231h.html \n", 1421 | "6 http://gutenberg.net.au/ebooks17/1700221h.html \n", 1422 | "7 http://gutenberg.net.au/ebooks17/1700211h.html \n", 1423 | "8 http://gutenberg.net.au/ebooks17/1700201h.html \n", 1424 | "9 http://gutenberg.net.au/ebooks17/1700191h.html \n", 1425 | "10 http://gutenberg.net.au/ebooks17/1700181h.html \n", 1426 | "11 http://gutenberg.net.au/ebooks17/1700171h.html \n", 1427 | "12 http://gutenberg.net.au/ebooks17/1700161h.html \n", 1428 | "13 http://gutenberg.net.au/ebooks17/1700151h.html \n", 1429 | "14 http://gutenberg.net.au/ebooks17/1700141h.html \n", 1430 | "15 http://gutenberg.net.au/ebooks17/1700131h.html \n", 1431 | "16 http://gutenberg.net.au/ebooks17/1700121h.html \n", 1432 | "17 http://gutenberg.net.au/ebooks17/1700111.txt o... \n", 1433 | "18 http://gutenberg.net.au/ebooks17/1700101h.html \n", 1434 | "19 http://gutenberg.net.au/ebooks17/1700091h.html \n", 1435 | "20 http://gutenberg.net.au/ebooks17/1700081h.html \n", 1436 | "21 http://gutenberg.net.au/ebooks17/1700071h.html \n", 1437 | "22 http://gutenberg.net.au/ebooks17/1700061h.html \n", 1438 | "23 http://gutenberg.net.au/ebooks17/1700051h.html \n", 1439 | "24 http://gutenberg.net.au/ebooks17/1700041h.html \n", 1440 | "25 http://gutenberg.net.au/ebooks17/1700031h.html \n", 1441 | "26 http://gutenberg.net.au/ebooks17/1700021h.html \n", 1442 | "27 http://gutenberg.net.au/ebooks17/1700011h.html \n", 1443 | "28 http://gutenberg.net.au/ebooks16/1601401.txt o... \n", 1444 | "29 http://gutenberg.net.au/ebooks16/1601391.txt o... \n", 1445 | "... ... \n", 1446 | "3692 http://gutenberg.net.au/ebooks01/0100301.txt o... \n", 1447 | "3693 http://gutenberg.net.au/ebooks01/0100291.txt o... \n", 1448 | "3694 http://gutenberg.net.au/ebooks01/0100281.txt o... \n", 1449 | "3695 http://gutenberg.net.au/ebooks01/0100271h.html \n", 1450 | "3696 http://gutenberg.net.au/ebooks01/0100261.txt o... \n", 1451 | "3697 http://gutenberg.net.au/ebooks01/0100251.txt o... \n", 1452 | "3698 http://gutenberg.net.au/ebooks01/0100241h.html \n", 1453 | "3699 http://gutenberg.net.au/ebooks01/0100231h.html \n", 1454 | "3700 http://gutenberg.net.au/ebooks01/0100221h.html \n", 1455 | "3701 http://gutenberg.net.au/ebooks01/0100211h.html \n", 1456 | "3702 http://gutenberg.net.au/ebooks01/0100201h.html \n", 1457 | "3703 http://gutenberg.net.au/ebooks01/0100191.txt o... \n", 1458 | "3704 http://gutenberg.net.au/ebooks01/0100181.txt o... \n", 1459 | "3705 http://gutenberg.net.au/ebooks01/0100171.txt o... \n", 1460 | "3706 http://gutenberg.net.au/ebooks01/0100161.txt o... \n", 1461 | "3707 http://gutenberg.net.au/ebooks01/0100151.txt o... \n", 1462 | "3708 http://gutenberg.net.au/ebooks01/0100141.txt o... \n", 1463 | "3709 http://gutenberg.net.au/ebooks01/0100131.txt o... \n", 1464 | "3710 http://gutenberg.net.au/ebooks01/0100121.txt o... \n", 1465 | "3711 http://gutenberg.net.au/ebooks01/0100111.txt o... \n", 1466 | "3712 http://gutenberg.net.au/ebooks01/0100101.txt o... \n", 1467 | "3713 http://gutenberg.net.au/ebooks01/0100091.txt o... \n", 1468 | "3714 http://gutenberg.net.au/ebooks01/0100081.txt o... \n", 1469 | "3715 http://gutenberg.net.au/ebooks01/0100071.txt o... \n", 1470 | "3716 http://gutenberg.net.au/ebooks01/0100061.txt o... \n", 1471 | "3717 http://gutenberg.net.au/ebooks01/0100051.txt o... \n", 1472 | "3718 http://gutenberg.net.au/ebooks01/0100041.txt o... \n", 1473 | "3719 http://gutenberg.net.au/ebooks01/0100031.txt o... \n", 1474 | "3720 http://gutenberg.net.au/ebooks01/0100021.txt o... \n", 1475 | "3721 http://gutenberg.net.au/ebooks01/0100011.txt o... \n", 1476 | "\n", 1477 | " htmlURL \\\n", 1478 | "0 http://gutenberg.net.au/ebooks17/1700281h.html \n", 1479 | "1 http://gutenberg.net.au/ebooks17/1700271h.html \n", 1480 | "2 http://gutenberg.net.au/ebooks17/1700261h.html \n", 1481 | "3 http://gutenberg.net.au/ebooks17/1700251h.html \n", 1482 | "4 http://gutenberg.net.au/ebooks17/1700241h.html \n", 1483 | "5 http://gutenberg.net.au/ebooks17/1700231h.html \n", 1484 | "6 http://gutenberg.net.au/ebooks17/1700221h.html \n", 1485 | "7 http://gutenberg.net.au/ebooks17/1700211h.html \n", 1486 | "8 http://gutenberg.net.au/ebooks17/1700201h.html \n", 1487 | "9 http://gutenberg.net.au/ebooks17/1700191h.html \n", 1488 | "10 http://gutenberg.net.au/ebooks17/1700181h.html \n", 1489 | "11 http://gutenberg.net.au/ebooks17/1700171h.html \n", 1490 | "12 http://gutenberg.net.au/ebooks17/1700161h.html \n", 1491 | "13 http://gutenberg.net.au/ebooks17/1700151h.html \n", 1492 | "14 http://gutenberg.net.au/ebooks17/1700141h.html \n", 1493 | "15 http://gutenberg.net.au/ebooks17/1700131h.html \n", 1494 | "16 http://gutenberg.net.au/ebooks17/1700121h.html \n", 1495 | "17 http://gutenberg.net.au/ebooks17/1700111h.html \n", 1496 | "18 http://gutenberg.net.au/ebooks17/1700101h.html \n", 1497 | "19 http://gutenberg.net.au/ebooks17/1700091h.html \n", 1498 | "20 http://gutenberg.net.au/ebooks17/1700081h.html \n", 1499 | "21 http://gutenberg.net.au/ebooks17/1700071h.html \n", 1500 | "22 http://gutenberg.net.au/ebooks17/1700061h.html \n", 1501 | "23 http://gutenberg.net.au/ebooks17/1700051h.html \n", 1502 | "24 http://gutenberg.net.au/ebooks17/1700041h.html \n", 1503 | "25 http://gutenberg.net.au/ebooks17/1700031h.html \n", 1504 | "26 http://gutenberg.net.au/ebooks17/1700021h.html \n", 1505 | "27 http://gutenberg.net.au/ebooks17/1700011h.html \n", 1506 | "28 http://gutenberg.net.au/ebooks16/1601401h.html \n", 1507 | "29 http://gutenberg.net.au/ebooks16/1601391h.html \n", 1508 | "... ... \n", 1509 | "3692 http://gutenberg.net.au/ebooks01/0100301h.html \n", 1510 | "3693 http://gutenberg.net.au/ebooks01/0100291.h.html \n", 1511 | "3694 http://gutenberg.net.au/ebooks01/0100281h.html \n", 1512 | "3695 http://gutenberg.net.au/ebooks01/0100271h.html \n", 1513 | "3696 http://gutenberg.net.au/ebooks01/0100261h.html \n", 1514 | "3697 http://gutenberg.net.au/ebooks01/0100251h.html \n", 1515 | "3698 http://gutenberg.net.au/ebooks01/0100241h.html \n", 1516 | "3699 http://gutenberg.net.au/ebooks01/0100231h.html \n", 1517 | "3700 http://gutenberg.net.au/ebooks01/0100221h.html \n", 1518 | "3701 http://gutenberg.net.au/ebooks01/0100211h.html \n", 1519 | "3702 http://gutenberg.net.au/ebooks01/0100201h.html \n", 1520 | "3703 http://gutenberg.net.au/ebooks01/0100191h.html \n", 1521 | "3704 NaN \n", 1522 | "3705 http://gutenberg.net.au/ebooks01/0100171h.html \n", 1523 | "3706 http://gutenberg.net.au/ebooks01/0100161h.html \n", 1524 | "3707 http://gutenberg.net.au/ebooks01/0100151h.html \n", 1525 | "3708 http://gutenberg.net.au/ebooks01/0100141h.html \n", 1526 | "3709 http://gutenberg.net.au/ebooks01/0100131h.html \n", 1527 | "3710 http://gutenberg.net.au/ebooks01/0100121h.html \n", 1528 | "3711 NaN \n", 1529 | "3712 NaN \n", 1530 | "3713 http://gutenberg.net.au/ebooks01/0100091h.html \n", 1531 | "3714 http://gutenberg.net.au/ebooks01/0100081h.html \n", 1532 | "3715 http://gutenberg.net.au/ebooks01/0100071h.html \n", 1533 | "3716 http://gutenberg.net.au/ebooks01/0100061h.html \n", 1534 | "3717 http://gutenberg.net.au/ebooks01/0100051h.html \n", 1535 | "3718 http://gutenberg.net.au/ebooks01/0100041h.html \n", 1536 | "3719 http://gutenberg.net.au/ebooks01/0100031h.html \n", 1537 | "3720 NaN \n", 1538 | "3721 http://gutenberg.net.au/ebooks01/0100011h.html \n", 1539 | "\n", 1540 | " Title \\\n", 1541 | "0 The Solitary Farm \n", 1542 | "1 The Green Mummy \n", 1543 | "2 The Opal Serpent \n", 1544 | "3 The Mandarin's Fan \n", 1545 | "4 The Red Window \n", 1546 | "5 The Dwarf's Chamber \n", 1547 | "6 A Coin of Edward VII \n", 1548 | "7 The Pagan's Cup \n", 1549 | "8 They See in Darkness \n", 1550 | "9 The Man Who Loved Lions \n", 1551 | "10 The Elephant Never Forgets \n", 1552 | "11 Put Out the Light \n", 1553 | "12 The First Time He Died \n", 1554 | "13 A Woman's Burden \n", 1555 | "14 Midnight House \n", 1556 | "15 The Lone Inn \n", 1557 | "16 The Bishop's Secret \n", 1558 | "17 The Newcastle Packets and the Hunter Valley \n", 1559 | "18 The Silent House \n", 1560 | "19 The Huddle \n", 1561 | "20 The Harlequin Opal \n", 1562 | "21 Red Money \n", 1563 | "22 The Third Eye \n", 1564 | "23 The Third Volume \n", 1565 | "24 The Secret Passage \n", 1566 | "25 Madame Midas \n", 1567 | "26 Six-Gun Gorilla \n", 1568 | "27 Collected Short Stories Vol. XX \n", 1569 | "28 Beauty in Distress \n", 1570 | "29 The Shifting Sands \n", 1571 | "... ... \n", 1572 | "3692 Here's Another \n", 1573 | "3693 The Casebook of Sherlock Holmes \n", 1574 | "3694 Anne of Ingleside \n", 1575 | "3695 Tarzan and the Golden Lion \n", 1576 | "3696 The Island of Desire \n", 1577 | "3697 Anne of Windy Poplars \n", 1578 | "3698 Llana of Gathol \n", 1579 | "3699 Synthetic Men of Mars \n", 1580 | "3700 Swords of Mars \n", 1581 | "3701 A Fighting Man of Mars \n", 1582 | "3702 The Master Mind of Mars \n", 1583 | "3703 Two hanged Women \n", 1584 | "3704 Lady Chatterley's Lover \n", 1585 | "3705 Down and Out in Paris and London \n", 1586 | "3706 A Raw Youth \n", 1587 | "3707 The Dwellers in the Mirage \n", 1588 | "3708 Bush Studies \n", 1589 | "3709 Klee Wyck \n", 1590 | "3710 The House of All Sorts \n", 1591 | "3711 Seven Pillars of Wisdom \n", 1592 | "3712 To the Lighthouse \n", 1593 | "3713 Fortunes of Richard Mahony \n", 1594 | "3714 Here's Luck \n", 1595 | "3715 Ultima Thule \n", 1596 | "3716 The Way Home \n", 1597 | "3717 Australia Felix \n", 1598 | "3718 Under the Northern Lights \n", 1599 | "3719 When the World Screamed \n", 1600 | "3720 Nineteen eighty-four \n", 1601 | "3721 Animal Farm \n", 1602 | "\n", 1603 | " txtURL Author \\\n", 1604 | "0 NaN Fergus Hume \n", 1605 | "1 NaN Fergus Hume \n", 1606 | "2 NaN Fergus Hume \n", 1607 | "3 NaN Fergus Hume \n", 1608 | "4 NaN Fergus Hume \n", 1609 | "5 NaN Fergus Hume \n", 1610 | "6 NaN Fergus Hume \n", 1611 | "7 NaN Fergus Hume \n", 1612 | "8 NaN Ethel Lina White \n", 1613 | "9 NaN Ethel Lina White \n", 1614 | "10 NaN Ethel Lina White \n", 1615 | "11 NaN Ethel Lina White \n", 1616 | "12 NaN Ethel Lina White \n", 1617 | "13 NaN Fergus Hume \n", 1618 | "14 NaN Ethel Lina White \n", 1619 | "15 NaN Fergus Hume \n", 1620 | "16 NaN Fergus Hume \n", 1621 | "17 http://gutenberg.net.au/ebooks17/1700111.txt J H M Abbott \n", 1622 | "18 NaN Fergus Hume \n", 1623 | "19 NaN Carolyn Wells \n", 1624 | "20 NaN Fergus Hume \n", 1625 | "21 NaN Fergus Hume \n", 1626 | "22 NaN Ethel Lina White \n", 1627 | "23 NaN Fergus Hume \n", 1628 | "24 NaN Fergus Hume \n", 1629 | "25 NaN Fergus Hume \n", 1630 | "26 NaN Anonymous \n", 1631 | "27 NaN Fred Merrick White \n", 1632 | "28 http://gutenberg.net.au/ebooks16/1601401.txt Fred Merrick White \n", 1633 | "29 http://gutenberg.net.au/ebooks16/1601391.txt Fred Merrick White \n", 1634 | "... ... ... \n", 1635 | "3692 http://gutenberg.net.au/ebooks01/0100301.txt Lennie Lower \n", 1636 | "3693 http://gutenberg.net.au/ebooks01/0100291.txt Sir Arthur Conan Doyle \n", 1637 | "3694 http://gutenberg.net.au/ebooks01/0100281.txt L M Montgomery \n", 1638 | "3695 NaN Edgar Rice Burroughs \n", 1639 | "3696 http://gutenberg.net.au/ebooks01/0100261.txt Robert D Frisbie \n", 1640 | "3697 http://gutenberg.net.au/ebooks01/0100251.txt L M Montgomery \n", 1641 | "3698 NaN Edgar Rice Burroughs \n", 1642 | "3699 NaN Edgar Rice Burroughs \n", 1643 | "3700 NaN Edgar Rice Burroughs \n", 1644 | "3701 NaN Edgar Rice Burroughs \n", 1645 | "3702 NaN Edgar Rice Burroughs \n", 1646 | "3703 http://gutenberg.net.au/ebooks01/0100191.txt Henry Handel Richardson \n", 1647 | "3704 http://gutenberg.net.au/ebooks01/0100181.txt D H Lawrence \n", 1648 | "3705 http://gutenberg.net.au/ebooks01/0100171.txt George Orwell \n", 1649 | "3706 http://gutenberg.net.au/ebooks01/0100161.txt Fyodor Dostoevsky \n", 1650 | "3707 http://gutenberg.net.au/ebooks01/0100151.txt Abraham Merritt \n", 1651 | "3708 http://gutenberg.net.au/ebooks01/0100141.txt Barbara Baynton \n", 1652 | "3709 http://gutenberg.net.au/ebooks01/0100131.txt Emily Carr \n", 1653 | "3710 http://gutenberg.net.au/ebooks01/0100121.txt Emily Carr \n", 1654 | "3711 http://gutenberg.net.au/ebooks01/0100111.txt T. E. Lawrence \n", 1655 | "3712 http://gutenberg.net.au/ebooks01/0100101.txt Virginia Woolf \n", 1656 | "3713 http://gutenberg.net.au/ebooks01/0100091.txt Henry Handel Richardson \n", 1657 | "3714 http://gutenberg.net.au/ebooks01/0100081.txt Lennie Lower \n", 1658 | "3715 http://gutenberg.net.au/ebooks01/0100071.txt Henry Handel Richardson \n", 1659 | "3716 http://gutenberg.net.au/ebooks01/0100061.txt Henry Handel Richardson \n", 1660 | "3717 http://gutenberg.net.au/ebooks01/0100051.txt Henry Handel Richardson \n", 1661 | "3718 http://gutenberg.net.au/ebooks01/0100041.txt Alan Sullivan \n", 1662 | "3719 http://gutenberg.net.au/ebooks01/0100031.txt Sir Arthur Conan Doyle \n", 1663 | "3720 http://gutenberg.net.au/ebooks01/0100021.txt George Orwell \n", 1664 | "3721 http://gutenberg.net.au/ebooks01/0100011.txt George Orwell \n", 1665 | "\n", 1666 | " Notes Subtitle \\\n", 1667 | "0 NaN NaN \n", 1668 | "1 NaN NaN \n", 1669 | "2 NaN NaN \n", 1670 | "3 NaN NaN \n", 1671 | "4 NaN NaN \n", 1672 | "5 NaN NaN \n", 1673 | "6 NaN NaN \n", 1674 | "7 NaN NaN \n", 1675 | "8 NaN NaN \n", 1676 | "9 NaN NaN \n", 1677 | "10 NaN NaN \n", 1678 | "11 NaN NaN \n", 1679 | "12 NaN NaN \n", 1680 | "13 NaN NaN \n", 1681 | "14 NaN NaN \n", 1682 | "15 NaN NaN \n", 1683 | "16 NaN NaN \n", 1684 | "17 NaN NaN \n", 1685 | "18 NaN NaN \n", 1686 | "19 NaN NaN \n", 1687 | "20 NaN NaN \n", 1688 | "21 NaN NaN \n", 1689 | "22 NaN NaN \n", 1690 | "23 NaN NaN \n", 1691 | "24 NaN NaN \n", 1692 | "25 NaN NaN \n", 1693 | "26 NaN NaN \n", 1694 | "27 NaN NaN \n", 1695 | "28 NaN NaN \n", 1696 | "29 NaN NaN \n", 1697 | "... ... ... \n", 1698 | "3692 NaN NaN \n", 1699 | "3693 NaN NaN \n", 1700 | "3694 NaN NaN \n", 1701 | "3695 NaN NaN \n", 1702 | "3696 NaN NaN \n", 1703 | "3697 NaN NaN \n", 1704 | "3698 NaN NaN \n", 1705 | "3699 NaN NaN \n", 1706 | "3700 NaN NaN \n", 1707 | "3701 NaN NaN \n", 1708 | "3702 NaN NaN \n", 1709 | "3703 NaN NaN \n", 1710 | "3704 NaN NaN \n", 1711 | "3705 NaN NaN \n", 1712 | "3706 NaN NaN \n", 1713 | "3707 NaN NaN \n", 1714 | "3708 NaN NaN \n", 1715 | "3709 NaN NaN \n", 1716 | "3710 NaN NaN \n", 1717 | "3711 NaN NaN \n", 1718 | "3712 NaN NaN \n", 1719 | "3713 [Trilogy comprising 010005xx.xxx, 010006xx.xxx... NaN \n", 1720 | "3714 NaN NaN \n", 1721 | "3715 NaN NaN \n", 1722 | "3716 NaN NaN \n", 1723 | "3717 NaN NaN \n", 1724 | "3718 NaN NaN \n", 1725 | "3719 NaN NaN \n", 1726 | "3720 NaN NaN \n", 1727 | "3721 NaN NaN \n", 1728 | "\n", 1729 | " ShortTitle AuthorAlt \\\n", 1730 | "0 NaN NaN \n", 1731 | "1 NaN NaN \n", 1732 | "2 NaN NaN \n", 1733 | "3 NaN NaN \n", 1734 | "4 NaN NaN \n", 1735 | "5 NaN NaN \n", 1736 | "6 NaN NaN \n", 1737 | "7 NaN NaN \n", 1738 | "8 NaN NaN \n", 1739 | "9 NaN NaN \n", 1740 | "10 NaN NaN \n", 1741 | "11 NaN NaN \n", 1742 | "12 NaN NaN \n", 1743 | "13 NaN NaN \n", 1744 | "14 NaN NaN \n", 1745 | "15 NaN NaN \n", 1746 | "16 NaN NaN \n", 1747 | "17 The Newcastle Packets NaN \n", 1748 | "18 NaN NaN \n", 1749 | "19 NaN NaN \n", 1750 | "20 NaN NaN \n", 1751 | "21 NaN NaN \n", 1752 | "22 NaN NaN \n", 1753 | "23 NaN NaN \n", 1754 | "24 NaN NaN \n", 1755 | "25 NaN NaN \n", 1756 | "26 NaN NaN \n", 1757 | "27 NaN NaN \n", 1758 | "28 NaN NaN \n", 1759 | "29 NaN NaN \n", 1760 | "... ... ... \n", 1761 | "3692 NaN NaN \n", 1762 | "3693 NaN A C Doyle \n", 1763 | "3694 NaN NaN \n", 1764 | "3695 NaN E R Burroughs \n", 1765 | "3696 NaN NaN \n", 1766 | "3697 NaN NaN \n", 1767 | "3698 NaN NaN \n", 1768 | "3699 NaN NaN \n", 1769 | "3700 NaN NaN \n", 1770 | "3701 NaN NaN \n", 1771 | "3702 NaN NaN \n", 1772 | "3703 NaN Henry H H Richardson \n", 1773 | "3704 NaN NaN \n", 1774 | "3705 NaN NaN \n", 1775 | "3706 NaN NaN \n", 1776 | "3707 NaN NaN \n", 1777 | "3708 NaN NaN \n", 1778 | "3709 NaN NaN \n", 1779 | "3710 NaN NaN \n", 1780 | "3711 NaN NaN \n", 1781 | "3712 NaN NaN \n", 1782 | "3713 NaN H H Richardson \n", 1783 | "3714 NaN NaN \n", 1784 | "3715 NaN NaN \n", 1785 | "3716 NaN NaN \n", 1786 | "3717 NaN NaN \n", 1787 | "3718 NaN NaN \n", 1788 | "3719 NaN Sir A C Doyle \n", 1789 | "3720 NaN NaN \n", 1790 | "3721 NaN NaN \n", 1791 | "\n", 1792 | " Text \\\n", 1793 | "0 NaN \n", 1794 | "1 NaN \n", 1795 | "2 NaN \n", 1796 | "3 NaN \n", 1797 | "4 NaN \n", 1798 | "5 NaN \n", 1799 | "6 NaN \n", 1800 | "7 NaN \n", 1801 | "8 NaN \n", 1802 | "9 NaN \n", 1803 | "10 NaN \n", 1804 | "11 NaN \n", 1805 | "12 NaN \n", 1806 | "13 NaN \n", 1807 | "14 NaN \n", 1808 | "15 NaN \n", 1809 | "16 NaN \n", 1810 | "17 \\r\\n\\r\\n... \n", 1811 | "18 NaN \n", 1812 | "19 NaN \n", 1813 | "20 NaN \n", 1814 | "21 NaN \n", 1815 | "22 NaN \n", 1816 | "23 NaN \n", 1817 | "24 NaN \n", 1818 | "25 NaN \n", 1819 | "26 NaN \n", 1820 | "27 NaN \n", 1821 | "28 \\r\\n
\\r\\n... \n", 1822 | "29 \\r\\n
\\r\\n... \n", 1823 | "... ... \n", 1824 | "3692 
\\r\\n\\r\\n... \n", 1825 | "3693 
\\r\\n\\r\\n... \n", 1826 | "3694 
\\r\\n\\r\\n... \n", 1827 | "3695 NaN \n", 1828 | "3696 \\r\\n\\r\\n
\\r\\n... \n", 1829 | "3697 
\\r\\n\\r\\n... \n", 1830 | "3698 NaN \n", 1831 | "3699 NaN \n", 1832 | "3700 NaN \n", 1833 | "3701 NaN \n", 1834 | "3702 NaN \n", 1835 | "3703 
\\r\\n\\r\\n... \n", 1836 | "3704 \\r\\n
\\r\\n... \n", 1837 | "3705 \\r\\n
\\r\\n... \n", 1838 | "3706 \\r\\n
\\r\\n... \n", 1839 | "3707 \\r\\n
\\r\\n... \n", 1840 | "3708 \\r\\n
\\r\\n... \n", 1841 | "3709 
\\r\\n\\r\\n... \n", 1842 | "3710 \\r\\n\\r\\n
\\r\\n... \n", 1843 | "3711 \\r\\n\\r\\n
\\r\\n... \n", 1844 | "3712 \\r\\n\\r\\n
\\r\\n... \n", 1845 | "3713 
\\r\\n\\r\\n... \n", 1846 | "3714 
\\r\\n\\r\\n... \n", 1847 | "3715 
\\r\\n\\r\\n... \n", 1848 | "3716 
\\r\\n\\r\\n... \n", 1849 | "3717 
\\r\\n\\r\\n... \n", 1850 | "3718 
\\r\\n\\r\\n... \n", 1851 | "3719 
\\r\\n\\r\\n... \n", 1852 | "3720 \\r\\n\\r\\n
\\r\\n... \n", 1853 | "3721 \\r\\n\\r\\n
\\r\\n... \n", 1854 | "\n", 1855 | " HTML \n", 1856 | "0 \\r\\n" 79 | ] 80 | }, 81 | "execution_count": 50, 82 | "metadata": {}, 83 | "output_type": "execute_result" 84 | } 85 | ], 86 | "source": [ 87 | "tree" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 51, 93 | "metadata": { 94 | "collapsed": false 95 | }, 96 | "outputs": [], 97 | "source": [ 98 | "root = tree.getroot()" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 60, 104 | "metadata": { 105 | "collapsed": false 106 | }, 107 | "outputs": [], 108 | "source": [ 109 | "elems = root.findall('*//*')" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 61, 115 | "metadata": { 116 | "collapsed": false 117 | }, 118 | "outputs": [ 119 | { 120 | "data": { 121 | "text/plain": [ 122 | "['{http://purl.org/dc/terms/}language',\n", 123 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description',\n", 124 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value',\n", 125 | " '{http://purl.org/dc/terms/}hasFormat',\n", 126 | " '{http://www.gutenberg.org/2009/pgterms/}file',\n", 127 | " '{http://purl.org/dc/terms/}format',\n", 128 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description',\n", 129 | " '{http://purl.org/dc/dcam/}memberOf',\n", 130 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value',\n", 131 | " '{http://purl.org/dc/terms/}isFormatOf',\n", 132 | " '{http://purl.org/dc/terms/}extent',\n", 133 | " '{http://purl.org/dc/terms/}modified',\n", 134 | " '{http://www.gutenberg.org/2009/pgterms/}bookshelf',\n", 135 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description',\n", 136 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value',\n", 137 | " '{http://purl.org/dc/dcam/}memberOf',\n", 138 | " '{http://purl.org/dc/terms/}hasFormat',\n", 139 | " '{http://www.gutenberg.org/2009/pgterms/}file',\n", 140 | " '{http://purl.org/dc/terms/}modified',\n", 141 | " '{http://purl.org/dc/terms/}isFormatOf',\n", 142 | " '{http://purl.org/dc/terms/}extent',\n", 143 | " '{http://purl.org/dc/terms/}format',\n", 144 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description',\n", 145 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value',\n", 146 | " '{http://purl.org/dc/dcam/}memberOf',\n", 147 | " '{http://purl.org/dc/terms/}creator',\n", 148 | " '{http://www.gutenberg.org/2009/pgterms/}agent',\n", 149 | " '{http://www.gutenberg.org/2009/pgterms/}webpage',\n", 150 | " '{http://www.gutenberg.org/2009/pgterms/}birthdate',\n", 151 | " '{http://www.gutenberg.org/2009/pgterms/}name',\n", 152 | " '{http://www.gutenberg.org/2009/pgterms/}deathdate',\n", 153 | " '{http://www.gutenberg.org/2009/pgterms/}alias',\n", 154 | " '{http://www.gutenberg.org/2009/pgterms/}downloads',\n", 155 | " '{http://purl.org/dc/terms/}subject',\n", 156 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description',\n", 157 | " '{http://purl.org/dc/dcam/}memberOf',\n", 158 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value',\n", 159 | " '{http://purl.org/dc/terms/}hasFormat',\n", 160 | " '{http://www.gutenberg.org/2009/pgterms/}file',\n", 161 | " '{http://purl.org/dc/terms/}isFormatOf',\n", 162 | " '{http://purl.org/dc/terms/}extent',\n", 163 | " '{http://purl.org/dc/terms/}modified',\n", 164 | " '{http://purl.org/dc/terms/}format',\n", 165 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description',\n", 166 | " '{http://purl.org/dc/dcam/}memberOf',\n", 167 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value',\n", 168 | " '{http://purl.org/dc/terms/}hasFormat',\n", 169 | " '{http://www.gutenberg.org/2009/pgterms/}file',\n", 170 | " '{http://purl.org/dc/terms/}isFormatOf',\n", 171 | " '{http://purl.org/dc/terms/}format',\n", 172 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description',\n", 173 | " '{http://purl.org/dc/dcam/}memberOf',\n", 174 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value',\n", 175 | " '{http://purl.org/dc/terms/}extent',\n", 176 | " '{http://purl.org/dc/terms/}modified',\n", 177 | " '{http://www.gutenberg.org/2009/pgterms/}bookshelf',\n", 178 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description',\n", 179 | " '{http://purl.org/dc/dcam/}memberOf',\n", 180 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value',\n", 181 | " '{http://purl.org/dc/terms/}hasFormat',\n", 182 | " '{http://www.gutenberg.org/2009/pgterms/}file',\n", 183 | " '{http://purl.org/dc/terms/}isFormatOf',\n", 184 | " '{http://purl.org/dc/terms/}format',\n", 185 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description',\n", 186 | " '{http://purl.org/dc/dcam/}memberOf',\n", 187 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value',\n", 188 | " '{http://purl.org/dc/terms/}format',\n", 189 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description',\n", 190 | " '{http://purl.org/dc/dcam/}memberOf',\n", 191 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value',\n", 192 | " '{http://purl.org/dc/terms/}extent',\n", 193 | " '{http://purl.org/dc/terms/}modified',\n", 194 | " '{http://purl.org/dc/terms/}hasFormat',\n", 195 | " '{http://www.gutenberg.org/2009/pgterms/}file',\n", 196 | " '{http://purl.org/dc/terms/}format',\n", 197 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description',\n", 198 | " '{http://purl.org/dc/dcam/}memberOf',\n", 199 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value',\n", 200 | " '{http://purl.org/dc/terms/}isFormatOf',\n", 201 | " '{http://purl.org/dc/terms/}modified',\n", 202 | " '{http://purl.org/dc/terms/}extent',\n", 203 | " '{http://purl.org/dc/terms/}hasFormat',\n", 204 | " '{http://www.gutenberg.org/2009/pgterms/}file',\n", 205 | " '{http://purl.org/dc/terms/}format',\n", 206 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description',\n", 207 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value',\n", 208 | " '{http://purl.org/dc/dcam/}memberOf',\n", 209 | " '{http://purl.org/dc/terms/}extent',\n", 210 | " '{http://purl.org/dc/terms/}modified',\n", 211 | " '{http://purl.org/dc/terms/}isFormatOf',\n", 212 | " '{http://purl.org/dc/terms/}publisher',\n", 213 | " '{http://purl.org/dc/terms/}subject',\n", 214 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description',\n", 215 | " '{http://purl.org/dc/dcam/}memberOf',\n", 216 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value',\n", 217 | " '{http://purl.org/dc/terms/}issued',\n", 218 | " '{http://purl.org/dc/terms/}description',\n", 219 | " '{http://purl.org/dc/terms/}hasFormat',\n", 220 | " '{http://www.gutenberg.org/2009/pgterms/}file',\n", 221 | " '{http://purl.org/dc/terms/}isFormatOf',\n", 222 | " '{http://purl.org/dc/terms/}extent',\n", 223 | " '{http://purl.org/dc/terms/}modified',\n", 224 | " '{http://purl.org/dc/terms/}format',\n", 225 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description',\n", 226 | " '{http://purl.org/dc/dcam/}memberOf',\n", 227 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value',\n", 228 | " '{http://purl.org/dc/terms/}hasFormat',\n", 229 | " '{http://www.gutenberg.org/2009/pgterms/}file',\n", 230 | " '{http://purl.org/dc/terms/}format',\n", 231 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description',\n", 232 | " '{http://purl.org/dc/dcam/}memberOf',\n", 233 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value',\n", 234 | " '{http://purl.org/dc/terms/}isFormatOf',\n", 235 | " '{http://purl.org/dc/terms/}extent',\n", 236 | " '{http://purl.org/dc/terms/}modified',\n", 237 | " '{http://purl.org/dc/terms/}subject',\n", 238 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description',\n", 239 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value',\n", 240 | " '{http://purl.org/dc/dcam/}memberOf',\n", 241 | " '{http://purl.org/dc/terms/}license',\n", 242 | " '{http://purl.org/dc/terms/}subject',\n", 243 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description',\n", 244 | " '{http://purl.org/dc/dcam/}memberOf',\n", 245 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value',\n", 246 | " '{http://purl.org/dc/terms/}hasFormat',\n", 247 | " '{http://www.gutenberg.org/2009/pgterms/}file',\n", 248 | " '{http://purl.org/dc/terms/}isFormatOf',\n", 249 | " '{http://purl.org/dc/terms/}format',\n", 250 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description',\n", 251 | " '{http://purl.org/dc/dcam/}memberOf',\n", 252 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value',\n", 253 | " '{http://purl.org/dc/terms/}modified',\n", 254 | " '{http://purl.org/dc/terms/}format',\n", 255 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description',\n", 256 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value',\n", 257 | " '{http://purl.org/dc/dcam/}memberOf',\n", 258 | " '{http://purl.org/dc/terms/}extent',\n", 259 | " '{http://purl.org/dc/terms/}type',\n", 260 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description',\n", 261 | " '{http://purl.org/dc/dcam/}memberOf',\n", 262 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value',\n", 263 | " '{http://www.gutenberg.org/2009/pgterms/}bookshelf',\n", 264 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description',\n", 265 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value',\n", 266 | " '{http://purl.org/dc/dcam/}memberOf',\n", 267 | " '{http://purl.org/dc/terms/}hasFormat',\n", 268 | " '{http://www.gutenberg.org/2009/pgterms/}file',\n", 269 | " '{http://purl.org/dc/terms/}isFormatOf',\n", 270 | " '{http://purl.org/dc/terms/}extent',\n", 271 | " '{http://purl.org/dc/terms/}modified',\n", 272 | " '{http://purl.org/dc/terms/}format',\n", 273 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description',\n", 274 | " '{http://purl.org/dc/dcam/}memberOf',\n", 275 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value',\n", 276 | " '{http://purl.org/dc/terms/}hasFormat',\n", 277 | " '{http://www.gutenberg.org/2009/pgterms/}file',\n", 278 | " '{http://purl.org/dc/terms/}extent',\n", 279 | " '{http://purl.org/dc/terms/}format',\n", 280 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description',\n", 281 | " '{http://purl.org/dc/dcam/}memberOf',\n", 282 | " '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value',\n", 283 | " '{http://purl.org/dc/terms/}isFormatOf',\n", 284 | " '{http://purl.org/dc/terms/}modified',\n", 285 | " '{http://purl.org/dc/terms/}rights',\n", 286 | " '{http://purl.org/dc/terms/}title',\n", 287 | " '{http://www.w3.org/2000/01/rdf-schema#}comment',\n", 288 | " '{http://web.resource.org/cc/}license',\n", 289 | " '{http://purl.org/dc/terms/}description']" 290 | ] 291 | }, 292 | "execution_count": 61, 293 | "metadata": {}, 294 | "output_type": "execute_result" 295 | } 296 | ], 297 | "source": [ 298 | "[e.tag for e in elems]" 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": 62, 304 | "metadata": { 305 | "collapsed": false 306 | }, 307 | "outputs": [ 308 | { 309 | "data": { 310 | "text/plain": [ 311 | "[('{http://purl.org/dc/terms/}language', '\\n '),\n", 312 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description', '\\n '),\n", 313 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value', 'en'),\n", 314 | " ('{http://purl.org/dc/terms/}hasFormat', '\\n '),\n", 315 | " ('{http://www.gutenberg.org/2009/pgterms/}file', '\\n '),\n", 316 | " ('{http://purl.org/dc/terms/}format', '\\n '),\n", 317 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description',\n", 318 | " '\\n '),\n", 319 | " ('{http://purl.org/dc/dcam/}memberOf', None),\n", 320 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value', 'application/prs.tex'),\n", 321 | " ('{http://purl.org/dc/terms/}isFormatOf', None),\n", 322 | " ('{http://purl.org/dc/terms/}extent', '347559'),\n", 323 | " ('{http://purl.org/dc/terms/}modified', '2015-05-27T06:55:50'),\n", 324 | " ('{http://www.gutenberg.org/2009/pgterms/}bookshelf', '\\n '),\n", 325 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description', '\\n '),\n", 326 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value', 'United States Law'),\n", 327 | " ('{http://purl.org/dc/dcam/}memberOf', None),\n", 328 | " ('{http://purl.org/dc/terms/}hasFormat', '\\n '),\n", 329 | " ('{http://www.gutenberg.org/2009/pgterms/}file', '\\n '),\n", 330 | " ('{http://purl.org/dc/terms/}modified', '2017-03-01T01:01:14.071083'),\n", 331 | " ('{http://purl.org/dc/terms/}isFormatOf', None),\n", 332 | " ('{http://purl.org/dc/terms/}extent', '52593'),\n", 333 | " ('{http://purl.org/dc/terms/}format', '\\n '),\n", 334 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description',\n", 335 | " '\\n '),\n", 336 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value',\n", 337 | " 'application/epub+zip'),\n", 338 | " ('{http://purl.org/dc/dcam/}memberOf', None),\n", 339 | " ('{http://purl.org/dc/terms/}creator', '\\n '),\n", 340 | " ('{http://www.gutenberg.org/2009/pgterms/}agent', '\\n '),\n", 341 | " ('{http://www.gutenberg.org/2009/pgterms/}webpage', None),\n", 342 | " ('{http://www.gutenberg.org/2009/pgterms/}birthdate', '1743'),\n", 343 | " ('{http://www.gutenberg.org/2009/pgterms/}name', 'Jefferson, Thomas'),\n", 344 | " ('{http://www.gutenberg.org/2009/pgterms/}deathdate', '1826'),\n", 345 | " ('{http://www.gutenberg.org/2009/pgterms/}alias',\n", 346 | " 'United States President (1801-1809)'),\n", 347 | " ('{http://www.gutenberg.org/2009/pgterms/}downloads', '614'),\n", 348 | " ('{http://purl.org/dc/terms/}subject', '\\n '),\n", 349 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description', '\\n '),\n", 350 | " ('{http://purl.org/dc/dcam/}memberOf', None),\n", 351 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value', 'E201'),\n", 352 | " ('{http://purl.org/dc/terms/}hasFormat', '\\n '),\n", 353 | " ('{http://www.gutenberg.org/2009/pgterms/}file', '\\n '),\n", 354 | " ('{http://purl.org/dc/terms/}isFormatOf', None),\n", 355 | " ('{http://purl.org/dc/terms/}extent', '14208'),\n", 356 | " ('{http://purl.org/dc/terms/}modified', '2017-03-16T05:01:11.366095'),\n", 357 | " ('{http://purl.org/dc/terms/}format', '\\n '),\n", 358 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description',\n", 359 | " '\\n '),\n", 360 | " ('{http://purl.org/dc/dcam/}memberOf', None),\n", 361 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value', 'application/rdf+xml'),\n", 362 | " ('{http://purl.org/dc/terms/}hasFormat', '\\n '),\n", 363 | " ('{http://www.gutenberg.org/2009/pgterms/}file', '\\n '),\n", 364 | " ('{http://purl.org/dc/terms/}isFormatOf', None),\n", 365 | " ('{http://purl.org/dc/terms/}format', '\\n '),\n", 366 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description',\n", 367 | " '\\n '),\n", 368 | " ('{http://purl.org/dc/dcam/}memberOf', None),\n", 369 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value',\n", 370 | " 'application/x-mobipocket-ebook'),\n", 371 | " ('{http://purl.org/dc/terms/}extent', '224165'),\n", 372 | " ('{http://purl.org/dc/terms/}modified', '2017-03-01T01:01:15.341061'),\n", 373 | " ('{http://www.gutenberg.org/2009/pgterms/}bookshelf', '\\n '),\n", 374 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description', '\\n '),\n", 375 | " ('{http://purl.org/dc/dcam/}memberOf', None),\n", 376 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value', 'Politics'),\n", 377 | " ('{http://purl.org/dc/terms/}hasFormat', '\\n '),\n", 378 | " ('{http://www.gutenberg.org/2009/pgterms/}file', '\\n '),\n", 379 | " ('{http://purl.org/dc/terms/}isFormatOf', None),\n", 380 | " ('{http://purl.org/dc/terms/}format', '\\n '),\n", 381 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description',\n", 382 | " '\\n '),\n", 383 | " ('{http://purl.org/dc/dcam/}memberOf', None),\n", 384 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value', 'application/zip'),\n", 385 | " ('{http://purl.org/dc/terms/}format', '\\n '),\n", 386 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description',\n", 387 | " '\\n '),\n", 388 | " ('{http://purl.org/dc/dcam/}memberOf', None),\n", 389 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value', 'application/prs.tex'),\n", 390 | " ('{http://purl.org/dc/terms/}extent', '620062'),\n", 391 | " ('{http://purl.org/dc/terms/}modified', '2015-05-27T06:58:20'),\n", 392 | " ('{http://purl.org/dc/terms/}hasFormat', '\\n '),\n", 393 | " ('{http://www.gutenberg.org/2009/pgterms/}file', '\\n '),\n", 394 | " ('{http://purl.org/dc/terms/}format', '\\n '),\n", 395 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description',\n", 396 | " '\\n '),\n", 397 | " ('{http://purl.org/dc/dcam/}memberOf', None),\n", 398 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value', 'text/html'),\n", 399 | " ('{http://purl.org/dc/terms/}isFormatOf', None),\n", 400 | " ('{http://purl.org/dc/terms/}modified', '2017-03-01T01:01:13.808111'),\n", 401 | " ('{http://purl.org/dc/terms/}extent', '134175'),\n", 402 | " ('{http://purl.org/dc/terms/}hasFormat', '\\n '),\n", 403 | " ('{http://www.gutenberg.org/2009/pgterms/}file', '\\n '),\n", 404 | " ('{http://purl.org/dc/terms/}format', '\\n '),\n", 405 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description',\n", 406 | " '\\n '),\n", 407 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value',\n", 408 | " 'application/epub+zip'),\n", 409 | " ('{http://purl.org/dc/dcam/}memberOf', None),\n", 410 | " ('{http://purl.org/dc/terms/}extent', '52594'),\n", 411 | " ('{http://purl.org/dc/terms/}modified', '2017-03-01T01:01:13.990084'),\n", 412 | " ('{http://purl.org/dc/terms/}isFormatOf', None),\n", 413 | " ('{http://purl.org/dc/terms/}publisher', 'Project Gutenberg'),\n", 414 | " ('{http://purl.org/dc/terms/}subject', '\\n '),\n", 415 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description', '\\n '),\n", 416 | " ('{http://purl.org/dc/dcam/}memberOf', None),\n", 417 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value',\n", 418 | " 'United States. Declaration of Independence'),\n", 419 | " ('{http://purl.org/dc/terms/}issued', '1971-12-01'),\n", 420 | " ('{http://purl.org/dc/terms/}description',\n", 421 | " 'This is the original PG edition.\\r\\nSee also our revised edition: #16780\\r\\nSee also #300'),\n", 422 | " ('{http://purl.org/dc/terms/}hasFormat', '\\n '),\n", 423 | " ('{http://www.gutenberg.org/2009/pgterms/}file', '\\n '),\n", 424 | " ('{http://purl.org/dc/terms/}isFormatOf', None),\n", 425 | " ('{http://purl.org/dc/terms/}extent', '224174'),\n", 426 | " ('{http://purl.org/dc/terms/}modified', '2017-03-01T01:01:14.717075'),\n", 427 | " ('{http://purl.org/dc/terms/}format', '\\n '),\n", 428 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description',\n", 429 | " '\\n '),\n", 430 | " ('{http://purl.org/dc/dcam/}memberOf', None),\n", 431 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value',\n", 432 | " 'application/x-mobipocket-ebook'),\n", 433 | " ('{http://purl.org/dc/terms/}hasFormat', '\\n '),\n", 434 | " ('{http://www.gutenberg.org/2009/pgterms/}file', '\\n '),\n", 435 | " ('{http://purl.org/dc/terms/}format', '\\n '),\n", 436 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description',\n", 437 | " '\\n '),\n", 438 | " ('{http://purl.org/dc/dcam/}memberOf', None),\n", 439 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value', 'text/plain'),\n", 440 | " ('{http://purl.org/dc/terms/}isFormatOf', None),\n", 441 | " ('{http://purl.org/dc/terms/}extent', '121031'),\n", 442 | " ('{http://purl.org/dc/terms/}modified', '2017-03-01T01:01:13.400101'),\n", 443 | " ('{http://purl.org/dc/terms/}subject', '\\n '),\n", 444 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description', '\\n '),\n", 445 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value',\n", 446 | " 'United States -- History -- Revolution, 1775-1783 -- Sources'),\n", 447 | " ('{http://purl.org/dc/dcam/}memberOf', None),\n", 448 | " ('{http://purl.org/dc/terms/}license', None),\n", 449 | " ('{http://purl.org/dc/terms/}subject', '\\n '),\n", 450 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description', '\\n '),\n", 451 | " ('{http://purl.org/dc/dcam/}memberOf', None),\n", 452 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value', 'JK'),\n", 453 | " ('{http://purl.org/dc/terms/}hasFormat', '\\n '),\n", 454 | " ('{http://www.gutenberg.org/2009/pgterms/}file', '\\n '),\n", 455 | " ('{http://purl.org/dc/terms/}isFormatOf', None),\n", 456 | " ('{http://purl.org/dc/terms/}format', '\\n '),\n", 457 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description',\n", 458 | " '\\n '),\n", 459 | " ('{http://purl.org/dc/dcam/}memberOf', None),\n", 460 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value', 'application/zip'),\n", 461 | " ('{http://purl.org/dc/terms/}modified', '2015-03-31T16:02:08'),\n", 462 | " ('{http://purl.org/dc/terms/}format', '\\n '),\n", 463 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description',\n", 464 | " '\\n '),\n", 465 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value',\n", 466 | " 'text/plain; charset=us-ascii'),\n", 467 | " ('{http://purl.org/dc/dcam/}memberOf', None),\n", 468 | " ('{http://purl.org/dc/terms/}extent', '45687'),\n", 469 | " ('{http://purl.org/dc/terms/}type', '\\n '),\n", 470 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description', '\\n '),\n", 471 | " ('{http://purl.org/dc/dcam/}memberOf', None),\n", 472 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value', 'Text'),\n", 473 | " ('{http://www.gutenberg.org/2009/pgterms/}bookshelf', '\\n '),\n", 474 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description', '\\n '),\n", 475 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value',\n", 476 | " 'American Revolutionary War'),\n", 477 | " ('{http://purl.org/dc/dcam/}memberOf', None),\n", 478 | " ('{http://purl.org/dc/terms/}hasFormat', '\\n '),\n", 479 | " ('{http://www.gutenberg.org/2009/pgterms/}file', '\\n '),\n", 480 | " ('{http://purl.org/dc/terms/}isFormatOf', None),\n", 481 | " ('{http://purl.org/dc/terms/}extent', '121061'),\n", 482 | " ('{http://purl.org/dc/terms/}modified', '2015-03-31T16:00:16'),\n", 483 | " ('{http://purl.org/dc/terms/}format', '\\n '),\n", 484 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description',\n", 485 | " '\\n '),\n", 486 | " ('{http://purl.org/dc/dcam/}memberOf', None),\n", 487 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value',\n", 488 | " 'text/plain; charset=us-ascii'),\n", 489 | " ('{http://purl.org/dc/terms/}hasFormat', '\\n '),\n", 490 | " ('{http://www.gutenberg.org/2009/pgterms/}file', '\\n '),\n", 491 | " ('{http://purl.org/dc/terms/}extent', '134175'),\n", 492 | " ('{http://purl.org/dc/terms/}format', '\\n '),\n", 493 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description',\n", 494 | " '\\n '),\n", 495 | " ('{http://purl.org/dc/dcam/}memberOf', None),\n", 496 | " ('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}value', 'text/html'),\n", 497 | " ('{http://purl.org/dc/terms/}isFormatOf', None),\n", 498 | " ('{http://purl.org/dc/terms/}modified', '2017-03-01T01:01:13.877093'),\n", 499 | " ('{http://purl.org/dc/terms/}rights', 'Public domain in the USA.'),\n", 500 | " ('{http://purl.org/dc/terms/}title',\n", 501 | " 'The Declaration of Independence of the United States of America'),\n", 502 | " ('{http://www.w3.org/2000/01/rdf-schema#}comment',\n", 503 | " 'Archives containing the RDF files for *all* our books can be downloaded at\\n http://www.gutenberg.org/wiki/Gutenberg:Feeds#The_Complete_Project_Gutenberg_Catalog'),\n", 504 | " ('{http://web.resource.org/cc/}license', None),\n", 505 | " ('{http://purl.org/dc/terms/}description', 'Wikipedia')]" 506 | ] 507 | }, 508 | "execution_count": 62, 509 | "metadata": {}, 510 | "output_type": "execute_result" 511 | } 512 | ], 513 | "source": [ 514 | "[(e.tag, e.text) for e in elems]" 515 | ] 516 | }, 517 | { 518 | "cell_type": "code", 519 | "execution_count": null, 520 | "metadata": { 521 | "collapsed": true 522 | }, 523 | "outputs": [], 524 | "source": [] 525 | } 526 | ], 527 | "metadata": { 528 | "kernelspec": { 529 | "display_name": "Python 3", 530 | "language": "python", 531 | "name": "python3" 532 | }, 533 | "language_info": { 534 | "codemirror_mode": { 535 | "name": "ipython", 536 | "version": 3 537 | }, 538 | "file_extension": ".py", 539 | "mimetype": "text/x-python", 540 | "name": "python", 541 | "nbconvert_exporter": "python", 542 | "pygments_lexer": "ipython3", 543 | "version": "3.6.0" 544 | } 545 | }, 546 | "nbformat": 4, 547 | "nbformat_minor": 2 548 | } 549 | -------------------------------------------------------------------------------- /parseRDF.py: -------------------------------------------------------------------------------- 1 | """Extract metadata from Project Gutenberg RDF catalog into a Python dict. 2 | 3 | Based on https://bitbucket.org/c-w/gutenberg/ 4 | 5 | >>> md = readmetadata() 6 | >>> md[123] 7 | {'LCC': {'PS'}, 8 | 'author': u'Burroughs, Edgar Rice', 9 | 'authoryearofbirth': 1875, 10 | 'authoryearofdeath': 1950, 11 | 'downloads': 401, 12 | 'formats': {'application/epub+zip': 'http://www.gutenberg.org/ebooks/123.epub.noimages', 13 | 'application/prs.plucker': 'http://www.gutenberg.org/ebooks/123.plucker', 14 | 'application/x-mobipocket-ebook': 'http://www.gutenberg.org/ebooks/123.kindle.noimages', 15 | 'application/x-qioo-ebook': 'http://www.gutenberg.org/ebooks/123.qioo', 16 | 'text/html; charset=iso-8859-1': 'http://www.gutenberg.org/files/123/123-h.zip', 17 | 'text/plain': 'http://www.gutenberg.org/ebooks/123.txt.utf-8', 18 | 'text/plain; charset=us-ascii': 'http://www.gutenberg.org/files/123/123.zip'}, 19 | 'id': 123, 20 | 'language': ['en'], 21 | 'subjects': {'Adventure stories', 22 | 'Earth (Planet) -- Core -- Fiction', 23 | 'Fantasy fiction', 24 | 'Science fiction'}, 25 | 'title': u"At the Earth's Core", 26 | 'type': 'Text'} 27 | 28 | """ 29 | 30 | import os 31 | import re 32 | import gzip 33 | import tarfile 34 | import urllib 35 | import xml.etree.cElementTree as ElementTree 36 | try: 37 | import _pickle as pickle 38 | except ImportError: 39 | import pickle 40 | 41 | PICKLEFILE = '/tmp/md.pickle.gz' # The Python dict produced by this module 42 | RDFFILES = '/tmp/rdf-files.tar.bz2' # The catalog downloaded from Gutenberg 43 | RDFURL = r'http://www.gutenberg.org/cache/epub/feeds/rdf-files.tar.bz2' 44 | META_FIELDS = ('id', 'author', 'title', 'downloads', 'formats', 'type', 'LCC', 45 | 'subjects', 'authoryearofbirth', 'authoryearofdeath', 'language') 46 | NS = dict( 47 | pg='http://www.gutenberg.org/2009/pgterms/', 48 | dc='http://purl.org/dc/terms/', 49 | dcam='http://purl.org/dc/dcam/', 50 | rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#') 51 | LINEBREAKRE = re.compile(ur'[ \t]*[\n\r]+[ \t]*') 52 | ETEXTRE = re.compile(r''' 53 | e(text|b?ook) 54 | \s* 55 | (\#\s*(?P\d+) 56 | | 57 | (?P\d+)\s*\#) 58 | ''', re.IGNORECASE | re.VERBOSE) 59 | 60 | 61 | def readmetadata(): 62 | """Read/create cached metadata dump of Gutenberg catalog. 63 | 64 | Returns: 65 | A dictionary with the following fields: 66 | 67 | id (int): Gutenberg identifier of text 68 | author (str): Last name, First name 69 | title (str): title of work 70 | subjects (list of str): list of descriptive subjects; a subject may be 71 | hierarchical, e.g: 72 | 'England -- Social life and customs -- 19th century -- Fiction' 73 | LCC (list of str): a list of two letter Library of Congress 74 | Classifications, e.g., 'PS' 75 | language (list of str): list of two letter language codes. 76 | type (str): 'Text', 'Sound', ... 77 | formats (dict of str, str pairs): keys are MIME types, values are URLs. 78 | download count (int): the number of times this ebook has been 79 | downloaded from the Gutenberg site in the last 30 days. 80 | 81 | Fields that are not part of the metadata are set to None. 82 | http://www.gutenberg.org/wiki/Gutenberg:Help_on_Bibliographic_Record_Page 83 | """ 84 | if os.path.exists(PICKLEFILE): 85 | metadata = pickle.load(gzip.open(PICKLEFILE, 'rb')) 86 | else: 87 | metadata = {} 88 | for xml in getrdfdata(): 89 | ebook = xml.find(r'{%(pg)s}ebook' % NS) 90 | if ebook is None: 91 | continue 92 | result = parsemetadata(ebook) 93 | if result is not None: 94 | metadata[result['id']] = result 95 | pickle.dump(metadata, gzip.open(PICKLEFILE, 'wb'), protocol=-1) 96 | return metadata 97 | 98 | 99 | def getrdfdata(): 100 | """Downloads Project Gutenberg RDF catalog. 101 | 102 | Yields: 103 | xml.etree.ElementTree.Element: An etext meta-data definition. 104 | 105 | """ 106 | if not os.path.exists(RDFFILES): 107 | _, _ = urllib.urlretrieve(RDFURL, RDFFILES) 108 | with tarfile.open(RDFFILES) as archive: 109 | for tarinfo in archive: 110 | yield ElementTree.parse(archive.extractfile(tarinfo)) 111 | 112 | 113 | def parsemetadata(ebook): 114 | """Parses an etext meta-data definition to extract fields. 115 | 116 | Args: 117 | ebook (xml.etree.ElementTree.Element): An ebook meta-data definition. 118 | 119 | """ 120 | result = dict.fromkeys(META_FIELDS) 121 | # get etext no 122 | about = ebook.get('{%(rdf)s}about' % NS) 123 | result['id'] = int(os.path.basename(about)) 124 | # author 125 | creator = ebook.find('.//{%(dc)s}creator' % NS) 126 | if creator is not None: 127 | name = creator.find('.//{%(pg)s}name' % NS) 128 | if name is not None: 129 | result['author'] = safeunicode(name.text, encoding='utf-8') 130 | birth = creator.find('.//{%(pg)s}birthdate' % NS) 131 | if birth is not None: 132 | result['authoryearofbirth'] = int(birth.text) 133 | death = creator.find('.//{%(pg)s}deathdate' % NS) 134 | if death is not None: 135 | result['authoryearofdeath'] = int(death.text) 136 | # title 137 | title = ebook.find('.//{%(dc)s}title' % NS) 138 | if title is not None: 139 | result['title'] = fixsubtitles( 140 | safeunicode(title.text, encoding='utf-8')) 141 | # subject lists 142 | result['subjects'], result['LCC'] = set(), set() 143 | for subject in ebook.findall('.//{%(dc)s}subject' % NS): 144 | res = subject.find('.//{%(dcam)s}memberOf' % NS) 145 | if res is None: 146 | continue 147 | res = res.get('{%(rdf)s}resource' % NS) 148 | value = subject.find('.//{%(rdf)s}value' % NS).text 149 | if res == ('%(dc)sLCSH' % NS): 150 | result['subjects'].add(value) 151 | elif res == ('%(dc)sLCC' % NS): 152 | result['LCC'].add(value) 153 | # formats 154 | result['formats'] = {file.find('{%(dc)s}format//{%(rdf)s}value' % NS).text: 155 | file.get('{%(rdf)s}about' % NS) 156 | for file in ebook.findall('.//{%(pg)s}file' % NS)} 157 | # type 158 | booktype = ebook.find('.//{%(dc)s}type//{%(rdf)s}value' % NS) 159 | if booktype is not None: 160 | result['type'] = booktype.text 161 | # languages 162 | lang = ebook.findall('.//{%(dc)s}language//{%(rdf)s}value' % NS) 163 | result['language'] = [a.text for a in lang] or None 164 | # download count 165 | downloads = ebook.find('.//{%(pg)s}downloads' % NS) 166 | if downloads is not None: 167 | result['downloads'] = int(downloads.text) 168 | return result 169 | 170 | 171 | def etextno(lines): 172 | """Retrieves the id for an etext. 173 | 174 | Args: 175 | lines (iter): The lines of the etext to search. 176 | 177 | Returns: 178 | int: The id of the etext. 179 | 180 | Raises: 181 | ValueError: If no etext id was found. 182 | 183 | Examples: 184 | >>> etextno(['Release Date: March 17, 2004 [EBook #11609]']) 185 | 11609 186 | 187 | >>> etextno(['Release Date: July, 2003 [Etext# 4263]']) 188 | 4263 189 | 190 | >>> etextno(['Release Date: November 29, 2003 [Eook #10335]']) 191 | 10335 192 | 193 | >>> etextno(['December, 1998 [Etext 1576#]']) 194 | 1576 195 | 196 | >>> etextno(['Some lines', 'without', 'Any [Etext] Number']) 197 | Traceback (most recent call last): 198 | ... 199 | ValueError: no etext-id found 200 | """ 201 | for line in lines: 202 | match = ETEXTRE.search(line) 203 | if match is not None: 204 | front_match = match.group('etextid_front') 205 | back_match = match.group('etextid_back') 206 | if front_match is not None: 207 | return int(front_match) 208 | elif back_match is not None: 209 | return int(back_match) 210 | else: 211 | raise ValueError('no regex match (this should never happen') 212 | raise ValueError('no etext-id found') 213 | 214 | 215 | def fixsubtitles(title): 216 | """Introduce any subtitle with (semi)colons instead of newlines. 217 | 218 | The first subtitle is introduced with a colon, the rest with semicolons. 219 | 220 | >>> fixsubtitles(u'First Across ...\r\nThe Story of ... \r\n' 221 | ... 'Being an investigation into ...') 222 | u'First Across ...: The Story of ...; Being an investigation into ...'""" 223 | tmp = LINEBREAKRE.sub(': ', title, 1) 224 | return LINEBREAKRE.sub('; ', tmp) 225 | 226 | 227 | def safeunicode(arg, *args, **kwargs): 228 | """Coerce argument to unicode, if it's not already.""" 229 | return arg if isinstance(arg, unicode) else unicode(arg, *args, **kwargs) 230 | 231 | __all__ = ['readmetadata'] 232 | -------------------------------------------------------------------------------- /pg-add-goodreads-Copy1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import pandas as pd\n", 12 | "from bs4 import BeautifulSoup\n", 13 | "from lxml import etree\n", 14 | "from secrets import goodreadsKey\n", 15 | "import requests\n", 16 | "import time\n", 17 | "import editdistance\n", 18 | "import re" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": { 25 | "collapsed": true 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "# Load data so far\n", 30 | "df = pd.read_hdf('pg-text-5-goodreads.hdf', 'pg')" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 3, 36 | "metadata": { 37 | "collapsed": true 38 | }, 39 | "outputs": [], 40 | "source": [ 41 | "class goodReads: \n", 42 | " def __init__(self, title, author=None):\n", 43 | " self.title = title\n", 44 | " self.author = author\n", 45 | " self.response = self.queryAPI(title, author)\n", 46 | " self.bestWork = self.parseResponse(self.response)\n", 47 | " if self.bestWork is not None: \n", 48 | " self.found = True\n", 49 | " self.pubDate = self.bestWork.original_publication_year.text + \\\n", 50 | " '-' + self.bestWork.original_publication_month.text + '-' + \\\n", 51 | " self.bestWork.original_publication_day.text\n", 52 | " self.rating = self.bestWork.average_rating.text\n", 53 | " self.id = self.bestWork.id.text\n", 54 | " self.numReviews = self.bestWork.text_reviews_count.text\n", 55 | " self.authorID = self.bestWork.author.id.text\n", 56 | " self.imageURL = self.bestWork.image_url.text\n", 57 | " self.smallImageURL = self.bestWork.small_image_url.text\n", 58 | " self.show()\n", 59 | " else: \n", 60 | " self.found = False\n", 61 | " \n", 62 | " def show(self): \n", 63 | " print('pubDate: ', self.pubDate)\n", 64 | " print('rating: ', self.rating)\n", 65 | " print('id: ', self.id)\n", 66 | " print('numReviews: ', self.numReviews)\n", 67 | " \n", 68 | " def queryAPI(self, title, author): \n", 69 | " url = 'https://www.goodreads.com/search.xml'\n", 70 | " params = [('key', goodreadsKey), ('q', title)]\n", 71 | " response = requests.get(url, params)\n", 72 | " self.lastRequest = time.time()\n", 73 | " return response\n", 74 | " \n", 75 | " def parseResponse(self, response): \n", 76 | " if response.status_code != 200: \n", 77 | " print('Got response other than 200!')\n", 78 | " else: \n", 79 | " self.text = response.text\n", 80 | " self.soup = BeautifulSoup(self.text, \"lxml\")\n", 81 | " self.works = self.soup.find_all('work')\n", 82 | " return self.getBestWork(self.works)\n", 83 | " \n", 84 | " def getBestWork(self, works, maxTries=3):\n", 85 | " for i in range(min(len(works), maxTries)): \n", 86 | " authorParsed = self.parseAuthor(works[i])\n", 87 | " if authorParsed is not None: \n", 88 | " if self.similarAuthors(self.author, authorParsed): \n", 89 | " return works[i]\n", 90 | " print(\"Couldn't find an appropriate work in the list.\")\n", 91 | " return None\n", 92 | " \n", 93 | " def parseAuthor(self, work): \n", 94 | " author = work.find('author').find('name').text\n", 95 | " return author\n", 96 | " \n", 97 | " def similarAuthors(self, origAuthor, parsedAuthor, threshold=3): \n", 98 | "# print(\"origAuthor: \", origAuthor)\n", 99 | "# print(\"parsedAuthor: \", parsedAuthor)\n", 100 | " if (type(origAuthor) == str) and (type(parsedAuthor) == str): \n", 101 | " origAuthor = origAuthor.strip().lower()\n", 102 | " origAuthor = re.sub('\\(.*?\\)', '', origAuthor)\n", 103 | " parsedAuthor = parsedAuthor.strip().lower()\n", 104 | " if ',' in origAuthor: \n", 105 | " # Try to change Jeferson, Thomas to Thomas Jefferson\n", 106 | " nameParts = origAuthor.split(',')\n", 107 | " origAuthor = nameParts[1] + ' ' + nameParts[0]\n", 108 | " return editdistance.eval(origAuthor, parsedAuthor) < threshold\n", 109 | " else: \n", 110 | " print(\"One of these authors is None, assuming it's OK.\")\n", 111 | " return True" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 14, 117 | "metadata": { 118 | "scrolled": true 119 | }, 120 | "outputs": [ 121 | { 122 | "name": "stderr", 123 | "output_type": "stream", 124 | "text": [ 125 | "/home/jon/.local/lib/python3.6/site-packages/pandas/core/generic.py:1299: PerformanceWarning: \n", 126 | "your performance may suffer as PyTables will pickle object types that it cannot\n", 127 | "map directly to c-types [inferred_type->mixed-integer,key->block1_values] [items->['LCC', 'author', 'authoryearofbirth', 'authoryearofdeath', 'downloads', 'formats', 'languages', 'lcsh', 'title', 'type', '_repo', '_version', 'alternative_title', 'contributor', 'covers', 'creator', 'description', 'edition_identifiers', 'edition_note', 'gutenberg_bookshelf', 'gutenberg_issued', 'gutenberg_type', 'identifiers', 'jmdate', 'subjects', 'language_note', 'production_note', 'publication_date', 'publication_note', 'publisher', 'rights', 'rights_url', 'series_note', 'summary', 'tableOfContents', 'titlepage_image', 'url', 'wikipedia', 'filename', 'wp_publication_date', 'wp_subjects', 'wp_info', 'wp_literary_genres', 'gr_rating', 'gr_numReviews', 'gr_pubDate', 'gr_id', 'gr_info', 'gr_author_id', 'gr_image_url', 'gr_small_image_url']]\n", 128 | "\n", 129 | " return pytables.to_hdf(path_or_buf, key, self, **kwargs)\n" 130 | ] 131 | } 132 | ], 133 | "source": [ 134 | "df.to_hdf('pg-text-5-goodreads.hdf', 'pg')" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 5, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "wells, sea = df.loc[34681].author, df.loc[34681].title\n" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 6, 149 | "metadata": {}, 150 | "outputs": [ 151 | { 152 | "data": { 153 | "text/plain": [ 154 | "'Wells, H. G. (Herbert George)'" 155 | ] 156 | }, 157 | "execution_count": 6, 158 | "metadata": {}, 159 | "output_type": "execute_result" 160 | } 161 | ], 162 | "source": [ 163 | "wells" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 7, 169 | "metadata": {}, 170 | "outputs": [ 171 | { 172 | "data": { 173 | "text/plain": [ 174 | "'The Sea Lady'" 175 | ] 176 | }, 177 | "execution_count": 7, 178 | "metadata": {}, 179 | "output_type": "execute_result" 180 | } 181 | ], 182 | "source": [ 183 | "sea" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": 11, 189 | "metadata": {}, 190 | "outputs": [ 191 | { 192 | "name": "stdout", 193 | "output_type": "stream", 194 | "text": [ 195 | "One of these authors is None, assuming it's OK.\n", 196 | "pubDate: 2006--\n", 197 | "rating: 3.26\n", 198 | "id: 1349063\n", 199 | "numReviews: 112\n" 200 | ] 201 | } 202 | ], 203 | "source": [ 204 | "result = goodReads(sea)" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": 16, 210 | "metadata": {}, 211 | "outputs": [ 212 | { 213 | "data": { 214 | "text/plain": [ 215 | "\n", 216 | "\n", 217 | "true\n", 218 | "\n", 219 | "\n", 220 | "\n", 221 | "\n", 222 | "\n", 223 | "1\n", 224 | "20\n", 225 | "122\n", 226 | "Goodreads\n", 227 | "0.20\n", 228 | "\n", 229 | "\n", 230 | "1349063\n", 231 | "15\n", 232 | "532\n", 233 | "112\n", 234 | "2006\n", 235 | "\n", 236 | "\n", 237 | "3.26\n", 238 | "\n", 239 | "105023\n", 240 | "The Sea Lady\n", 241 | "\n", 242 | "60750\n", 243 | "Margaret Drabble\n", 244 | "\n", 245 | "https://images.gr-assets.com/books/1328876815m/105023.jpg\n", 246 | "https://images.gr-assets.com/books/1328876815s/105023.jpg\n", 247 | "\n", 248 | "\n", 249 | "\n", 250 | "1085078\n", 251 | "46\n", 252 | "92\n", 253 | "13\n", 254 | "1902\n", 255 | "\n", 256 | "\n", 257 | "3.37\n", 258 | "\n", 259 | "1098224\n", 260 | "The Sea Lady: A Tissue of Moonshine \n", 261 | "\n", 262 | "880695\n", 263 | "H.G. Wells\n", 264 | "\n", 265 | "https://s.gr-assets.com/assets/nophoto/book/111x148-bcc042a9c91a29c1d680899eff700a03.png\n", 266 | "https://s.gr-assets.com/assets/nophoto/book/50x75-a91bf249278a81aabab721ef782c4a74.png\n", 267 | "\n", 268 | "\n", 269 | "\n", 270 | "391568\n", 271 | "1290\n", 272 | "593865\n", 273 | "15687\n", 274 | "1844\n", 275 | "\n", 276 | "\n", 277 | "4.21\n", 278 | "\n", 279 | "24864077\n", 280 | "The Count of Monte Cristo\n", 281 | "\n", 282 | "4785\n", 283 | "Alexandre Dumas\n", 284 | "\n", 285 | "https://images.gr-assets.com/books/1423343722m/24864077.jpg\n", 286 | "https://images.gr-assets.com/books/1423343722s/24864077.jpg\n", 287 | "\n", 288 | "\n", 289 | "\n", 290 | "1585709\n", 291 | "1\n", 292 | "21\n", 293 | "4\n", 294 | "1939\n", 295 | "\n", 296 | "\n", 297 | "4.05\n", 298 | "\n", 299 | "1592677\n", 300 | "Sea Island Lady\n", 301 | "\n", 302 | "739061\n", 303 | "Francis Griswold\n", 304 | "\n", 305 | "https://s.gr-assets.com/assets/nophoto/book/111x148-bcc042a9c91a29c1d680899eff700a03.png\n", 306 | "https://s.gr-assets.com/assets/nophoto/book/50x75-a91bf249278a81aabab721ef782c4a74.png\n", 307 | "\n", 308 | "\n", 309 | "\n", 310 | "3106797\n", 311 | "90\n", 312 | "955\n", 313 | "73\n", 314 | "1888\n", 315 | "\n", 316 | "\n", 317 | "3.84\n", 318 | "\n", 319 | "634117\n", 320 | "The Lady from the Sea\n", 321 | "\n", 322 | "2730977\n", 323 | "Henrik Ibsen\n", 324 | "\n", 325 | "https://s.gr-assets.com/assets/nophoto/book/111x148-bcc042a9c91a29c1d680899eff700a03.png\n", 326 | "https://s.gr-assets.com/assets/nophoto/book/50x75-a91bf249278a81aabab721ef782c4a74.png\n", 327 | "\n", 328 | "\n", 329 | "\n", 330 | "502154\n", 331 | "6\n", 332 | "893\n", 333 | "33\n", 334 | "2004\n", 335 | "12\n", 336 | "7\n", 337 | "3.90\n", 338 | "\n", 339 | "56352\n", 340 | "The Lady of the Sea (Tristan and Isolde, #3)\n", 341 | "\n", 342 | "13891\n", 343 | "Rosalind Miles\n", 344 | "\n", 345 | "https://s.gr-assets.com/assets/nophoto/book/111x148-bcc042a9c91a29c1d680899eff700a03.png\n", 346 | "https://s.gr-assets.com/assets/nophoto/book/50x75-a91bf249278a81aabab721ef782c4a74.png\n", 347 | "\n", 348 | "\n", 349 | "\n", 350 | "598753\n", 351 | "3\n", 352 | "117\n", 353 | "6\n", 354 | "2002\n", 355 | "4\n", 356 | "1\n", 357 | "3.71\n", 358 | "\n", 359 | "612296\n", 360 | "Face Down Across the Western Sea (Susanna, Lady Appleton, #7)\n", 361 | "\n", 362 | "71313\n", 363 | "Kathy Lynn Emerson\n", 364 | "\n", 365 | "https://images.gr-assets.com/books/1312001859m/612296.jpg\n", 366 | "https://images.gr-assets.com/books/1312001859s/612296.jpg\n", 367 | "\n", 368 | "\n", 369 | "\n", 370 | "14123108\n", 371 | "1\n", 372 | "1\n", 373 | "0\n", 374 | "2010\n", 375 | "11\n", 376 | "15\n", 377 | "1.00\n", 378 | "\n", 379 | "9242751\n", 380 | "The First Men in the Moon/The Sea Lady\n", 381 | "\n", 382 | "880695\n", 383 | "H.G. Wells\n", 384 | "\n", 385 | "https://s.gr-assets.com/assets/nophoto/book/111x148-bcc042a9c91a29c1d680899eff700a03.png\n", 386 | "https://s.gr-assets.com/assets/nophoto/book/50x75-a91bf249278a81aabab721ef782c4a74.png\n", 387 | "\n", 388 | "\n", 389 | "\n", 390 | "1955184\n", 391 | "8\n", 392 | "2249\n", 393 | "190\n", 394 | "1999\n", 395 | "5\n", 396 | "1\n", 397 | "3.90\n", 398 | "\n", 399 | "15732429\n", 400 | "Sea Scoundrel (Knave of Hearts #1)\n", 401 | "\n", 402 | "196212\n", 403 | "Annette Blair\n", 404 | "\n", 405 | "https://images.gr-assets.com/books/1341242095m/15732429.jpg\n", 406 | "https://images.gr-assets.com/books/1341242095s/15732429.jpg\n", 407 | "\n", 408 | "\n", 409 | "\n", 410 | "50414\n", 411 | "176\n", 412 | "2736\n", 413 | "162\n", 414 | "1904\n", 415 | "\n", 416 | "\n", 417 | "3.44\n", 418 | "\n", 419 | "455065\n", 420 | "The Food Of The Gods, The Sea Lady (The Works Of H.G. Wells Volume 5)\n", 421 | "\n", 422 | "880695\n", 423 | "H.G. Wells\n", 424 | "\n", 425 | "https://s.gr-assets.com/assets/nophoto/book/111x148-bcc042a9c91a29c1d680899eff700a03.png\n", 426 | "https://s.gr-assets.com/assets/nophoto/book/50x75-a91bf249278a81aabab721ef782c4a74.png\n", 427 | "\n", 428 | "\n", 429 | "\n", 430 | "14447\n", 431 | "38\n", 432 | "1079\n", 433 | "67\n", 434 | "1970\n", 435 | "1\n", 436 | "1\n", 437 | "3.53\n", 438 | "\n", 439 | "12104\n", 440 | "Riders to the Sea\n", 441 | "\n", 442 | "4332478\n", 443 | "J.M. Synge\n", 444 | "\n", 445 | "https://s.gr-assets.com/assets/nophoto/book/111x148-bcc042a9c91a29c1d680899eff700a03.png\n", 446 | "https://s.gr-assets.com/assets/nophoto/book/50x75-a91bf249278a81aabab721ef782c4a74.png\n", 447 | "\n", 448 | "\n", 449 | "\n", 450 | "26590825\n", 451 | "3\n", 452 | "8\n", 453 | "0\n", 454 | "2013\n", 455 | "7\n", 456 | "30\n", 457 | "4.50\n", 458 | "\n", 459 | "19310811\n", 460 | "My Sea Lady: An Epic Memoir of the Arctic Convoys\n", 461 | "\n", 462 | "7364290\n", 463 | "Graeme Ogden\n", 464 | "\n", 465 | "https://s.gr-assets.com/assets/nophoto/book/111x148-bcc042a9c91a29c1d680899eff700a03.png\n", 466 | "https://s.gr-assets.com/assets/nophoto/book/50x75-a91bf249278a81aabab721ef782c4a74.png\n", 467 | "\n", 468 | "\n", 469 | "\n", 470 | "15566519\n", 471 | "3\n", 472 | "206\n", 473 | "33\n", 474 | "2011\n", 475 | "3\n", 476 | "15\n", 477 | "3.89\n", 478 | "\n", 479 | "10657375\n", 480 | "A Lady's Wish (Rogues of the Sea, #1.5)\n", 481 | "\n", 482 | "3145688\n", 483 | "Katharine Ashe\n", 484 | "\n", 485 | "https://images.gr-assets.com/books/1300662934m/10657375.jpg\n", 486 | "https://images.gr-assets.com/books/1300662934s/10657375.jpg\n", 487 | "\n", 488 | "\n", 489 | "\n", 490 | "40395395\n", 491 | "2\n", 492 | "0\n", 493 | "0\n", 494 | "2011\n", 495 | "11\n", 496 | "29\n", 497 | "0.0\n", 498 | "\n", 499 | "21024355\n", 500 | "Dragon Empress of the Sea (Lady Pirates)\n", 501 | "\n", 502 | "4557055\n", 503 | "Rigel Ailur\n", 504 | "\n", 505 | "https://s.gr-assets.com/assets/nophoto/book/111x148-bcc042a9c91a29c1d680899eff700a03.png\n", 506 | "https://s.gr-assets.com/assets/nophoto/book/50x75-a91bf249278a81aabab721ef782c4a74.png\n", 507 | "\n", 508 | "\n", 509 | "\n", 510 | "6566912\n", 511 | "12\n", 512 | "1879\n", 513 | "60\n", 514 | "1960\n", 515 | "1\n", 516 | "1\n", 517 | "3.92\n", 518 | "\n", 519 | "6081634\n", 520 | "League of Youth/A Doll's House/The Lady from the Sea\n", 521 | "\n", 522 | "2730977\n", 523 | "Henrik Ibsen\n", 524 | "\n", 525 | "https://s.gr-assets.com/assets/nophoto/book/111x148-bcc042a9c91a29c1d680899eff700a03.png\n", 526 | "https://s.gr-assets.com/assets/nophoto/book/50x75-a91bf249278a81aabab721ef782c4a74.png\n", 527 | "\n", 528 | "\n", 529 | "\n", 530 | "2103708\n", 531 | "4\n", 532 | "220\n", 533 | "17\n", 534 | "1994\n", 535 | "8\n", 536 | "\n", 537 | "4.14\n", 538 | "\n", 539 | "2098345\n", 540 | "My Lady Pirate (Heros of the Sea, #3)\n", 541 | "\n", 542 | "187102\n", 543 | "Danelle Harmon\n", 544 | "\n", 545 | "https://images.gr-assets.com/books/1353417595m/2098345.jpg\n", 546 | "https://images.gr-assets.com/books/1353417595s/2098345.jpg\n", 547 | "\n", 548 | "\n", 549 | "\n", 550 | "25491631\n", 551 | "11\n", 552 | "1296\n", 553 | "67\n", 554 | "1981\n", 555 | "12\n", 556 | "25\n", 557 | "4.14\n", 558 | "\n", 559 | "8709243\n", 560 | "Poetry by Sylvia Plath: Ariel, Ennui, Daddy, Lady Lazarus, Two Lovers and a Beachcomber by the Real Sea, the Munich Mannequins\n", 561 | "\n", 562 | "4379\n", 563 | "Sylvia Plath\n", 564 | "\n", 565 | "https://s.gr-assets.com/assets/nophoto/book/111x148-bcc042a9c91a29c1d680899eff700a03.png\n", 566 | "https://s.gr-assets.com/assets/nophoto/book/50x75-a91bf249278a81aabab721ef782c4a74.png\n", 567 | "\n", 568 | "\n", 569 | "\n", 570 | "45956754\n", 571 | "1\n", 572 | "22\n", 573 | "1\n", 574 | "2015\n", 575 | "7\n", 576 | "1\n", 577 | "4.23\n", 578 | "\n", 579 | "26030603\n", 580 | "The Sea King's Lady (Seven Kingdoms Tales, #2)\n", 581 | "\n", 582 | "6446003\n", 583 | "S.E. Smith\n", 584 | "\n", 585 | "https://images.gr-assets.com/books/1492200980m/26030603.jpg\n", 586 | "https://images.gr-assets.com/books/1492200980s/26030603.jpg\n", 587 | "\n", 588 | "\n", 589 | "\n", 590 | "4018759\n", 591 | "2\n", 592 | "45\n", 593 | "1\n", 594 | "2008\n", 595 | "7\n", 596 | "1\n", 597 | "3.67\n", 598 | "\n", 599 | "3973000\n", 600 | "The Sea Captain and the Lady\n", 601 | "\n", 602 | "260167\n", 603 | "Vada Foster\n", 604 | "\n", 605 | "https://s.gr-assets.com/assets/nophoto/book/111x148-bcc042a9c91a29c1d680899eff700a03.png\n", 606 | "https://s.gr-assets.com/assets/nophoto/book/50x75-a91bf249278a81aabab721ef782c4a74.png\n", 607 | "\n", 608 | "\n", 609 | "\n", 610 | "6566840\n", 611 | "12\n", 612 | "127\n", 613 | "16\n", 614 | "1970\n", 615 | "7\n", 616 | "1\n", 617 | "4.02\n", 618 | "\n", 619 | "240387\n", 620 | "Four Major Plays, Vol. 2: Ghosts / An Enemy of the People / The Lady from the Sea / John Gabriel Borkman\n", 621 | "\n", 622 | "2730977\n", 623 | "Henrik Ibsen\n", 624 | "\n", 625 | "https://s.gr-assets.com/assets/nophoto/book/111x148-bcc042a9c91a29c1d680899eff700a03.png\n", 626 | "https://s.gr-assets.com/assets/nophoto/book/50x75-a91bf249278a81aabab721ef782c4a74.png\n", 627 | "\n", 628 | "\n", 629 | "\n", 630 | "\n", 631 | "" 632 | ] 633 | }, 634 | "execution_count": 16, 635 | "metadata": {}, 636 | "output_type": "execute_result" 637 | } 638 | ], 639 | "source": [ 640 | "result.soup" 641 | ] 642 | }, 643 | { 644 | "cell_type": "code", 645 | "execution_count": null, 646 | "metadata": { 647 | "collapsed": true 648 | }, 649 | "outputs": [], 650 | "source": [] 651 | } 652 | ], 653 | "metadata": { 654 | "kernelspec": { 655 | "display_name": "Python 3", 656 | "language": "python", 657 | "name": "python3" 658 | }, 659 | "language_info": { 660 | "codemirror_mode": { 661 | "name": "ipython", 662 | "version": 3 663 | }, 664 | "file_extension": ".py", 665 | "mimetype": "text/x-python", 666 | "name": "python", 667 | "nbconvert_exporter": "python", 668 | "pygments_lexer": "ipython3", 669 | "version": "3.6.1" 670 | } 671 | }, 672 | "nbformat": 4, 673 | "nbformat_minor": 2 674 | } 675 | -------------------------------------------------------------------------------- /pg-add-goodreads.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import pandas as pd\n", 12 | "from bs4 import BeautifulSoup\n", 13 | "from lxml import etree\n", 14 | "from secrets import goodreadsKey\n", 15 | "import requests\n", 16 | "import time\n", 17 | "import editdistance\n", 18 | "import re" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 3, 24 | "metadata": { 25 | "collapsed": true 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "# Load data so far\n", 30 | "df = pd.read_hdf('pg-text-5-goodreads.hdf', 'pg')" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": { 37 | "collapsed": true 38 | }, 39 | "outputs": [], 40 | "source": [ 41 | "class goodReads: \n", 42 | " def __init__(self, title, author=None):\n", 43 | " self.title = title\n", 44 | " self.author = author\n", 45 | " self.response = self.queryAPI(title, author)\n", 46 | " self.bestWork = self.parseResponse(self.response)\n", 47 | " if self.bestWork is not None: \n", 48 | " self.found = True\n", 49 | " self.pubDate = self.bestWork.original_publication_year.text + \\\n", 50 | " '-' + self.bestWork.original_publication_month.text + '-' + \\\n", 51 | " self.bestWork.original_publication_day.text\n", 52 | " self.rating = self.bestWork.average_rating.text\n", 53 | " self.id = self.bestWork.id.text\n", 54 | " self.numReviews = self.bestWork.text_reviews_count.text\n", 55 | " self.authorID = self.bestWork.author.id.text\n", 56 | " self.imageURL = self.bestWork.image_url.text\n", 57 | " self.smallImageURL = self.bestWork.small_image_url.text\n", 58 | " self.show()\n", 59 | " else: \n", 60 | " self.found = False\n", 61 | " \n", 62 | " def show(self): \n", 63 | " print('pubDate: ', self.pubDate)\n", 64 | " print('rating: ', self.rating)\n", 65 | " print('id: ', self.id)\n", 66 | " print('numReviews: ', self.numReviews)\n", 67 | " \n", 68 | " def queryAPI(self, title, author): \n", 69 | " url = 'https://www.goodreads.com/search.xml'\n", 70 | " params = [('key', goodreadsKey), ('q', title)]\n", 71 | " response = requests.get(url, params)\n", 72 | " self.lastRequest = time.time()\n", 73 | " return response\n", 74 | " \n", 75 | " def parseResponse(self, response): \n", 76 | " if response.status_code != 200: \n", 77 | " print('Got response other than 200!')\n", 78 | " else: \n", 79 | " self.text = response.text\n", 80 | " self.soup = BeautifulSoup(self.text, \"lxml\")\n", 81 | " self.works = self.soup.find_all('work')\n", 82 | " return self.getBestWork(self.works)\n", 83 | " \n", 84 | " def getBestWork(self, works, maxTries=3):\n", 85 | " for i in range(min(len(works), maxTries)): \n", 86 | " authorParsed = self.parseAuthor(works[i])\n", 87 | " if authorParsed is not None: \n", 88 | " if self.similarAuthors(self.author, authorParsed): \n", 89 | " return works[i]\n", 90 | " print(\"Couldn't find an appropriate work in the list.\")\n", 91 | " return None\n", 92 | " \n", 93 | " def parseAuthor(self, work): \n", 94 | " author = work.find('author').find('name').text\n", 95 | " return author\n", 96 | " \n", 97 | " def similarAuthors(self, origAuthor, parsedAuthor, threshold=3): \n", 98 | "# print(\"origAuthor: \", origAuthor)\n", 99 | "# print(\"parsedAuthor: \", parsedAuthor)\n", 100 | " if (type(origAuthor) == str) and (type(parsedAuthor) == str): \n", 101 | " origAuthor = origAuthor.strip().lower()\n", 102 | " origAuthor = re.sub('\\(.*?\\)', '', origAuthor)\n", 103 | " parsedAuthor = parsedAuthor.strip().lower()\n", 104 | " if ',' in origAuthor: \n", 105 | " # Try to change Jefferson, Thomas to Thomas Jefferson\n", 106 | " nameParts = origAuthor.split(',')\n", 107 | " origAuthor = nameParts[1] + ' ' + nameParts[0]\n", 108 | " # if the last name is somewhere in the name, that's good enough\n", 109 | " if nameParts[0] in parsedAuthor: \n", 110 | " return True\n", 111 | " if editdistance.eval(origAuthor, parsedAuthor) < threshold: \n", 112 | " return True\n", 113 | " else: \n", 114 | " print(\"Couldn't match origAuthor: %s \\n with parsedAuthor: %s\" % (origAuthor, parsedAuthor))\n", 115 | " return False\n", 116 | " else: \n", 117 | " print(\"One of these authors is None, assuming it's OK.\")\n", 118 | " return True" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 22, 124 | "metadata": { 125 | "scrolled": true 126 | }, 127 | "outputs": [ 128 | { 129 | "name": "stdout", 130 | "output_type": "stream", 131 | "text": [ 132 | "Looking up #53044: Three Days on the Ohio River\n", 133 | "By: Alcott, William A. (William Andrus)\n", 134 | "Couldn't find an appropriate work in the list.\n", 135 | "Looking up #53045: The Irish Penny Journal, Vol. 1, No. 18, October 31, 1840\n", 136 | "By: Various\n", 137 | "pubDate: --\n", 138 | "rating: 0.0\n", 139 | "id: 56354618\n", 140 | "numReviews: 0\n", 141 | "Looking up #53046: Conscience and Sin: Daily Meditations for Lent\n", 142 | "By: Baring-Gould, S. (Sabine)\n", 143 | "Couldn't find an appropriate work in the list.\n", 144 | "Looking up #53047: What Jesus Taught\n", 145 | "By: Widtsoe, Osborne J. P.\n", 146 | "Couldn't find an appropriate work in the list.\n", 147 | "Looking up #53048: The American Missionary — Volume 33, No. 8, August, 1879\n", 148 | "By: Various\n", 149 | "pubDate: 2006-3-16\n", 150 | "rating: 0.0\n", 151 | "id: 13035293\n", 152 | "numReviews: 0\n", 153 | "Looking up #53049: Charlie Codman's Cruise: A Story for Boys\n", 154 | "By: Alger, Horatio, Jr.\n", 155 | "pubDate: --\n", 156 | "rating: 0.0\n", 157 | "id: 56255776\n", 158 | "numReviews: 0\n", 159 | "Looking up #53050: Short Reasons for Communion with the Church of England: or the Churchman's answer to the question, \"Why are you a Member of the Established Church?\"\n", 160 | "By: Biddulph, T. T.\n", 161 | "Couldn't find an appropriate work in the list.\n", 162 | "Looking up #53051: The German Army in Belgium, the White Book of May 1915\n", 163 | "By: Bennett, E. N.\n", 164 | "Couldn't find an appropriate work in the list.\n", 165 | "Looking up #53052: The Irish Penny Journal, Vol. 1 No. 19, November 7, 1840\n", 166 | "By: Various\n", 167 | "Couldn't find an appropriate work in the list.\n", 168 | "Looking up #53053: Scrap Book of Mormon Literature, Volume 2 (of 2): Religious Tracts\n", 169 | "By: Various\n", 170 | "Couldn't find an appropriate work in the list.\n", 171 | "Looking up #53054: Copyright Renewals: Artwork 1960-1964: Catalog of Copyright Entries\n", 172 | "By: Congress, U. S. Copyright Office Library of\n", 173 | "Couldn't find an appropriate work in the list.\n", 174 | "Looking up #53055: Marks' first lessons in geometry: in two parts. Objectively presented, and designed for the; use of primary classes in grammar schools, academies, etc.\n", 175 | "By: Marks, Bernhard\n", 176 | "pubDate: 2015-9-27\n", 177 | "rating: 0.0\n", 178 | "id: 46978045\n", 179 | "numReviews: 0\n", 180 | "Looking up #53056: Biografia di Giuseppe Garibaldi\n", 181 | "By: Cuneo, Giovanni Battista\n", 182 | "pubDate: --\n", 183 | "rating: 0.0\n", 184 | "id: 56261862\n", 185 | "numReviews: 0\n", 186 | "Looking up #53057: College Men Without Money\n", 187 | "By: Riddle, Carl\n", 188 | "Couldn't find an appropriate work in the list.\n", 189 | "Looking up #53058: John Holdsworth: Chief Mate\n", 190 | "By: Russell, William Clark\n", 191 | "pubDate: 1875--\n", 192 | "rating: 4.00\n", 193 | "id: 26647366\n", 194 | "numReviews: 0\n", 195 | "Looking up #53059: Urith: A Tale of Dartmoor\n", 196 | "By: Baring-Gould, S. (Sabine)\n", 197 | "Couldn't find an appropriate work in the list.\n", 198 | "Looking up #53060: Meine Tante Anna\n", 199 | "By: Villinger, Hermine\n", 200 | "pubDate: --\n", 201 | "rating: 0.0\n", 202 | "id: 55981303\n", 203 | "numReviews: 0\n", 204 | "Looking up #53061: Mirdja: Romaani\n", 205 | "By: Onerva, L.\n", 206 | "Couldn't find an appropriate work in the list.\n", 207 | "Looking up #53062: Tarzanin paluu: Seikkailukirja Afrikan aarniometsistä\n", 208 | "By: Burroughs, Edgar Rice\n", 209 | "pubDate: --\n", 210 | "rating: 0.0\n", 211 | "id: 55735312\n", 212 | "numReviews: 0\n", 213 | "Looking up #53063: Les poilus canadiens: Le roman du vingt-deuxième bataillon canadien-français\n", 214 | "By: Holland, J. A.\n", 215 | "Couldn't find an appropriate work in the list.\n", 216 | "Looking up #53064: Ancient Apostles\n", 217 | "By: McKay, David O.\n", 218 | "pubDate: 2009-8-14\n", 219 | "rating: 5.00\n", 220 | "id: 14676715\n", 221 | "numReviews: 1\n", 222 | "Looking up #53065: Kitty Alone (vol 1 of 3): A Story of Three Fires\n", 223 | "By: Baring-Gould, S. (Sabine)\n", 224 | "Couldn't find an appropriate work in the list.\n", 225 | "Looking up #53066: Collection complète des oeuvres de l'Abbé de Mably, Volume 4\n", 226 | "By: Mably, Abbé de\n", 227 | "pubDate: --\n", 228 | "rating: 0.0\n", 229 | "id: 56257468\n", 230 | "numReviews: 0\n", 231 | "Looking up #53067: Mons, Anzac and Kut\n", 232 | "By: Herbert, Aubrey\n", 233 | "pubDate: 2010-1-2\n", 234 | "rating: 0.0\n", 235 | "id: 26198004\n", 236 | "numReviews: 0\n", 237 | "Looking up #53068: Ampleforth College; A Sketch-Book\n", 238 | "By: Pike, Joseph\n", 239 | "Couldn't find an appropriate work in the list.\n", 240 | "Looking up #53069: Bruges; A Sketch-Book\n", 241 | "By: Pike, Joseph\n", 242 | "pubDate: --\n", 243 | "rating: 0.0\n", 244 | "id: 49535767\n", 245 | "numReviews: 0\n", 246 | "Looking up #53070: Cardiff; A Sketch-Book\n", 247 | "By: Andrews, Douglas S.\n", 248 | "Couldn't find an appropriate work in the list.\n", 249 | "Looking up #53071: Durham; A Sketch-Book\n", 250 | "By: Bertram, Robert J. S.\n", 251 | "Couldn't find an appropriate work in the list.\n", 252 | "Looking up #53072: Isle of Wight; A Sketch-Book\n", 253 | "By: Woollard, Dorothy E.G.\n", 254 | "Couldn't find an appropriate work in the list.\n", 255 | "Looking up #53073: The English Lakes; A Sketch-Book\n", 256 | "By: Home, Gordon\n", 257 | "Couldn't find an appropriate work in the list.\n", 258 | "Looking up #53074: Liverpool; A Sketch-Book\n", 259 | "By: Brown, Sam J. M.\n", 260 | "Couldn't find an appropriate work in the list.\n", 261 | "Looking up #53075: London; A Sketch-Book\n", 262 | "By: Hornby, Lester G. (Lester George)\n", 263 | "Couldn't find an appropriate work in the list.\n", 264 | "Looking up #53076: Newcastle-Upon-Tyne; A Sketch-Book\n", 265 | "By: Bertram, Robert J. S.\n", 266 | "pubDate: --\n", 267 | "rating: 0.0\n", 268 | "id: 56911792\n", 269 | "numReviews: 0\n", 270 | "Looking up #53077: Norwich; A Sketch-Book\n", 271 | "By: Cole, E.V.\n", 272 | "Couldn't find an appropriate work in the list.\n", 273 | "Looking up #53078: Paris; A Sketch-Book\n", 274 | "By: Béjot, Eugène\n", 275 | "Couldn't find an appropriate work in the list.\n", 276 | "Looking up #53079: Stratford-on-Avon; A Sketch-Book\n", 277 | "By: Home, Gordon\n", 278 | "pubDate: 2009-1-1\n", 279 | "rating: 5.00\n", 280 | "id: 55662640\n", 281 | "numReviews: 1\n", 282 | "Looking up #53080: Surrey; A Sketch-Book\n", 283 | "By: Austin, R.S.\n", 284 | "pubDate: --\n", 285 | "rating: 0.0\n", 286 | "id: 55836034\n", 287 | "numReviews: 0\n", 288 | "Looking up #53081: The Thames\n", 289 | "By: Sharpley, R.\n", 290 | "Couldn't find an appropriate work in the list.\n", 291 | "Looking up #53082: Winchester; A Sketch-Book\n", 292 | "By: Home, Gordon\n", 293 | "pubDate: --\n", 294 | "rating: 0.0\n", 295 | "id: 48940474\n", 296 | "numReviews: 0\n", 297 | "Looking up #53083: York; A Sketch-Book\n", 298 | "By: Home, Gordon\n", 299 | "Couldn't find an appropriate work in the list.\n", 300 | "Looking up #53084: Symmes's Theory of Concentric Spheres: Demonstrating that the Earth is hollow, habitable within,; and widely open about the poles\n", 301 | "By: Anonymous\n", 302 | "pubDate: --\n", 303 | "rating: 0.0\n", 304 | "id: 50546825\n", 305 | "numReviews: 0\n", 306 | "Looking up #53085: How Women May Earn a Living\n", 307 | "By: Grogan, Mercy\n", 308 | "Couldn't find an appropriate work in the list.\n", 309 | "Looking up #53086: Life of a Pioneer: Being the Autobiography of James S. Brown\n", 310 | "By: Brown, James S.\n", 311 | "pubDate: --\n", 312 | "rating: 0.0\n", 313 | "id: 56260687\n", 314 | "numReviews: 0\n", 315 | "Looking up #53087: The Æsculapian Labyrinth Explored: Medical Mystery Illustrated\n", 316 | "By: Glyster, Gregory\n", 317 | "pubDate: --\n", 318 | "rating: 0.0\n", 319 | "id: 55695631\n", 320 | "numReviews: 0\n", 321 | "Looking up #53088: Miriam: A Tale of Pole Hill and the Greenfield Hills\n", 322 | "By: Sykes, Daniel Frederick Edward\n", 323 | "Couldn't find an appropriate work in the list.\n", 324 | "Looking up #53089: Europe in the Middle Ages\n", 325 | "By: Plunket, Ierne Lifford\n", 326 | "Couldn't find an appropriate work in the list.\n", 327 | "Looking up #53090: The Women of Mormondom\n", 328 | "By: Tullidge, Edward W.\n", 329 | "pubDate: 1957--\n", 330 | "rating: 4.00\n", 331 | "id: 6545581\n", 332 | "numReviews: 4\n", 333 | "Looking up #53091: Bath and Wells; A Sketch-Book\n", 334 | "By: Andrews, D. S.\n", 335 | "Couldn't find an appropriate work in the list.\n", 336 | "Looking up #53092: Reminiscences of Joseph the Prophet: And the Coming Forth of the Book of Mormon\n", 337 | "By: Stevenson, Edward Luther\n", 338 | "Couldn't find an appropriate work in the list.\n", 339 | "Looking up #53093: Q-Ships and Their Story\n", 340 | "By: Chatterton, E. Keble (Edward Keble)\n", 341 | "pubDate: 1972-11-2\n", 342 | "rating: 3.47\n", 343 | "id: 3794697\n", 344 | "numReviews: 1\n", 345 | "Looking up #53094: L'ancien régime et la révolution\n", 346 | "By: Tocqueville, Alexis de\n", 347 | "pubDate: --\n", 348 | "rating: 0.0\n", 349 | "id: 56350640\n", 350 | "numReviews: 0\n", 351 | "Looking up #53095: The Art of Architectural Modelling in Paper\n", 352 | "By: Richardson, T. A.\n", 353 | "pubDate: 2015-8-9\n", 354 | "rating: 0.0\n", 355 | "id: 46679625\n", 356 | "numReviews: 0\n", 357 | "Looking up #53096: Vertraute Briefe an eine Freundin\n", 358 | "By: Garve, Christian\n", 359 | "pubDate: --\n", 360 | "rating: 0.0\n", 361 | "id: 56262050\n", 362 | "numReviews: 0\n", 363 | "Looking up #53097: Sadan prosentin patriootti\n", 364 | "By: Sinclair, Upton\n", 365 | "Couldn't find an appropriate work in the list.\n", 366 | "Looking up #53098: Väärällä uralla\n", 367 | "By: Kianto, Ilmari\n", 368 | "pubDate: 1896--\n", 369 | "rating: 0.0\n", 370 | "id: 50318218\n", 371 | "numReviews: 0\n", 372 | "Looking up #53099: Bristol; A Sketch-Book\n", 373 | "By: Woollard, Dorothy E.G.\n", 374 | "pubDate: 2015-2-19\n", 375 | "rating: 0.0\n", 376 | "id: 46676433\n", 377 | "numReviews: 0\n", 378 | "Looking up #53100: Abridgement of the Debates of Congress, from 1789 to 1856, Vol. 3 (of 16)\n", 379 | "By: Various\n", 380 | "Couldn't find an appropriate work in the list.\n", 381 | "Looking up #53101: Queens of the Renaissance\n", 382 | "By: Ryley, M. Beresford\n", 383 | "pubDate: 1907--\n", 384 | "rating: 3.00\n", 385 | "id: 6707564\n", 386 | "numReviews: 1\n", 387 | "Looking up #53102: Autobiography of Sir John Rennie, F.R.S., Past President of the Institute of Civil Engineers: Comprising the history of his professional life, together; with reminiscences dating from the commencement of the; century to the present time.\n", 388 | "By: Rennie, John\n", 389 | "pubDate: 2015-8-8\n", 390 | "rating: 0.0\n", 391 | "id: 46603766\n", 392 | "numReviews: 0\n" 393 | ] 394 | }, 395 | { 396 | "name": "stdout", 397 | "output_type": "stream", 398 | "text": [ 399 | "Looking up #53103: The Slav Nations\n", 400 | "By: Tucić, Srgjan Pl.\n", 401 | "Couldn't find an appropriate work in the list.\n", 402 | "Looking up #53104: Printing in Relation to Graphic Art\n", 403 | "By: French, George\n", 404 | "pubDate: 2015-8-20\n", 405 | "rating: 0.0\n", 406 | "id: 46767682\n", 407 | "numReviews: 0\n", 408 | "Looking up #53105: Jed, the Poorhouse Boy\n", 409 | "By: Alger, Horatio, Jr.\n", 410 | "pubDate: --\n", 411 | "rating: 0.0\n", 412 | "id: 56173586\n", 413 | "numReviews: 0\n", 414 | "Looking up #53106: In Beaver Cove and Elsewhere\n", 415 | "By: Crim, Matt\n", 416 | "pubDate: 2007-10-1\n", 417 | "rating: 0.0\n", 418 | "id: 19957096\n", 419 | "numReviews: 0\n", 420 | "Looking up #53107: Istruzioni popolari per la buona tenuta dei bachi da seta\n", 421 | "By: Poggi, Tito\n", 422 | "Couldn't find an appropriate work in the list.\n", 423 | "Looking up #53108: Des Vaters Sünde, der Mutter Fluch\n", 424 | "By: Clauren, Heinrich\n", 425 | "Couldn't find an appropriate work in the list.\n", 426 | "Looking up #53109: Glasgow; A Sketch-Book\n", 427 | "By: Nisbet, John\n", 428 | "Couldn't find an appropriate work in the list.\n", 429 | "Looking up #53110: Harrow; A Sketch-Book\n", 430 | "By: Keesey, Walter M.\n", 431 | "pubDate: --\n", 432 | "rating: 0.0\n", 433 | "id: 55855713\n", 434 | "numReviews: 0\n", 435 | "Looking up #53111: Runeberg Suomen kansan runoilijana\n", 436 | "By: Vasenius, Valfrid\n", 437 | "pubDate: --\n", 438 | "rating: 0.0\n", 439 | "id: 10394143\n", 440 | "numReviews: 0\n", 441 | "Looking up #53112: Lehrbuch der Physik zum Schulgebrauche.\n", 442 | "By: Winter, Wilhelm\n", 443 | "Couldn't find an appropriate work in the list.\n", 444 | "Looking up #53113: Gettysburg: Stories of the Red Harvest and the Aftermath\n", 445 | "By: Singmaster, Elsie\n", 446 | "pubDate: 1913--\n", 447 | "rating: 3.00\n", 448 | "id: 3710498\n", 449 | "numReviews: 0\n", 450 | "Looking up #53114: Suomalaisia\n", 451 | "By: Wainio, Niilo E.\n", 452 | "Couldn't find an appropriate work in the list.\n", 453 | "Looking up #53115: The Silicon Jungle\n", 454 | "By: Rothman, David H.\n", 455 | "Couldn't find an appropriate work in the list.\n", 456 | "Looking up #53116: The Works of John Dryden, Now First Collected in Eighteen Volumes; Vol. 12 (of 18)\n", 457 | "By: Dryden, John\n", 458 | "pubDate: --\n", 459 | "rating: 0.0\n", 460 | "id: 50214033\n", 461 | "numReviews: 0\n", 462 | "Looking up #53117: Lakiasia: Näytelmä kolmessa näytöksessä\n", 463 | "By: Karikko, Kaarle\n", 464 | "Couldn't find an appropriate work in the list.\n", 465 | "Looking up #53118: Kolmimasto \"Tulevaisuus\" eli elämää Pohjolassa\n", 466 | "By: Lie, Jonas\n", 467 | "Couldn't find an appropriate work in the list.\n", 468 | "Looking up #53119: The Brighton Boys in the Submarine Treasure Ship\n", 469 | "By: Driscoll, Lieutenant James R.\n", 470 | "Couldn't find an appropriate work in the list.\n", 471 | "Looking up #53120: A History of the Old English Letter Foundries: with Notes, Historical and Bibliographical, on the Rise; and Progress of English Typography.\n", 472 | "By: Reed, Talbot Baines\n", 473 | "pubDate: 1887--\n", 474 | "rating: 3.00\n", 475 | "id: 13260884\n", 476 | "numReviews: 0\n", 477 | "Looking up #53121: A Colonial Reformer, Vol. III (of 3)\n", 478 | "By: Boldrewood, Rolf\n", 479 | "Couldn't find an appropriate work in the list.\n", 480 | "Looking up #53122: A Blundering Boy: A Humorous Story\n", 481 | "By: Munro, Bruce Weston\n", 482 | "pubDate: 2015-9-27\n", 483 | "rating: 0.0\n", 484 | "id: 46995555\n", 485 | "numReviews: 0\n", 486 | "Looking up #53123: Die Harpyen von Madrit, oder die Postkutsche: Aus dem Spanischen des Verfassers der Donna Rufina\n", 487 | "By: Solórzano, Alonso de Castillo\n", 488 | "Couldn't find an appropriate work in the list.\n", 489 | "Looking up #53124: The Romance of Madame Tussaud's\n", 490 | "By: Tussaud, John Theodore\n", 491 | "pubDate: 2009-10-24\n", 492 | "rating: 3.00\n", 493 | "id: 14031275\n", 494 | "numReviews: 2\n", 495 | "Looking up #53125: What's What in America\n", 496 | "By: Brewster, Eugene V.\n", 497 | "Couldn't find an appropriate work in the list.\n", 498 | "Looking up #53126: With Force and Arms: A Tale of Love and Salem Witchcraft\n", 499 | "By: Garis, Howard Roger\n", 500 | "Couldn't find an appropriate work in the list.\n", 501 | "Looking up #53127: The Royal Pastime of Cock-fighting: The art of breeding, feeding, fighting, and curing cocks of the game\n", 502 | "By: R. H. (Robert Howlett)\n", 503 | "Couldn't find an appropriate work in the list.\n", 504 | "Looking up #53128: Red Spider, Volume 1 (of 2)\n", 505 | "By: Baring-Gould, S. (Sabine)\n", 506 | "Couldn't find an appropriate work in the list.\n", 507 | "Looking up #53129: Red Spider, Volume 2 (of 2)\n", 508 | "By: Baring-Gould, S. (Sabine)\n", 509 | "Couldn't find an appropriate work in the list.\n", 510 | "Looking up #53130: Pleasant Ways in Science\n", 511 | "By: Proctor, Richard A. (Richard Anthony)\n", 512 | "pubDate: 2015-9-27\n", 513 | "rating: 0.0\n", 514 | "id: 46977368\n", 515 | "numReviews: 0\n", 516 | "Looking up #53131: The Catholic World, Vol. 21, April, 1875, to September, 1875: A Monthly Magazine of General Literature and Science\n", 517 | "By: Various\n", 518 | "Couldn't find an appropriate work in the list.\n", 519 | "Looking up #53132: Through the Telescope\n", 520 | "By: Baikie, James\n", 521 | "Couldn't find an appropriate work in the list.\n", 522 | "Looking up #53133: Vanha pappila\n", 523 | "By: Kianto, Ilmari\n", 524 | "Couldn't find an appropriate work in the list.\n", 525 | "Looking up #53134: A Battle of the Books, recorded by an unknown writer for the use of authors and publishers: To the first for doctrine, to the second for reproof, to; both for correction and for instruction in righteousness\n", 526 | "By: Hamilton, Gail\n", 527 | "pubDate: 2010-4-21\n", 528 | "rating: 0.0\n", 529 | "id: 13595412\n", 530 | "numReviews: 0\n", 531 | "Looking up #53135: Collecting Old Glass: English and Irish\n", 532 | "By: Yoxall, J. H.\n", 533 | "pubDate: --\n", 534 | "rating: 0.0\n", 535 | "id: 49892924\n", 536 | "numReviews: 0\n", 537 | "Looking up #53136: Inari: Romaani\n", 538 | "By: Onerva, L.\n", 539 | "Couldn't find an appropriate work in the list.\n", 540 | "Looking up #53137: The American Railway: Its Construction, Development, Management, and Appliances\n", 541 | "By: Bogart, John\n", 542 | "Couldn't find an appropriate work in the list.\n", 543 | "Looking up #53138: Dramatized Readings: Recitations in Poetry and Prose: Preston Library No. 1\n", 544 | "By: Yendes, Lucy A.\n", 545 | "Couldn't find an appropriate work in the list.\n", 546 | "Looking up #53139: Pyhä viha: Romaani\n", 547 | "By: Kianto, Ilmari\n", 548 | "Couldn't find an appropriate work in the list.\n", 549 | "Looking up #53140: Paris en América\n", 550 | "By: Lefebvre, Renato\n", 551 | "Couldn't find an appropriate work in the list.\n", 552 | "Looking up #53141: Histoires souveraines\n", 553 | "By: L'Isle-Adam, Auguste de Villiers de\n", 554 | "Couldn't find an appropriate work in the list.\n", 555 | "Looking up #53142: The Irish Penny Journal, Vol. 1 No. 20, November 14, 1840\n", 556 | "By: Various\n", 557 | "Couldn't find an appropriate work in the list.\n", 558 | "Looking up #53143: Nelson The Newsboy: Or, Afloat in New York\n", 559 | "By: Stratemeyer, Edward\n", 560 | "Couldn't find an appropriate work in the list.\n", 561 | "Looking up #53144: Köyhäin aarteet\n", 562 | "By: Maeterlinck, Maurice\n", 563 | "pubDate: 1896--\n", 564 | "rating: 3.91\n", 565 | "id: 1207891\n", 566 | "numReviews: 20\n", 567 | "Looking up #53145: Schilderungen des Treibens im Leben und Handel in den Vereinigten Staaten und Havana.: Gezeichnet auf Reisen in den Jahren 1838 und 1839\n", 568 | "By: Ries, Julius\n", 569 | "pubDate: 2013-4-21\n", 570 | "rating: 0.0\n", 571 | "id: 28152834\n", 572 | "numReviews: 0\n", 573 | "Looking up #53146: A Complete History of Music: for Schools, Clubs, and Private Readings\n", 574 | "By: Baltzell, W. J. (Winton James)\n", 575 | "Couldn't find an appropriate work in the list.\n", 576 | "Looking up #53147: Prétextes: Réflexions sur quelques points de littérature et de morale\n", 577 | "By: Gide, André\n", 578 | "pubDate: --\n", 579 | "rating: 0.0\n", 580 | "id: 56327962\n", 581 | "numReviews: 0\n", 582 | "Looking up #53148: A Biography of the Signers of the Declaration of Independence, and of Washington and Patrick Henry: With an appendix, containing the Constitution of the United; States and other documents\n", 583 | "By: Judson, L. Carroll (Levi Carroll)\n", 584 | "pubDate: 2010-8-1\n", 585 | "rating: 3.00\n", 586 | "id: 13910785\n", 587 | "numReviews: 0\n", 588 | "Looking up #53149: Nirvana: Lemmentarina\n", 589 | "By: Kianto, Ilmari\n", 590 | "Couldn't find an appropriate work in the list.\n", 591 | "Looking up #53150: The Irish Penny Journal, Vol. 1 No. 21, November 21, 1840\n", 592 | "By: Various\n", 593 | "Couldn't find an appropriate work in the list.\n", 594 | "Looking up #53151: Lois psychologiques de l'évolution des peuples\n", 595 | "By: Le Bon, Gustave\n", 596 | "pubDate: 1898--\n", 597 | "rating: 3.46\n", 598 | "id: 7382625\n", 599 | "numReviews: 47\n", 600 | "Looking up #53152: Kertomuksia Intiasta\n", 601 | "By: Kipling, Rudyard\n", 602 | "pubDate: 1955--\n", 603 | "rating: 0.0\n", 604 | "id: 53934219\n", 605 | "numReviews: 0\n", 606 | "Looking up #53153: Mehalah: A Story of the Salt Marshes\n", 607 | "By: Baring-Gould, S. (Sabine)\n", 608 | "Couldn't find an appropriate work in the list.\n", 609 | "Looking up #53154: Motor Tours in the West Country\n", 610 | "By: Stawell, Rodolph, Mrs.\n", 611 | "Couldn't find an appropriate work in the list.\n", 612 | "Looking up #53155: The Religious Thought of the Greeks\n", 613 | "By: Moore, Clifford Herschel\n", 614 | "pubDate: 2003-4-1\n", 615 | "rating: 3.00\n", 616 | "id: 18172981\n", 617 | "numReviews: 0\n", 618 | "Looking up #53156: Gleaning of a Mystic: A Series of Essays on Practical Mysticism\n", 619 | "By: Heindel, Max\n", 620 | "pubDate: --\n", 621 | "rating: 0.0\n", 622 | "id: 51855317\n", 623 | "numReviews: 0\n", 624 | "Looking up #53157: Valittuja kertomuksia\n", 625 | "By: Kipling, Rudyard\n", 626 | "Couldn't find an appropriate work in the list.\n", 627 | "Looking up #53158: New England Joke Lore: The Tonic of Yankee Humor\n", 628 | "By: Crandall, Arthur George\n", 629 | "pubDate: --\n", 630 | "rating: 0.0\n", 631 | "id: 49430667\n", 632 | "numReviews: 0\n", 633 | "Looking up #53159: Piccole anime\n", 634 | "By: None\n", 635 | "One of these authors is None, assuming it's OK.\n", 636 | "pubDate: 2011-9-10\n", 637 | "rating: 4.00\n", 638 | "id: 17947664\n", 639 | "numReviews: 1\n" 640 | ] 641 | }, 642 | { 643 | "name": "stdout", 644 | "output_type": "stream", 645 | "text": [ 646 | "Looking up #53160: nan\n", 647 | "By: nan\n", 648 | "One of these authors is None, assuming it's OK.\n", 649 | "pubDate: 2012-1-10\n", 650 | "rating: 4.27\n", 651 | "id: 16827462\n", 652 | "numReviews: 140061\n", 653 | "Looking up #53161: nan\n", 654 | "By: nan\n", 655 | "One of these authors is None, assuming it's OK.\n", 656 | "pubDate: 2012-1-10\n", 657 | "rating: 4.27\n", 658 | "id: 16827462\n", 659 | "numReviews: 140061\n", 660 | "Looking up #53162: nan\n", 661 | "By: nan\n", 662 | "One of these authors is None, assuming it's OK.\n", 663 | "pubDate: 2012-1-10\n", 664 | "rating: 4.27\n", 665 | "id: 16827462\n", 666 | "numReviews: 140061\n", 667 | "Looking up #53163: nan\n", 668 | "By: nan\n", 669 | "One of these authors is None, assuming it's OK.\n", 670 | "pubDate: 2012-1-10\n", 671 | "rating: 4.27\n", 672 | "id: 16827462\n", 673 | "numReviews: 140061\n", 674 | "Looking up #53164: nan\n", 675 | "By: nan\n", 676 | "One of these authors is None, assuming it's OK.\n", 677 | "pubDate: 2012-1-10\n", 678 | "rating: 4.27\n", 679 | "id: 16827462\n", 680 | "numReviews: 140061\n", 681 | "Looking up #53165: nan\n", 682 | "By: nan\n", 683 | "One of these authors is None, assuming it's OK.\n", 684 | "pubDate: 2012-1-10\n", 685 | "rating: 4.27\n", 686 | "id: 16827462\n", 687 | "numReviews: 140061\n", 688 | "Looking up #53166: nan\n", 689 | "By: nan\n", 690 | "One of these authors is None, assuming it's OK.\n", 691 | "pubDate: 2012-1-10\n", 692 | "rating: 4.27\n", 693 | "id: 16827462\n", 694 | "numReviews: 140061\n", 695 | "Looking up #53167: nan\n", 696 | "By: nan\n", 697 | "One of these authors is None, assuming it's OK.\n", 698 | "pubDate: 2012-1-10\n", 699 | "rating: 4.27\n", 700 | "id: 16827462\n", 701 | "numReviews: 140061\n", 702 | "Looking up #53168: nan\n", 703 | "By: nan\n", 704 | "One of these authors is None, assuming it's OK.\n", 705 | "pubDate: 2012-1-10\n", 706 | "rating: 4.27\n", 707 | "id: 16827462\n", 708 | "numReviews: 140061\n", 709 | "Looking up #53169: nan\n", 710 | "By: nan\n", 711 | "One of these authors is None, assuming it's OK.\n", 712 | "pubDate: 2012-1-10\n", 713 | "rating: 4.27\n", 714 | "id: 16827462\n", 715 | "numReviews: 140061\n", 716 | "Looking up #53170: nan\n", 717 | "By: nan\n", 718 | "One of these authors is None, assuming it's OK.\n", 719 | "pubDate: 2012-1-10\n", 720 | "rating: 4.27\n", 721 | "id: 16827462\n", 722 | "numReviews: 140061\n", 723 | "Looking up #53171: nan\n", 724 | "By: nan\n", 725 | "One of these authors is None, assuming it's OK.\n", 726 | "pubDate: 2012-1-10\n", 727 | "rating: 4.27\n", 728 | "id: 16827462\n", 729 | "numReviews: 140061\n", 730 | "Looking up #53172: nan\n", 731 | "By: nan\n", 732 | "One of these authors is None, assuming it's OK.\n", 733 | "pubDate: 2012-1-10\n", 734 | "rating: 4.27\n", 735 | "id: 16827462\n", 736 | "numReviews: 140061\n", 737 | "Looking up #53173: nan\n", 738 | "By: nan\n", 739 | "One of these authors is None, assuming it's OK.\n", 740 | "pubDate: 2012-1-10\n", 741 | "rating: 4.27\n", 742 | "id: 16827462\n", 743 | "numReviews: 140061\n", 744 | "Looking up #53174: nan\n", 745 | "By: nan\n", 746 | "One of these authors is None, assuming it's OK.\n", 747 | "pubDate: 2012-1-10\n", 748 | "rating: 4.27\n", 749 | "id: 16827462\n", 750 | "numReviews: 140061\n", 751 | "Looking up #53175: nan\n", 752 | "By: nan\n", 753 | "One of these authors is None, assuming it's OK.\n", 754 | "pubDate: 2012-1-10\n", 755 | "rating: 4.27\n", 756 | "id: 16827462\n", 757 | "numReviews: 140061\n", 758 | "Looking up #53176: nan\n", 759 | "By: nan\n", 760 | "One of these authors is None, assuming it's OK.\n", 761 | "pubDate: 2012-1-10\n", 762 | "rating: 4.27\n", 763 | "id: 16827462\n", 764 | "numReviews: 140061\n", 765 | "Looking up #53177: nan\n", 766 | "By: nan\n", 767 | "One of these authors is None, assuming it's OK.\n", 768 | "pubDate: 2012-1-10\n", 769 | "rating: 4.27\n", 770 | "id: 16827462\n", 771 | "numReviews: 140061\n", 772 | "Looking up #53178: nan\n", 773 | "By: nan\n", 774 | "One of these authors is None, assuming it's OK.\n", 775 | "pubDate: 2012-1-10\n", 776 | "rating: 4.27\n", 777 | "id: 16827462\n", 778 | "numReviews: 140061\n", 779 | "Looking up #53179: nan\n", 780 | "By: nan\n", 781 | "One of these authors is None, assuming it's OK.\n", 782 | "pubDate: 2012-1-10\n", 783 | "rating: 4.27\n", 784 | "id: 16827462\n", 785 | "numReviews: 140061\n", 786 | "Looking up #53180: nan\n", 787 | "By: nan\n", 788 | "One of these authors is None, assuming it's OK.\n", 789 | "pubDate: 2012-1-10\n", 790 | "rating: 4.27\n", 791 | "id: 16827462\n", 792 | "numReviews: 140061\n", 793 | "Looking up #53181: nan\n", 794 | "By: nan\n", 795 | "One of these authors is None, assuming it's OK.\n", 796 | "pubDate: 2012-1-10\n", 797 | "rating: 4.27\n", 798 | "id: 16827462\n", 799 | "numReviews: 140061\n", 800 | "Looking up #53182: nan\n", 801 | "By: nan\n", 802 | "One of these authors is None, assuming it's OK.\n", 803 | "pubDate: 2012-1-10\n", 804 | "rating: 4.27\n", 805 | "id: 16827462\n", 806 | "numReviews: 140061\n", 807 | "Looking up #53183: nan\n", 808 | "By: nan\n", 809 | "One of these authors is None, assuming it's OK.\n", 810 | "pubDate: 2012-1-10\n", 811 | "rating: 4.27\n", 812 | "id: 16827462\n", 813 | "numReviews: 140061\n", 814 | "Looking up #53184: nan\n", 815 | "By: nan\n", 816 | "One of these authors is None, assuming it's OK.\n", 817 | "pubDate: 2012-1-10\n", 818 | "rating: 4.27\n", 819 | "id: 16827462\n", 820 | "numReviews: 140061\n", 821 | "Looking up #53185: nan\n", 822 | "By: nan\n", 823 | "One of these authors is None, assuming it's OK.\n", 824 | "pubDate: 2012-1-10\n", 825 | "rating: 4.27\n", 826 | "id: 16827462\n", 827 | "numReviews: 140061\n", 828 | "Looking up #53186: nan\n", 829 | "By: nan\n", 830 | "One of these authors is None, assuming it's OK.\n", 831 | "pubDate: 2012-1-10\n", 832 | "rating: 4.27\n", 833 | "id: 16827462\n", 834 | "numReviews: 140061\n", 835 | "Looking up #53187: nan\n", 836 | "By: nan\n", 837 | "One of these authors is None, assuming it's OK.\n", 838 | "pubDate: 2012-1-10\n", 839 | "rating: 4.27\n", 840 | "id: 16827462\n", 841 | "numReviews: 140061\n", 842 | "Looking up #53188: nan\n", 843 | "By: nan\n", 844 | "One of these authors is None, assuming it's OK.\n", 845 | "pubDate: 2012-1-10\n", 846 | "rating: 4.27\n", 847 | "id: 16827462\n", 848 | "numReviews: 140061\n", 849 | "Looking up #53189: nan\n", 850 | "By: nan\n", 851 | "One of these authors is None, assuming it's OK.\n", 852 | "pubDate: 2012-1-10\n", 853 | "rating: 4.27\n", 854 | "id: 16827462\n", 855 | "numReviews: 140061\n", 856 | "Looking up #53190: nan\n", 857 | "By: nan\n", 858 | "One of these authors is None, assuming it's OK.\n", 859 | "pubDate: 2012-1-10\n", 860 | "rating: 4.27\n", 861 | "id: 16827462\n", 862 | "numReviews: 140061\n", 863 | "Looking up #53191: nan\n", 864 | "By: nan\n", 865 | "One of these authors is None, assuming it's OK.\n", 866 | "pubDate: 2012-1-10\n", 867 | "rating: 4.27\n", 868 | "id: 16827462\n", 869 | "numReviews: 140061\n", 870 | "Looking up #53192: nan\n", 871 | "By: nan\n", 872 | "One of these authors is None, assuming it's OK.\n", 873 | "pubDate: 2012-1-10\n", 874 | "rating: 4.27\n", 875 | "id: 16827462\n", 876 | "numReviews: 140061\n", 877 | "Looking up #53193: nan\n", 878 | "By: nan\n", 879 | "One of these authors is None, assuming it's OK.\n", 880 | "pubDate: 2012-1-10\n", 881 | "rating: 4.27\n", 882 | "id: 16827462\n", 883 | "numReviews: 140061\n", 884 | "Looking up #53194: nan\n", 885 | "By: nan\n", 886 | "One of these authors is None, assuming it's OK.\n", 887 | "pubDate: 2012-1-10\n", 888 | "rating: 4.27\n", 889 | "id: 16827462\n", 890 | "numReviews: 140061\n", 891 | "Looking up #53195: nan\n", 892 | "By: nan\n", 893 | "One of these authors is None, assuming it's OK.\n", 894 | "pubDate: 2012-1-10\n", 895 | "rating: 4.27\n", 896 | "id: 16827462\n", 897 | "numReviews: 140061\n", 898 | "Looking up #53196: nan\n", 899 | "By: nan\n", 900 | "One of these authors is None, assuming it's OK.\n", 901 | "pubDate: 2012-1-10\n", 902 | "rating: 4.27\n", 903 | "id: 16827462\n", 904 | "numReviews: 140061\n", 905 | "Looking up #53197: nan\n", 906 | "By: nan\n", 907 | "One of these authors is None, assuming it's OK.\n", 908 | "pubDate: 2012-1-10\n", 909 | "rating: 4.27\n", 910 | "id: 16827462\n", 911 | "numReviews: 140061\n" 912 | ] 913 | } 914 | ], 915 | "source": [ 916 | "start, end = 53043, 100000\n", 917 | "for i, row in df.iterrows(): \n", 918 | " if i > start and i < end: \n", 919 | " print('Looking up #%s: %s' % (i, row.title))\n", 920 | " print('By: ', row.author)\n", 921 | " gr = goodReads(row.title, row.author)\n", 922 | " if gr.found: \n", 923 | " df.set_value(i, 'gr_rating', gr.rating)\n", 924 | " df.set_value(i, 'gr_numReviews', gr.numReviews)\n", 925 | " df.set_value(i, 'gr_pubDate', gr.pubDate)\n", 926 | " df.set_value(i, 'gr_id', gr.id)\n", 927 | " df.set_value(i, 'gr_info', str(gr.bestWork))\n", 928 | " df.set_value(i, 'gr_author_id', gr.authorID)\n", 929 | " df.set_value(i, 'gr_image_url', gr.imageURL)\n", 930 | " df.set_value(i, 'gr_small_image_url', gr.smallImageURL)\n", 931 | " time.sleep(1)" 932 | ] 933 | }, 934 | { 935 | "cell_type": "code", 936 | "execution_count": null, 937 | "metadata": { 938 | "scrolled": true 939 | }, 940 | "outputs": [], 941 | "source": [ 942 | "df.to_hdf('pg-text-6-goodreads.hdf', 'pg')" 943 | ] 944 | }, 945 | { 946 | "cell_type": "code", 947 | "execution_count": null, 948 | "metadata": { 949 | "collapsed": true 950 | }, 951 | "outputs": [], 952 | "source": [ 953 | "len(df)" 954 | ] 955 | }, 956 | { 957 | "cell_type": "code", 958 | "execution_count": null, 959 | "metadata": { 960 | "collapsed": true 961 | }, 962 | "outputs": [], 963 | "source": [] 964 | } 965 | ], 966 | "metadata": { 967 | "kernelspec": { 968 | "display_name": "Python 3", 969 | "language": "python", 970 | "name": "python3" 971 | }, 972 | "language_info": { 973 | "codemirror_mode": { 974 | "name": "ipython", 975 | "version": 3 976 | }, 977 | "file_extension": ".py", 978 | "mimetype": "text/x-python", 979 | "name": "python", 980 | "nbconvert_exporter": "python", 981 | "pygments_lexer": "ipython3", 982 | "version": "3.6.1" 983 | } 984 | }, 985 | "nbformat": 4, 986 | "nbformat_minor": 2 987 | } 988 | -------------------------------------------------------------------------------- /pg-metadata-rdf.hdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JonathanReeve/gitenberg-experiments/e7309b41d64279ca7bd44b419d0a99044d55a847/pg-metadata-rdf.hdf -------------------------------------------------------------------------------- /pg-vectorize.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "from multiprocessing.dummy import Pool as ThreadPool \n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": { 18 | "collapsed": true 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "pool = ThreadPool(4) \n", 23 | "results = pool.map(my_function, my_array" 24 | ] 25 | } 26 | ], 27 | "metadata": { 28 | "kernelspec": { 29 | "display_name": "Python 3", 30 | "language": "python", 31 | "name": "python3" 32 | }, 33 | "language_info": { 34 | "codemirror_mode": { 35 | "name": "ipython", 36 | "version": 3 37 | }, 38 | "file_extension": ".py", 39 | "mimetype": "text/x-python", 40 | "name": "python", 41 | "nbconvert_exporter": "python", 42 | "pygments_lexer": "ipython3", 43 | "version": "3.6.1" 44 | } 45 | }, 46 | "nbformat": 4, 47 | "nbformat_minor": 2 48 | } 49 | --------------------------------------------------------------------------------