├── sampleECCOtxt.xml
├── sampleECCOtextTravel.xml
├── XmlParsingExperiments.ipynb
└── LoCdata-Gale-LXML.v0.2.ipynb
/sampleECCOtxt.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sgsinclair/experiments/master/sampleECCOtxt.xml
--------------------------------------------------------------------------------
/sampleECCOtextTravel.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sgsinclair/experiments/master/sampleECCOtextTravel.xml
--------------------------------------------------------------------------------
/XmlParsingExperiments.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 50,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "from lxml import etree\n",
12 | "\n",
13 | "testfile = \"sampleECCOtxt.xml\"\n",
14 | "taglist = ['documentID', 'ESTCID', 'pubDate','ESTCID',\n",
15 | " 'language','module','locSubject','notes',\n",
16 | " 'fullTitle','displayTitle','currentVolume', \n",
17 | " 'totalVolumes', 'imprintPublisher','imprintFull',\n",
18 | " 'imprintCity', 'publicationPlace']"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 51,
24 | "metadata": {
25 | "collapsed": true
26 | },
27 | "outputs": [],
28 | "source": [
29 | "# here we iterparse multiple times looking for a specific tag\n",
30 | "def iterparse_multiple(file, tags):\n",
31 | " elementlist = []\n",
32 | " for xmltag in tags:\n",
33 | " context = etree.iterparse(file, events=('end',), tag=xmltag)\n",
34 | " for event, elem in context:\n",
35 | " elementlist.append(elem.text)\n",
36 | " return elementlist"
37 | ]
38 | },
39 | {
40 | "cell_type": "code",
41 | "execution_count": 52,
42 | "metadata": {},
43 | "outputs": [
44 | {
45 | "name": "stdout",
46 | "output_type": "stream",
47 | "text": [
48 | "CPU times: user 5.56 s, sys: 863 ms, total: 6.43 s\n",
49 | "Wall time: 6.43 s\n"
50 | ]
51 | },
52 | {
53 | "data": {
54 | "text/plain": [
55 | "21"
56 | ]
57 | },
58 | "execution_count": 52,
59 | "metadata": {},
60 | "output_type": "execute_result"
61 | }
62 | ],
63 | "source": [
64 | "%%time \n",
65 | "\n",
66 | "len(iterparse_multiple(testfile, taglist))"
67 | ]
68 | },
69 | {
70 | "cell_type": "code",
71 | "execution_count": 55,
72 | "metadata": {
73 | "collapsed": true
74 | },
75 | "outputs": [],
76 | "source": [
77 | "# here we iterparse once and look at each element\n",
78 | "def iterparse_single(file, tags):\n",
79 | " tagset = set(tags)\n",
80 | " elementlist = []\n",
81 | " context = etree.iterparse(file)\n",
82 | " for event, elem in context:\n",
83 | " if elem.tag in tagset:\n",
84 | " elementlist.append(elem.text)\n",
85 | " return elementlist"
86 | ]
87 | },
88 | {
89 | "cell_type": "code",
90 | "execution_count": 56,
91 | "metadata": {},
92 | "outputs": [
93 | {
94 | "name": "stdout",
95 | "output_type": "stream",
96 | "text": [
97 | "CPU times: user 602 ms, sys: 61 ms, total: 663 ms\n",
98 | "Wall time: 661 ms\n"
99 | ]
100 | },
101 | {
102 | "data": {
103 | "text/plain": [
104 | "20"
105 | ]
106 | },
107 | "execution_count": 56,
108 | "metadata": {},
109 | "output_type": "execute_result"
110 | }
111 | ],
112 | "source": [
113 | "%%time \n",
114 | "\n",
115 | "len(iterparse_single(testfile, taglist))"
116 | ]
117 | }
118 | ],
119 | "metadata": {
120 | "kernelspec": {
121 | "display_name": "Python 3",
122 | "language": "python",
123 | "name": "python3"
124 | },
125 | "language_info": {
126 | "codemirror_mode": {
127 | "name": "ipython",
128 | "version": 3
129 | },
130 | "file_extension": ".py",
131 | "mimetype": "text/x-python",
132 | "name": "python",
133 | "nbconvert_exporter": "python",
134 | "pygments_lexer": "ipython3",
135 | "version": "3.6.3"
136 | }
137 | },
138 | "nbformat": 4,
139 | "nbformat_minor": 2
140 | }
141 |
--------------------------------------------------------------------------------
/LoCdata-Gale-LXML.v0.2.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Skimming LCSH data from Gale files\n",
8 | "\n",
9 | "The goal of this notebook is to figure out a quick way to skim the thousands of ECCO and NCCO file metadata from Gale-Cengage, discover if they have a relevant Library of Congress Subject Heading, and, if the file does have a relevant LCSH, add it to a list.\n",
10 | "\n",
11 | "The terms I am looking for include `'travel', 'discov', 'explor', 'voyage', 'guide', 'antiquit'`. These are the same terms that I used to search the HTRC.\n",
12 | "\n",
13 | "In a past notebook, I struggled with using xml, in part because it was very slow to open an xml file, read the whole thing in to BeautifulSoup, and then see if what I wanted was there. This [lxml method from IBM](https://www.ibm.com/developerworks/xml/library/x-hiperfparse/), however, was helpful in developing something quicker."
14 | ]
15 | },
16 | {
17 | "cell_type": "code",
18 | "execution_count": 1,
19 | "metadata": {
20 | "collapsed": true
21 | },
22 | "outputs": [],
23 | "source": [
24 | "from lxml\n",
25 | "import pandas as pd\n",
26 | "import glob\n",
27 | "import lxml"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 4,
33 | "metadata": {},
34 | "outputs": [
35 | {
36 | "name": "stdout",
37 | "output_type": "stream",
38 | "text": [
39 | "Wall time: 866 ms\n"
40 | ]
41 | }
42 | ],
43 | "source": [
44 | "%%time \n",
45 | "\n",
46 | "context = etree.iterparse('files/sampleECCOtxt.xml', events=('end',), tag='locSubject')\n",
47 | "\n",
48 | "loclist = []\n",
49 | "for event, elem in context:\n",
50 | " loclist.append(elem.text)\n",
51 | "# Let's just try printing these in a few different ways"
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": 4,
57 | "metadata": {},
58 | "outputs": [
59 | {
60 | "data": {
61 | "text/plain": [
62 | "'Westminster (London, England); History; Early works to 1800; London (England); History; Early works to 1800'"
63 | ]
64 | },
65 | "execution_count": 4,
66 | "metadata": {},
67 | "output_type": "execute_result"
68 | }
69 | ],
70 | "source": [
71 | "string = '; '.join(loclist)\n",
72 | "string"
73 | ]
74 | },
75 | {
76 | "cell_type": "code",
77 | "execution_count": 5,
78 | "metadata": {
79 | "collapsed": true
80 | },
81 | "outputs": [],
82 | "source": [
83 | "for event, elem in context:\n",
84 | " print(elem.text)\n",
85 | " # Um, unsure why this isn't working, when it works above?"
86 | ]
87 | },
88 | {
89 | "cell_type": "code",
90 | "execution_count": 6,
91 | "metadata": {},
92 | "outputs": [
93 | {
94 | "data": {
95 | "text/plain": [
96 | "['Westminster (London, England)',\n",
97 | " 'History',\n",
98 | " 'Early works to 1800',\n",
99 | " 'London (England)',\n",
100 | " 'History',\n",
101 | " 'Early works to 1800']"
102 | ]
103 | },
104 | "execution_count": 6,
105 | "metadata": {},
106 | "output_type": "execute_result"
107 | }
108 | ],
109 | "source": [
110 | "loclist"
111 | ]
112 | },
113 | {
114 | "cell_type": "markdown",
115 | "metadata": {},
116 | "source": [
117 | "I should also not have to worry if the tag does not exist; as can be seen below, it won't break or throw an derror - the result will just be empy. So, I should still be able to apply my general method of something like `if word is in termlist`."
118 | ]
119 | },
120 | {
121 | "cell_type": "code",
122 | "execution_count": 7,
123 | "metadata": {
124 | "collapsed": true
125 | },
126 | "outputs": [],
127 | "source": [
128 | "contextnone = etree.iterparse('files/sampleECCOtxt.xml', events=('end',), tag='holdings2')"
129 | ]
130 | },
131 | {
132 | "cell_type": "code",
133 | "execution_count": 8,
134 | "metadata": {},
135 | "outputs": [
136 | {
137 | "data": {
138 | "text/plain": [
139 | "[]"
140 | ]
141 | },
142 | "execution_count": 8,
143 | "metadata": {},
144 | "output_type": "execute_result"
145 | }
146 | ],
147 | "source": [
148 | "emptylist = []\n",
149 | "for event, elem in contextnone:\n",
150 | " emptylist.append(elem.text)\n",
151 | "emptylist"
152 | ]
153 | },
154 | {
155 | "cell_type": "markdown",
156 | "metadata": {},
157 | "source": [
158 | "### Running over multiple files\n",
159 | " \n",
160 | "Let's try run this over a few files to see what happens. The main chunk of code below was originally running BeautifulSoup, which took way too long; hopefully this will move a bit more quickly!\n",
161 | "\n",
162 | "I will also need a list of metadata tags to iterate through, in order to grab the metadata for the files that I want. Let's do that first.\n",
163 | "\n",
164 | "I also have to remember that although this chunk of code should work for ECCO files, I may have to adjust it slightly to work with NCCO files, especially since lxml tags are sensitive to capital letters (or at least, they are they are used above)."
165 | ]
166 | },
167 | {
168 | "cell_type": "code",
169 | "execution_count": 11,
170 | "metadata": {
171 | "collapsed": true
172 | },
173 | "outputs": [],
174 | "source": [
175 | "taglist = ['documentID', 'ESTCID', 'pubDate','ESTCID',\n",
176 | " 'language','module','locSubject','notes',\n",
177 | " 'fullTitle','displayTitle','currentVolume', \n",
178 | " 'totalVolumes', 'imprintPublisher','imprintFull',\n",
179 | " 'imprintCity', 'publicationPlace']"
180 | ]
181 | },
182 | {
183 | "cell_type": "code",
184 | "execution_count": 11,
185 | "metadata": {
186 | "collapsed": true
187 | },
188 | "outputs": [],
189 | "source": [
190 | "# set up a dict to hold all the strings of all these elements\n",
191 | "elementdict = {}\n",
192 | "\n",
193 | "for xmltag in taglist:\n",
194 | " elementlist = []\n",
195 | " context = etree.iterparse('files/sampleECCOtxt.xml', events=('end',), tag=xmltag)\n",
196 | " for event, elem in context:\n",
197 | " elementlist.append(elem.text)\n",
198 | " elementdict[xmltag] = ', '.join(elementlist)\n",
199 | "\n",
200 | " "
201 | ]
202 | },
203 | {
204 | "cell_type": "code",
205 | "execution_count": 12,
206 | "metadata": {},
207 | "outputs": [
208 | {
209 | "data": {
210 | "text/plain": [
211 | "{'ESTCID': 'T228085',\n",
212 | " 'currentVolume': 'Volume 2',\n",
213 | " 'displayTitle': 'A new and compleat survey of London. In ten parts. I. All the publick transactions and memorable events, that have happened to the citizens, from ...',\n",
214 | " 'documentID': '1299400102',\n",
215 | " 'fullTitle': 'A new and compleat survey of London. In ten parts. I. All the publick transactions and memorable events, that have happened to the citizens, from its foundation, to the year 1742. II. A particular description of the thirteen wards on the East of Walbrook. III. Of the twelve wards on the West of Walbrook. IV. A political account of London; parallels between this and the most celebrated cities of antiquity, as well as the modern great cities of Europe, Asia and Africa. V. An historical account of the city governments, ecclesiastical, civil and military. VI. A full account of the great and extensive commerce of the city; and of the several incorporations of the arts and mysteries of the citizens. VII. Of the present state of learning in this city. VIII. History and antiquities of Westminster; its government, ecclesiastical and civil. IX. Of the several parishes and liberties in the county of Middlesex, within the bill of mortality. X. Of the borough of Southwark, and places contiguous in the county of Surry. In two volumes. By a citizen, and native of London.',\n",
216 | " 'imprintCity': 'London',\n",
217 | " 'imprintFull': 'London : printed for S. Lyne, at the Globe in Newgate-street; and J. Ilive, in Aldersgate-street, MDCCXLII. [1742]',\n",
218 | " 'imprintPublisher': 'printed for S. Lyne, at the Globe in Newgate-street; and J. Ilive, in Aldersgate-street',\n",
219 | " 'language': 'English',\n",
220 | " 'locSubject': 'Westminster (London, England), History, Early works to 1800, London (England), History, Early works to 1800',\n",
221 | " 'module': 'History and Geography',\n",
222 | " 'notes': 'In fact in two volumes; volume 2 has a separate register, but continuous pagination and the imprint reads: \"London: printed for S. Lyne; and J. Ilive, 1742\". With an index.',\n",
223 | " 'pubDate': '17420101',\n",
224 | " 'publicationPlace': 'London',\n",
225 | " 'totalVolumes': '2'}"
226 | ]
227 | },
228 | "execution_count": 12,
229 | "metadata": {},
230 | "output_type": "execute_result"
231 | }
232 | ],
233 | "source": [
234 | "#aaaand, let's see if it worked\n",
235 | "elementdict"
236 | ]
237 | },
238 | {
239 | "cell_type": "markdown",
240 | "metadata": {},
241 | "source": [
242 | "Great! And if we set up a file list and try to do two files, one with travel, one without, let's see if we can get that working..."
243 | ]
244 | },
245 | {
246 | "cell_type": "code",
247 | "execution_count": 7,
248 | "metadata": {
249 | "collapsed": true
250 | },
251 | "outputs": [],
252 | "source": [
253 | "twofiles = ['files/sampleECCOtextTravel.xml', 'files/sampleECCOtxt.xml']"
254 | ]
255 | },
256 | {
257 | "cell_type": "code",
258 | "execution_count": 9,
259 | "metadata": {
260 | "collapsed": true
261 | },
262 | "outputs": [],
263 | "source": [
264 | "# first, we only want the files that have certain LCSH\n",
265 | "# so let's set up a list of tags\n",
266 | "termlist = ['travel', 'discov', 'explor', 'voyage', 'guide', 'antiquit']\n",
267 | "\n",
268 | "teststring = 'Italy, Description and travel, Early works to 1800'\n"
269 | ]
270 | },
271 | {
272 | "cell_type": "code",
273 | "execution_count": 15,
274 | "metadata": {},
275 | "outputs": [
276 | {
277 | "name": "stdout",
278 | "output_type": "stream",
279 | "text": [
280 | "yes\n"
281 | ]
282 | }
283 | ],
284 | "source": [
285 | "# just to make sure that my if statement will work:\n",
286 | "if any(x in teststring for x in termlist):\n",
287 | " print('yes')\n",
288 | "else:\n",
289 | " print('no')"
290 | ]
291 | },
292 | {
293 | "cell_type": "markdown",
294 | "metadata": {},
295 | "source": [
296 | "## _note_ Run the next cell before the below ones\n",
297 | "Make sure to establish the `taglist` and `termlist`"
298 | ]
299 | },
300 | {
301 | "cell_type": "code",
302 | "execution_count": 2,
303 | "metadata": {},
304 | "outputs": [],
305 | "source": [
306 | "import pandas as pd\n",
307 | "import glob\n",
308 | "import lxml\n",
309 | "\n",
310 | "taglist = ['documentID', 'ESTCID', 'pubDate','ESTCID',\n",
311 | " 'language','module','locSubject','notes',\n",
312 | " 'fullTitle','displayTitle','currentVolume', \n",
313 | " 'totalVolumes', 'imprintPublisher','imprintFull',\n",
314 | " 'imprintCity', 'publicationPlace']\n",
315 | "termlist = ['travel', 'discov', 'explor', 'voyage', 'guide', 'antiquit']"
316 | ]
317 | },
318 | {
319 | "cell_type": "markdown",
320 | "metadata": {},
321 | "source": [
322 | "The next three cells have variations of:\n",
323 | "1. two files, one with travel and one without: `8.82 s ± 2.84 s per loop (mean ± std. dev. of 10 runs, 1 loop each)`\n",
324 | "2. one file, with travel: `7.18 s ± 751 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)`\n",
325 | "3. one file, without travel: `2.78 s ± 334 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)`\n",
326 | "\n",
327 | "So, in general, if I have 3000 files and 300 of them are travel related (as was roughly the case below), it would take 9,720 s (162 minutes / 2.7 hours).\n",
328 | "\n",
329 | "Is there a way to do this more quickly? At this rate, since there is about 200,000 files on ECCO I and ECCO II, it will take 680,000 seconds, or 180 hours. Ouch. \n",
330 | "\n",
331 | "Of course, some of these sections - like medicine or Lit&Lang, will be much lower in the number of travel texts that they have, so that number of hours could drop by a lot. For example, if only 2% (rather than 10%) of all 200,000 files have a travel tag, the time then becomes 576,800 or 160ish hours. \n",
332 | "\n",
333 | "These files are on my local hard drive, and not on the Gale hard drive, so I am guessing these loops might go a bit faster than when I'm using the Gale hard drive? Google did not provide an easy answer to this."
334 | ]
335 | },
336 | {
337 | "cell_type": "code",
338 | "execution_count": 6,
339 | "metadata": {},
340 | "outputs": [
341 | {
342 | "name": "stdout",
343 | "output_type": "stream",
344 | "text": [
345 | "8.82 s ± 2.84 s per loop (mean ± std. dev. of 10 runs, 1 loop each)\n"
346 | ]
347 | }
348 | ],
349 | "source": [
350 | "%%timeit -r 10\n",
351 | "\n",
352 | "# this list of dicts will hold all a dict that holds\n",
353 | "# the metadata for each relevant file\n",
354 | "listofdicts = []\n",
355 | "\n",
356 | "twofiles = ['files/sampleECCOtextTravel.xml', 'files/sampleECCOtxt.xml']\n",
357 | "\n",
358 | "for file in twofiles:\n",
359 | " testparse = etree.iterparse(file, events=('end',), tag = 'locSubject')\n",
360 | " testlist = []\n",
361 | " for event, elem in testparse:\n",
362 | " testlist.append(elem.text)\n",
363 | " # reclaim the memory at the end of each loop -\n",
364 | " # clears unneeded node references\n",
365 | " elem.clear()\n",
366 | " while elem.getprevious() is not None:\n",
367 | " del elem.getparent()[0]\n",
368 | " \n",
369 | " teststring = ', '.join(testlist) \n",
370 | " # print(teststring)\n",
371 | " \n",
372 | " if any(lcsh in teststring for lcsh in termlist):\n",
373 | " # print('yes')\n",
374 | " \n",
375 | " # if that is true, we want the metadata\n",
376 | " # so let's make a dict to hold it\n",
377 | " # this dict will be reset with every file loop\n",
378 | " filedict = {}\n",
379 | " \n",
380 | " for xmltag in taglist:\n",
381 | " # make an empty list to hold what is in each tag, \n",
382 | " # which will be written to our dict in a few steps\n",
383 | " elementlist = []\n",
384 | " \n",
385 | " context = etree.iterparse(file, events=('end',), tag=xmltag)\n",
386 | " for event, elem in context:\n",
387 | " elementlist.append(elem.text)\n",
388 | " \n",
389 | " # the below should make things faster - I think? \n",
390 | " # reclaim the memory at the end of each loop -\n",
391 | " # clears unneeded node references\n",
392 | " elem.clear()\n",
393 | " while elem.getprevious() is not None:\n",
394 | " del elem.getparent()[0]\n",
395 | " # assign to the dictionary\n",
396 | " filedict[xmltag] = ', '.join(elementlist)\n",
397 | " \n",
398 | " # after it has looped through all the xmltags, \n",
399 | " # add the filedict to list of dicts \n",
400 | " listofdicts.append(filedict)\n",
401 | " #else:\n",
402 | " # print('no')"
403 | ]
404 | },
405 | {
406 | "cell_type": "code",
407 | "execution_count": 7,
408 | "metadata": {},
409 | "outputs": [
410 | {
411 | "name": "stdout",
412 | "output_type": "stream",
413 | "text": [
414 | "7.18 s ± 751 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)\n"
415 | ]
416 | }
417 | ],
418 | "source": [
419 | "%%timeit -r 10\n",
420 | "\n",
421 | "# this list of dicts will hold all a dict that holds\n",
422 | "# the metadata for each relevant file\n",
423 | "listofdicts = []\n",
424 | "\n",
425 | "travellist = ['files/sampleECCOtextTravel.xml']\n",
426 | "\n",
427 | "for file in travellist:\n",
428 | " testparse = etree.iterparse(file, events=('end',), tag = 'locSubject')\n",
429 | " testlist = []\n",
430 | " for event, elem in testparse:\n",
431 | " testlist.append(elem.text)\n",
432 | " # reclaim the memory at the end of each loop -\n",
433 | " # clears unneeded node references\n",
434 | " elem.clear()\n",
435 | " while elem.getprevious() is not None:\n",
436 | " del elem.getparent()[0]\n",
437 | " \n",
438 | " teststring = ', '.join(testlist) \n",
439 | " # print(teststring)\n",
440 | " \n",
441 | " if any(lcsh in teststring for lcsh in termlist):\n",
442 | " # print('yes')\n",
443 | " \n",
444 | " # if that is true, we want the metadata\n",
445 | " # so let's make a dict to hold it\n",
446 | " # this dict will be reset with every file loop\n",
447 | " filedict = {}\n",
448 | " \n",
449 | " for xmltag in taglist:\n",
450 | " # make an empty list to hold what is in each tag, \n",
451 | " # which will be written to our dict in a few steps\n",
452 | " elementlist = []\n",
453 | " \n",
454 | " context = etree.iterparse(file, events=('end',), tag=xmltag)\n",
455 | " for event, elem in context:\n",
456 | " elementlist.append(elem.text)\n",
457 | " \n",
458 | " # the below should make things faster - I think? \n",
459 | " # reclaim the memory at the end of each loop -\n",
460 | " # clears unneeded node references\n",
461 | " elem.clear()\n",
462 | " while elem.getprevious() is not None:\n",
463 | " del elem.getparent()[0]\n",
464 | " # assign to the dictionary\n",
465 | " filedict[xmltag] = ', '.join(elementlist)\n",
466 | " \n",
467 | " # after it has looped through all the xmltags, \n",
468 | " # add the filedict to list of dicts \n",
469 | " listofdicts.append(filedict)\n",
470 | " #else:\n",
471 | " # print('no')\n",
472 | " "
473 | ]
474 | },
475 | {
476 | "cell_type": "code",
477 | "execution_count": 8,
478 | "metadata": {},
479 | "outputs": [
480 | {
481 | "name": "stdout",
482 | "output_type": "stream",
483 | "text": [
484 | "2.78 s ± 334 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)\n"
485 | ]
486 | }
487 | ],
488 | "source": [
489 | "%%timeit -r 10\n",
490 | "\n",
491 | "# this list of dicts will hold all a dict that holds\n",
492 | "# the metadata for each relevant file\n",
493 | "listofdicts = []\n",
494 | "\n",
495 | "nontravellist = ['files/sampleECCOtxt.xml']\n",
496 | "\n",
497 | "for file in nontravellist:\n",
498 | " testparse = etree.iterparse(file, events=('end',), tag = 'locSubject')\n",
499 | " testlist = []\n",
500 | " for event, elem in testparse:\n",
501 | " testlist.append(elem.text)\n",
502 | " # reclaim the memory at the end of each loop -\n",
503 | " # clears unneeded node references\n",
504 | " elem.clear()\n",
505 | " while elem.getprevious() is not None:\n",
506 | " del elem.getparent()[0]\n",
507 | " \n",
508 | " teststring = ', '.join(testlist) \n",
509 | " # print(teststring)\n",
510 | " \n",
511 | " if any(lcsh in teststring for lcsh in termlist):\n",
512 | " #print('yes')\n",
513 | " \n",
514 | " # if that is true, we want the metadata\n",
515 | " # so let's make a dict to hold it\n",
516 | " # this dict will be reset with every file loop\n",
517 | " filedict = {}\n",
518 | " \n",
519 | " for xmltag in taglist:\n",
520 | " # make an empty list to hold what is in each tag, \n",
521 | " # which will be written to our dict in a few steps\n",
522 | " elementlist = []\n",
523 | " \n",
524 | " context = etree.iterparse(file, events=('end',), tag=xmltag)\n",
525 | " for event, elem in context:\n",
526 | " elementlist.append(elem.text)\n",
527 | " \n",
528 | " # the below should make things faster - I think? \n",
529 | " # reclaim the memory at the end of each loop -\n",
530 | " # clears unneeded node references\n",
531 | " elem.clear()\n",
532 | " while elem.getprevious() is not None:\n",
533 | " del elem.getparent()[0]\n",
534 | " # assign to the dictionary\n",
535 | " filedict[xmltag] = ', '.join(elementlist)\n",
536 | " \n",
537 | " # after it has looped through all the xmltags, \n",
538 | " # add the filedict to list of dicts \n",
539 | " listofdicts.append(filedict)\n",
540 | " #else:\n",
541 | " #print('no')\n",
542 | " "
543 | ]
544 | },
545 | {
546 | "cell_type": "code",
547 | "execution_count": 18,
548 | "metadata": {},
549 | "outputs": [
550 | {
551 | "data": {
552 | "text/plain": [
553 | "[]"
554 | ]
555 | },
556 | "execution_count": 18,
557 | "metadata": {},
558 | "output_type": "execute_result"
559 | }
560 | ],
561 | "source": [
562 | "listofdicts"
563 | ]
564 | },
565 | {
566 | "cell_type": "code",
567 | "execution_count": 19,
568 | "metadata": {
569 | "collapsed": true
570 | },
571 | "outputs": [],
572 | "source": [
573 | "dictsdf = pd.DataFrame(listofdicts)"
574 | ]
575 | },
576 | {
577 | "cell_type": "code",
578 | "execution_count": 20,
579 | "metadata": {},
580 | "outputs": [
581 | {
582 | "data": {
583 | "text/html": [
584 | "
\n",
585 | "\n",
598 | "
\n",
599 | " \n",
600 | " \n",
601 | " | \n",
602 | " ESTCID | \n",
603 | " currentVolume | \n",
604 | " displayTitle | \n",
605 | " documentID | \n",
606 | " fullTitle | \n",
607 | " imprintCity | \n",
608 | " imprintFull | \n",
609 | " imprintPublisher | \n",
610 | " language | \n",
611 | " locSubject | \n",
612 | " module | \n",
613 | " notes | \n",
614 | " pubDate | \n",
615 | " publicationPlace | \n",
616 | " totalVolumes | \n",
617 | "
\n",
618 | " \n",
619 | " \n",
620 | " \n",
621 | " | 0 | \n",
622 | " T110070 | \n",
623 | " 0 | \n",
624 | " The travels of the learned Father Montfaucon f... | \n",
625 | " 0084800600 | \n",
626 | " The travels of the learned Father Montfaucon f... | \n",
627 | " London | \n",
628 | " London : printed by D. L. for E. Curll at the ... | \n",
629 | " printed by D. L. for E. Curll at the Dial and ... | \n",
630 | " English | \n",
631 | " Italy, Description and travel, Early works to ... | \n",
632 | " History and Geography | \n",
633 | " With an index. In this issue the imprint date... | \n",
634 | " 17120101 | \n",
635 | " London | \n",
636 | " 0 | \n",
637 | "
\n",
638 | " \n",
639 | "
\n",
640 | "
"
641 | ],
642 | "text/plain": [
643 | " ESTCID currentVolume displayTitle \\\n",
644 | "0 T110070 0 The travels of the learned Father Montfaucon f... \n",
645 | "\n",
646 | " documentID fullTitle imprintCity \\\n",
647 | "0 0084800600 The travels of the learned Father Montfaucon f... London \n",
648 | "\n",
649 | " imprintFull \\\n",
650 | "0 London : printed by D. L. for E. Curll at the ... \n",
651 | "\n",
652 | " imprintPublisher language \\\n",
653 | "0 printed by D. L. for E. Curll at the Dial and ... English \n",
654 | "\n",
655 | " locSubject module \\\n",
656 | "0 Italy, Description and travel, Early works to ... History and Geography \n",
657 | "\n",
658 | " notes pubDate \\\n",
659 | "0 With an index. In this issue the imprint date... 17120101 \n",
660 | "\n",
661 | " publicationPlace totalVolumes \n",
662 | "0 London 0 "
663 | ]
664 | },
665 | "execution_count": 20,
666 | "metadata": {},
667 | "output_type": "execute_result"
668 | }
669 | ],
670 | "source": [
671 | "dictsdf"
672 | ]
673 | },
674 | {
675 | "cell_type": "markdown",
676 | "metadata": {},
677 | "source": [
678 | "## Applying it to all the files\n",
679 | "\n",
680 | "Now that I have this code working, I should be able to apply it to all the other files.\n",
681 | "\n",
682 | "I've asked an astute colleague (thanks Jonathan!) to look over the above, in case there is some error that would cause something to cascade and run forever (or, just run really slow). That's most likely to happen because of my unfamiliarity with lxml. Pandas, I think, should be able to handle what I'm asking of it.\n",
683 | "\n",
684 | "So now, step one of this new task: get a file list. \n",
685 | "\n",
686 | "We'll start with the HistAndGeo section of ECCOII, since it has only 3385 files as opposed to the ~14000 files of ECCOI (ECCO was released in two segments). I'm also fairly hopeful that there should be at least one or two pieces of travel writing in there, since it is the ECCO genre most closely related to travel writing."
687 | ]
688 | },
689 | {
690 | "cell_type": "code",
691 | "execution_count": 21,
692 | "metadata": {
693 | "collapsed": true
694 | },
695 | "outputs": [],
696 | "source": [
697 | "filelist = glob.glob('D:/ECCOII 2001/HistAndGeo/XML/*.xml')"
698 | ]
699 | },
700 | {
701 | "cell_type": "code",
702 | "execution_count": 22,
703 | "metadata": {},
704 | "outputs": [
705 | {
706 | "data": {
707 | "text/plain": [
708 | "3384"
709 | ]
710 | },
711 | "execution_count": 22,
712 | "metadata": {},
713 | "output_type": "execute_result"
714 | }
715 | ],
716 | "source": [
717 | "len(filelist)"
718 | ]
719 | },
720 | {
721 | "cell_type": "code",
722 | "execution_count": 26,
723 | "metadata": {},
724 | "outputs": [
725 | {
726 | "data": {
727 | "text/plain": [
728 | "['D:/ECCOII 2001/HistAndGeo/XML\\\\1299100101.xml',\n",
729 | " 'D:/ECCOII 2001/HistAndGeo/XML\\\\1299100102.xml',\n",
730 | " 'D:/ECCOII 2001/HistAndGeo/XML\\\\1299100103.xml',\n",
731 | " 'D:/ECCOII 2001/HistAndGeo/XML\\\\1299100200.xml',\n",
732 | " 'D:/ECCOII 2001/HistAndGeo/XML\\\\1299100301.xml']"
733 | ]
734 | },
735 | "execution_count": 26,
736 | "metadata": {},
737 | "output_type": "execute_result"
738 | }
739 | ],
740 | "source": [
741 | "shortfilelist = filelist[:100]\n",
742 | "shortfilelist[:5]"
743 | ]
744 | },
745 | {
746 | "cell_type": "markdown",
747 | "metadata": {},
748 | "source": [
749 | "Okay, now I need to replicate the code up above so that it will work on this larger batch.\n",
750 | "\n",
751 | "I'm thinking, as well, of things that I will have to watch for: where will these files overlap with the ones in mybib already? I will have to be careful when integrating and comparing my various points of data, but the volume information and the file id numbering system (ie, having 0s vs 1/2/etc on the end of the filename) should help!"
752 | ]
753 | },
754 | {
755 | "cell_type": "markdown",
756 | "metadata": {},
757 | "source": [
758 | "So, let's modify the code that I used earlier.\n",
759 | "\n",
760 | "In particular, I want to add a line that will note which ECCO segment it came from - ECCOI or ECCOII."
761 | ]
762 | },
763 | {
764 | "cell_type": "code",
765 | "execution_count": 27,
766 | "metadata": {
767 | "collapsed": true
768 | },
769 | "outputs": [],
770 | "source": [
771 | "# this list of dicts will hold all a dict that holds\n",
772 | "# the metadata for each relevant file\n",
773 | "listofdicts = []\n",
774 | "\n",
775 | "for file in shortfilelist:\n",
776 | " \n",
777 | " # the first iterparse will test to see if it has the desired lcsh.\n",
778 | " testparse = etree.iterparse(file, events=('end',), tag = 'locSubject')\n",
779 | " testlist = []\n",
780 | " for event, elem in testparse:\n",
781 | " testlist.append(elem.text)\n",
782 | " \n",
783 | " # the below will reclaim the memory at the end of each loop -\n",
784 | " # clears unneeded node references\n",
785 | " elem.clear()\n",
786 | " while elem.getprevious() is not None:\n",
787 | " del elem.getparent()[0]\n",
788 | " \n",
789 | " # and back to the purpose of our code - \n",
790 | " # note that putting it in a string makes it easier to search\n",
791 | " # comparing list items required an exact match,\n",
792 | " # and I wanted fuzzier searching, \n",
793 | " # just in case there were any errors in the controlled vocabulary\n",
794 | " # of the lcsh.\n",
795 | " teststring = ', '.join(testlist) \n",
796 | " if any(lcsh in teststring for lcsh in termlist):\n",
797 | " \n",
798 | " # if that is true, we want the metadata for that file\n",
799 | " # so let's make a dict to hold it\n",
800 | " # this dict will be reset with every file loop\n",
801 | " filedict = {}\n",
802 | " \n",
803 | " # a dict entry to indicate which ECCO release it came from\n",
804 | " filedict['eccorelease'] = '2'\n",
805 | " \n",
806 | " for xmltag in taglist:\n",
807 | " # make an empty list to hold what is in each tag, \n",
808 | " # which will be written to our dict in a few steps\n",
809 | " elementlist = []\n",
810 | " \n",
811 | " context = etree.iterparse(file, events=('end',), tag=xmltag)\n",
812 | " for event, elem in context:\n",
813 | " elementlist.append(elem.text)\n",
814 | " \n",
815 | " # the below should make things faster - I think? \n",
816 | " # reclaim the memory at the end of each loop -\n",
817 | " # clears unneeded node references\n",
818 | " elem.clear()\n",
819 | " while elem.getprevious() is not None:\n",
820 | " del elem.getparent()[0]\n",
821 | " # assign to the dictionary\n",
822 | " filedict[xmltag] = ', '.join(elementlist)\n",
823 | " \n",
824 | " # after it has looped through all the xmltags, \n",
825 | " # add the filedict to list of dicts \n",
826 | " listofdicts.append(filedict) "
827 | ]
828 | },
829 | {
830 | "cell_type": "code",
831 | "execution_count": 28,
832 | "metadata": {
833 | "collapsed": true
834 | },
835 | "outputs": [],
836 | "source": [
837 | "dictsdf = pd.DataFrame(listofdicts)"
838 | ]
839 | },
840 | {
841 | "cell_type": "code",
842 | "execution_count": 29,
843 | "metadata": {},
844 | "outputs": [
845 | {
846 | "data": {
847 | "text/html": [
848 | "\n",
849 | "\n",
862 | "
\n",
863 | " \n",
864 | " \n",
865 | " | \n",
866 | " ESTCID | \n",
867 | " currentVolume | \n",
868 | " displayTitle | \n",
869 | " documentID | \n",
870 | " fullTitle | \n",
871 | " imprintCity | \n",
872 | " imprintFull | \n",
873 | " imprintPublisher | \n",
874 | " language | \n",
875 | " locSubject | \n",
876 | " module | \n",
877 | " notes | \n",
878 | " pubDate | \n",
879 | " publicationPlace | \n",
880 | " totalVolumes | \n",
881 | "
\n",
882 | " \n",
883 | " \n",
884 | " \n",
885 | " | 0 | \n",
886 | " N025387 | \n",
887 | " Volume 16 | \n",
888 | " The World displayed; Or, A curious collection ... | \n",
889 | " 1309700116 | \n",
890 | " The World displayed; Or, A curious collection ... | \n",
891 | " London | \n",
892 | " London : Printed for J. Newbery, at the Bible ... | \n",
893 | " Printed for J. Newbery, at the Bible and Sun, ... | \n",
894 | " English | \n",
895 | " Voyages and travels | \n",
896 | " History and Geography | \n",
897 | " | \n",
898 | " 17670101 | \n",
899 | " London | \n",
900 | " 1 | \n",
901 | "
\n",
902 | " \n",
903 | " | 1 | \n",
904 | " N031356 | \n",
905 | " 0 | \n",
906 | " An essay towards a description of the city of ... | \n",
907 | " 1309700600 | \n",
908 | " An essay towards a description of the city of ... | \n",
909 | " [Bath] | \n",
910 | " [Bath] : Printed for W. Frederick, bookseller,... | \n",
911 | " Printed for W. Frederick, bookseller, in Bath | \n",
912 | " English | \n",
913 | " Bath (England), Description and travel, Early ... | \n",
914 | " History and Geography | \n",
915 | " With an additional titlepage for pt. 1: 'An es... | \n",
916 | " 17420101 | \n",
917 | " Bath | \n",
918 | " 0 | \n",
919 | "
\n",
920 | " \n",
921 | " | 2 | \n",
922 | " N029227 | \n",
923 | " 0 | \n",
924 | " Interesting account of the early voyages, made... | \n",
925 | " 1309900200 | \n",
926 | " Interesting account of the early voyages, made... | \n",
927 | " London | \n",
928 | " London : Printed for the proprietors, and sold... | \n",
929 | " Printed for the proprietors, and sold at Stalk... | \n",
930 | " English | \n",
931 | " Explorers, Portugal, Early works to 1800, Expl... | \n",
932 | " History and Geography | \n",
933 | " | \n",
934 | " 17900101 | \n",
935 | " London | \n",
936 | " 0 | \n",
937 | "
\n",
938 | " \n",
939 | " | 3 | \n",
940 | " N031489 | \n",
941 | " Volume 1 | \n",
942 | " An entertaining journey to the Netherlands; co... | \n",
943 | " 1309900301 | \n",
944 | " An entertaining journey to the Netherlands; co... | \n",
945 | " London | \n",
946 | " London : printed for W. Smith, M DCC LXXXII. [... | \n",
947 | " printed for W. Smith | \n",
948 | " English | \n",
949 | " Netherlands, Description and travel, Early wor... | \n",
950 | " History and Geography | \n",
951 | " Coriat Junior = Samuel Paterson?. | \n",
952 | " 17820101 | \n",
953 | " London | \n",
954 | " 3 | \n",
955 | "
\n",
956 | " \n",
957 | " | 4 | \n",
958 | " N031489 | \n",
959 | " Volume 2 | \n",
960 | " An entertaining journey to the Netherlands; co... | \n",
961 | " 1309900302 | \n",
962 | " An entertaining journey to the Netherlands; co... | \n",
963 | " London | \n",
964 | " London : printed for W. Smith, M DCC LXXXII. [... | \n",
965 | " printed for W. Smith | \n",
966 | " English | \n",
967 | " Netherlands, Description and travel, Early wor... | \n",
968 | " History and Geography | \n",
969 | " Coriat Junior = Samuel Paterson?. | \n",
970 | " 17820101 | \n",
971 | " London | \n",
972 | " 3 | \n",
973 | "
\n",
974 | " \n",
975 | " | 5 | \n",
976 | " N031489 | \n",
977 | " Volume 3 | \n",
978 | " An entertaining journey to the Netherlands; co... | \n",
979 | " 1309900303 | \n",
980 | " An entertaining journey to the Netherlands; co... | \n",
981 | " London | \n",
982 | " London : printed for W. Smith, M DCC LXXXII. [... | \n",
983 | " printed for W. Smith | \n",
984 | " English | \n",
985 | " Netherlands, Description and travel, Early wor... | \n",
986 | " History and Geography | \n",
987 | " Coriat Junior = Samuel Paterson?. | \n",
988 | " 17820101 | \n",
989 | " London | \n",
990 | " 3 | \n",
991 | "
\n",
992 | " \n",
993 | " | 6 | \n",
994 | " T188570 | \n",
995 | " 0 | \n",
996 | " A New collection of voyages and travels, dedic... | \n",
997 | " 1309901100 | \n",
998 | " A New collection of voyages and travels, dedic... | \n",
999 | " London | \n",
1000 | " London : Printed for E. Newbery, the corner of... | \n",
1001 | " Printed for E. Newbery, the corner of St. Paul... | \n",
1002 | " English | \n",
1003 | " Voyages and travels, Early works to 1800 | \n",
1004 | " History and Geography | \n",
1005 | " William Mavor is the editor of the 'Historical... | \n",
1006 | " 17960101 | \n",
1007 | " London | \n",
1008 | " 0 | \n",
1009 | "
\n",
1010 | " \n",
1011 | " | 7 | \n",
1012 | " T172346 | \n",
1013 | " 0 | \n",
1014 | " Miscellaneous remarks made on the spot, in a l... | \n",
1015 | " 1310300400 | \n",
1016 | " Miscellaneous remarks made on the spot, in a l... | \n",
1017 | " London | \n",
1018 | " London : Printed for S. Hooper, at Gay's Head,... | \n",
1019 | " Printed for S. Hooper, at Gay's Head, near Bea... | \n",
1020 | " English | \n",
1021 | " Italy, Description and travel, Early works to ... | \n",
1022 | " History and Geography | \n",
1023 | " | \n",
1024 | " 17560101 | \n",
1025 | " London | \n",
1026 | " 0 | \n",
1027 | "
\n",
1028 | " \n",
1029 | "
\n",
1030 | "
"
1031 | ],
1032 | "text/plain": [
1033 | " ESTCID currentVolume displayTitle \\\n",
1034 | "0 N025387 Volume 16 The World displayed; Or, A curious collection ... \n",
1035 | "1 N031356 0 An essay towards a description of the city of ... \n",
1036 | "2 N029227 0 Interesting account of the early voyages, made... \n",
1037 | "3 N031489 Volume 1 An entertaining journey to the Netherlands; co... \n",
1038 | "4 N031489 Volume 2 An entertaining journey to the Netherlands; co... \n",
1039 | "5 N031489 Volume 3 An entertaining journey to the Netherlands; co... \n",
1040 | "6 T188570 0 A New collection of voyages and travels, dedic... \n",
1041 | "7 T172346 0 Miscellaneous remarks made on the spot, in a l... \n",
1042 | "\n",
1043 | " documentID fullTitle imprintCity \\\n",
1044 | "0 1309700116 The World displayed; Or, A curious collection ... London \n",
1045 | "1 1309700600 An essay towards a description of the city of ... [Bath] \n",
1046 | "2 1309900200 Interesting account of the early voyages, made... London \n",
1047 | "3 1309900301 An entertaining journey to the Netherlands; co... London \n",
1048 | "4 1309900302 An entertaining journey to the Netherlands; co... London \n",
1049 | "5 1309900303 An entertaining journey to the Netherlands; co... London \n",
1050 | "6 1309901100 A New collection of voyages and travels, dedic... London \n",
1051 | "7 1310300400 Miscellaneous remarks made on the spot, in a l... London \n",
1052 | "\n",
1053 | " imprintFull \\\n",
1054 | "0 London : Printed for J. Newbery, at the Bible ... \n",
1055 | "1 [Bath] : Printed for W. Frederick, bookseller,... \n",
1056 | "2 London : Printed for the proprietors, and sold... \n",
1057 | "3 London : printed for W. Smith, M DCC LXXXII. [... \n",
1058 | "4 London : printed for W. Smith, M DCC LXXXII. [... \n",
1059 | "5 London : printed for W. Smith, M DCC LXXXII. [... \n",
1060 | "6 London : Printed for E. Newbery, the corner of... \n",
1061 | "7 London : Printed for S. Hooper, at Gay's Head,... \n",
1062 | "\n",
1063 | " imprintPublisher language \\\n",
1064 | "0 Printed for J. Newbery, at the Bible and Sun, ... English \n",
1065 | "1 Printed for W. Frederick, bookseller, in Bath English \n",
1066 | "2 Printed for the proprietors, and sold at Stalk... English \n",
1067 | "3 printed for W. Smith English \n",
1068 | "4 printed for W. Smith English \n",
1069 | "5 printed for W. Smith English \n",
1070 | "6 Printed for E. Newbery, the corner of St. Paul... English \n",
1071 | "7 Printed for S. Hooper, at Gay's Head, near Bea... English \n",
1072 | "\n",
1073 | " locSubject module \\\n",
1074 | "0 Voyages and travels History and Geography \n",
1075 | "1 Bath (England), Description and travel, Early ... History and Geography \n",
1076 | "2 Explorers, Portugal, Early works to 1800, Expl... History and Geography \n",
1077 | "3 Netherlands, Description and travel, Early wor... History and Geography \n",
1078 | "4 Netherlands, Description and travel, Early wor... History and Geography \n",
1079 | "5 Netherlands, Description and travel, Early wor... History and Geography \n",
1080 | "6 Voyages and travels, Early works to 1800 History and Geography \n",
1081 | "7 Italy, Description and travel, Early works to ... History and Geography \n",
1082 | "\n",
1083 | " notes pubDate \\\n",
1084 | "0 17670101 \n",
1085 | "1 With an additional titlepage for pt. 1: 'An es... 17420101 \n",
1086 | "2 17900101 \n",
1087 | "3 Coriat Junior = Samuel Paterson?. 17820101 \n",
1088 | "4 Coriat Junior = Samuel Paterson?. 17820101 \n",
1089 | "5 Coriat Junior = Samuel Paterson?. 17820101 \n",
1090 | "6 William Mavor is the editor of the 'Historical... 17960101 \n",
1091 | "7 17560101 \n",
1092 | "\n",
1093 | " publicationPlace totalVolumes \n",
1094 | "0 London 1 \n",
1095 | "1 Bath 0 \n",
1096 | "2 London 0 \n",
1097 | "3 London 3 \n",
1098 | "4 London 3 \n",
1099 | "5 London 3 \n",
1100 | "6 London 0 \n",
1101 | "7 London 0 "
1102 | ]
1103 | },
1104 | "execution_count": 29,
1105 | "metadata": {},
1106 | "output_type": "execute_result"
1107 | }
1108 | ],
1109 | "source": [
1110 | "dictsdf"
1111 | ]
1112 | },
1113 | {
1114 | "cell_type": "markdown",
1115 | "metadata": {},
1116 | "source": [
1117 | "Hurrah, it worked! Now, to replicate the code and do it for the multiple sections of ECCO II. "
1118 | ]
1119 | },
1120 | {
1121 | "cell_type": "markdown",
1122 | "metadata": {},
1123 | "source": [
1124 | "# ECCO Part 2\n",
1125 | "Let's run our analysis on ECCO pt 2 (there are less files here than on ECCO pt 1!)"
1126 | ]
1127 | },
1128 | {
1129 | "cell_type": "code",
1130 | "execution_count": 1,
1131 | "metadata": {
1132 | "collapsed": true
1133 | },
1134 | "outputs": [],
1135 | "source": [
1136 | "from lxml import etree\n",
1137 | "import pandas as pd\n",
1138 | "import glob"
1139 | ]
1140 | },
1141 | {
1142 | "cell_type": "markdown",
1143 | "metadata": {},
1144 | "source": [
1145 | "Because many of the files are "
1146 | ]
1147 | },
1148 | {
1149 | "cell_type": "code",
1150 | "execution_count": 52,
1151 | "metadata": {
1152 | "collapsed": true
1153 | },
1154 | "outputs": [],
1155 | "source": [
1156 | "import os\n",
1157 | "filelist = []\n",
1158 | "for root, dirs, files in os.walk(mypath):\n",
1159 | " for file in files:\n",
1160 | " if file.endswith(\".xml\"):\n",
1161 | " filelist.append(os.path.join(root, file))"
1162 | ]
1163 | },
1164 | {
1165 | "cell_type": "code",
1166 | "execution_count": 53,
1167 | "metadata": {},
1168 | "outputs": [
1169 | {
1170 | "data": {
1171 | "text/plain": [
1172 | "52690"
1173 | ]
1174 | },
1175 | "execution_count": 53,
1176 | "metadata": {},
1177 | "output_type": "execute_result"
1178 | }
1179 | ],
1180 | "source": [
1181 | "len(filelist)"
1182 | ]
1183 | },
1184 | {
1185 | "cell_type": "code",
1186 | "execution_count": 65,
1187 | "metadata": {
1188 | "scrolled": true
1189 | },
1190 | "outputs": [
1191 | {
1192 | "data": {
1193 | "text/plain": [
1194 | "['D:/ECCOII 2001/GenRef\\\\XML\\\\1336600100.xml',\n",
1195 | " 'D:/ECCOII 2001/GenRef\\\\XML\\\\1336600200.xml',\n",
1196 | " 'D:/ECCOII 2001/GenRef\\\\XML\\\\1336600300.xml',\n",
1197 | " 'D:/ECCOII 2001/GenRef\\\\XML\\\\1336600400.xml',\n",
1198 | " 'D:/ECCOII 2001/GenRef\\\\XML\\\\1336600500.xml',\n",
1199 | " 'D:/ECCOII 2001/GenRef\\\\XML\\\\1336600600.xml',\n",
1200 | " 'D:/ECCOII 2001/GenRef\\\\XML\\\\1336600700.xml',\n",
1201 | " 'D:/ECCOII 2001/GenRef\\\\XML\\\\1336600800.xml',\n",
1202 | " 'D:/ECCOII 2001/GenRef\\\\XML\\\\1336600900.xml',\n",
1203 | " 'D:/ECCOII 2001/GenRef\\\\XML\\\\1336601000.xml']"
1204 | ]
1205 | },
1206 | "execution_count": 65,
1207 | "metadata": {},
1208 | "output_type": "execute_result"
1209 | }
1210 | ],
1211 | "source": [
1212 | "filelist[:10]"
1213 | ]
1214 | },
1215 | {
1216 | "cell_type": "markdown",
1217 | "metadata": {},
1218 | "source": [
1219 | "Looks good - there shouldn't be any subfolders or word docs, for example. \n",
1220 | "\n",
1221 | "I am...a little hesitant to run something on such a large number of files. What if it breaks, partway through? What if my laptop panics? I think, however, this is a good opportunity to take a break, make some hot chocolate, watch the snow fall, and let my code run (and then, maybe, return to writing?!)"
1222 | ]
1223 | },
1224 | {
1225 | "cell_type": "markdown",
1226 | "metadata": {},
1227 | "source": [
1228 | "### UH OH.\n",
1229 | "\n",
1230 | "*revised plan* Okay, so I started it running, and then let it be. And, of course, something didn't work and it threw a \"file not found\" error. :( The weird thing, is that is didn't happen on the `testparse = etree` line like I would have expected; instead, it happend on the first `for event, elem in testparse: testlist.append(elem.text)`.\n",
1231 | "\n",
1232 | "So, I'm going to return to doing it in batches; at least, then, I will know whether it is happening in a certain folder. Grumble grumble.\n",
1233 | "\n",
1234 | "(side note: I am a little concerned about what if the capitalization happens to be different somewhere? But, I think I can just run a `locsubject` (without the capital S) to catch any differences...)\n",
1235 | "\n",
1236 | "In order to do a longer test, I'll use the `ECCOII HistAndGeo` subsection as an experiment - there were about 3300 files in there. \n",
1237 | "\n",
1238 | "_side note_: My laptop went to sleep just after the 2500 mark, so I had to start it back up again, woops. No duplicates in the total of `312` relevant files below, though I will have to filter out anything that isn't printed in Great Britain - there are some texts printed in New York, Dublin, etc., in there."
1239 | ]
1240 | },
1241 | {
1242 | "cell_type": "code",
1243 | "execution_count": 2,
1244 | "metadata": {
1245 | "collapsed": true
1246 | },
1247 | "outputs": [],
1248 | "source": [
1249 | "filelistHistAndGeo = glob.glob('D:/ECCOII 2001/HistAndGeo/XML/*.xml')"
1250 | ]
1251 | },
1252 | {
1253 | "cell_type": "code",
1254 | "execution_count": 19,
1255 | "metadata": {},
1256 | "outputs": [
1257 | {
1258 | "name": "stdout",
1259 | "output_type": "stream",
1260 | "text": [
1261 | "3000\n"
1262 | ]
1263 | }
1264 | ],
1265 | "source": [
1266 | "termlist = ['travel', 'discov', 'explor', 'voyage', 'guide', 'antiquit']\n",
1267 | "taglist = ['documentID', 'ESTCID', 'pubDate','ESTCID',\n",
1268 | " 'language','module','locSubject','notes',\n",
1269 | " 'fullTitle','displayTitle','currentVolume', \n",
1270 | " 'totalVolumes', 'imprintPublisher','imprintFull',\n",
1271 | " 'imprintCity', 'publicationPlace']\n",
1272 | "\n",
1273 | "# this list of dicts will hold all a dict that holds\n",
1274 | "# the metadata for each relevant file\n",
1275 | "\n",
1276 | "# listofdicts = []\n",
1277 | "\n",
1278 | "# and, a count so that I can track my progress\n",
1279 | "count = 2500\n",
1280 | "\n",
1281 | "for file in filelistHistAndGeo[2500:]:\n",
1282 | " count+=1\n",
1283 | " if (count % 500) == 0:\n",
1284 | " print(count)\n",
1285 | " \n",
1286 | " \n",
1287 | " # the first iterparse will test to see if it has the desired lcsh.\n",
1288 | " testparse = etree.iterparse(file, events=('end',), tag = 'locSubject')\n",
1289 | " testlist = []\n",
1290 | " for event, elem in testparse:\n",
1291 | " testlist.append(elem.text)\n",
1292 | " \n",
1293 | " # the below will reclaim the memory at the end of each loop -\n",
1294 | " # clears unneeded node references\n",
1295 | " elem.clear()\n",
1296 | " while elem.getprevious() is not None:\n",
1297 | " del elem.getparent()[0]\n",
1298 | " \n",
1299 | " # and back to the purpose of our code - \n",
1300 | " # note that putting it in a string makes it easier to search\n",
1301 | " # comparing list items required an exact match,\n",
1302 | " # and I wanted fuzzier searching, \n",
1303 | " # just in case there were any errors in the controlled vocabulary\n",
1304 | " # of the lcsh.\n",
1305 | " teststring = ', '.join(testlist) \n",
1306 | " if any(lcsh in teststring for lcsh in termlist):\n",
1307 | " \n",
1308 | " # if that is true, we want the metadata for that file\n",
1309 | " # so let's make a dict to hold it\n",
1310 | " # this dict will be reset with every file loop\n",
1311 | " filedict = {}\n",
1312 | " \n",
1313 | " # a dict entry to indicate which ECCO release it came from\n",
1314 | " filedict['eccorelease'] = '2'\n",
1315 | " \n",
1316 | " for xmltag in taglist:\n",
1317 | " # make an empty list to hold what is in each tag, \n",
1318 | " # which will be written to our dict in a few steps\n",
1319 | " elementlist = []\n",
1320 | " \n",
1321 | " context = etree.iterparse(file, events=('end',), tag=xmltag)\n",
1322 | " for event, elem in context:\n",
1323 | " elementlist.append(elem.text)\n",
1324 | " \n",
1325 | " # the below should make things faster - I think? \n",
1326 | " # reclaim the memory at the end of each loop -\n",
1327 | " # clears unneeded node references\n",
1328 | " elem.clear()\n",
1329 | " while elem.getprevious() is not None:\n",
1330 | " del elem.getparent()[0]\n",
1331 | " # assign to the dictionary\n",
1332 | " filedict[xmltag] = ', '.join(elementlist)\n",
1333 | " \n",
1334 | " # after it has looped through all the xmltags, \n",
1335 | " # add the filedict to list of dicts \n",
1336 | " listofdicts.append(filedict) \n",
1337 | " \n"
1338 | ]
1339 | },
1340 | {
1341 | "cell_type": "code",
1342 | "execution_count": 20,
1343 | "metadata": {},
1344 | "outputs": [
1345 | {
1346 | "data": {
1347 | "text/plain": [
1348 | "312"
1349 | ]
1350 | },
1351 | "execution_count": 20,
1352 | "metadata": {},
1353 | "output_type": "execute_result"
1354 | }
1355 | ],
1356 | "source": [
1357 | "len(listofdicts)"
1358 | ]
1359 | },
1360 | {
1361 | "cell_type": "code",
1362 | "execution_count": 21,
1363 | "metadata": {},
1364 | "outputs": [
1365 | {
1366 | "data": {
1367 | "text/html": [
1368 | "\n",
1369 | "\n",
1382 | "
\n",
1383 | " \n",
1384 | " \n",
1385 | " | \n",
1386 | " ESTCID | \n",
1387 | " currentVolume | \n",
1388 | " displayTitle | \n",
1389 | " documentID | \n",
1390 | " eccorelease | \n",
1391 | " fullTitle | \n",
1392 | " imprintCity | \n",
1393 | " imprintFull | \n",
1394 | " imprintPublisher | \n",
1395 | " language | \n",
1396 | " locSubject | \n",
1397 | " module | \n",
1398 | " notes | \n",
1399 | " pubDate | \n",
1400 | " publicationPlace | \n",
1401 | " totalVolumes | \n",
1402 | "
\n",
1403 | " \n",
1404 | " \n",
1405 | " \n",
1406 | " | 0 | \n",
1407 | " N025387 | \n",
1408 | " Volume 16 | \n",
1409 | " The World displayed; Or, A curious collection ... | \n",
1410 | " 1309700116 | \n",
1411 | " 2 | \n",
1412 | " The World displayed; Or, A curious collection ... | \n",
1413 | " London | \n",
1414 | " London : Printed for J. Newbery, at the Bible ... | \n",
1415 | " Printed for J. Newbery, at the Bible and Sun, ... | \n",
1416 | " English | \n",
1417 | " Voyages and travels | \n",
1418 | " History and Geography | \n",
1419 | " | \n",
1420 | " 17670101 | \n",
1421 | " London | \n",
1422 | " 1 | \n",
1423 | "
\n",
1424 | " \n",
1425 | " | 1 | \n",
1426 | " N031356 | \n",
1427 | " 0 | \n",
1428 | " An essay towards a description of the city of ... | \n",
1429 | " 1309700600 | \n",
1430 | " 2 | \n",
1431 | " An essay towards a description of the city of ... | \n",
1432 | " [Bath] | \n",
1433 | " [Bath] : Printed for W. Frederick, bookseller,... | \n",
1434 | " Printed for W. Frederick, bookseller, in Bath | \n",
1435 | " English | \n",
1436 | " Bath (England), Description and travel, Early ... | \n",
1437 | " History and Geography | \n",
1438 | " With an additional titlepage for pt. 1: 'An es... | \n",
1439 | " 17420101 | \n",
1440 | " Bath | \n",
1441 | " 0 | \n",
1442 | "
\n",
1443 | " \n",
1444 | " | 2 | \n",
1445 | " N029227 | \n",
1446 | " 0 | \n",
1447 | " Interesting account of the early voyages, made... | \n",
1448 | " 1309900200 | \n",
1449 | " 2 | \n",
1450 | " Interesting account of the early voyages, made... | \n",
1451 | " London | \n",
1452 | " London : Printed for the proprietors, and sold... | \n",
1453 | " Printed for the proprietors, and sold at Stalk... | \n",
1454 | " English | \n",
1455 | " Explorers, Portugal, Early works to 1800, Expl... | \n",
1456 | " History and Geography | \n",
1457 | " | \n",
1458 | " 17900101 | \n",
1459 | " London | \n",
1460 | " 0 | \n",
1461 | "
\n",
1462 | " \n",
1463 | " | 3 | \n",
1464 | " N031489 | \n",
1465 | " Volume 1 | \n",
1466 | " An entertaining journey to the Netherlands; co... | \n",
1467 | " 1309900301 | \n",
1468 | " 2 | \n",
1469 | " An entertaining journey to the Netherlands; co... | \n",
1470 | " London | \n",
1471 | " London : printed for W. Smith, M DCC LXXXII. [... | \n",
1472 | " printed for W. Smith | \n",
1473 | " English | \n",
1474 | " Netherlands, Description and travel, Early wor... | \n",
1475 | " History and Geography | \n",
1476 | " Coriat Junior = Samuel Paterson?. | \n",
1477 | " 17820101 | \n",
1478 | " London | \n",
1479 | " 3 | \n",
1480 | "
\n",
1481 | " \n",
1482 | " | 4 | \n",
1483 | " N031489 | \n",
1484 | " Volume 2 | \n",
1485 | " An entertaining journey to the Netherlands; co... | \n",
1486 | " 1309900302 | \n",
1487 | " 2 | \n",
1488 | " An entertaining journey to the Netherlands; co... | \n",
1489 | " London | \n",
1490 | " London : printed for W. Smith, M DCC LXXXII. [... | \n",
1491 | " printed for W. Smith | \n",
1492 | " English | \n",
1493 | " Netherlands, Description and travel, Early wor... | \n",
1494 | " History and Geography | \n",
1495 | " Coriat Junior = Samuel Paterson?. | \n",
1496 | " 17820101 | \n",
1497 | " London | \n",
1498 | " 3 | \n",
1499 | "
\n",
1500 | " \n",
1501 | " | 5 | \n",
1502 | " N031489 | \n",
1503 | " Volume 3 | \n",
1504 | " An entertaining journey to the Netherlands; co... | \n",
1505 | " 1309900303 | \n",
1506 | " 2 | \n",
1507 | " An entertaining journey to the Netherlands; co... | \n",
1508 | " London | \n",
1509 | " London : printed for W. Smith, M DCC LXXXII. [... | \n",
1510 | " printed for W. Smith | \n",
1511 | " English | \n",
1512 | " Netherlands, Description and travel, Early wor... | \n",
1513 | " History and Geography | \n",
1514 | " Coriat Junior = Samuel Paterson?. | \n",
1515 | " 17820101 | \n",
1516 | " London | \n",
1517 | " 3 | \n",
1518 | "
\n",
1519 | " \n",
1520 | " | 6 | \n",
1521 | " T188570 | \n",
1522 | " 0 | \n",
1523 | " A New collection of voyages and travels, dedic... | \n",
1524 | " 1309901100 | \n",
1525 | " 2 | \n",
1526 | " A New collection of voyages and travels, dedic... | \n",
1527 | " London | \n",
1528 | " London : Printed for E. Newbery, the corner of... | \n",
1529 | " Printed for E. Newbery, the corner of St. Paul... | \n",
1530 | " English | \n",
1531 | " Voyages and travels, Early works to 1800 | \n",
1532 | " History and Geography | \n",
1533 | " William Mavor is the editor of the 'Historical... | \n",
1534 | " 17960101 | \n",
1535 | " London | \n",
1536 | " 0 | \n",
1537 | "
\n",
1538 | " \n",
1539 | " | 7 | \n",
1540 | " T172346 | \n",
1541 | " 0 | \n",
1542 | " Miscellaneous remarks made on the spot, in a l... | \n",
1543 | " 1310300400 | \n",
1544 | " 2 | \n",
1545 | " Miscellaneous remarks made on the spot, in a l... | \n",
1546 | " London | \n",
1547 | " London : Printed for S. Hooper, at Gay's Head,... | \n",
1548 | " Printed for S. Hooper, at Gay's Head, near Bea... | \n",
1549 | " English | \n",
1550 | " Italy, Description and travel, Early works to ... | \n",
1551 | " History and Geography | \n",
1552 | " | \n",
1553 | " 17560101 | \n",
1554 | " London | \n",
1555 | " 0 | \n",
1556 | "
\n",
1557 | " \n",
1558 | " | 8 | \n",
1559 | " T220401 | \n",
1560 | " Volume 1 | \n",
1561 | " The memoirs of Charles-Lewis, Baron de Pollnit... | \n",
1562 | " 1313200301 | \n",
1563 | " 2 | \n",
1564 | " The memoirs of Charles-Lewis, Baron de Pollnit... | \n",
1565 | " London | \n",
1566 | " London : Printed for Daniel Browne, at the Bla... | \n",
1567 | " Printed for Daniel Browne, at the Black Swan, ... | \n",
1568 | " English | \n",
1569 | " Europe, Description and travel | \n",
1570 | " History and Geography | \n",
1571 | " Translated by Stephen Whatley. In this editio... | \n",
1572 | " 17390101 | \n",
1573 | " London | \n",
1574 | " 2 | \n",
1575 | "
\n",
1576 | " \n",
1577 | " | 9 | \n",
1578 | " T220401 | \n",
1579 | " Volume 2 | \n",
1580 | " The memoirs of Charles-Lewis, Baron de Pollnit... | \n",
1581 | " 1313200302 | \n",
1582 | " 2 | \n",
1583 | " The memoirs of Charles-Lewis, Baron de Pollnit... | \n",
1584 | " London | \n",
1585 | " London : Printed for Daniel Browne, at the Bla... | \n",
1586 | " Printed for Daniel Browne, at the Black Swan, ... | \n",
1587 | " English | \n",
1588 | " Europe, Description and travel | \n",
1589 | " History and Geography | \n",
1590 | " Translated by Stephen Whatley. In this editio... | \n",
1591 | " 17390101 | \n",
1592 | " London | \n",
1593 | " 2 | \n",
1594 | "
\n",
1595 | " \n",
1596 | " | 10 | \n",
1597 | " W012701 | \n",
1598 | " 0 | \n",
1599 | " A history of a voyage to the coast of Africa, ... | \n",
1600 | " 1313200400 | \n",
1601 | " 2 | \n",
1602 | " A history of a voyage to the coast of Africa, ... | \n",
1603 | " Philadelphia | \n",
1604 | " Philadelphia : printed for the author, by S. C... | \n",
1605 | " printed for the author, by S. C[.] Ustick, & Co | \n",
1606 | " English | \n",
1607 | " Hawkins, Joseph,, b. 1772, Portraits, Voyages ... | \n",
1608 | " History and Geography | \n",
1609 | " \"The author relating the history of his travel... | \n",
1610 | " 17970101 | \n",
1611 | " Philadelphia | \n",
1612 | " 0 | \n",
1613 | "
\n",
1614 | " \n",
1615 | " | 11 | \n",
1616 | " T170016 | \n",
1617 | " Volume 1 | \n",
1618 | " Modern voyages: Containing a variety of useful... | \n",
1619 | " 1316200401 | \n",
1620 | " 2 | \n",
1621 | " Modern voyages: Containing a variety of useful... | \n",
1622 | " Dublin | \n",
1623 | " Dublin : printed for Chamberlaine and Rice, P.... | \n",
1624 | " printed for Chamberlaine and Rice, P. Wogan, P... | \n",
1625 | " English | \n",
1626 | " Voyages and travels, Early works to 1800 | \n",
1627 | " History and Geography | \n",
1628 | " The imprint in vol. 2 is enlarged by the addit... | \n",
1629 | " 17900101 | \n",
1630 | " Dublin | \n",
1631 | " 2 | \n",
1632 | "
\n",
1633 | " \n",
1634 | " | 12 | \n",
1635 | " T170016 | \n",
1636 | " Volume 2 | \n",
1637 | " Modern voyages: Containing a variety of useful... | \n",
1638 | " 1316200402 | \n",
1639 | " 2 | \n",
1640 | " Modern voyages: Containing a variety of useful... | \n",
1641 | " Dublin | \n",
1642 | " Dublin : printed for Chamberlaine and Rice, P.... | \n",
1643 | " printed for Chamberlaine and Rice, P. Wogan, P... | \n",
1644 | " English | \n",
1645 | " Voyages and travels, Early works to 1800 | \n",
1646 | " History and Geography | \n",
1647 | " The imprint in vol. 2 is enlarged by the addit... | \n",
1648 | " 17900101 | \n",
1649 | " Dublin | \n",
1650 | " 2 | \n",
1651 | "
\n",
1652 | " \n",
1653 | " | 13 | \n",
1654 | " T224686 | \n",
1655 | " 0 | \n",
1656 | " The foreign travels and dangerous voyages of t... | \n",
1657 | " 1316400600 | \n",
1658 | " 2 | \n",
1659 | " The foreign travels and dangerous voyages of t... | \n",
1660 | " London | \n",
1661 | " London : printed for M. Hotham, [1710?] | \n",
1662 | " printed for M. Hotham | \n",
1663 | " English | \n",
1664 | " Voyages and travels, Early works to 1800 | \n",
1665 | " History and Geography | \n",
1666 | " | \n",
1667 | " 17100101 | \n",
1668 | " London | \n",
1669 | " 0 | \n",
1670 | "
\n",
1671 | " \n",
1672 | " | 14 | \n",
1673 | " T164122 | \n",
1674 | " 0 | \n",
1675 | " An accountof the shipwreck and captivity of Mr... | \n",
1676 | " 1321200900 | \n",
1677 | " 2 | \n",
1678 | " An accountof the shipwreck and captivity of Mr... | \n",
1679 | " London | \n",
1680 | " London : Printed for C. Forster, in the Poultr... | \n",
1681 | " Printed for C. Forster, in the Poultry | \n",
1682 | " English | \n",
1683 | " Voyages and travels, Early works to 1800 | \n",
1684 | " History and Geography | \n",
1685 | " Price in square brackets: (Price Two Shillings... | \n",
1686 | " 17890101 | \n",
1687 | " London | \n",
1688 | " 0 | \n",
1689 | "
\n",
1690 | " \n",
1691 | " | 15 | \n",
1692 | " T224703 | \n",
1693 | " 0 | \n",
1694 | " Owen's new book of fairs, published by the Kin... | \n",
1695 | " 1324400100 | \n",
1696 | " 2 | \n",
1697 | " Owen's new book of fairs, published by the Kin... | \n",
1698 | " London | \n",
1699 | " London : Printed (by assignment from W. Owen) ... | \n",
1700 | " Printed (by assignment from W. Owen) for J. Jo... | \n",
1701 | " English | \n",
1702 | " Great Britain, Description and travel, Early w... | \n",
1703 | " History and Geography | \n",
1704 | " With an initial copyright leaf. Also publishe... | \n",
1705 | " 17990101 | \n",
1706 | " London | \n",
1707 | " 0 | \n",
1708 | "
\n",
1709 | " \n",
1710 | " | 16 | \n",
1711 | " T223983 | \n",
1712 | " 0 | \n",
1713 | " Morse's Geography. This day is published, in o... | \n",
1714 | " 1324400900 | \n",
1715 | " 2 | \n",
1716 | " Morse's Geography. This day is published, in o... | \n",
1717 | " London | \n",
1718 | " London : Printed for John Stockdale, Piccadill... | \n",
1719 | " Printed for John Stockdale, Piccadilly and sol... | \n",
1720 | " English | \n",
1721 | " United States, Description and travel, Early w... | \n",
1722 | " History and Geography | \n",
1723 | " Advertisement for new books printed for John S... | \n",
1724 | " 17920101 | \n",
1725 | " London | \n",
1726 | " 0 | \n",
1727 | "
\n",
1728 | " \n",
1729 | " | 17 | \n",
1730 | " N034619 | \n",
1731 | " 0 | \n",
1732 | " The Present state of Sicily and Malta, Extract... | \n",
1733 | " 1331000300 | \n",
1734 | " 2 | \n",
1735 | " The Present state of Sicily and Malta, Extract... | \n",
1736 | " London | \n",
1737 | " London : Printed for G. Kearsley, at Johnson's... | \n",
1738 | " Printed for G. Kearsley, at Johnson's Head, No... | \n",
1739 | " English | \n",
1740 | " Sicily (Italy), Description and travel, Early ... | \n",
1741 | " History and Geography | \n",
1742 | " With a final errata leaf. P. iv misnumbered v... | \n",
1743 | " 17880101 | \n",
1744 | " London | \n",
1745 | " 0 | \n",
1746 | "
\n",
1747 | " \n",
1748 | " | 18 | \n",
1749 | " T165771 | \n",
1750 | " 0 | \n",
1751 | " Holland: a jaunt to the principal places in th... | \n",
1752 | " 1331201100 | \n",
1753 | " 2 | \n",
1754 | " Holland: a jaunt to the principal places in th... | \n",
1755 | " London | \n",
1756 | " London : Printed, and sold by W.Hay, next to t... | \n",
1757 | " Printed, and sold by W.Hay, next to the Societ... | \n",
1758 | " English | \n",
1759 | " Holland (Netherlands : Province), Description ... | \n",
1760 | " History and Geography | \n",
1761 | " Price on title page: (Price 2s.) | \n",
1762 | " 17750101 | \n",
1763 | " London | \n",
1764 | " 0 | \n",
1765 | "
\n",
1766 | " \n",
1767 | " | 19 | \n",
1768 | " T231765 | \n",
1769 | " 0 | \n",
1770 | " A Description of the hermitage of Warkworth. | \n",
1771 | " 1331400300 | \n",
1772 | " 2 | \n",
1773 | " A Description of the hermitage of Warkworth. | \n",
1774 | " [London? ] | \n",
1775 | " [London? : s.n., 1800?] | \n",
1776 | " s.n. | \n",
1777 | " English | \n",
1778 | " Great Britain, Description and travel, Early w... | \n",
1779 | " History and Geography | \n",
1780 | " | \n",
1781 | " 18000101 | \n",
1782 | " London | \n",
1783 | " 0 | \n",
1784 | "
\n",
1785 | " \n",
1786 | " | 20 | \n",
1787 | " N025348 | \n",
1788 | " Volume 1 | \n",
1789 | " The world displayed; or, A curious collection ... | \n",
1790 | " 1334900101 | \n",
1791 | " 2 | \n",
1792 | " The world displayed; or, A curious collection ... | \n",
1793 | " Dublin | \n",
1794 | " Dublin : printed by James Williams, M,DCC,LXXI... | \n",
1795 | " printed by James Williams | \n",
1796 | " English | \n",
1797 | " Voyages and travels | \n",
1798 | " History and Geography | \n",
1799 | " Compiled by Christopher Smart, Oliver Goldsmit... | \n",
1800 | " 17790101 | \n",
1801 | " Dublin | \n",
1802 | " 20 | \n",
1803 | "
\n",
1804 | " \n",
1805 | " | 21 | \n",
1806 | " N025348 | \n",
1807 | " Volume 2 | \n",
1808 | " The world displayed; or, A curious collection ... | \n",
1809 | " 1334900102 | \n",
1810 | " 2 | \n",
1811 | " The world displayed; or, A curious collection ... | \n",
1812 | " Dublin | \n",
1813 | " Dublin : printed by James Williams, M,DCC,LXXI... | \n",
1814 | " printed by James Williams | \n",
1815 | " English | \n",
1816 | " Voyages and travels | \n",
1817 | " History and Geography | \n",
1818 | " Compiled by Christopher Smart, Oliver Goldsmit... | \n",
1819 | " 17790101 | \n",
1820 | " Dublin | \n",
1821 | " 20 | \n",
1822 | "
\n",
1823 | " \n",
1824 | " | 22 | \n",
1825 | " N025348 | \n",
1826 | " Volume 3 | \n",
1827 | " The world displayed; or, A curious collection ... | \n",
1828 | " 1334900103 | \n",
1829 | " 2 | \n",
1830 | " The world displayed; or, A curious collection ... | \n",
1831 | " Dublin | \n",
1832 | " Dublin : printed by James Williams, M,DCC,LXXI... | \n",
1833 | " printed by James Williams | \n",
1834 | " English | \n",
1835 | " Voyages and travels | \n",
1836 | " History and Geography | \n",
1837 | " Compiled by Christopher Smart, Oliver Goldsmit... | \n",
1838 | " 17790101 | \n",
1839 | " Dublin | \n",
1840 | " 20 | \n",
1841 | "
\n",
1842 | " \n",
1843 | " | 23 | \n",
1844 | " N025348 | \n",
1845 | " Volume 4 | \n",
1846 | " The world displayed; or, A curious collection ... | \n",
1847 | " 1334900104 | \n",
1848 | " 2 | \n",
1849 | " The world displayed; or, A curious collection ... | \n",
1850 | " Dublin | \n",
1851 | " Dublin : printed by James Williams, M,DCC,LXXI... | \n",
1852 | " printed by James Williams | \n",
1853 | " English | \n",
1854 | " Voyages and travels | \n",
1855 | " History and Geography | \n",
1856 | " Compiled by Christopher Smart, Oliver Goldsmit... | \n",
1857 | " 17790101 | \n",
1858 | " Dublin | \n",
1859 | " 20 | \n",
1860 | "
\n",
1861 | " \n",
1862 | " | 24 | \n",
1863 | " N025348 | \n",
1864 | " Volume 5 | \n",
1865 | " The world displayed; or, A curious collection ... | \n",
1866 | " 1334900105 | \n",
1867 | " 2 | \n",
1868 | " The world displayed; or, A curious collection ... | \n",
1869 | " Dublin | \n",
1870 | " Dublin : printed by James Williams, M,DCC,LXXI... | \n",
1871 | " printed by James Williams | \n",
1872 | " English | \n",
1873 | " Voyages and travels | \n",
1874 | " History and Geography | \n",
1875 | " Compiled by Christopher Smart, Oliver Goldsmit... | \n",
1876 | " 17790101 | \n",
1877 | " Dublin | \n",
1878 | " 20 | \n",
1879 | "
\n",
1880 | " \n",
1881 | " | 25 | \n",
1882 | " N025348 | \n",
1883 | " Volume 6 | \n",
1884 | " The world displayed; or, A curious collection ... | \n",
1885 | " 1334900106 | \n",
1886 | " 2 | \n",
1887 | " The world displayed; or, A curious collection ... | \n",
1888 | " Dublin | \n",
1889 | " Dublin : printed by James Williams, M,DCC,LXXI... | \n",
1890 | " printed by James Williams | \n",
1891 | " English | \n",
1892 | " Voyages and travels | \n",
1893 | " History and Geography | \n",
1894 | " Compiled by Christopher Smart, Oliver Goldsmit... | \n",
1895 | " 17790101 | \n",
1896 | " Dublin | \n",
1897 | " 20 | \n",
1898 | "
\n",
1899 | " \n",
1900 | " | 26 | \n",
1901 | " N025348 | \n",
1902 | " Volume 7 | \n",
1903 | " The world displayed; or, A curious collection ... | \n",
1904 | " 1334900107 | \n",
1905 | " 2 | \n",
1906 | " The world displayed; or, A curious collection ... | \n",
1907 | " Dublin | \n",
1908 | " Dublin : printed by James Williams, M,DCC,LXXI... | \n",
1909 | " printed by James Williams | \n",
1910 | " English | \n",
1911 | " Voyages and travels | \n",
1912 | " History and Geography | \n",
1913 | " Compiled by Christopher Smart, Oliver Goldsmit... | \n",
1914 | " 17790101 | \n",
1915 | " Dublin | \n",
1916 | " 20 | \n",
1917 | "
\n",
1918 | " \n",
1919 | " | 27 | \n",
1920 | " N025348 | \n",
1921 | " Volume 8 | \n",
1922 | " The world displayed; or, A curious collection ... | \n",
1923 | " 1334900108 | \n",
1924 | " 2 | \n",
1925 | " The world displayed; or, A curious collection ... | \n",
1926 | " Dublin | \n",
1927 | " Dublin : printed by James Williams, M,DCC,LXXI... | \n",
1928 | " printed by James Williams | \n",
1929 | " English | \n",
1930 | " Voyages and travels | \n",
1931 | " History and Geography | \n",
1932 | " Compiled by Christopher Smart, Oliver Goldsmit... | \n",
1933 | " 17790101 | \n",
1934 | " Dublin | \n",
1935 | " 20 | \n",
1936 | "
\n",
1937 | " \n",
1938 | " | 28 | \n",
1939 | " N025348 | \n",
1940 | " Volume 9 | \n",
1941 | " The world displayed; or, A curious collection ... | \n",
1942 | " 1334900109 | \n",
1943 | " 2 | \n",
1944 | " The world displayed; or, A curious collection ... | \n",
1945 | " Dublin | \n",
1946 | " Dublin : printed by James Williams, M,DCC,LXXI... | \n",
1947 | " printed by James Williams | \n",
1948 | " English | \n",
1949 | " Voyages and travels | \n",
1950 | " History and Geography | \n",
1951 | " Compiled by Christopher Smart, Oliver Goldsmit... | \n",
1952 | " 17790101 | \n",
1953 | " Dublin | \n",
1954 | " 20 | \n",
1955 | "
\n",
1956 | " \n",
1957 | " | 29 | \n",
1958 | " N025348 | \n",
1959 | " Volume 10 | \n",
1960 | " The world displayed; or, A curious collection ... | \n",
1961 | " 1335000110 | \n",
1962 | " 2 | \n",
1963 | " The world displayed; or, A curious collection ... | \n",
1964 | " Dublin | \n",
1965 | " Dublin : printed by James Williams, M,DCC,LXXI... | \n",
1966 | " printed by James Williams | \n",
1967 | " English | \n",
1968 | " Voyages and travels | \n",
1969 | " History and Geography | \n",
1970 | " Compiled by Christopher Smart, Oliver Goldsmit... | \n",
1971 | " 17790101 | \n",
1972 | " Dublin | \n",
1973 | " 20 | \n",
1974 | "
\n",
1975 | " \n",
1976 | " | ... | \n",
1977 | " ... | \n",
1978 | " ... | \n",
1979 | " ... | \n",
1980 | " ... | \n",
1981 | " ... | \n",
1982 | " ... | \n",
1983 | " ... | \n",
1984 | " ... | \n",
1985 | " ... | \n",
1986 | " ... | \n",
1987 | " ... | \n",
1988 | " ... | \n",
1989 | " ... | \n",
1990 | " ... | \n",
1991 | " ... | \n",
1992 | " ... | \n",
1993 | "
\n",
1994 | " \n",
1995 | " | 282 | \n",
1996 | " T206354 | \n",
1997 | " 0 | \n",
1998 | " A new and enlarged book of sailing directions ... | \n",
1999 | " 1696000600 | \n",
2000 | " 2 | \n",
2001 | " A new and enlarged book of sailing directions ... | \n",
2002 | " London | \n",
2003 | " London : Printed for Robert Laurie and James W... | \n",
2004 | " Printed for Robert Laurie and James Whittle, N... | \n",
2005 | " English | \n",
2006 | " Pilot guides, Florida, Straits of | \n",
2007 | " History and Geography | \n",
2008 | " At foot of titlepage: \"N.B. These directions a... | \n",
2009 | " 17960101 | \n",
2010 | " London | \n",
2011 | " 0 | \n",
2012 | "
\n",
2013 | " \n",
2014 | " | 283 | \n",
2015 | " T215603 | \n",
2016 | " 0 | \n",
2017 | " The admirable travels of Thomas Jenkins. And D... | \n",
2018 | " 1696001000 | \n",
2019 | " 2 | \n",
2020 | " The admirable travels of Thomas Jenkins. And D... | \n",
2021 | " London | \n",
2022 | " London : Printed for, and sold by W. Clements,... | \n",
2023 | " Printed for, and sold by W. Clements, and J. S... | \n",
2024 | " English | \n",
2025 | " Voyages and travels, England, London, Early wo... | \n",
2026 | " History and Geography | \n",
2027 | " Written in the first person by David Lowellin.... | \n",
2028 | " 17910101 | \n",
2029 | " London | \n",
2030 | " 0 | \n",
2031 | "
\n",
2032 | " \n",
2033 | " | 284 | \n",
2034 | " T187257 | \n",
2035 | " 0 | \n",
2036 | " East-Bourn, being an descriptive account of th... | \n",
2037 | " 1696400300 | \n",
2038 | " 2 | \n",
2039 | " East-Bourn, being an descriptive account of th... | \n",
2040 | " London | \n",
2041 | " London : Printed at the Philanthropic reform, ... | \n",
2042 | " Printed at the Philanthropic reform, for Hookh... | \n",
2043 | " English | \n",
2044 | " Sussex (England), Description and travel, Earl... | \n",
2045 | " History and Geography | \n",
2046 | " | \n",
2047 | " 17990101 | \n",
2048 | " London | \n",
2049 | " 0 | \n",
2050 | "
\n",
2051 | " \n",
2052 | " | 285 | \n",
2053 | " T186693 | \n",
2054 | " Volume 1 | \n",
2055 | " A journey made in the summer of 1794, through ... | \n",
2056 | " 1698000201 | \n",
2057 | " 2 | \n",
2058 | " A journey made in the summer of 1794, through ... | \n",
2059 | " London | \n",
2060 | " London : printed for G.G. and J. Robinson, Pat... | \n",
2061 | " printed for G.G. and J. Robinson, Paternoster-Row | \n",
2062 | " English | \n",
2063 | " Europe, Description and travel | \n",
2064 | " History and Geography | \n",
2065 | " | \n",
2066 | " 17960101 | \n",
2067 | " London | \n",
2068 | " 2 | \n",
2069 | "
\n",
2070 | " \n",
2071 | " | 286 | \n",
2072 | " T186693 | \n",
2073 | " Volume 2 | \n",
2074 | " A journey made in the summer of 1794, through ... | \n",
2075 | " 1698000202 | \n",
2076 | " 2 | \n",
2077 | " A journey made in the summer of 1794, through ... | \n",
2078 | " London | \n",
2079 | " London : printed for G.G. and J. Robinson, Pat... | \n",
2080 | " printed for G.G. and J. Robinson, Paternoster-Row | \n",
2081 | " English | \n",
2082 | " Europe, Description and travel | \n",
2083 | " History and Geography | \n",
2084 | " | \n",
2085 | " 17960101 | \n",
2086 | " London | \n",
2087 | " 2 | \n",
2088 | "
\n",
2089 | " \n",
2090 | " | 287 | \n",
2091 | " T212407 | \n",
2092 | " 0 | \n",
2093 | " The world in miniature; or, The entertaining t... | \n",
2094 | " 1698200100 | \n",
2095 | " 2 | \n",
2096 | " The world in miniature; or, The entertaining t... | \n",
2097 | " Dublin | \n",
2098 | " Dublin : Printed for William Williamson, books... | \n",
2099 | " Printed for William Williamson, bookseller, at... | \n",
2100 | " English | \n",
2101 | " Voyages and travels, Early works to 1800 | \n",
2102 | " History and Geography | \n",
2103 | " With an index. | \n",
2104 | " 17510101 | \n",
2105 | " Dublin | \n",
2106 | " 0 | \n",
2107 | "
\n",
2108 | " \n",
2109 | " | 288 | \n",
2110 | " T212500 | \n",
2111 | " 0 | \n",
2112 | " A tour through Ireland. In several entertainin... | \n",
2113 | " 1698200400 | \n",
2114 | " 2 | \n",
2115 | " A tour through Ireland. In several entertainin... | \n",
2116 | " Dublin | \n",
2117 | " Dublin : Printed for Peter Wilson, bookseller,... | \n",
2118 | " Printed for Peter Wilson, bookseller, in Dame-... | \n",
2119 | " English | \n",
2120 | " Ireland, Description and travel | \n",
2121 | " History and Geography | \n",
2122 | " Anonymous. By William Rufus Chetwood. A reiss... | \n",
2123 | " 17480101 | \n",
2124 | " Dublin | \n",
2125 | " 0 | \n",
2126 | "
\n",
2127 | " \n",
2128 | " | 289 | \n",
2129 | " T188848 | \n",
2130 | " 0 | \n",
2131 | " An authentic narrative of some remarkable and ... | \n",
2132 | " 1698500500 | \n",
2133 | " 2 | \n",
2134 | " An authentic narrative of some remarkable and ... | \n",
2135 | " Dublin | \n",
2136 | " Dublin : Printed by Robert Dapper, for B. Dugd... | \n",
2137 | " Printed by Robert Dapper, for B. Dugdale, No. ... | \n",
2138 | " English | \n",
2139 | " Voyages and travels, Personal narratives | \n",
2140 | " History and Geography | \n",
2141 | " Written by Newton himself. First published as... | \n",
2142 | " 17960101 | \n",
2143 | " Dublin | \n",
2144 | " 0 | \n",
2145 | "
\n",
2146 | " \n",
2147 | " | 290 | \n",
2148 | " T100821 | \n",
2149 | " 0 | \n",
2150 | " The voiage and travaile of Sir John Maundevile... | \n",
2151 | " 1705600300 | \n",
2152 | " 2 | \n",
2153 | " The voiage and travaile of Sir John Maundevile... | \n",
2154 | " London | \n",
2155 | " London : Printed for Woodman, and Lyon, in Rus... | \n",
2156 | " Printed for Woodman, and Lyon, in Russel-Stree... | \n",
2157 | " English | \n",
2158 | " Voyages and travels | \n",
2159 | " History and Geography | \n",
2160 | " Titlepage in red and black, preceding a black ... | \n",
2161 | " 17270101 | \n",
2162 | " London | \n",
2163 | " 0 | \n",
2164 | "
\n",
2165 | " \n",
2166 | " | 291 | \n",
2167 | " T176199 | \n",
2168 | " Volume 1 | \n",
2169 | " Travels in Switzerland, and in the country of ... | \n",
2170 | " 1726100301 | \n",
2171 | " 2 | \n",
2172 | " Travels in Switzerland, and in the country of ... | \n",
2173 | " London | \n",
2174 | " London : printed for T. Cadell, in the Strand,... | \n",
2175 | " printed for T. Cadell, in the Strand | \n",
2176 | " English | \n",
2177 | " Switzerland, Description and travel | \n",
2178 | " History and Geography | \n",
2179 | " | \n",
2180 | " 17910101 | \n",
2181 | " London | \n",
2182 | " 3 | \n",
2183 | "
\n",
2184 | " \n",
2185 | " | 292 | \n",
2186 | " T176199 | \n",
2187 | " Volume 2 | \n",
2188 | " Travels in Switzerland, and in the country of ... | \n",
2189 | " 1726200102 | \n",
2190 | " 2 | \n",
2191 | " Travels in Switzerland, and in the country of ... | \n",
2192 | " London | \n",
2193 | " London : printed for T. Cadell, in the Strand,... | \n",
2194 | " printed for T. Cadell, in the Strand | \n",
2195 | " English | \n",
2196 | " Switzerland, Description and travel | \n",
2197 | " History and Geography | \n",
2198 | " | \n",
2199 | " 17910101 | \n",
2200 | " London | \n",
2201 | " 3 | \n",
2202 | "
\n",
2203 | " \n",
2204 | " | 293 | \n",
2205 | " T176199 | \n",
2206 | " Volume 3 | \n",
2207 | " Travels in Switzerland, and in the country of ... | \n",
2208 | " 1726200103 | \n",
2209 | " 2 | \n",
2210 | " Travels in Switzerland, and in the country of ... | \n",
2211 | " London | \n",
2212 | " London : printed for T. Cadell, in the Strand,... | \n",
2213 | " printed for T. Cadell, in the Strand | \n",
2214 | " English | \n",
2215 | " Switzerland, Description and travel | \n",
2216 | " History and Geography | \n",
2217 | " | \n",
2218 | " 17910101 | \n",
2219 | " London | \n",
2220 | " 3 | \n",
2221 | "
\n",
2222 | " \n",
2223 | " | 294 | \n",
2224 | " T006285 | \n",
2225 | " Volume 1 | \n",
2226 | " An account of the European settlements in Amer... | \n",
2227 | " 1726700501 | \n",
2228 | " 2 | \n",
2229 | " An account of the European settlements in Amer... | \n",
2230 | " London | \n",
2231 | " London : printed for J. Dodsley, in Pall-Mall,... | \n",
2232 | " printed for J. Dodsley, in Pall-Mall | \n",
2233 | " English | \n",
2234 | " United States, Description and travel, Early w... | \n",
2235 | " History and Geography | \n",
2236 | " Anonymous. Probably a collaboration by William... | \n",
2237 | " 17700101 | \n",
2238 | " London | \n",
2239 | " 2 | \n",
2240 | "
\n",
2241 | " \n",
2242 | " | 295 | \n",
2243 | " T006285 | \n",
2244 | " Volume 2 | \n",
2245 | " An account of the European settlements in Amer... | \n",
2246 | " 1726800102 | \n",
2247 | " 2 | \n",
2248 | " An account of the European settlements in Amer... | \n",
2249 | " London | \n",
2250 | " London : printed for J. Dodsley, in Pall-Mall,... | \n",
2251 | " printed for J. Dodsley, in Pall-Mall | \n",
2252 | " English | \n",
2253 | " United States, Description and travel, Early w... | \n",
2254 | " History and Geography | \n",
2255 | " Anonymous. Probably a collaboration by William... | \n",
2256 | " 17700101 | \n",
2257 | " London | \n",
2258 | " 2 | \n",
2259 | "
\n",
2260 | " \n",
2261 | " | 296 | \n",
2262 | " T012218 | \n",
2263 | " Volume 1 | \n",
2264 | " Travels through Syria and Egypt, in the years ... | \n",
2265 | " 1726800201 | \n",
2266 | " 2 | \n",
2267 | " Travels through Syria and Egypt, in the years ... | \n",
2268 | " London | \n",
2269 | " London : printed for G.G.J. and J. Robinson, P... | \n",
2270 | " printed for G.G.J. and J. Robinson, Pater-Nost... | \n",
2271 | " English | \n",
2272 | " Syria, Description and travel, Early works to ... | \n",
2273 | " History and Geography | \n",
2274 | " Largely a reimpression of the 1787 London edit... | \n",
2275 | " 17880101 | \n",
2276 | " London | \n",
2277 | " 2 | \n",
2278 | "
\n",
2279 | " \n",
2280 | " | 297 | \n",
2281 | " T041858 | \n",
2282 | " 0 | \n",
2283 | " Observations on the western parts of England, ... | \n",
2284 | " 1726800400 | \n",
2285 | " 2 | \n",
2286 | " Observations on the western parts of England, ... | \n",
2287 | " London | \n",
2288 | " London : Printed for T. Cadell Jun. and W. Dav... | \n",
2289 | " Printed for T. Cadell Jun. and W. Davies, Strand | \n",
2290 | " English | \n",
2291 | " England, Description and travel, Early works t... | \n",
2292 | " History and Geography | \n",
2293 | " The same setting of type was also printed with... | \n",
2294 | " 17980101 | \n",
2295 | " London | \n",
2296 | " 0 | \n",
2297 | "
\n",
2298 | " \n",
2299 | " | 298 | \n",
2300 | " T059033 | \n",
2301 | " 0 | \n",
2302 | " Travels in several parts of Turkey, Egypt, and... | \n",
2303 | " 1726900100 | \n",
2304 | " 2 | \n",
2305 | " Travels in several parts of Turkey, Egypt, and... | \n",
2306 | " London | \n",
2307 | " London : Printed for the author; and sold by J... | \n",
2308 | " Printed for the author; and sold by J. Axtell,... | \n",
2309 | " English | \n",
2310 | " Middle East, Description and travel | \n",
2311 | " History and Geography | \n",
2312 | " Horizontal chain lines. | \n",
2313 | " 17740101 | \n",
2314 | " London | \n",
2315 | " 0 | \n",
2316 | "
\n",
2317 | " \n",
2318 | " | 299 | \n",
2319 | " T085368 | \n",
2320 | " Volume 1 | \n",
2321 | " A year's journey through France, and part of S... | \n",
2322 | " 1727800301 | \n",
2323 | " 2 | \n",
2324 | " A year's journey through France, and part of S... | \n",
2325 | " Bath | \n",
2326 | " Bath : Printed by R. Cruttwell, for the author... | \n",
2327 | " Printed by R. Cruttwell, for the author; and s... | \n",
2328 | " English | \n",
2329 | " France, Description and travel, Early works to... | \n",
2330 | " History and Geography | \n",
2331 | " With a list of subscribers. | \n",
2332 | " 17770101 | \n",
2333 | " Bath | \n",
2334 | " 2 | \n",
2335 | "
\n",
2336 | " \n",
2337 | " | 300 | \n",
2338 | " T085368 | \n",
2339 | " Volume 2 | \n",
2340 | " A year's journey through France, and part of S... | \n",
2341 | " 1727800302 | \n",
2342 | " 2 | \n",
2343 | " A year's journey through France, and part of S... | \n",
2344 | " Bath | \n",
2345 | " Bath : Printed by R. Cruttwell, for the author... | \n",
2346 | " Printed by R. Cruttwell, for the author; and s... | \n",
2347 | " English | \n",
2348 | " France, Description and travel, Early works to... | \n",
2349 | " History and Geography | \n",
2350 | " With a list of subscribers. | \n",
2351 | " 17770101 | \n",
2352 | " Bath | \n",
2353 | " 2 | \n",
2354 | "
\n",
2355 | " \n",
2356 | " | 301 | \n",
2357 | " T089035 | \n",
2358 | " Volume 1 | \n",
2359 | " Letters from Italy, describing the manners, cu... | \n",
2360 | " 1728000101 | \n",
2361 | " 2 | \n",
2362 | " Letters from Italy, describing the manners, cu... | \n",
2363 | " Dublin | \n",
2364 | " Dublin : printed for W. Watson, D. Chamberlain... | \n",
2365 | " printed for W. Watson, D. Chamberlaine, J. Pot... | \n",
2366 | " English | \n",
2367 | " Italy, Description and travel | \n",
2368 | " History and Geography | \n",
2369 | " An English woman = Anna, wife of Sir John Rigg... | \n",
2370 | " 17760101 | \n",
2371 | " London | \n",
2372 | " 3 | \n",
2373 | "
\n",
2374 | " \n",
2375 | " | 302 | \n",
2376 | " T089035 | \n",
2377 | " Volume 2 | \n",
2378 | " Letters from Italy, describing the manners, cu... | \n",
2379 | " 1728000102 | \n",
2380 | " 2 | \n",
2381 | " Letters from Italy, describing the manners, cu... | \n",
2382 | " Dublin | \n",
2383 | " Dublin : printed for W. Watson, D. Chamberlain... | \n",
2384 | " printed for W. Watson, D. Chamberlaine, J. Pot... | \n",
2385 | " English | \n",
2386 | " Italy, Description and travel | \n",
2387 | " History and Geography | \n",
2388 | " An English woman = Anna, wife of Sir John Rigg... | \n",
2389 | " 17760101 | \n",
2390 | " London | \n",
2391 | " 3 | \n",
2392 | "
\n",
2393 | " \n",
2394 | " | 303 | \n",
2395 | " T089035 | \n",
2396 | " Volume 3 | \n",
2397 | " Letters from Italy, describing the manners, cu... | \n",
2398 | " 1728000103 | \n",
2399 | " 2 | \n",
2400 | " Letters from Italy, describing the manners, cu... | \n",
2401 | " Dublin | \n",
2402 | " Dublin : printed for W. Watson, D. Chamberlain... | \n",
2403 | " printed for W. Watson, D. Chamberlaine, J. Pot... | \n",
2404 | " English | \n",
2405 | " Italy, Description and travel | \n",
2406 | " History and Geography | \n",
2407 | " An English woman = Anna, wife of Sir John Rigg... | \n",
2408 | " 17760101 | \n",
2409 | " London | \n",
2410 | " 3 | \n",
2411 | "
\n",
2412 | " \n",
2413 | " | 304 | \n",
2414 | " T099651 | \n",
2415 | " Volume 1 | \n",
2416 | " Travels from St. Petersburg in Russia, to dive... | \n",
2417 | " 1728900101 | \n",
2418 | " 2 | \n",
2419 | " Travels from St. Petersburg in Russia, to dive... | \n",
2420 | " Glasgow | \n",
2421 | " Glasgow : printed for the author by Robert and... | \n",
2422 | " printed for the author by Robert and Andrew Fo... | \n",
2423 | " English | \n",
2424 | " Asia, Description and travel | \n",
2425 | " History and Geography | \n",
2426 | " With a list of subscribers in vol.1. \"Volume ... | \n",
2427 | " 17630101 | \n",
2428 | " Glasgow | \n",
2429 | " 2 | \n",
2430 | "
\n",
2431 | " \n",
2432 | " | 305 | \n",
2433 | " T099651 | \n",
2434 | " Volume 2 | \n",
2435 | " Travels from St. Petersburg in Russia, to dive... | \n",
2436 | " 1728900102 | \n",
2437 | " 2 | \n",
2438 | " Travels from St. Petersburg in Russia, to dive... | \n",
2439 | " Glasgow | \n",
2440 | " Glasgow : printed for the author by Robert and... | \n",
2441 | " printed for the author by Robert and Andrew Fo... | \n",
2442 | " English | \n",
2443 | " Asia, Description and travel | \n",
2444 | " History and Geography | \n",
2445 | " With a list of subscribers in vol.1. \"Volume ... | \n",
2446 | " 17630101 | \n",
2447 | " Glasgow | \n",
2448 | " 2 | \n",
2449 | "
\n",
2450 | " \n",
2451 | " | 306 | \n",
2452 | " T097845 | \n",
2453 | " Volume 1 | \n",
2454 | " A Collection of voyages and travels, some now ... | \n",
2455 | " 1736200201 | \n",
2456 | " 2 | \n",
2457 | " A Collection of voyages and travels, some now ... | \n",
2458 | " London | \n",
2459 | " London : Printed by assignment from Messrs. Ch... | \n",
2460 | " Printed by assignment from Messrs. Churchill, ... | \n",
2461 | " English | \n",
2462 | " Voyages and travels | \n",
2463 | " History and Geography | \n",
2464 | " Compiled by Awnsham Churchill and John Churchi... | \n",
2465 | " 17440101 | \n",
2466 | " London | \n",
2467 | " 6 | \n",
2468 | "
\n",
2469 | " \n",
2470 | " | 307 | \n",
2471 | " T097845 | \n",
2472 | " Volume 2 | \n",
2473 | " A Collection of voyages and travels, some now ... | \n",
2474 | " 1736200202 | \n",
2475 | " 2 | \n",
2476 | " A Collection of voyages and travels, some now ... | \n",
2477 | " London | \n",
2478 | " London : Printed by assignment from Messrs. Ch... | \n",
2479 | " Printed by assignment from Messrs. Churchill, ... | \n",
2480 | " English | \n",
2481 | " Voyages and travels | \n",
2482 | " History and Geography | \n",
2483 | " Compiled by Awnsham Churchill and John Churchi... | \n",
2484 | " 17440101 | \n",
2485 | " London | \n",
2486 | " 6 | \n",
2487 | "
\n",
2488 | " \n",
2489 | " | 308 | \n",
2490 | " T097845 | \n",
2491 | " Volume 3 | \n",
2492 | " A Collection of voyages and travels, some now ... | \n",
2493 | " 1736300103 | \n",
2494 | " 2 | \n",
2495 | " A Collection of voyages and travels, some now ... | \n",
2496 | " London | \n",
2497 | " London : Printed by assignment from Messrs. Ch... | \n",
2498 | " Printed by assignment from Messrs. Churchill, ... | \n",
2499 | " English | \n",
2500 | " Voyages and travels | \n",
2501 | " History and Geography | \n",
2502 | " Compiled by Awnsham Churchill and John Churchi... | \n",
2503 | " 17440101 | \n",
2504 | " London | \n",
2505 | " 6 | \n",
2506 | "
\n",
2507 | " \n",
2508 | " | 309 | \n",
2509 | " T097845 | \n",
2510 | " Volume 4 | \n",
2511 | " A Collection of voyages and travels, some now ... | \n",
2512 | " 1736300104 | \n",
2513 | " 2 | \n",
2514 | " A Collection of voyages and travels, some now ... | \n",
2515 | " London | \n",
2516 | " London : Printed by assignment from Messrs. Ch... | \n",
2517 | " Printed by assignment from Messrs. Churchill, ... | \n",
2518 | " English | \n",
2519 | " Voyages and travels | \n",
2520 | " History and Geography | \n",
2521 | " Compiled by Awnsham Churchill and John Churchi... | \n",
2522 | " 17440101 | \n",
2523 | " London | \n",
2524 | " 6 | \n",
2525 | "
\n",
2526 | " \n",
2527 | " | 310 | \n",
2528 | " T097845 | \n",
2529 | " Volume 5 | \n",
2530 | " A Collection of voyages and travels, some now ... | \n",
2531 | " 1736400105 | \n",
2532 | " 2 | \n",
2533 | " A Collection of voyages and travels, some now ... | \n",
2534 | " London | \n",
2535 | " London : Printed by assignment from Messrs. Ch... | \n",
2536 | " Printed by assignment from Messrs. Churchill, ... | \n",
2537 | " English | \n",
2538 | " Voyages and travels | \n",
2539 | " History and Geography | \n",
2540 | " Compiled by Awnsham Churchill and John Churchi... | \n",
2541 | " 17440101 | \n",
2542 | " London | \n",
2543 | " 6 | \n",
2544 | "
\n",
2545 | " \n",
2546 | " | 311 | \n",
2547 | " T097845 | \n",
2548 | " Volume 6 | \n",
2549 | " A Collection of voyages and travels, some now ... | \n",
2550 | " 1736400106 | \n",
2551 | " 2 | \n",
2552 | " A Collection of voyages and travels, some now ... | \n",
2553 | " London | \n",
2554 | " London : Printed by assignment from Messrs. Ch... | \n",
2555 | " Printed by assignment from Messrs. Churchill, ... | \n",
2556 | " English | \n",
2557 | " Voyages and travels | \n",
2558 | " History and Geography | \n",
2559 | " Compiled by Awnsham Churchill and John Churchi... | \n",
2560 | " 17440101 | \n",
2561 | " London | \n",
2562 | " 6 | \n",
2563 | "
\n",
2564 | " \n",
2565 | "
\n",
2566 | "
312 rows × 16 columns
\n",
2567 | "
"
2568 | ],
2569 | "text/plain": [
2570 | " ESTCID currentVolume displayTitle \\\n",
2571 | "0 N025387 Volume 16 The World displayed; Or, A curious collection ... \n",
2572 | "1 N031356 0 An essay towards a description of the city of ... \n",
2573 | "2 N029227 0 Interesting account of the early voyages, made... \n",
2574 | "3 N031489 Volume 1 An entertaining journey to the Netherlands; co... \n",
2575 | "4 N031489 Volume 2 An entertaining journey to the Netherlands; co... \n",
2576 | "5 N031489 Volume 3 An entertaining journey to the Netherlands; co... \n",
2577 | "6 T188570 0 A New collection of voyages and travels, dedic... \n",
2578 | "7 T172346 0 Miscellaneous remarks made on the spot, in a l... \n",
2579 | "8 T220401 Volume 1 The memoirs of Charles-Lewis, Baron de Pollnit... \n",
2580 | "9 T220401 Volume 2 The memoirs of Charles-Lewis, Baron de Pollnit... \n",
2581 | "10 W012701 0 A history of a voyage to the coast of Africa, ... \n",
2582 | "11 T170016 Volume 1 Modern voyages: Containing a variety of useful... \n",
2583 | "12 T170016 Volume 2 Modern voyages: Containing a variety of useful... \n",
2584 | "13 T224686 0 The foreign travels and dangerous voyages of t... \n",
2585 | "14 T164122 0 An accountof the shipwreck and captivity of Mr... \n",
2586 | "15 T224703 0 Owen's new book of fairs, published by the Kin... \n",
2587 | "16 T223983 0 Morse's Geography. This day is published, in o... \n",
2588 | "17 N034619 0 The Present state of Sicily and Malta, Extract... \n",
2589 | "18 T165771 0 Holland: a jaunt to the principal places in th... \n",
2590 | "19 T231765 0 A Description of the hermitage of Warkworth. \n",
2591 | "20 N025348 Volume 1 The world displayed; or, A curious collection ... \n",
2592 | "21 N025348 Volume 2 The world displayed; or, A curious collection ... \n",
2593 | "22 N025348 Volume 3 The world displayed; or, A curious collection ... \n",
2594 | "23 N025348 Volume 4 The world displayed; or, A curious collection ... \n",
2595 | "24 N025348 Volume 5 The world displayed; or, A curious collection ... \n",
2596 | "25 N025348 Volume 6 The world displayed; or, A curious collection ... \n",
2597 | "26 N025348 Volume 7 The world displayed; or, A curious collection ... \n",
2598 | "27 N025348 Volume 8 The world displayed; or, A curious collection ... \n",
2599 | "28 N025348 Volume 9 The world displayed; or, A curious collection ... \n",
2600 | "29 N025348 Volume 10 The world displayed; or, A curious collection ... \n",
2601 | ".. ... ... ... \n",
2602 | "282 T206354 0 A new and enlarged book of sailing directions ... \n",
2603 | "283 T215603 0 The admirable travels of Thomas Jenkins. And D... \n",
2604 | "284 T187257 0 East-Bourn, being an descriptive account of th... \n",
2605 | "285 T186693 Volume 1 A journey made in the summer of 1794, through ... \n",
2606 | "286 T186693 Volume 2 A journey made in the summer of 1794, through ... \n",
2607 | "287 T212407 0 The world in miniature; or, The entertaining t... \n",
2608 | "288 T212500 0 A tour through Ireland. In several entertainin... \n",
2609 | "289 T188848 0 An authentic narrative of some remarkable and ... \n",
2610 | "290 T100821 0 The voiage and travaile of Sir John Maundevile... \n",
2611 | "291 T176199 Volume 1 Travels in Switzerland, and in the country of ... \n",
2612 | "292 T176199 Volume 2 Travels in Switzerland, and in the country of ... \n",
2613 | "293 T176199 Volume 3 Travels in Switzerland, and in the country of ... \n",
2614 | "294 T006285 Volume 1 An account of the European settlements in Amer... \n",
2615 | "295 T006285 Volume 2 An account of the European settlements in Amer... \n",
2616 | "296 T012218 Volume 1 Travels through Syria and Egypt, in the years ... \n",
2617 | "297 T041858 0 Observations on the western parts of England, ... \n",
2618 | "298 T059033 0 Travels in several parts of Turkey, Egypt, and... \n",
2619 | "299 T085368 Volume 1 A year's journey through France, and part of S... \n",
2620 | "300 T085368 Volume 2 A year's journey through France, and part of S... \n",
2621 | "301 T089035 Volume 1 Letters from Italy, describing the manners, cu... \n",
2622 | "302 T089035 Volume 2 Letters from Italy, describing the manners, cu... \n",
2623 | "303 T089035 Volume 3 Letters from Italy, describing the manners, cu... \n",
2624 | "304 T099651 Volume 1 Travels from St. Petersburg in Russia, to dive... \n",
2625 | "305 T099651 Volume 2 Travels from St. Petersburg in Russia, to dive... \n",
2626 | "306 T097845 Volume 1 A Collection of voyages and travels, some now ... \n",
2627 | "307 T097845 Volume 2 A Collection of voyages and travels, some now ... \n",
2628 | "308 T097845 Volume 3 A Collection of voyages and travels, some now ... \n",
2629 | "309 T097845 Volume 4 A Collection of voyages and travels, some now ... \n",
2630 | "310 T097845 Volume 5 A Collection of voyages and travels, some now ... \n",
2631 | "311 T097845 Volume 6 A Collection of voyages and travels, some now ... \n",
2632 | "\n",
2633 | " documentID eccorelease \\\n",
2634 | "0 1309700116 2 \n",
2635 | "1 1309700600 2 \n",
2636 | "2 1309900200 2 \n",
2637 | "3 1309900301 2 \n",
2638 | "4 1309900302 2 \n",
2639 | "5 1309900303 2 \n",
2640 | "6 1309901100 2 \n",
2641 | "7 1310300400 2 \n",
2642 | "8 1313200301 2 \n",
2643 | "9 1313200302 2 \n",
2644 | "10 1313200400 2 \n",
2645 | "11 1316200401 2 \n",
2646 | "12 1316200402 2 \n",
2647 | "13 1316400600 2 \n",
2648 | "14 1321200900 2 \n",
2649 | "15 1324400100 2 \n",
2650 | "16 1324400900 2 \n",
2651 | "17 1331000300 2 \n",
2652 | "18 1331201100 2 \n",
2653 | "19 1331400300 2 \n",
2654 | "20 1334900101 2 \n",
2655 | "21 1334900102 2 \n",
2656 | "22 1334900103 2 \n",
2657 | "23 1334900104 2 \n",
2658 | "24 1334900105 2 \n",
2659 | "25 1334900106 2 \n",
2660 | "26 1334900107 2 \n",
2661 | "27 1334900108 2 \n",
2662 | "28 1334900109 2 \n",
2663 | "29 1335000110 2 \n",
2664 | ".. ... ... \n",
2665 | "282 1696000600 2 \n",
2666 | "283 1696001000 2 \n",
2667 | "284 1696400300 2 \n",
2668 | "285 1698000201 2 \n",
2669 | "286 1698000202 2 \n",
2670 | "287 1698200100 2 \n",
2671 | "288 1698200400 2 \n",
2672 | "289 1698500500 2 \n",
2673 | "290 1705600300 2 \n",
2674 | "291 1726100301 2 \n",
2675 | "292 1726200102 2 \n",
2676 | "293 1726200103 2 \n",
2677 | "294 1726700501 2 \n",
2678 | "295 1726800102 2 \n",
2679 | "296 1726800201 2 \n",
2680 | "297 1726800400 2 \n",
2681 | "298 1726900100 2 \n",
2682 | "299 1727800301 2 \n",
2683 | "300 1727800302 2 \n",
2684 | "301 1728000101 2 \n",
2685 | "302 1728000102 2 \n",
2686 | "303 1728000103 2 \n",
2687 | "304 1728900101 2 \n",
2688 | "305 1728900102 2 \n",
2689 | "306 1736200201 2 \n",
2690 | "307 1736200202 2 \n",
2691 | "308 1736300103 2 \n",
2692 | "309 1736300104 2 \n",
2693 | "310 1736400105 2 \n",
2694 | "311 1736400106 2 \n",
2695 | "\n",
2696 | " fullTitle imprintCity \\\n",
2697 | "0 The World displayed; Or, A curious collection ... London \n",
2698 | "1 An essay towards a description of the city of ... [Bath] \n",
2699 | "2 Interesting account of the early voyages, made... London \n",
2700 | "3 An entertaining journey to the Netherlands; co... London \n",
2701 | "4 An entertaining journey to the Netherlands; co... London \n",
2702 | "5 An entertaining journey to the Netherlands; co... London \n",
2703 | "6 A New collection of voyages and travels, dedic... London \n",
2704 | "7 Miscellaneous remarks made on the spot, in a l... London \n",
2705 | "8 The memoirs of Charles-Lewis, Baron de Pollnit... London \n",
2706 | "9 The memoirs of Charles-Lewis, Baron de Pollnit... London \n",
2707 | "10 A history of a voyage to the coast of Africa, ... Philadelphia \n",
2708 | "11 Modern voyages: Containing a variety of useful... Dublin \n",
2709 | "12 Modern voyages: Containing a variety of useful... Dublin \n",
2710 | "13 The foreign travels and dangerous voyages of t... London \n",
2711 | "14 An accountof the shipwreck and captivity of Mr... London \n",
2712 | "15 Owen's new book of fairs, published by the Kin... London \n",
2713 | "16 Morse's Geography. This day is published, in o... London \n",
2714 | "17 The Present state of Sicily and Malta, Extract... London \n",
2715 | "18 Holland: a jaunt to the principal places in th... London \n",
2716 | "19 A Description of the hermitage of Warkworth. [London? ] \n",
2717 | "20 The world displayed; or, A curious collection ... Dublin \n",
2718 | "21 The world displayed; or, A curious collection ... Dublin \n",
2719 | "22 The world displayed; or, A curious collection ... Dublin \n",
2720 | "23 The world displayed; or, A curious collection ... Dublin \n",
2721 | "24 The world displayed; or, A curious collection ... Dublin \n",
2722 | "25 The world displayed; or, A curious collection ... Dublin \n",
2723 | "26 The world displayed; or, A curious collection ... Dublin \n",
2724 | "27 The world displayed; or, A curious collection ... Dublin \n",
2725 | "28 The world displayed; or, A curious collection ... Dublin \n",
2726 | "29 The world displayed; or, A curious collection ... Dublin \n",
2727 | ".. ... ... \n",
2728 | "282 A new and enlarged book of sailing directions ... London \n",
2729 | "283 The admirable travels of Thomas Jenkins. And D... London \n",
2730 | "284 East-Bourn, being an descriptive account of th... London \n",
2731 | "285 A journey made in the summer of 1794, through ... London \n",
2732 | "286 A journey made in the summer of 1794, through ... London \n",
2733 | "287 The world in miniature; or, The entertaining t... Dublin \n",
2734 | "288 A tour through Ireland. In several entertainin... Dublin \n",
2735 | "289 An authentic narrative of some remarkable and ... Dublin \n",
2736 | "290 The voiage and travaile of Sir John Maundevile... London \n",
2737 | "291 Travels in Switzerland, and in the country of ... London \n",
2738 | "292 Travels in Switzerland, and in the country of ... London \n",
2739 | "293 Travels in Switzerland, and in the country of ... London \n",
2740 | "294 An account of the European settlements in Amer... London \n",
2741 | "295 An account of the European settlements in Amer... London \n",
2742 | "296 Travels through Syria and Egypt, in the years ... London \n",
2743 | "297 Observations on the western parts of England, ... London \n",
2744 | "298 Travels in several parts of Turkey, Egypt, and... London \n",
2745 | "299 A year's journey through France, and part of S... Bath \n",
2746 | "300 A year's journey through France, and part of S... Bath \n",
2747 | "301 Letters from Italy, describing the manners, cu... Dublin \n",
2748 | "302 Letters from Italy, describing the manners, cu... Dublin \n",
2749 | "303 Letters from Italy, describing the manners, cu... Dublin \n",
2750 | "304 Travels from St. Petersburg in Russia, to dive... Glasgow \n",
2751 | "305 Travels from St. Petersburg in Russia, to dive... Glasgow \n",
2752 | "306 A Collection of voyages and travels, some now ... London \n",
2753 | "307 A Collection of voyages and travels, some now ... London \n",
2754 | "308 A Collection of voyages and travels, some now ... London \n",
2755 | "309 A Collection of voyages and travels, some now ... London \n",
2756 | "310 A Collection of voyages and travels, some now ... London \n",
2757 | "311 A Collection of voyages and travels, some now ... London \n",
2758 | "\n",
2759 | " imprintFull \\\n",
2760 | "0 London : Printed for J. Newbery, at the Bible ... \n",
2761 | "1 [Bath] : Printed for W. Frederick, bookseller,... \n",
2762 | "2 London : Printed for the proprietors, and sold... \n",
2763 | "3 London : printed for W. Smith, M DCC LXXXII. [... \n",
2764 | "4 London : printed for W. Smith, M DCC LXXXII. [... \n",
2765 | "5 London : printed for W. Smith, M DCC LXXXII. [... \n",
2766 | "6 London : Printed for E. Newbery, the corner of... \n",
2767 | "7 London : Printed for S. Hooper, at Gay's Head,... \n",
2768 | "8 London : Printed for Daniel Browne, at the Bla... \n",
2769 | "9 London : Printed for Daniel Browne, at the Bla... \n",
2770 | "10 Philadelphia : printed for the author, by S. C... \n",
2771 | "11 Dublin : printed for Chamberlaine and Rice, P.... \n",
2772 | "12 Dublin : printed for Chamberlaine and Rice, P.... \n",
2773 | "13 London : printed for M. Hotham, [1710?] \n",
2774 | "14 London : Printed for C. Forster, in the Poultr... \n",
2775 | "15 London : Printed (by assignment from W. Owen) ... \n",
2776 | "16 London : Printed for John Stockdale, Piccadill... \n",
2777 | "17 London : Printed for G. Kearsley, at Johnson's... \n",
2778 | "18 London : Printed, and sold by W.Hay, next to t... \n",
2779 | "19 [London? : s.n., 1800?] \n",
2780 | "20 Dublin : printed by James Williams, M,DCC,LXXI... \n",
2781 | "21 Dublin : printed by James Williams, M,DCC,LXXI... \n",
2782 | "22 Dublin : printed by James Williams, M,DCC,LXXI... \n",
2783 | "23 Dublin : printed by James Williams, M,DCC,LXXI... \n",
2784 | "24 Dublin : printed by James Williams, M,DCC,LXXI... \n",
2785 | "25 Dublin : printed by James Williams, M,DCC,LXXI... \n",
2786 | "26 Dublin : printed by James Williams, M,DCC,LXXI... \n",
2787 | "27 Dublin : printed by James Williams, M,DCC,LXXI... \n",
2788 | "28 Dublin : printed by James Williams, M,DCC,LXXI... \n",
2789 | "29 Dublin : printed by James Williams, M,DCC,LXXI... \n",
2790 | ".. ... \n",
2791 | "282 London : Printed for Robert Laurie and James W... \n",
2792 | "283 London : Printed for, and sold by W. Clements,... \n",
2793 | "284 London : Printed at the Philanthropic reform, ... \n",
2794 | "285 London : printed for G.G. and J. Robinson, Pat... \n",
2795 | "286 London : printed for G.G. and J. Robinson, Pat... \n",
2796 | "287 Dublin : Printed for William Williamson, books... \n",
2797 | "288 Dublin : Printed for Peter Wilson, bookseller,... \n",
2798 | "289 Dublin : Printed by Robert Dapper, for B. Dugd... \n",
2799 | "290 London : Printed for Woodman, and Lyon, in Rus... \n",
2800 | "291 London : printed for T. Cadell, in the Strand,... \n",
2801 | "292 London : printed for T. Cadell, in the Strand,... \n",
2802 | "293 London : printed for T. Cadell, in the Strand,... \n",
2803 | "294 London : printed for J. Dodsley, in Pall-Mall,... \n",
2804 | "295 London : printed for J. Dodsley, in Pall-Mall,... \n",
2805 | "296 London : printed for G.G.J. and J. Robinson, P... \n",
2806 | "297 London : Printed for T. Cadell Jun. and W. Dav... \n",
2807 | "298 London : Printed for the author; and sold by J... \n",
2808 | "299 Bath : Printed by R. Cruttwell, for the author... \n",
2809 | "300 Bath : Printed by R. Cruttwell, for the author... \n",
2810 | "301 Dublin : printed for W. Watson, D. Chamberlain... \n",
2811 | "302 Dublin : printed for W. Watson, D. Chamberlain... \n",
2812 | "303 Dublin : printed for W. Watson, D. Chamberlain... \n",
2813 | "304 Glasgow : printed for the author by Robert and... \n",
2814 | "305 Glasgow : printed for the author by Robert and... \n",
2815 | "306 London : Printed by assignment from Messrs. Ch... \n",
2816 | "307 London : Printed by assignment from Messrs. Ch... \n",
2817 | "308 London : Printed by assignment from Messrs. Ch... \n",
2818 | "309 London : Printed by assignment from Messrs. Ch... \n",
2819 | "310 London : Printed by assignment from Messrs. Ch... \n",
2820 | "311 London : Printed by assignment from Messrs. Ch... \n",
2821 | "\n",
2822 | " imprintPublisher language \\\n",
2823 | "0 Printed for J. Newbery, at the Bible and Sun, ... English \n",
2824 | "1 Printed for W. Frederick, bookseller, in Bath English \n",
2825 | "2 Printed for the proprietors, and sold at Stalk... English \n",
2826 | "3 printed for W. Smith English \n",
2827 | "4 printed for W. Smith English \n",
2828 | "5 printed for W. Smith English \n",
2829 | "6 Printed for E. Newbery, the corner of St. Paul... English \n",
2830 | "7 Printed for S. Hooper, at Gay's Head, near Bea... English \n",
2831 | "8 Printed for Daniel Browne, at the Black Swan, ... English \n",
2832 | "9 Printed for Daniel Browne, at the Black Swan, ... English \n",
2833 | "10 printed for the author, by S. C[.] Ustick, & Co English \n",
2834 | "11 printed for Chamberlaine and Rice, P. Wogan, P... English \n",
2835 | "12 printed for Chamberlaine and Rice, P. Wogan, P... English \n",
2836 | "13 printed for M. Hotham English \n",
2837 | "14 Printed for C. Forster, in the Poultry English \n",
2838 | "15 Printed (by assignment from W. Owen) for J. Jo... English \n",
2839 | "16 Printed for John Stockdale, Piccadilly and sol... English \n",
2840 | "17 Printed for G. Kearsley, at Johnson's Head, No... English \n",
2841 | "18 Printed, and sold by W.Hay, next to the Societ... English \n",
2842 | "19 s.n. English \n",
2843 | "20 printed by James Williams English \n",
2844 | "21 printed by James Williams English \n",
2845 | "22 printed by James Williams English \n",
2846 | "23 printed by James Williams English \n",
2847 | "24 printed by James Williams English \n",
2848 | "25 printed by James Williams English \n",
2849 | "26 printed by James Williams English \n",
2850 | "27 printed by James Williams English \n",
2851 | "28 printed by James Williams English \n",
2852 | "29 printed by James Williams English \n",
2853 | ".. ... ... \n",
2854 | "282 Printed for Robert Laurie and James Whittle, N... English \n",
2855 | "283 Printed for, and sold by W. Clements, and J. S... English \n",
2856 | "284 Printed at the Philanthropic reform, for Hookh... English \n",
2857 | "285 printed for G.G. and J. Robinson, Paternoster-Row English \n",
2858 | "286 printed for G.G. and J. Robinson, Paternoster-Row English \n",
2859 | "287 Printed for William Williamson, bookseller, at... English \n",
2860 | "288 Printed for Peter Wilson, bookseller, in Dame-... English \n",
2861 | "289 Printed by Robert Dapper, for B. Dugdale, No. ... English \n",
2862 | "290 Printed for Woodman, and Lyon, in Russel-Stree... English \n",
2863 | "291 printed for T. Cadell, in the Strand English \n",
2864 | "292 printed for T. Cadell, in the Strand English \n",
2865 | "293 printed for T. Cadell, in the Strand English \n",
2866 | "294 printed for J. Dodsley, in Pall-Mall English \n",
2867 | "295 printed for J. Dodsley, in Pall-Mall English \n",
2868 | "296 printed for G.G.J. and J. Robinson, Pater-Nost... English \n",
2869 | "297 Printed for T. Cadell Jun. and W. Davies, Strand English \n",
2870 | "298 Printed for the author; and sold by J. Axtell,... English \n",
2871 | "299 Printed by R. Cruttwell, for the author; and s... English \n",
2872 | "300 Printed by R. Cruttwell, for the author; and s... English \n",
2873 | "301 printed for W. Watson, D. Chamberlaine, J. Pot... English \n",
2874 | "302 printed for W. Watson, D. Chamberlaine, J. Pot... English \n",
2875 | "303 printed for W. Watson, D. Chamberlaine, J. Pot... English \n",
2876 | "304 printed for the author by Robert and Andrew Fo... English \n",
2877 | "305 printed for the author by Robert and Andrew Fo... English \n",
2878 | "306 Printed by assignment from Messrs. Churchill, ... English \n",
2879 | "307 Printed by assignment from Messrs. Churchill, ... English \n",
2880 | "308 Printed by assignment from Messrs. Churchill, ... English \n",
2881 | "309 Printed by assignment from Messrs. Churchill, ... English \n",
2882 | "310 Printed by assignment from Messrs. Churchill, ... English \n",
2883 | "311 Printed by assignment from Messrs. Churchill, ... English \n",
2884 | "\n",
2885 | " locSubject module \\\n",
2886 | "0 Voyages and travels History and Geography \n",
2887 | "1 Bath (England), Description and travel, Early ... History and Geography \n",
2888 | "2 Explorers, Portugal, Early works to 1800, Expl... History and Geography \n",
2889 | "3 Netherlands, Description and travel, Early wor... History and Geography \n",
2890 | "4 Netherlands, Description and travel, Early wor... History and Geography \n",
2891 | "5 Netherlands, Description and travel, Early wor... History and Geography \n",
2892 | "6 Voyages and travels, Early works to 1800 History and Geography \n",
2893 | "7 Italy, Description and travel, Early works to ... History and Geography \n",
2894 | "8 Europe, Description and travel History and Geography \n",
2895 | "9 Europe, Description and travel History and Geography \n",
2896 | "10 Hawkins, Joseph,, b. 1772, Portraits, Voyages ... History and Geography \n",
2897 | "11 Voyages and travels, Early works to 1800 History and Geography \n",
2898 | "12 Voyages and travels, Early works to 1800 History and Geography \n",
2899 | "13 Voyages and travels, Early works to 1800 History and Geography \n",
2900 | "14 Voyages and travels, Early works to 1800 History and Geography \n",
2901 | "15 Great Britain, Description and travel, Early w... History and Geography \n",
2902 | "16 United States, Description and travel, Early w... History and Geography \n",
2903 | "17 Sicily (Italy), Description and travel, Early ... History and Geography \n",
2904 | "18 Holland (Netherlands : Province), Description ... History and Geography \n",
2905 | "19 Great Britain, Description and travel, Early w... History and Geography \n",
2906 | "20 Voyages and travels History and Geography \n",
2907 | "21 Voyages and travels History and Geography \n",
2908 | "22 Voyages and travels History and Geography \n",
2909 | "23 Voyages and travels History and Geography \n",
2910 | "24 Voyages and travels History and Geography \n",
2911 | "25 Voyages and travels History and Geography \n",
2912 | "26 Voyages and travels History and Geography \n",
2913 | "27 Voyages and travels History and Geography \n",
2914 | "28 Voyages and travels History and Geography \n",
2915 | "29 Voyages and travels History and Geography \n",
2916 | ".. ... ... \n",
2917 | "282 Pilot guides, Florida, Straits of History and Geography \n",
2918 | "283 Voyages and travels, England, London, Early wo... History and Geography \n",
2919 | "284 Sussex (England), Description and travel, Earl... History and Geography \n",
2920 | "285 Europe, Description and travel History and Geography \n",
2921 | "286 Europe, Description and travel History and Geography \n",
2922 | "287 Voyages and travels, Early works to 1800 History and Geography \n",
2923 | "288 Ireland, Description and travel History and Geography \n",
2924 | "289 Voyages and travels, Personal narratives History and Geography \n",
2925 | "290 Voyages and travels History and Geography \n",
2926 | "291 Switzerland, Description and travel History and Geography \n",
2927 | "292 Switzerland, Description and travel History and Geography \n",
2928 | "293 Switzerland, Description and travel History and Geography \n",
2929 | "294 United States, Description and travel, Early w... History and Geography \n",
2930 | "295 United States, Description and travel, Early w... History and Geography \n",
2931 | "296 Syria, Description and travel, Early works to ... History and Geography \n",
2932 | "297 England, Description and travel, Early works t... History and Geography \n",
2933 | "298 Middle East, Description and travel History and Geography \n",
2934 | "299 France, Description and travel, Early works to... History and Geography \n",
2935 | "300 France, Description and travel, Early works to... History and Geography \n",
2936 | "301 Italy, Description and travel History and Geography \n",
2937 | "302 Italy, Description and travel History and Geography \n",
2938 | "303 Italy, Description and travel History and Geography \n",
2939 | "304 Asia, Description and travel History and Geography \n",
2940 | "305 Asia, Description and travel History and Geography \n",
2941 | "306 Voyages and travels History and Geography \n",
2942 | "307 Voyages and travels History and Geography \n",
2943 | "308 Voyages and travels History and Geography \n",
2944 | "309 Voyages and travels History and Geography \n",
2945 | "310 Voyages and travels History and Geography \n",
2946 | "311 Voyages and travels History and Geography \n",
2947 | "\n",
2948 | " notes pubDate \\\n",
2949 | "0 17670101 \n",
2950 | "1 With an additional titlepage for pt. 1: 'An es... 17420101 \n",
2951 | "2 17900101 \n",
2952 | "3 Coriat Junior = Samuel Paterson?. 17820101 \n",
2953 | "4 Coriat Junior = Samuel Paterson?. 17820101 \n",
2954 | "5 Coriat Junior = Samuel Paterson?. 17820101 \n",
2955 | "6 William Mavor is the editor of the 'Historical... 17960101 \n",
2956 | "7 17560101 \n",
2957 | "8 Translated by Stephen Whatley. In this editio... 17390101 \n",
2958 | "9 Translated by Stephen Whatley. In this editio... 17390101 \n",
2959 | "10 \"The author relating the history of his travel... 17970101 \n",
2960 | "11 The imprint in vol. 2 is enlarged by the addit... 17900101 \n",
2961 | "12 The imprint in vol. 2 is enlarged by the addit... 17900101 \n",
2962 | "13 17100101 \n",
2963 | "14 Price in square brackets: (Price Two Shillings... 17890101 \n",
2964 | "15 With an initial copyright leaf. Also publishe... 17990101 \n",
2965 | "16 Advertisement for new books printed for John S... 17920101 \n",
2966 | "17 With a final errata leaf. P. iv misnumbered v... 17880101 \n",
2967 | "18 Price on title page: (Price 2s.) 17750101 \n",
2968 | "19 18000101 \n",
2969 | "20 Compiled by Christopher Smart, Oliver Goldsmit... 17790101 \n",
2970 | "21 Compiled by Christopher Smart, Oliver Goldsmit... 17790101 \n",
2971 | "22 Compiled by Christopher Smart, Oliver Goldsmit... 17790101 \n",
2972 | "23 Compiled by Christopher Smart, Oliver Goldsmit... 17790101 \n",
2973 | "24 Compiled by Christopher Smart, Oliver Goldsmit... 17790101 \n",
2974 | "25 Compiled by Christopher Smart, Oliver Goldsmit... 17790101 \n",
2975 | "26 Compiled by Christopher Smart, Oliver Goldsmit... 17790101 \n",
2976 | "27 Compiled by Christopher Smart, Oliver Goldsmit... 17790101 \n",
2977 | "28 Compiled by Christopher Smart, Oliver Goldsmit... 17790101 \n",
2978 | "29 Compiled by Christopher Smart, Oliver Goldsmit... 17790101 \n",
2979 | ".. ... ... \n",
2980 | "282 At foot of titlepage: \"N.B. These directions a... 17960101 \n",
2981 | "283 Written in the first person by David Lowellin.... 17910101 \n",
2982 | "284 17990101 \n",
2983 | "285 17960101 \n",
2984 | "286 17960101 \n",
2985 | "287 With an index. 17510101 \n",
2986 | "288 Anonymous. By William Rufus Chetwood. A reiss... 17480101 \n",
2987 | "289 Written by Newton himself. First published as... 17960101 \n",
2988 | "290 Titlepage in red and black, preceding a black ... 17270101 \n",
2989 | "291 17910101 \n",
2990 | "292 17910101 \n",
2991 | "293 17910101 \n",
2992 | "294 Anonymous. Probably a collaboration by William... 17700101 \n",
2993 | "295 Anonymous. Probably a collaboration by William... 17700101 \n",
2994 | "296 Largely a reimpression of the 1787 London edit... 17880101 \n",
2995 | "297 The same setting of type was also printed with... 17980101 \n",
2996 | "298 Horizontal chain lines. 17740101 \n",
2997 | "299 With a list of subscribers. 17770101 \n",
2998 | "300 With a list of subscribers. 17770101 \n",
2999 | "301 An English woman = Anna, wife of Sir John Rigg... 17760101 \n",
3000 | "302 An English woman = Anna, wife of Sir John Rigg... 17760101 \n",
3001 | "303 An English woman = Anna, wife of Sir John Rigg... 17760101 \n",
3002 | "304 With a list of subscribers in vol.1. \"Volume ... 17630101 \n",
3003 | "305 With a list of subscribers in vol.1. \"Volume ... 17630101 \n",
3004 | "306 Compiled by Awnsham Churchill and John Churchi... 17440101 \n",
3005 | "307 Compiled by Awnsham Churchill and John Churchi... 17440101 \n",
3006 | "308 Compiled by Awnsham Churchill and John Churchi... 17440101 \n",
3007 | "309 Compiled by Awnsham Churchill and John Churchi... 17440101 \n",
3008 | "310 Compiled by Awnsham Churchill and John Churchi... 17440101 \n",
3009 | "311 Compiled by Awnsham Churchill and John Churchi... 17440101 \n",
3010 | "\n",
3011 | " publicationPlace totalVolumes \n",
3012 | "0 London 1 \n",
3013 | "1 Bath 0 \n",
3014 | "2 London 0 \n",
3015 | "3 London 3 \n",
3016 | "4 London 3 \n",
3017 | "5 London 3 \n",
3018 | "6 London 0 \n",
3019 | "7 London 0 \n",
3020 | "8 London 2 \n",
3021 | "9 London 2 \n",
3022 | "10 Philadelphia 0 \n",
3023 | "11 Dublin 2 \n",
3024 | "12 Dublin 2 \n",
3025 | "13 London 0 \n",
3026 | "14 London 0 \n",
3027 | "15 London 0 \n",
3028 | "16 London 0 \n",
3029 | "17 London 0 \n",
3030 | "18 London 0 \n",
3031 | "19 London 0 \n",
3032 | "20 Dublin 20 \n",
3033 | "21 Dublin 20 \n",
3034 | "22 Dublin 20 \n",
3035 | "23 Dublin 20 \n",
3036 | "24 Dublin 20 \n",
3037 | "25 Dublin 20 \n",
3038 | "26 Dublin 20 \n",
3039 | "27 Dublin 20 \n",
3040 | "28 Dublin 20 \n",
3041 | "29 Dublin 20 \n",
3042 | ".. ... ... \n",
3043 | "282 London 0 \n",
3044 | "283 London 0 \n",
3045 | "284 London 0 \n",
3046 | "285 London 2 \n",
3047 | "286 London 2 \n",
3048 | "287 Dublin 0 \n",
3049 | "288 Dublin 0 \n",
3050 | "289 Dublin 0 \n",
3051 | "290 London 0 \n",
3052 | "291 London 3 \n",
3053 | "292 London 3 \n",
3054 | "293 London 3 \n",
3055 | "294 London 2 \n",
3056 | "295 London 2 \n",
3057 | "296 London 2 \n",
3058 | "297 London 0 \n",
3059 | "298 London 0 \n",
3060 | "299 Bath 2 \n",
3061 | "300 Bath 2 \n",
3062 | "301 London 3 \n",
3063 | "302 London 3 \n",
3064 | "303 London 3 \n",
3065 | "304 Glasgow 2 \n",
3066 | "305 Glasgow 2 \n",
3067 | "306 London 6 \n",
3068 | "307 London 6 \n",
3069 | "308 London 6 \n",
3070 | "309 London 6 \n",
3071 | "310 London 6 \n",
3072 | "311 London 6 \n",
3073 | "\n",
3074 | "[312 rows x 16 columns]"
3075 | ]
3076 | },
3077 | "execution_count": 21,
3078 | "metadata": {},
3079 | "output_type": "execute_result"
3080 | }
3081 | ],
3082 | "source": [
3083 | "dfHistAndGeo = pd.DataFrame(listofdicts)\n",
3084 | "dfHistAndGeo"
3085 | ]
3086 | },
3087 | {
3088 | "cell_type": "code",
3089 | "execution_count": 22,
3090 | "metadata": {
3091 | "collapsed": true
3092 | },
3093 | "outputs": [],
3094 | "source": [
3095 | "dfHistAndGeo.to_csv('files/HistAndGeo.csv')"
3096 | ]
3097 | },
3098 | {
3099 | "cell_type": "markdown",
3100 | "metadata": {},
3101 | "source": [
3102 | "Okay, so, it worked (hurrah!) but it did take quite a while - a few hours. At this rate, I might have to leave do multiple sections in order to gather them all, unless there is a faster method (which there almost undoubtedly is)."
3103 | ]
3104 | },
3105 | {
3106 | "cell_type": "markdown",
3107 | "metadata": {},
3108 | "source": []
3109 | },
3110 | {
3111 | "cell_type": "markdown",
3112 | "metadata": {},
3113 | "source": []
3114 | }
3115 | ],
3116 | "metadata": {
3117 | "kernelspec": {
3118 | "display_name": "Python 3",
3119 | "language": "python",
3120 | "name": "python3"
3121 | },
3122 | "language_info": {
3123 | "codemirror_mode": {
3124 | "name": "ipython",
3125 | "version": 3
3126 | },
3127 | "file_extension": ".py",
3128 | "mimetype": "text/x-python",
3129 | "name": "python",
3130 | "nbconvert_exporter": "python",
3131 | "pygments_lexer": "ipython3",
3132 | "version": "3.6.2"
3133 | }
3134 | },
3135 | "nbformat": 4,
3136 | "nbformat_minor": 2
3137 | }
3138 |
--------------------------------------------------------------------------------