├── .circleci
    └── config.yml
├── .editorconfig
├── .gitignore
├── .vscode
    └── settings.json
├── LICENSE
├── README-DEV.md
├── README.md
├── pyproject.toml
├── requirements-test.txt
├── requirements.txt
└── src
    └── mods4pandas
        ├── .gitignore
        ├── alto4pandas.py
        ├── lib.py
        ├── mods4pandas.py
        └── tests
            ├── data
                ├── alto
                │   ├── 734008031
                │   │   ├── 00000005.xml
                │   │   ├── 00000026.xml
                │   │   ├── 00000029.xml
                │   │   ├── 00000060.xml
                │   │   └── 00000102.xml
                │   ├── 749782137
                │   │   ├── 00000077.xml
                │   │   ├── 00000085.xml
                │   │   ├── 00000464.xml
                │   │   ├── 00000651.xml
                │   │   ├── 00000915.xml
                │   │   └── 00001120.xml
                │   ├── PPN636777308
                │   │   └── 00000002.xml
                │   ├── PPN640992293
                │   │   └── 00000017.xml
                │   ├── PPN715049151
                │   │   └── 00000017.xml
                │   ├── PPN767883624
                │   │   ├── 00000001.xml
                │   │   └── 00000002.xml
                │   ├── PPN895016346
                │   │   └── 00000022.xml
                │   ├── alto-ner
                │   │   ├── 00000046.xml
                │   │   ├── 00000102.xml
                │   │   └── 00000217.xml
                │   └── weird-ns
                │   │   └── 00000007.xml
                └── mets-mods
                │   ├── PPN1678618276.xml
                │   ├── PPN1727545451.xml
                │   ├── PPN1737752050.xml
                │   ├── PPN1769395962.xml
                │   ├── PPN3348760607-mehrere-shelfLocator.xml
                │   ├── PPN717884805-multivolume_work-no-structMap-PHYSICAL.xml
                │   ├── PPN773555676.xml
                │   └── PPN821507109-1361-pages.xml
            ├── test_alto.py
            ├── test_mets.py
            ├── test_mods4pandas.py
            └── test_page_info.py


/.circleci/config.yml:
--------------------------------------------------------------------------------
 1 | version: 2.1
 2 | 
 3 | jobs:
 4 |   test:
 5 |     parameters:
 6 |       python-version:
 7 |         type: string
 8 |     docker:
 9 |       - image: cimg/python:<< parameters.python-version >>
10 |     steps:
11 |       - checkout
12 |       - run: pip3 install --upgrade pip
13 |       - run: pip3 install -e .
14 |       - run: pip3 install -r requirements-test.txt
15 |       - run: pytest
16 | 
17 | workflows:
18 |   all-tests:
19 |     jobs:
20 |       - test:
21 |           matrix:
22 |             parameters:
23 |               python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
24 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
1 | [*]
2 | max_line_length = 120
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # Distribution / packaging
 7 | *.egg-info/
 8 | 
 9 | # Unit test / coverage reports
10 | htmlcov/
11 | .coverage
12 | .coverage.*
13 | 
14 | # Environments
15 | .env
16 | .venv
17 | env/
18 | venv/
19 | .python-version
20 | 
21 | # mypy
22 | .mypy_cache/
23 | .dmypy.json
24 | dmypy.json
25 | 
26 | # User-specific stuff
27 | .idea
28 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "python.formatting.provider": "black"
3 | }


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2019 qurator
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README-DEV.md:
--------------------------------------------------------------------------------
 1 | ```
 2 | pip install -r requirements-test.txt
 3 | ```
 4 | 
 5 | To run tests:
 6 | ```
 7 | pip install -e .
 8 | pytest
 9 | ```
10 | 
11 | To run a test with profiling:
12 | 
13 | 1. Make sure graphviz is installed
14 | 2. Run pytest with with profiling enabled:
15 |   ```
16 |   pytest --profile-svg -k test_page_info
17 |   ```
18 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Extract the MODS/ALTO metadata of a bunch of METS/ALTO files into pandas DataFrames.
 2 | 
 3 | [![Build Status](https://circleci.com/gh/qurator-spk/mods4pandas.svg?style=svg)](https://circleci.com/gh/qurator-spk/mods4pandas)
 4 | 
 5 | **mods4pandas** converts the MODS metadata from METS files into a pandas DataFrame.
 6 | 
 7 | Column names are derived from the corresponding MODS elements. Some domain
 8 | knowledge is used to convert elements to a useful column, e.g. produce sets
 9 | instead of ordered lists for topics, etc. Parts of the tool are specific to
10 | our environment/needs at the State Library Berlin and may need to be changed for
11 | your library.
12 | 
13 | Per-page information (e.g. structure information from the METS structMap) can
14 | be converted as well (`--output-page-info`).
15 | 
16 | **alto4pandas** converts the metadata from ALTO files into a pandas DataFrame.
17 | 
18 | Column names are derived from the corresponding ALTO elements. Some columns
19 | contain descriptive statistics (e.g. counts or mean) of the corresponding ALTO
20 | elements or attributes.
21 | 
22 | ## Usage
23 | ~~~
24 | mods4pandas /path/to/a/directory/containing/mets_files
25 | ~~~
26 | 
27 | ~~~
28 | alto4pandas /path/to/a/directory/full/of/alto_files
29 | ~~~
30 | 
31 | ### Conversion to other formats
32 | 
33 | CSV:
34 | ```
35 | python -c 'import pandas as pd; pd.read_parquet("mods_info_df.parquet").to_csv("mods_info_df.csv")'
36 | ```
37 | Excel (requires `XlsxWriter`):
38 | ```
39 | python -c 'import pandas as pd; pd.read_parquet("mods_info_df.parquet").to_excel("mods_info_df.xlsx"
40 | , engine="xlsxwriter")'
41 | ```
42 | 
43 | ## Example
44 | In this example we convert the MODS metadata contained in the METS files in
45 | `/srv/data/digisam_mets-sample-300` to a pandas DataFrame under
46 | `mods_info_df.parquet`. This file can then be read by your data scientist using
47 | `pd.read_parquet()`.
48 | 
49 | ```
50 | % mods4pandas /srv/data/digisam_mets-sample-300
51 | INFO:root:Scanning directory /srv/data/digisam_mets-sample-300
52 | 301it [00:00, 19579.19it/s]
53 | INFO:root:Processing METS files
54 | 100%|████████████████████████████████████████| 301/301 [00:01<00:00, 162.59it/s]
55 | INFO:root:Writing DataFrame to mods_info_df.parquet
56 | ```
57 | 
58 | In the next example we convert the metadata from the ALTO files in the test data
59 | directory:
60 | 
61 | ~~~
62 | % alto4pandas qurator/mods4pandas/tests/data/alto
63 | Scanning directory qurator/mods4pandas/tests/data/alto
64 | Scanning directory qurator/mods4pandas/tests/data/alto/PPN636777308
65 | Scanning directory qurator/mods4pandas/tests/data/alto/734008031
66 | Scanning directory qurator/mods4pandas/tests/data/alto/PPN895016346
67 | Scanning directory qurator/mods4pandas/tests/data/alto/PPN640992293
68 | Scanning directory qurator/mods4pandas/tests/data/alto/alto-ner
69 | Scanning directory qurator/mods4pandas/tests/data/alto/PPN767883624
70 | Scanning directory qurator/mods4pandas/tests/data/alto/PPN715049151
71 | Scanning directory qurator/mods4pandas/tests/data/alto/749782137
72 | Scanning directory qurator/mods4pandas/tests/data/alto/weird-ns
73 | INFO:alto4pandas:Processing ALTO files
74 | INFO:alto4pandas:Writing DataFrame to alto_info_df.parquet
75 | ~~~
76 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=61.0.0", "wheel"]
 3 | 
 4 | [project]
 5 | name = "mods4pandas"
 6 | version = "0.0.0"
 7 | authors = [
 8 |     {name = "Mike Gerber", email = "mike.gerber@sbb.spk-berlin.de"},
 9 |     {name = "The QURATOR SPK Team", email = "qurator@sbb.spk-berlin.de"},
10 | ]
11 | description = "Convert MODS metadata to a pandas DataFrame"
12 | readme = "README.md"
13 | license.file = "LICENSE"
14 | requires-python = ">=3.8"
15 | keywords = ["qurator", "mets", "mods", "metadata", "library"]
16 | 
17 | dynamic = ["dependencies", "optional-dependencies"]
18 | 
19 | # https://pypi.org/classifiers/
20 | classifiers = [
21 |     "Development Status :: 4 - Beta",
22 |     "Environment :: Console",
23 |     "Intended Audience :: Science/Research",
24 |     "Intended Audience :: Other Audience",
25 |     "License :: OSI Approved :: Apache Software License",
26 |     "Programming Language :: Python :: 3",
27 |     "Programming Language :: Python :: 3 :: Only",
28 |     "Topic :: Scientific/Engineering :: Information Analysis",
29 | ]
30 | 
31 | [project.scripts]
32 | mods4pandas="mods4pandas.mods4pandas:main"
33 | alto4pandas="mods4pandas.alto4pandas:main"
34 | 
35 | 
36 | [project.urls]
37 | Homepage = "https://github.com/qurator-spk/mods4pandas"
38 | Repository = "https://github.com/qurator-spk/mods4pandas.git"
39 | 
40 | 
41 | [tool.setuptools.dynamic]
42 | dependencies = {file = ["requirements.txt"]}
43 | optional-dependencies.dev = {file = ["requirements-dev.txt"]}
44 | 
45 | [tool.setuptools.packages.find]
46 | where = ["src"]
47 | 


--------------------------------------------------------------------------------
/requirements-test.txt:
--------------------------------------------------------------------------------
1 | pytest
2 | pytest-profiling
3 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | click
2 | pandas
3 | numpy
4 | tqdm
5 | lxml
6 | pyarrow
7 | XlsxWriter
8 | 


--------------------------------------------------------------------------------
/src/mods4pandas/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | mods_info_df.pkl*
3 | 


--------------------------------------------------------------------------------
/src/mods4pandas/alto4pandas.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import csv
  3 | import logging
  4 | import os
  5 | import re
  6 | import warnings
  7 | import sys
  8 | from xml.dom.expatbuilder import Namespaces
  9 | from lxml import etree as ET
 10 | from itertools import groupby
 11 | from operator import attrgetter
 12 | from typing import List
 13 | from collections.abc import MutableMapping, Sequence
 14 | 
 15 | import click
 16 | import pandas as pd
 17 | import numpy as np
 18 | from tqdm import tqdm
 19 | 
 20 | from .lib import TagGroup, sorted_groupby, flatten, ns
 21 | 
 22 | 
 23 | logger = logging.getLogger('alto4pandas')
 24 | 
 25 | 
 26 | 
 27 | def alto_to_dict(alto, raise_errors=True):
 28 |     """Convert ALTO metadata to a nested dictionary"""
 29 | 
 30 |     value = {}
 31 | 
 32 |     # Iterate through each group of tags
 33 |     for tag, group in sorted_groupby(alto, key=attrgetter('tag')):
 34 |         group = list(group)
 35 | 
 36 |         localname = ET.QName(tag).localname
 37 |         alto_namespace = ET.QName(tag).namespace
 38 |         namespaces={"alto": alto_namespace}
 39 | 
 40 |         if localname == 'Description':
 41 |             value[localname] = TagGroup(tag, group).is_singleton().has_no_attributes().descend(raise_errors)
 42 |         elif localname == 'MeasurementUnit':
 43 |             value[localname] = TagGroup(tag, group).is_singleton().has_no_attributes().text()
 44 |         elif localname == 'OCRProcessing':
 45 |             value[localname] = TagGroup(tag, group).is_singleton().descend(raise_errors)
 46 |         elif localname == 'Processing':
 47 |             # TODO This enumerated descent is used more than once, DRY!
 48 |             for n, e in enumerate(group):
 49 |                 value[f'{localname}{n}'] = alto_to_dict(e, raise_errors)
 50 |         elif localname == 'ocrProcessingStep':
 51 |             for n, e in enumerate(group):
 52 |                 value[f'{localname}{n}'] = alto_to_dict(e, raise_errors)
 53 |         elif localname == 'preProcessingStep':
 54 |             for n, e in enumerate(group):
 55 |                 value[f'{localname}{n}'] = alto_to_dict(e, raise_errors)
 56 |         elif localname == 'processingDateTime':
 57 |             value[localname] = TagGroup(tag, group).is_singleton().has_no_attributes().text()
 58 |         elif localname == 'processingSoftware':
 59 |             value[localname] = TagGroup(tag, group).is_singleton().descend(raise_errors)
 60 |         elif localname == 'processingAgency':
 61 |             value[localname] = TagGroup(tag, group).is_singleton().has_no_attributes().text()
 62 |         elif localname == 'processingStepDescription':
 63 |             value[localname] = TagGroup(tag, group).is_singleton().has_no_attributes().text()
 64 |         elif localname == 'processingStepSettings':
 65 |             value[localname] = TagGroup(tag, group).is_singleton().has_no_attributes().text()
 66 |         elif localname == 'softwareCreator':
 67 |             value[localname] = TagGroup(tag, group).is_singleton().has_no_attributes().text()
 68 |         elif localname == 'softwareName':
 69 |             value[localname] = TagGroup(tag, group).is_singleton().has_no_attributes().text()
 70 |         elif localname == 'softwareVersion':
 71 |             value[localname] = TagGroup(tag, group).is_singleton().has_no_attributes().text()
 72 | 
 73 |         elif localname == 'sourceImageInformation':
 74 |             value[localname] = TagGroup(tag, group).is_singleton().has_no_attributes().descend(raise_errors)
 75 |         elif localname == 'fileName':
 76 |             value[localname] = TagGroup(tag, group).is_singleton().has_no_attributes().text()
 77 | 
 78 |         elif localname == 'Layout':
 79 |             value[localname] = TagGroup(tag, group).is_singleton().has_no_attributes().descend(raise_errors)
 80 |         elif localname == 'Page':
 81 |             value[localname] = {}
 82 |             value[localname].update(TagGroup(tag, group).is_singleton().attributes())
 83 |             value[localname].update(TagGroup(tag, group).subelement_counts())
 84 |             value[localname].update(TagGroup(tag, group).xpath_statistics("//alto:String/@WC", namespaces))
 85 | 
 86 |             # Count all alto:String elements with TAGREFS attribute
 87 |             value[localname].update(TagGroup(tag, group).xpath_count("//alto:String[@TAGREFS]", namespaces))
 88 | 
 89 |         elif localname == 'Styles':
 90 |             pass
 91 |         elif localname == 'Tags':
 92 |             value[localname] = {}
 93 |             value[localname].update(TagGroup(tag, group).subelement_counts())
 94 |         else:
 95 |             if raise_errors:
 96 |                 print(value)
 97 |                 raise ValueError('Unknown tag "{}"'.format(tag))
 98 |             else:
 99 |                 pass
100 | 
101 |     return value
102 | 
103 | 
104 | 
105 | def walk(m):
106 |     # XXX do this in mods4pandas, too
107 |     if os.path.isdir(m):
108 |         tqdm.write(f'Scanning directory {m}')
109 |         for f in tqdm(os.scandir(m), leave=False):
110 |             if f.is_file() and not f.name.startswith('.'):
111 |                 yield f.path
112 |             elif f.is_dir():
113 |                 try:
114 |                     yield from walk(f.path)
115 |                 except PermissionError:
116 |                     warnings.warn(f"Error walking {f.path}")
117 |     else:
118 |         yield m.path
119 | 
120 | 
121 | 
122 | @click.command()
123 | @click.argument('alto_files', type=click.Path(exists=True), required=True, nargs=-1)
124 | @click.option('--output', '-o', 'output_file', type=click.Path(), help='Output pickle file',
125 |               default='alto_info_df.pkl', show_default=True)
126 | @click.option('--output-csv', type=click.Path(), help='Output CSV file')
127 | @click.option('--output-xlsx', type=click.Path(), help='Output Excel .xlsx file')
128 | def process(alto_files: List[str], output_file: str, output_csv: str, output_xlsx: str):
129 |     """
130 |     A tool to convert the ALTO metadata in INPUT to a pandas DataFrame.
131 | 
132 |     INPUT is assumed to be a ALTO document. INPUT may optionally be a directory. The tool then reads
133 |     all files in the directory.
134 | 
135 |     alto4pandas writes two output files: A pickled pandas DataFrame and a CSV file with all conversion warnings.
136 |     """
137 | 
138 |     # Extend file list if directories are given
139 |     alto_files_real = []
140 |     for m in alto_files:
141 |         for x in walk(m):
142 |             alto_files_real.append(x)
143 | 
144 |     # Process ALTO files
145 |     with open(output_file + '.warnings.csv', 'w') as csvfile:
146 |         csvwriter = csv.writer(csvfile)
147 |         alto_info = []
148 |         logger.info('Processing ALTO files')
149 |         for alto_file in tqdm(alto_files_real, leave=False):
150 |             try:
151 |                 root = ET.parse(alto_file).getroot()
152 |                 alto = root # XXX .find('alto:alto', ns) does not work here
153 | 
154 |                 with warnings.catch_warnings(record=True) as caught_warnings:
155 |                     warnings.simplefilter('always')  # do NOT filter double occurrences
156 | 
157 |                     # ALTO
158 |                     d = flatten(alto_to_dict(alto, raise_errors=True))
159 |                     # "meta"
160 |                     d['alto_file'] = alto_file
161 |                     d['alto_xmlns'] = ET.QName(alto).namespace
162 | 
163 |                     alto_info.append(d)
164 | 
165 |                     if caught_warnings:
166 |                         # PyCharm thinks caught_warnings is not Iterable:
167 |                         # noinspection PyTypeChecker
168 |                         for caught_warning in caught_warnings:
169 |                             csvwriter.writerow([alto_file, caught_warning.message])
170 |             except Exception as e:
171 |                 logger.error('Exception in {}: {}'.format(alto_file, e))
172 |                 import traceback; traceback.print_exc()
173 | 
174 |     # Convert the alto_info List[Dict] to a pandas DataFrame
175 |     columns = []
176 |     for m in alto_info:
177 |         for c in m.keys():
178 |             if c not in columns:
179 |                 columns.append(c)
180 |     data = [[m.get(c) for c in columns] for m in alto_info]
181 |     index = [m['alto_file'] for m in alto_info] # TODO use ppn + page?
182 |     alto_info_df = pd.DataFrame(data=data, index=index, columns=columns)
183 | 
184 |     # Pickle the DataFrame
185 |     logger.info('Writing DataFrame to {}'.format(output_file))
186 |     alto_info_df.to_pickle(output_file)
187 |     if output_csv:
188 |         logger.info('Writing CSV to {}'.format(output_csv))
189 |         alto_info_df.to_csv(output_csv)
190 |     if output_xlsx:
191 |         logger.info('Writing Excel .xlsx to {}'.format(output_xlsx))
192 |         alto_info_df.to_excel(output_xlsx)
193 | 
194 | 
195 | def main():
196 |     logging.basicConfig(level=logging.INFO)
197 | 
198 |     for prefix, uri in ns.items():
199 |         ET.register_namespace(prefix, uri)
200 | 
201 |     process()
202 | 
203 | 
204 | if __name__ == '__main__':
205 |     main()
206 | 


--------------------------------------------------------------------------------
/src/mods4pandas/lib.py:
--------------------------------------------------------------------------------
  1 | from itertools import groupby
  2 | import re
  3 | import warnings
  4 | from typing import List, Sequence, MutableMapping, Dict
  5 | 
  6 | import pandas as pd
  7 | import numpy as np
  8 | from lxml import etree as ET
  9 | 
 10 | 
 11 | __all__ = ["ns"]
 12 | 
 13 | 
 14 | ns = {
 15 |     'mets': 'http://www.loc.gov/METS/',
 16 |     'mods': 'http://www.loc.gov/mods/v3',
 17 |     "alto": "http://www.loc.gov/standards/alto/ns-v2",
 18 |     "xlink": "http://www.w3.org/1999/xlink",
 19 | }
 20 | 
 21 | 
 22 | 
 23 | class TagGroup:
 24 |     """Helper class to simplify the parsing and checking of MODS metadata"""
 25 | 
 26 |     def __init__(self, tag, group: List[ET.Element]):
 27 |         self.tag = tag
 28 |         self.group = group
 29 | 
 30 |     def to_xml(self):
 31 |         return '\n'.join(str(ET.tostring(e), 'utf-8').strip() for e in self.group)
 32 | 
 33 |     def __str__(self):
 34 |         return f"TagGroup with content:\n{self.to_xml()}"
 35 | 
 36 |     def is_singleton(self):
 37 |         if len(self.group) != 1:
 38 |             raise ValueError('More than one instance: {}'.format(self))
 39 |         return self
 40 | 
 41 |     def has_no_attributes(self):
 42 |         return self.has_attributes({})
 43 | 
 44 |     def has_attributes(self, attrib):
 45 |         if not isinstance(attrib, Sequence):
 46 |             attrib = [attrib]
 47 |         if not all(e.attrib in attrib for e in self.group):
 48 |             raise ValueError('One or more element has unexpected attributes: {}'.format(self))
 49 |         return self
 50 | 
 51 |     def ignore_attributes(self):
 52 |         # This serves as documentation for now.
 53 |         return self
 54 | 
 55 |     def sort(self, key=None, reverse=False):
 56 |         self.group = sorted(self.group, key=key, reverse=reverse)
 57 |         return self
 58 | 
 59 |     def text(self, separator='\n'):
 60 |         t = ''
 61 |         for e in self.group:
 62 |             if t != '':
 63 |                 t += separator
 64 |             if e.text:
 65 |                 t += e.text
 66 |         return t
 67 | 
 68 |     def text_set(self):
 69 |         return {e.text for e in self.group}
 70 | 
 71 |     def descend(self, raise_errors):
 72 |         return _to_dict(self.is_singleton().group[0], raise_errors)
 73 | 
 74 |     def filter(self, cond, warn=None):
 75 |         new_group = []
 76 |         for e in self.group:
 77 |             if cond(e):
 78 |                 new_group.append(e)
 79 |             else:
 80 |                 if warn:
 81 |                     warnings.warn('Filtered {} element ({})'.format(self.tag, warn))
 82 |         return TagGroup(self.tag, new_group)
 83 | 
 84 |     def force_singleton(self, warn=True):
 85 |         if len(self.group) == 1:
 86 |             return self
 87 |         else:
 88 |             if warn:
 89 |                 warnings.warn('Forced single instance of {}'.format(self.tag))
 90 |             return TagGroup(self.tag, self.group[:1])
 91 | 
 92 |     RE_ISO8601_DATE = r'^\d{2}(\d{2}|XX)(-\d{2}-\d{2})?$'  # Note: Includes non-specific century dates like '18XX'
 93 |     RE_GERMAN_DATE = r'^(?P<dd>\d{2})\.(?P<mm>\d{2})\.(?P<yyyy>\d{4})$'
 94 | 
 95 |     def fix_date(self):
 96 | 
 97 |         for e in self.group:
 98 |             if e.attrib.get('encoding') == 'w3cdtf':
 99 |                 # This should be 'iso8601' according to MODS-AP 2.3.1
100 |                 warnings.warn('Changed w3cdtf encoding to iso8601')
101 |                 e.attrib['encoding'] = 'iso8601'
102 | 
103 |         new_group = []
104 |         for e in self.group:
105 |             if e.attrib.get('encoding') == 'iso8601' and re.match(self.RE_ISO8601_DATE, e.text):
106 |                 new_group.append(e)
107 |             elif re.match(self.RE_ISO8601_DATE, e.text):
108 |                 warnings.warn('Added iso8601 encoding to date {}'.format(e.text))
109 |                 e.attrib['encoding'] = 'iso8601'
110 |                 new_group.append(e)
111 |             elif re.match(self.RE_GERMAN_DATE, e.text):
112 |                 warnings.warn('Converted date {} to iso8601 encoding'.format(e.text))
113 |                 m = re.match(self.RE_GERMAN_DATE, e.text)
114 |                 e.text = '{}-{}-{}'.format(m.group('yyyy'), m.group('mm'), m.group('dd'))
115 |                 e.attrib['encoding'] = 'iso8601'
116 |                 new_group.append(e)
117 |             else:
118 |                 warnings.warn('Not a iso8601 date: "{}"'.format(e.text))
119 |                 new_group.append(e)
120 |         self.group = new_group
121 | 
122 |         # Notes:
123 |         # - There are dates with the misspelled qualifier 'aproximate'
124 |         # - Rough periods are sometimes given either by:
125 |         #   - years like '19xx'
126 |         #   - or 'approximate' date ranges with point="start"/"end" attributes set
127 |         #     (this could be correct according to MODS-AP 2.3.1)
128 |         # - Some very specific dates like '06.08.1820' are sometimes given the 'approximate' qualifier
129 |         # - Sometimes, approximate date ranges are given in the text "1785-1800 (ca.)"
130 | 
131 |         return self
132 | 
133 |     def fix_event_type(self):
134 |         # According to MODS-AP 2.3.1, every originInfo should have its eventType set.
135 |         # Fix this for special cases.
136 | 
137 |         for e in self.group:
138 |             if e.attrib.get('eventType') is None:
139 |                 try:
140 |                     if e.find('mods:publisher', ns).text.startswith('Staatsbibliothek zu Berlin') and \
141 |                             e.find('mods:edition', ns).text == '[Electronic ed.]':
142 |                         e.attrib['eventType'] = 'digitization'
143 |                         warnings.warn('Fixed eventType for electronic ed.')
144 |                         continue
145 |                 except AttributeError:
146 |                     pass
147 |                 try:
148 |                     if e.find('mods:dateIssued', ns) is not None:
149 |                         e.attrib['eventType'] = 'publication'
150 |                         warnings.warn('Fixed eventType for an issued origin')
151 |                         continue
152 |                 except AttributeError:
153 |                     pass
154 |                 try:
155 |                     if e.find('mods:dateCreated', ns) is not None:
156 |                         e.attrib['eventType'] = 'production'
157 |                         warnings.warn('Fixed eventType for a created origin')
158 |                         continue
159 |                 except AttributeError:
160 |                     pass
161 |         return self
162 | 
163 |     def fix_script_term(self):
164 |         for e in self.group:
165 |             # MODS-AP 2.3.1 is not clear about this, but it looks like that this should be lower case.
166 |             if e.attrib['authority'] == 'ISO15924':
167 |                 e.attrib['authority'] = 'iso15924'
168 |                 warnings.warn('Changed scriptTerm authority to lower case')
169 |         return self
170 | 
171 |     def merge_sub_tags_to_set(self):
172 |         from .mods4pandas import mods_to_dict
173 |         value = {}
174 | 
175 |         sub_dicts = [mods_to_dict(e) for e in self.group]
176 |         sub_tags = {k for d in sub_dicts for k in d.keys()}
177 |         for sub_tag in sub_tags:
178 |             s = set()
179 |             for d in sub_dicts:
180 |                 v = d.get(sub_tag)
181 |                 if v:
182 |                     # There could be multiple scriptTerms in one language element, e.g. Antiqua and Fraktur in a
183 |                     # German language document.
184 |                     if isinstance(v, set):
185 |                         s.update(v)
186 |                     else:
187 |                         s.add(v)
188 |             value[sub_tag] = s
189 |         return value
190 | 
191 |     def attributes(self):
192 |         """
193 |         Return a merged dict of all attributes of the tag group.
194 | 
195 |         Probably most useful if used on a singleton, for example:
196 | 
197 |             value['Page'] = TagGroup(tag, group).is_singleton().attributes()
198 |         """
199 |         attrib = {}
200 |         for e in self.group:
201 |             for a, v in e.attrib.items():
202 |                 a_localname = ET.QName(a).localname
203 |                 attrib[a_localname] = v
204 |         return attrib
205 | 
206 |     def subelement_counts(self):
207 |         counts = {}
208 |         for e in self.group:
209 |             for x in e.iter():
210 |                 tag = ET.QName(x.tag).localname
211 |                 key = f"{tag}-count"
212 |                 counts[key] = counts.get(key, 0) + 1
213 |         return counts
214 | 
215 |     def xpath_statistics(self, xpath_expr, namespaces):
216 |         """
217 |         Extract values and calculate statistics
218 | 
219 |         Extract values using the given XPath expression, convert them to float and return descriptive
220 |         statistics on the values.
221 |         """
222 |         values = []
223 |         for e in self.group:
224 |             r = e.xpath(xpath_expr, namespaces=namespaces)
225 |             values += r
226 |         values = np.array([float(v) for v in values])
227 | 
228 |         statistics = {}
229 |         if values.size > 0:
230 |             statistics[f'{xpath_expr}-mean'] = np.mean(values)
231 |             statistics[f'{xpath_expr}-median'] = np.median(values)
232 |             statistics[f'{xpath_expr}-std'] = np.std(values)
233 |             statistics[f'{xpath_expr}-min'] = np.min(values)
234 |             statistics[f'{xpath_expr}-max'] = np.max(values)
235 |         return statistics
236 | 
237 |     def xpath_count(self, xpath_expr, namespaces):
238 |         """
239 |         Count all elements matching xpath_expr
240 |         """
241 |         values = []
242 |         for e in self.group:
243 |             r = e.xpath(xpath_expr, namespaces=namespaces)
244 |             values += r
245 | 
246 |         counts = {f'{xpath_expr}-count': len(values)}
247 |         return counts
248 | 
249 | 
250 | 
251 | def sorted_groupby(iterable, key=None):
252 |     """
253 |     Sort iterable by key and then group by the same key.
254 | 
255 |     itertools.groupby() assumes that the iterable is already sorted. This function
256 |     conveniently sorts the iterable first, and then groups its elements.
257 |     """
258 |     return groupby(sorted(iterable, key=key), key=key)
259 | 
260 | 
261 | def _to_dict(root, raise_errors):
262 |     from .mods4pandas import mods_to_dict, mets_to_dict
263 |     from .alto4pandas import alto_to_dict
264 | 
265 |     root_name = ET.QName(root.tag)
266 |     if root_name.namespace == "http://www.loc.gov/mods/v3":
267 |         return mods_to_dict(root, raise_errors)
268 |     elif root_name.namespace == "http://www.loc.gov/METS/":
269 |         return mets_to_dict(root, raise_errors)
270 |     elif root_name.namespace in [
271 |         "http://schema.ccs-gmbh.com/ALTO",
272 |         "http://www.loc.gov/standards/alto/",
273 |         "http://www.loc.gov/standards/alto/ns-v2#",
274 |         "http://www.loc.gov/standards/alto/ns-v4#",
275 |     ]:
276 |         return alto_to_dict(root, raise_errors)
277 |     else:
278 |         raise ValueError(f"Unknown namespace {root_name.namespace}")
279 | 
280 | 
281 | def flatten(d: MutableMapping, parent='', separator='_'):
282 |     """
283 |     Flatten the given nested dict.
284 | 
285 |     It is assumed that d maps strings to either another dictionary (similarly structured) or some other value.
286 |     """
287 |     items = []
288 | 
289 |     for k, v in d.items():
290 |         if parent:
291 |             new_key = parent + separator + k
292 |         else:
293 |             new_key = k
294 | 
295 |         if isinstance(v, MutableMapping):
296 |             items.extend(flatten(v, new_key, separator=separator).items())
297 |         else:
298 |             items.append((new_key, v))
299 | 
300 |     return dict(items)
301 | 
302 | 
303 | def dicts_to_df(data_list: List[Dict], *, index_column) -> pd.DataFrame:
304 |     """
305 |     Convert the given list of dicts to a Pandas DataFrame.
306 | 
307 |     The keys of the dicts make the columns.
308 |     """
309 | 
310 |     # Build columns from keys
311 |     columns = []
312 |     for m in data_list:
313 |         for c in m.keys():
314 |             if c not in columns:
315 |                 columns.append(c)
316 | 
317 |     # Build data table
318 |     data = [[m.get(c) for c in columns] for m in data_list]
319 | 
320 |     # Build index
321 |     if isinstance(index_column, str):
322 |         index = [m[index_column] for m in data_list]
323 |     elif isinstance(index_column, tuple):
324 |         index = [[m[c] for m in data_list] for c in index_column]
325 |         index = pd.MultiIndex.from_arrays(index, names=index_column)
326 |     else:
327 |         raise ValueError(f"index_column must")
328 | 
329 |     df = pd.DataFrame(data=data, index=index, columns=columns)
330 |     return df
331 | 


--------------------------------------------------------------------------------
/src/mods4pandas/mods4pandas.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import csv
  3 | import logging
  4 | import os
  5 | import re
  6 | import warnings
  7 | from lxml import etree as ET
  8 | from itertools import groupby
  9 | from operator import attrgetter
 10 | from typing import Dict, List
 11 | from collections.abc import MutableMapping, Sequence
 12 | 
 13 | import click
 14 | import pandas as pd
 15 | from tqdm import tqdm
 16 | 
 17 | from .lib import sorted_groupby, TagGroup, ns, flatten, dicts_to_df
 18 | 
 19 | 
 20 | 
 21 | logger = logging.getLogger('mods4pandas')
 22 | 
 23 | def mods_to_dict(mods, raise_errors=True):
 24 |     """Convert MODS metadata to a nested dictionary"""
 25 | 
 26 |     # The approach taken here is to handle each element explicitly. This also means that ignored elements are ignored
 27 |     # explicitly.
 28 | 
 29 |     value = {}
 30 | 
 31 |     # Iterate through each group of tags
 32 |     for tag, group in sorted_groupby(mods, key=attrgetter('tag')):
 33 |         group = list(group)
 34 |         if tag == '{http://www.loc.gov/mods/v3}location':
 35 |             def only_current_location(location):
 36 |                 return location.get('type') != 'former'
 37 |             value['location'] = TagGroup(tag, group) \
 38 |                 .filter(only_current_location) \
 39 |                 .has_attributes([{}, {'type': 'current'}]) \
 40 |                 .is_singleton().descend(raise_errors)
 41 |         elif tag == '{http://www.loc.gov/mods/v3}physicalLocation':
 42 |             def no_display_label(physical_location):
 43 |                 return physical_location.get('displayLabel') is None
 44 |             value['physicalLocation'] = TagGroup(tag, group).filter(no_display_label).text()
 45 |         elif tag == '{http://www.loc.gov/mods/v3}shelfLocator':
 46 |             # This element should not be repeated according to MODS-AP 2.3.1, however a few of the files contain
 47 |             # a second element with empty text and a "displayLabel" attribute set.
 48 |             def no_display_label(shelf_locator):
 49 |                 return shelf_locator.get('displayLabel') is None
 50 |             value['shelfLocator'] = TagGroup(tag, group) \
 51 |                 .filter(no_display_label) \
 52 |                 .force_singleton() \
 53 |                 .has_no_attributes() \
 54 |                 .text()
 55 |         elif tag == '{http://www.loc.gov/mods/v3}originInfo':
 56 |             def has_event_type(origin_info):
 57 |                 # According to MODS-AP 2.3.1, every originInfo should have its eventType set. However, some
 58 |                 # are empty and not fixable.
 59 |                 return origin_info.attrib.get('eventType') is not None
 60 |             tag_group = TagGroup(tag, group).fix_event_type().filter(has_event_type, warn="has no eventType")
 61 |             for event_type, grouped_group in sorted_groupby(tag_group.group, key=lambda g: g.attrib['eventType']):
 62 |                 for n, e in enumerate(grouped_group):
 63 |                     value['originInfo-{}{}'.format(event_type, n)] = mods_to_dict(e, raise_errors)
 64 |         elif tag == '{http://www.loc.gov/mods/v3}place':
 65 |             value['place'] = TagGroup(tag, group).force_singleton(warn=False).has_no_attributes().descend(raise_errors)
 66 |         elif tag == '{http://www.loc.gov/mods/v3}placeTerm':
 67 |             value['placeTerm'] = TagGroup(tag, group).is_singleton().has_attributes({'type': 'text'}).text()
 68 |         elif tag == '{http://www.loc.gov/mods/v3}dateIssued':
 69 |             value['dateIssued'] = TagGroup(tag, group) \
 70 |                 .fix_date() \
 71 |                 .sort(key=lambda d: d.attrib.get('keyDate') == 'yes', reverse=True) \
 72 |                 .ignore_attributes() \
 73 |                 .force_singleton() \
 74 |                 .text()
 75 |         elif tag == '{http://www.loc.gov/mods/v3}dateCreated':
 76 |             value['dateCreated'] = TagGroup(tag, group) \
 77 |                 .fix_date() \
 78 |                 .sort(key=lambda d: d.attrib.get('keyDate') == 'yes', reverse=True) \
 79 |                 .ignore_attributes() \
 80 |                 .force_singleton() \
 81 |                 .text()
 82 |         elif tag == '{http://www.loc.gov/mods/v3}dateCaptured':
 83 |             value['dateCaptured'] = TagGroup(tag, group).fix_date().ignore_attributes().is_singleton().text()
 84 |         elif tag == '{http://www.loc.gov/mods/v3}dateOther':
 85 |             value['dateOther'] = TagGroup(tag, group).fix_date().ignore_attributes().is_singleton().text()
 86 |         elif tag == '{http://www.loc.gov/mods/v3}publisher':
 87 |             value['publisher'] = TagGroup(tag, group).force_singleton(warn=False).has_no_attributes().text()
 88 |         elif tag == '{http://www.loc.gov/mods/v3}edition':
 89 |             value['edition'] = TagGroup(tag, group).force_singleton().has_no_attributes().text()
 90 |         elif tag == '{http://www.loc.gov/mods/v3}classification':
 91 |             authorities = {e.attrib['authority'] for e in group}
 92 |             for authority in authorities:
 93 |                 sub_group = [e for e in group if e.attrib.get('authority') == authority]
 94 |                 value['classification-{}'.format(authority)] = TagGroup(tag, sub_group).text_set()
 95 |         elif tag == '{http://www.loc.gov/mods/v3}recordInfo':
 96 |             value['recordInfo'] = TagGroup(tag, group).is_singleton().has_no_attributes().descend(raise_errors)
 97 |         elif tag == '{http://www.loc.gov/mods/v3}recordIdentifier':
 98 |             # By default we assume source="gbv-ppn" mods:recordIdentifiers (= PPNs),
 99 |             # however, in mods:relatedItems, there may be source="dnb-ppns",
100 |             # which we need to distinguish by using a separate field name.
101 |             try:
102 |                 value['recordIdentifier'] = TagGroup(tag, group).is_singleton().has_attributes({'source': 'gbv-ppn'}).text()
103 |             except ValueError:
104 |                 value['recordIdentifier-dnb-ppn'] = TagGroup(tag, group).is_singleton().has_attributes({'source': 'dnb-ppn'}).text()
105 |         elif tag == '{http://www.loc.gov/mods/v3}identifier':
106 |             for e in group:
107 |                 if len(e.attrib) != 1:
108 |                     raise ValueError('Unknown attributes for identifier {}'.format(e.attrib))
109 |                 value['identifier-{}'.format(e.attrib['type'])] = e.text
110 |         elif tag == '{http://www.loc.gov/mods/v3}titleInfo':
111 |             def only_standard_title(title_info):
112 |                 return title_info.attrib.get('type') is None
113 |             value['titleInfo'] = TagGroup(tag, group) \
114 |                 .filter(only_standard_title) \
115 |                 .is_singleton().has_no_attributes().descend(raise_errors)
116 |         elif tag == '{http://www.loc.gov/mods/v3}title':
117 |             value['title'] = TagGroup(tag, group).is_singleton().has_no_attributes().text()
118 |         elif tag == '{http://www.loc.gov/mods/v3}partName':
119 |             value['partName'] = TagGroup(tag, group).is_singleton().has_no_attributes().text()
120 |         elif tag == '{http://www.loc.gov/mods/v3}subTitle':
121 |             value['subTitle'] = TagGroup(tag, group).force_singleton().has_no_attributes().text()
122 |         elif tag == '{http://www.loc.gov/mods/v3}note':
123 |             # This could be useful if distinguished by type attribute.
124 |             pass
125 |         elif tag == '{http://www.loc.gov/mods/v3}part':
126 |             pass
127 |         elif tag == '{http://www.loc.gov/mods/v3}abstract':
128 |             value['abstract'] = TagGroup(tag, group).has_no_attributes().text()
129 |         elif tag == '{http://www.loc.gov/mods/v3}subject':
130 |             authorities = {e.attrib.get('authority') for e in group}
131 |             for authority in authorities:
132 |                 k = 'subject-{}'.format(authority) if authority is not None else 'subject'
133 |                 sub_group = [e for e in group if e.attrib.get('authority') == authority]
134 |                 value[k] = TagGroup(tag, sub_group).force_singleton().descend(raise_errors)
135 |         elif tag == '{http://www.loc.gov/mods/v3}topic':
136 |             TagGroup(tag, group).text_set()
137 |         elif tag == '{http://www.loc.gov/mods/v3}cartographics':
138 |             pass
139 |         elif tag == '{http://www.loc.gov/mods/v3}geographic':
140 |             TagGroup(tag, group).text_set()
141 |         elif tag == '{http://www.loc.gov/mods/v3}temporal':
142 |             TagGroup(tag, group).text_set()
143 |         elif tag == '{http://www.loc.gov/mods/v3}genre':
144 |             authorities = {e.attrib.get('authority') for e in group}
145 |             for authority in authorities:
146 |                 k = 'genre-{}'.format(authority) if authority is not None else 'genre'
147 |                 value[k] = {e.text for e in group if e.attrib.get('authority') == authority}
148 |         elif tag == '{http://www.loc.gov/mods/v3}language':
149 |             value["language"] = TagGroup(tag, group) \
150 |                 .merge_sub_tags_to_set()
151 |         elif tag == '{http://www.loc.gov/mods/v3}languageTerm':
152 |             value['languageTerm'] = TagGroup(tag, group) \
153 |                 .has_attributes({'authority': 'iso639-2b', 'type': 'code'}) \
154 |                 .text_set()
155 |         elif tag == '{http://www.loc.gov/mods/v3}scriptTerm':
156 |             value['scriptTerm'] = TagGroup(tag, group) \
157 |                 .fix_script_term() \
158 |                 .has_attributes({'authority': 'iso15924', 'type': 'code'}) \
159 |                 .text_set()
160 |         elif tag == '{http://www.loc.gov/mods/v3}relatedItem':
161 |             tag_group = TagGroup(tag, group)
162 |             for type_, grouped_group in sorted_groupby(tag_group.group, key=lambda g: g.attrib['type']):
163 |                 sub_tag = 'relatedItem-{}'.format(type_)
164 |                 grouped_group = list(grouped_group)
165 |                 if type_ in ["original", "host"]:
166 |                     value[sub_tag] = TagGroup(sub_tag, grouped_group).is_singleton().descend(raise_errors)
167 |                 else:
168 |                     # TODO type="series"
169 |                     pass
170 |         elif tag == '{http://www.loc.gov/mods/v3}name':
171 |             for n, e in enumerate(group):
172 |                 value['name{}'.format(n)] = mods_to_dict(e, raise_errors)
173 |         elif tag == '{http://www.loc.gov/mods/v3}role':
174 |             value["role"] = TagGroup(tag, group) \
175 |                 .has_no_attributes() \
176 |                 .merge_sub_tags_to_set()
177 |         elif tag == '{http://www.loc.gov/mods/v3}roleTerm':
178 |             value['roleTerm'] = TagGroup(tag, group) \
179 |                 .has_attributes({'authority': 'marcrelator', 'type': 'code'}) \
180 |                 .text_set()
181 |         elif tag == '{http://www.loc.gov/mods/v3}namePart':
182 |             for e in group:
183 |                 if not e.attrib.get('type'):
184 |                     value['namePart'] = e.text
185 |                 else:
186 |                     value['namePart-{}'.format(e.attrib['type'])] = e.text
187 |         elif tag == '{http://www.loc.gov/mods/v3}nameIdentifier':
188 |             # TODO Use this (e.g. <mods:nameIdentifier type="ppn">106168096</mods:nameIdentifier>) or the
189 |             # mods:name@valueURI to disambiguate
190 |             pass
191 |         elif tag == '{http://www.loc.gov/mods/v3}displayForm':
192 |             value['displayForm'] = TagGroup(tag, group).is_singleton().has_no_attributes().text()
193 |         elif tag == '{http://www.loc.gov/mods/v3}physicalDescription':
194 |             pass
195 |         elif tag == '{http://www.loc.gov/mods/v3}extension':
196 |             pass
197 |         elif tag == '{http://www.loc.gov/mods/v3}accessCondition':
198 |             for e in group:
199 |                 if not e.attrib.get('type'):
200 |                     raise ValueError('Unknown attributes for accessCondition {}'.format(e.attrib))
201 |                 value['accessCondition-{}'.format(e.attrib['type'])] = e.text
202 |         elif tag == '{http://www.loc.gov/mods/v3}typeOfResource':
203 |             value['typeOfResource'] = TagGroup(tag, group).is_singleton().has_no_attributes().text()
204 |         elif tag == '{http://www.loc.gov/mods/v3}mods':
205 |             # XXX Ignore nested mods:mods for now (used in mods:subject)
206 |             pass
207 |         else:
208 |             if raise_errors:
209 |                 raise ValueError('Unknown tag "{}"'.format(tag))
210 |             else:
211 |                 pass
212 | 
213 |     return value
214 | 
215 | 
216 | def mets_to_dict(mets, raise_errors=True):
217 |     """Convert METS metadata to a nested dictionary"""
218 | 
219 |     # The approach taken here is to handle each element explicitly. This also means that ignored elements are ignored
220 |     # explicitly.
221 | 
222 |     value = {}
223 | 
224 |     # Iterate through each group of tags
225 |     for tag, group in sorted_groupby(mets, key=attrgetter('tag')):
226 |         group = list(group)
227 | 
228 |         # XXX Namespaces seem to use a trailing / sometimes, sometimes not.
229 |         #     (e.g. {http://www.loc.gov/METS/} vs {http://www.loc.gov/METS})
230 |         if tag == '{http://www.loc.gov/METS/}amdSec':
231 |             pass  # TODO
232 |         elif tag == '{http://www.loc.gov/METS/}dmdSec':
233 |             pass  # TODO
234 |         elif tag == '{http://www.loc.gov/METS/}metsHdr':
235 |             pass  # TODO
236 |         elif tag == '{http://www.loc.gov/METS/}structLink':
237 |             pass  # TODO
238 |         elif tag == '{http://www.loc.gov/METS/}structMap':
239 |             pass  # TODO
240 |         elif tag == '{http://www.loc.gov/METS/}fileSec':
241 |             value['fileSec'] = TagGroup(tag, group) \
242 |                 .is_singleton().descend(raise_errors)
243 |         elif tag == '{http://www.loc.gov/METS/}fileGrp':
244 |             for e in group:
245 |                 use = e.attrib.get('USE')
246 |                 if not use:
247 |                     raise ValueError('No USE attribute for fileGrp {}'.format(e))
248 |                 value[f'fileGrp-{use}-count'] = len(e)
249 |         else:
250 |             if raise_errors:
251 |                 print(value)
252 |                 raise ValueError('Unknown tag "{}"'.format(tag))
253 |             else:
254 |                 pass
255 |     return value
256 | 
257 | def pages_to_dict(mets, raise_errors=True) -> List[Dict]:
258 |     # TODO replace asserts by ValueError
259 | 
260 |     result = []
261 | 
262 |     # PPN
263 |     def get_mets_recordIdentifier(*, source="gbv-ppn"):
264 |         return (mets.xpath(f'//mets:dmdSec[1]//mods:mods/mods:recordInfo/mods:recordIdentifier[@source="{source}"]',
265 |                            namespaces=ns) or [None])[0].text
266 |     ppn = get_mets_recordIdentifier()
267 | 
268 |     # Getting per-page/structure information is a bit different
269 |     structMap_PHYSICAL = mets.find('./mets:structMap[@TYPE="PHYSICAL"]', ns)
270 |     structMap_LOGICAL = mets.find('./mets:structMap[@TYPE="LOGICAL"]', ns)
271 |     fileSec = mets.find('./mets:fileSec', ns)
272 |     if structMap_PHYSICAL is None:
273 |         # This is expected in a multivolume work or periodical!
274 |         if any(
275 |                 structMap_LOGICAL.find(f'./mets:div[@TYPE="{t}"]', ns) is not None
276 |                 for t in ["multivolume_work", "MultivolumeWork", "periodical"]
277 |         ):
278 |             return []
279 |         else:
280 |             raise ValueError("No structMap[@TYPE='PHYSICAL'] found (but not a multivolume work)")
281 |     if structMap_LOGICAL is None:
282 |         raise ValueError("No structMap[@TYPE='LOGICAL'] found")
283 |     if fileSec is None:
284 |         raise ValueError("No fileSec found")
285 | 
286 |     div_physSequence = structMap_PHYSICAL[0]
287 |     assert div_physSequence.attrib.get("TYPE") == "physSequence"
288 | 
289 | 
290 |     # Build a look-up table to get mets:file by @ID
291 |     # This cuts retrieving the mets:file down to half the time.
292 |     mets_file_by_ID = {}
293 |     def _init_mets_file_by_ID():
294 |         for f in fileSec.iterfind('./mets:fileGrp/mets:file', ns):
295 |             mets_file_by_ID[f.attrib.get("ID")] = f
296 |     _init_mets_file_by_ID()
297 | 
298 |     def get_mets_file(*, ID):
299 |         if ID:
300 |             return mets_file_by_ID[ID]
301 | 
302 |     def get_mets_div(*, ID):
303 |         if ID:
304 |             return structMap_LOGICAL.findall(f'.//mets:div[@ID="{ID}"]', ns)
305 | 
306 |     for page in div_physSequence:
307 | 
308 |         # TODO sort by ORDER?
309 |         assert page.attrib.get("TYPE") == "page"
310 |         page_dict = {}
311 |         page_dict["ppn"] = ppn
312 |         page_dict["ID"] = page.attrib.get("ID")
313 |         for fptr in page:
314 |             assert fptr.tag == "{http://www.loc.gov/METS/}fptr"
315 |             file_id = fptr.attrib.get("FILEID")
316 |             assert file_id
317 | 
318 |             file_ = get_mets_file(ID=file_id)
319 |             assert file_ is not None
320 |             fileGrp_USE = file_.getparent().attrib.get("USE")
321 |             file_FLocat_href = (file_.xpath('mets:FLocat/@xlink:href', namespaces=ns) or [None])[0]
322 |             page_dict[f"fileGrp_{fileGrp_USE}_file_FLocat_href"] = file_FLocat_href
323 | 
324 |         def get_struct_log(*, to_phys):
325 |             """
326 |             Get the logical structMap elements that link to the given physical page.
327 | 
328 |             Keyword arguments:
329 |             to_phys -- ID of the page, as per structMap[@TYPE="PHYSICAL"]
330 |             """
331 | 
332 |             # This is all XLink, there might be a more generic way to traverse the links. However, currently,
333 |             # it suffices to do this the old-fashioned way.
334 | 
335 |             sm_links = mets.findall(
336 |                     f'./mets:structLink/mets:smLink[@xlink:to="{to_phys}"]', ns
337 |             )
338 | 
339 |             targets = []
340 |             for sm_link in sm_links:
341 |                 xlink_from = sm_link.attrib.get(f"{{{ns['xlink']}}}from")
342 |                 targets.extend(get_mets_div(ID=xlink_from))
343 |             return targets
344 | 
345 |         struct_divs = set(get_struct_log(to_phys=page_dict["ID"]))
346 | 
347 |         # In our documents, there are already links to parent elements, but we want to make
348 |         # sure and add them.
349 |         def get_struct_log_parents(div):
350 |             cursor = div
351 |             while (cursor := cursor.getparent()).tag == f"{{{ns['mets']}}}div":
352 |                 yield cursor
353 | 
354 |         struct_divs_to_add = set()
355 |         for struct_div in struct_divs:
356 |             struct_divs_to_add.update(get_struct_log_parents(struct_div))
357 |         struct_divs.update(struct_divs_to_add)
358 | 
359 |         # Populate structure type indicator variables
360 |         for struct_div in struct_divs:
361 |             type_ = struct_div.attrib.get("TYPE")
362 |             assert type_
363 |             page_dict[f"structMap-LOGICAL_TYPE_{type_}"] = 1
364 | 
365 |         result.append(page_dict)
366 | 
367 |     return result
368 | 
369 | 
370 | @click.command()
371 | @click.argument('mets_files', type=click.Path(exists=True), required=True, nargs=-1)
372 | @click.option('--output', '-o', 'output_file', type=click.Path(), help='Output Parquet file',
373 |               default='mods_info_df.parquet', show_default=True)
374 | @click.option('--output-page-info', type=click.Path(), help='Output page info Parquet file')
375 | def process(mets_files: List[str], output_file: str, output_page_info: str):
376 |     """
377 |     A tool to convert the MODS metadata in INPUT to a pandas DataFrame.
378 | 
379 |     INPUT is assumed to be a METS document with MODS metadata. INPUT may optionally be a directory. The tool then reads
380 |     all files in the directory.
381 | 
382 |     mods4pandas writes two output files: A pandas DataFrame (as Parquet) and a CSV file with all conversion warnings.
383 | 
384 |     Per-page information (e.g. structure information) can be output to a separate Parquet file.
385 |     """
386 | 
387 |     # Extend file list if directories are given
388 |     mets_files_real = []
389 |     for m in mets_files:
390 |         if os.path.isdir(m):
391 |             logger.info('Scanning directory {}'.format(m))
392 |             mets_files_real.extend(f.path for f in tqdm(os.scandir(m), leave=False)
393 |                                    if f.is_file() and not f.name.startswith('.'))
394 |         else:
395 |             mets_files_real.append(m)
396 | 
397 |     # Process METS files
398 |     with open(output_file + '.warnings.csv', 'w') as csvfile:
399 |         csvwriter = csv.writer(csvfile)
400 |         mods_info = []
401 |         page_info = []
402 |         logger.info('Processing METS files')
403 |         for mets_file in tqdm(mets_files_real, leave=False):
404 |             try:
405 |                 root = ET.parse(mets_file).getroot()
406 |                 mets = root # XXX .find('mets:mets', ns) does not work here
407 |                 mods = root.find('mets:dmdSec//mods:mods', ns)
408 | 
409 |                 with warnings.catch_warnings(record=True) as caught_warnings:
410 |                     warnings.simplefilter('always')  # do NOT filter double occurrences
411 | 
412 |                     # MODS
413 |                     d = flatten(mods_to_dict(mods, raise_errors=True))
414 | 
415 |                     # METS
416 |                     d_mets = flatten(mets_to_dict(mets, raise_errors=True))
417 |                     for k, v in d_mets.items():
418 |                         d[f"mets_{k}"] = v
419 |                     # "meta"
420 |                     d['mets_file'] = mets_file
421 | 
422 |                     # METS - per-page
423 |                     if output_page_info:
424 |                         page_info_doc: list[dict] = pages_to_dict(mets, raise_errors=True)
425 | 
426 |                     mods_info.append(d)
427 |                     if output_page_info:
428 |                         page_info.extend(page_info_doc)
429 | 
430 |                     if caught_warnings:
431 |                         # PyCharm thinks caught_warnings is not Iterable:
432 |                         # noinspection PyTypeChecker
433 |                         for caught_warning in caught_warnings:
434 |                             csvwriter.writerow([mets_file, caught_warning.message])
435 |             except Exception as e:
436 |                 logger.error('Exception in {}: {}'.format(mets_file, e))
437 |                 #import traceback; traceback.print_exc()
438 | 
439 |     # Convert the mods_info List[Dict] to a pandas DataFrame
440 |     mods_info_df = dicts_to_df(mods_info, index_column="recordInfo_recordIdentifier")
441 | 
442 |     # Save the DataFrame
443 |     logger.info('Writing DataFrame to {}'.format(output_file))
444 |     mods_info_df.to_parquet(output_file)
445 | 
446 |     # Convert page_info
447 |     if output_page_info:
448 |         page_info_df = dicts_to_df(page_info, index_column=("ppn", "ID"))
449 |         # Save the DataFrame
450 |         logger.info('Writing DataFrame to {}'.format(output_page_info))
451 |         page_info_df.to_parquet(output_page_info)
452 | 
453 | 
454 | def main():
455 |     logging.basicConfig(level=logging.INFO)
456 | 
457 |     for prefix, uri in ns.items():
458 |         ET.register_namespace(prefix, uri)
459 | 
460 |     process()
461 | 
462 | 
463 | if __name__ == '__main__':
464 |     main()
465 | 


--------------------------------------------------------------------------------
/src/mods4pandas/tests/data/alto/734008031/00000005.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
  2 | <alto xmlns="http://www.loc.gov/standards/alto/ns-v2#" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/standards/alto/ns-v2# http://www.loc.gov/standards/alto/alto-v2.0.xsd">
  3 |   <Description>
  4 |     <MeasurementUnit>pixel</MeasurementUnit>
  5 |     <OCRProcessing ID="IdOcr">
  6 |       <ocrProcessingStep>
  7 |         <processingDateTime>2016-08-07</processingDateTime>
  8 |         <processingSoftware>
  9 |           <softwareCreator>ABBYY</softwareCreator>
 10 |           <softwareName>ABBYY FineReader Engine</softwareName>
 11 |           <softwareVersion>11</softwareVersion>
 12 |         </processingSoftware>
 13 |       </ocrProcessingStep>
 14 |     </OCRProcessing>
 15 |   </Description>
 16 |   <Styles>
 17 |     <TextStyle ID="font0" FONTFAMILY="Arial" FONTSIZE="5"/>
 18 |     <TextStyle ID="font1" FONTFAMILY="Arial" FONTSIZE="7"/>
 19 |     <TextStyle ID="font2" FONTFAMILY="Arial" FONTSIZE="8"/>
 20 |     <TextStyle ID="font3" FONTFAMILY="Arial" FONTSIZE="9"/>
 21 |     <TextStyle ID="font4" FONTFAMILY="Arial" FONTSIZE="10"/>
 22 |     <TextStyle ID="font5" FONTFAMILY="Arial" FONTSIZE="11"/>
 23 |     <TextStyle ID="font6" FONTFAMILY="Arial" FONTSIZE="12"/>
 24 |     <TextStyle ID="font7" FONTFAMILY="Arial" FONTSIZE="14"/>
 25 |     <TextStyle ID="font8" FONTFAMILY="Arial" FONTSIZE="21"/>
 26 |   </Styles>
 27 |   <Layout>
 28 |     <Page ID="Page1" PHYSICAL_IMG_NR="1" HEIGHT="2453" WIDTH="1425">
 29 |       <TopMargin HEIGHT="392" WIDTH="1425" VPOS="0" HPOS="0">
 30 |         <TextBlock ID="Page1_Block1" HEIGHT="48" WIDTH="128" VPOS="195" HPOS="665" language="de">
 31 |           <TextLine HEIGHT="36" WIDTH="116" VPOS="201" HPOS="671">
 32 |             <String STYLEREFS="font3" WC="0.6800000072" CONTENT="D" HEIGHT="35" WIDTH="35" VPOS="201" HPOS="671"/>
 33 |             <SP WIDTH="27" VPOS="201" HPOS="707"/>
 34 |             <String STYLE="italics" STYLEREFS="font5" WC="1." CONTENT="i" HEIGHT="34" WIDTH="11" VPOS="202" HPOS="735"/>
 35 |             <SP WIDTH="26" VPOS="202" HPOS="747"/>
 36 |             <String STYLEREFS="font3" WC="0.3100000024" CONTENT="c" HEIGHT="23" WIDTH="13" VPOS="214" HPOS="774"/>
 37 |           </TextLine>
 38 |         </TextBlock>
 39 |       </TopMargin>
 40 |       <LeftMargin HEIGHT="1861" WIDTH="153" VPOS="392" HPOS="0">
 41 | </LeftMargin>
 42 |       <RightMargin HEIGHT="1861" WIDTH="127" VPOS="392" HPOS="1298">
 43 | </RightMargin>
 44 |       <BottomMargin HEIGHT="200" WIDTH="1425" VPOS="2253" HPOS="0">
 45 | </BottomMargin>
 46 |       <PrintSpace HEIGHT="1861" WIDTH="1145" VPOS="392" HPOS="153">
 47 |         <TextBlock ID="Page1_Block2" HEIGHT="105" WIDTH="1140" VPOS="392" HPOS="158" language="de" STYLEREFS="font8">
 48 |           <TextLine HEIGHT="92" WIDTH="1128" VPOS="399" HPOS="165">
 49 |             <String WC="0.4499999881" CONTENT="in" HEIGHT="74" WIDTH="60" VPOS="399" HPOS="165"/>
 50 |             <SP WIDTH="32" VPOS="399" HPOS="226"/>
 51 |             <String WC="0.6178571582" CONTENT="Neu-Vorpommern" HEIGHT="88" WIDTH="630" VPOS="399" HPOS="259"/>
 52 |             <SP WIDTH="32" VPOS="417" HPOS="890"/>
 53 |             <String WC="0.6299999952" CONTENT="und" HEIGHT="70" WIDTH="116" VPOS="403" HPOS="923"/>
 54 |             <SP WIDTH="34" VPOS="399" HPOS="1040"/>
 55 |             <String WC="0.59799999" CONTENT="Rügen" HEIGHT="92" WIDTH="218" VPOS="399" HPOS="1075"/>
 56 |           </TextLine>
 57 |         </TextBlock>
 58 |         <TextBlock ID="Page1_Block3" HEIGHT="294" WIDTH="988" VPOS="649" HPOS="232" language="de" STYLEREFS="font2">
 59 |           <TextLine HEIGHT="37" WIDTH="381" VPOS="655" HPOS="538" STYLEREFS="font3">
 60 |             <String WC="0.3881818056" CONTENT="bestehenden" HEIGHT="37" WIDTH="185" VPOS="655" HPOS="538"/>
 61 |             <SP WIDTH="24" VPOS="655" HPOS="724"/>
 62 |             <String WC="0.3770000041" CONTENT="besonderen" HEIGHT="35" WIDTH="170" VPOS="655" HPOS="749"/>
 63 |           </TextLine>
 64 |           <TextLine HEIGHT="22" WIDTH="12" VPOS="764" HPOS="1157">
 65 |             <String STYLE="bold" WC="1." CONTENT="&gt;" HEIGHT="22" WIDTH="12" VPOS="764" HPOS="1157"/>
 66 |           </TextLine>
 67 |           <TextLine HEIGHT="75" WIDTH="976" VPOS="862" HPOS="238" STYLEREFS="font7">
 68 |             <String WC="0.6486666799" CONTENT="Polizei-Gesetze" HEIGHT="75" WIDTH="416" VPOS="862" HPOS="238"/>
 69 |             <SP WIDTH="32" VPOS="880" HPOS="655"/>
 70 |             <String WC="0.6333333254" CONTENT="und" HEIGHT="56" WIDTH="93" VPOS="867" HPOS="688"/>
 71 |             <SP WIDTH="37" VPOS="865" HPOS="782"/>
 72 |             <String WC="0.6846153736" CONTENT="Verordnungen," HEIGHT="71" WIDTH="394" VPOS="865" HPOS="820"/>
 73 |           </TextLine>
 74 |         </TextBlock>
 75 |         <TextBlock ID="Page1_Block4" HEIGHT="231" WIDTH="1143" VPOS="1106" HPOS="153" language="de" STYLEREFS="font1">
 76 |           <TextLine HEIGHT="42" WIDTH="1127" VPOS="1112" HPOS="163">
 77 |             <String WC="0.6200000048" CONTENT="im" HEIGHT="28" WIDTH="34" VPOS="1120" HPOS="163"/>
 78 |             <SP WIDTH="23" VPOS="1118" HPOS="198"/>
 79 |             <String WC="0.6039999723" CONTENT="Anschlüsse" HEIGHT="34" WIDTH="147" VPOS="1117" HPOS="222"/>
 80 |             <SP WIDTH="24" VPOS="1124" HPOS="370"/>
 81 |             <String WC="0.5133333206" CONTENT="und" HEIGHT="25" WIDTH="53" VPOS="1118" HPOS="395"/>
 82 |             <SP WIDTH="29" VPOS="1115" HPOS="449"/>
 83 |             <String WC="0.6349999905" CONTENT="in" HEIGHT="26" WIDTH="25" VPOS="1115" HPOS="479"/>
 84 |             <SP WIDTH="20" VPOS="1115" HPOS="505"/>
 85 |             <String WC="0.5600000024" CONTENT="Ergänzung" HEIGHT="35" WIDTH="158" VPOS="1115" HPOS="526"/>
 86 |             <SP WIDTH="26" VPOS="1116" HPOS="685"/>
 87 |             <String WC="0.3650000095" CONTENT="der," HEIGHT="31" WIDTH="51" VPOS="1116" HPOS="712"/>
 88 |             <SP WIDTH="24" VPOS="1115" HPOS="764"/>
 89 |             <String WC="0.5049999952" CONTENT="im" HEIGHT="26" WIDTH="36" VPOS="1115" HPOS="789"/>
 90 |             <SP WIDTH="25" VPOS="1112" HPOS="826"/>
 91 |             <String WC="0.7287499905" CONTENT="Aufträge" HEIGHT="38" WIDTH="129" VPOS="1112" HPOS="852"/>
 92 |             <SP WIDTH="21" VPOS="1116" HPOS="982"/>
 93 |             <String WC="0.5400000215" CONTENT="des" HEIGHT="28" WIDTH="44" VPOS="1116" HPOS="1004"/>
 94 |             <SP WIDTH="18" VPOS="1115" HPOS="1049"/>
 95 |             <String WC="0.5857142806" CONTENT="König!." HEIGHT="39" WIDTH="110" VPOS="1115" HPOS="1068"/>
 96 |             <SP WIDTH="22" VPOS="1118" HPOS="1179"/>
 97 |             <String WC="0.9775000215" CONTENT="Mini" HEIGHT="29" WIDTH="74" VPOS="1118" HPOS="1202" SUBS_TYPE="HypPart1" SUBS_CONTENT="Ministeriums"/>
 98 |             <HYP CONTENT="­"/>
 99 |           </TextLine>
100 |           <TextLine HEIGHT="42" WIDTH="1130" VPOS="1171" HPOS="159">
101 |             <String WC="0.7724999785" CONTENT="steriums" HEIGHT="37" WIDTH="119" VPOS="1176" HPOS="159" SUBS_TYPE="HypPart2" SUBS_CONTENT="Ministeriums"/>
102 |             <SP WIDTH="33" VPOS="1176" HPOS="279"/>
103 |             <String WC="0.7566666603" CONTENT="des" HEIGHT="28" WIDTH="44" VPOS="1176" HPOS="313"/>
104 |             <SP WIDTH="25" VPOS="1174" HPOS="358"/>
105 |             <String WC="0.4633333385" CONTENT="Innern" HEIGHT="32" WIDTH="103" VPOS="1174" HPOS="384"/>
106 |             <SP WIDTH="29" VPOS="1181" HPOS="488"/>
107 |             <String WC="0.4799999893" CONTENT="von" HEIGHT="22" WIDTH="51" VPOS="1181" HPOS="518"/>
108 |             <SP WIDTH="29" VPOS="1171" HPOS="570"/>
109 |             <String WC="0.6449999809" CONTENT="K." HEIGHT="32" WIDTH="36" VPOS="1171" HPOS="600"/>
110 |             <SP WIDTH="19" VPOS="1172" HPOS="637"/>
111 |             <String WC="0.6299999952" CONTENT="F." HEIGHT="36" WIDTH="35" VPOS="1172" HPOS="657"/>
112 |             <SP WIDTH="20" VPOS="1172" HPOS="693"/>
113 |             <String WC="0.621999979" CONTENT="Rauer" HEIGHT="32" WIDTH="98" VPOS="1172" HPOS="714"/>
114 |             <SP WIDTH="29" VPOS="1174" HPOS="813"/>
115 |             <String WC="0.6018750072" CONTENT="herausgegebenen," HEIGHT="38" WIDTH="245" VPOS="1174" HPOS="843"/>
116 |             <SP WIDTH="25" VPOS="1176" HPOS="1089"/>
117 |             <String WC="0.6924999952" CONTENT="Zusammen" HEIGHT="37" WIDTH="158" VPOS="1176" HPOS="1115" SUBS_TYPE="HypPart1" SUBS_CONTENT="Zusammenstellung"/>
118 |             <HYP CONTENT="­"/>
119 |           </TextLine>
120 |           <TextLine HEIGHT="41" WIDTH="1035" VPOS="1233" HPOS="205">
121 |             <String WC="0.6962500215" CONTENT="stellung" HEIGHT="37" WIDTH="107" VPOS="1237" HPOS="205" SUBS_TYPE="HypPart2" SUBS_CONTENT="Zusammenstellung"/>
122 |             <SP WIDTH="18" VPOS="1240" HPOS="313"/>
123 |             <String WC="0.5033333302" CONTENT="der" HEIGHT="25" WIDTH="41" VPOS="1240" HPOS="332"/>
124 |             <SP WIDTH="18" VPOS="1236" HPOS="374"/>
125 |             <String WC="0.3249999881" CONTENT="in" HEIGHT="28" WIDTH="26" VPOS="1236" HPOS="393"/>
126 |             <SP WIDTH="19" VPOS="1239" HPOS="420"/>
127 |             <String WC="0.4566666782" CONTENT="den" HEIGHT="26" WIDTH="46" VPOS="1239" HPOS="440"/>
128 |             <SP WIDTH="20" VPOS="1234" HPOS="487"/>
129 |             <String WC="0.7566666603" CONTENT="Preuß." HEIGHT="36" WIDTH="100" VPOS="1234" HPOS="508"/>
130 |             <SP WIDTH="15" VPOS="1233" HPOS="609"/>
131 |             <String WC="0.3342857063" CONTENT="Staaten" HEIGHT="30" WIDTH="117" VPOS="1233" HPOS="625"/>
132 |             <SP WIDTH="21" VPOS="1234" HPOS="743"/>
133 |             <String WC="0.3736363649" CONTENT="bestehenden" HEIGHT="36" WIDTH="157" VPOS="1234" HPOS="765"/>
134 |             <SP WIDTH="19" VPOS="1244" HPOS="923"/>
135 |             <String WC="0.7272727489" CONTENT="allgemeinen" HEIGHT="38" WIDTH="162" VPOS="1235" HPOS="943"/>
136 |             <SP WIDTH="19" VPOS="1235" HPOS="1106"/>
137 |             <String WC="0.462500006" CONTENT="Polizei-" HEIGHT="39" WIDTH="114" VPOS="1235" HPOS="1126"/>
138 |           </TextLine>
139 |           <TextLine HEIGHT="37" WIDTH="407" VPOS="1294" HPOS="521">
140 |             <String WC="0.5314285755" CONTENT="Gesetze" HEIGHT="37" WIDTH="96" VPOS="1294" HPOS="521"/>
141 |             <SP WIDTH="20" VPOS="1303" HPOS="618"/>
142 |             <String WC="0.5666666627" CONTENT="und" HEIGHT="25" WIDTH="51" VPOS="1297" HPOS="639"/>
143 |             <SP WIDTH="20" VPOS="1294" HPOS="691"/>
144 |             <String WC="0.6653845906" CONTENT="Verordnungen." HEIGHT="37" WIDTH="216" VPOS="1294" HPOS="712"/>
145 |           </TextLine>
146 |         </TextBlock>
147 |         <TextBlock ID="Page1_Block5" HEIGHT="127" WIDTH="390" VPOS="2020" HPOS="524" language="de" STYLEREFS="font6">
148 |           <TextLine HEIGHT="43" WIDTH="266" VPOS="2026" HPOS="589">
149 |             <String STYLE="bold" WC="0.4718181789" CONTENT="Greifswald," HEIGHT="43" WIDTH="266" VPOS="2026" HPOS="589"/>
150 |           </TextLine>
151 |           <TextLine HEIGHT="35" WIDTH="378" VPOS="2106" HPOS="530" STYLEREFS="font1">
152 |             <String WC="0.5133333206" CONTENT="bei" HEIGHT="25" WIDTH="50" VPOS="2110" HPOS="530"/>
153 |             <SP WIDTH="25" VPOS="2107" HPOS="581"/>
154 |             <String WC="0.4422222078" CONTENT="Ferdinand" HEIGHT="35" WIDTH="185" VPOS="2106" HPOS="607"/>
155 |             <SP WIDTH="24" VPOS="2108" HPOS="793"/>
156 |             <String WC="0.4939999878" CONTENT="Ottc." HEIGHT="27" WIDTH="90" VPOS="2108" HPOS="818"/>
157 |           </TextLine>
158 |         </TextBlock>
159 |         <TextBlock ID="Page1_Block6" HEIGHT="45" WIDTH="101" VPOS="2208" HPOS="671" language="de" STYLEREFS="font4">
160 |           <TextLine HEIGHT="33" WIDTH="88" VPOS="2214" HPOS="678">
161 |             <String WC="0.8475000262" CONTENT="1855" HEIGHT="32" WIDTH="80" VPOS="2214" HPOS="678"/>
162 |             <String STYLE="bold" STYLEREFS="font0" WC="0.5400000215" CONTENT="." HEIGHT="4" WIDTH="4" VPOS="2243" HPOS="762"/>
163 |           </TextLine>
164 |         </TextBlock>
165 |         <GraphicalElement ID="Page1_Block7" HEIGHT="9" WIDTH="186" VPOS="1960" HPOS="746"/>
166 |         <GraphicalElement ID="Page1_Block8" HEIGHT="3" WIDTH="124" VPOS="1971" HPOS="543"/>
167 |       </PrintSpace>
168 |     </Page>
169 |   </Layout>
170 | </alto>
171 | 


--------------------------------------------------------------------------------
/src/mods4pandas/tests/data/alto/PPN636777308/00000002.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | <alto xmlns="http://schema.ccs-gmbh.com/ALTO" xmlns:a="http://schema.ccs-gmbh.com/ALTO" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xlink="http://www.w3.org/1999/xlink">
  3 |   <a:Description>
  4 |     <a:MeasurementUnit>mm10</a:MeasurementUnit>
  5 |     <a:sourceImageInformation>
  6 |       <a:fileName>F:\Batch SBB\dachklag_635359391_orig\00000003.tif</a:fileName>
  7 |     </a:sourceImageInformation>
  8 |     <a:OCRProcessing ID="ID_BIT_2e715125-c746-44e5-8667-e65f5d1646c8">
  9 |       <a:preProcessingStep>
 10 |         <a:processingDateTime>2011-06-29T09:05:04</a:processingDateTime>
 11 |         <a:processingAgency>Staatsbibliothek zu Berlin – PK</a:processingAgency>
 12 |         <a:processingStepDescription>Color Enhancement</a:processingStepDescription>
 13 |         <a:processingStepSettings>ContrastR=0.5 ContrastG=0.5 ContrastB=0.5 GammaR=1.0 GammaG=1.0 GammaB=1.0 LuminanceR=0.5 LuminanceG=0.5 LuminanceB=0.5</a:processingStepSettings>
 14 |         <a:processingSoftware>
 15 |           <a:softwareCreator>B.I.T. Bureau Ingénieur Tomasi</a:softwareCreator>
 16 |           <a:softwareName>BIT-Alpha</a:softwareName>
 17 |           <a:softwareVersion>2.0.38.595 (Rel. 38)</a:softwareVersion>
 18 |         </a:processingSoftware>
 19 |       </a:preProcessingStep>
 20 |       <a:preProcessingStep>
 21 |         <a:processingDateTime>2011-06-29T09:05:04</a:processingDateTime>
 22 |         <a:processingAgency>Staatsbibliothek zu Berlin – PK</a:processingAgency>
 23 |         <a:processingStepDescription>Rotation</a:processingStepDescription>
 24 |         <a:processingStepSettings>Type=None Margins={Left=0.5 Right=0.5 Top=0.800000011920929 Bottom=0.5}</a:processingStepSettings>
 25 |         <a:processingSoftware>
 26 |           <a:softwareCreator>B.I.T. Bureau Ingénieur Tomasi</a:softwareCreator>
 27 |           <a:softwareName>BIT-Alpha</a:softwareName>
 28 |           <a:softwareVersion>2.0.38.595 (Rel. 38)</a:softwareVersion>
 29 |         </a:processingSoftware>
 30 |       </a:preProcessingStep>
 31 |       <a:preProcessingStep>
 32 |         <a:processingDateTime>2011-06-29T09:05:04</a:processingDateTime>
 33 |         <a:processingAgency>Staatsbibliothek zu Berlin – PK</a:processingAgency>
 34 |         <a:processingStepDescription>Binarisation</a:processingStepDescription>
 35 |         <a:processingStepSettings>SourceBPP=24 Algorithm=Intensity based algorithm</a:processingStepSettings>
 36 |         <a:processingSoftware>
 37 |           <a:softwareCreator>B.I.T. Bureau Ingénieur Tomasi</a:softwareCreator>
 38 |           <a:softwareName>BIT-Alpha</a:softwareName>
 39 |           <a:softwareVersion>2.0.38.595 (Rel. 38)</a:softwareVersion>
 40 |         </a:processingSoftware>
 41 |       </a:preProcessingStep>
 42 |       <a:preProcessingStep>
 43 |         <a:processingDateTime>2011-06-29T09:05:08</a:processingDateTime>
 44 |         <a:processingAgency>Staatsbibliothek zu Berlin – PK</a:processingAgency>
 45 |         <a:processingStepDescription>Cleaning</a:processingStepDescription>
 46 |         <a:processingStepSettings>ContrastR=0.5 ContrastG=0.5 ContrastB=0.5 GammaR=1.0 GammaG=1.0 GammaB=1.0 LuminanceR=0.5 LuminanceG=0.5 LuminanceB=0.5</a:processingStepSettings>
 47 |         <a:processingSoftware>
 48 |           <a:softwareCreator>B.I.T. Bureau Ingénieur Tomasi</a:softwareCreator>
 49 |           <a:softwareName>BIT-Alpha</a:softwareName>
 50 |           <a:softwareVersion>2.0.38.595 (Rel. 38)</a:softwareVersion>
 51 |         </a:processingSoftware>
 52 |       </a:preProcessingStep>
 53 |       <a:preProcessingStep>
 54 |         <a:processingDateTime>2011-06-29T09:05:10</a:processingDateTime>
 55 |         <a:processingAgency>Staatsbibliothek zu Berlin – PK</a:processingAgency>
 56 |         <a:processingStepDescription>Remove Dots</a:processingStepDescription>
 57 |         <a:processingStepSettings>ContrastR=0.5 ContrastG=0.5 ContrastB=0.5 GammaR=1.0 GammaG=1.0 GammaB=1.0 LuminanceR=0.5 LuminanceG=0.5 LuminanceB=0.5</a:processingStepSettings>
 58 |         <a:processingSoftware>
 59 |           <a:softwareCreator>B.I.T. Bureau Ingénieur Tomasi</a:softwareCreator>
 60 |           <a:softwareName>BIT-Alpha</a:softwareName>
 61 |           <a:softwareVersion>2.0.38.595 (Rel. 38)</a:softwareVersion>
 62 |         </a:processingSoftware>
 63 |       </a:preProcessingStep>
 64 |       <a:preProcessingStep>
 65 |         <a:processingDateTime>2011-06-29T09:05:16</a:processingDateTime>
 66 |         <a:processingAgency>Staatsbibliothek zu Berlin – PK</a:processingAgency>
 67 |         <a:processingStepDescription>Blackborder elimination</a:processingStepDescription>
 68 |         <a:processingStepSettings>ContrastR=0.5 ContrastG=0.5 ContrastB=0.5 GammaR=1.0 GammaG=1.0 GammaB=1.0 LuminanceR=0.5 LuminanceG=0.5 LuminanceB=0.5</a:processingStepSettings>
 69 |         <a:processingSoftware>
 70 |           <a:softwareCreator>B.I.T. Bureau Ingénieur Tomasi</a:softwareCreator>
 71 |           <a:softwareName>BIT-Alpha</a:softwareName>
 72 |           <a:softwareVersion>2.0.38.595 (Rel. 38)</a:softwareVersion>
 73 |         </a:processingSoftware>
 74 |       </a:preProcessingStep>
 75 |       <a:preProcessingStep>
 76 |         <a:processingDateTime>2011-06-29T09:05:21</a:processingDateTime>
 77 |         <a:processingAgency>Staatsbibliothek zu Berlin – PK</a:processingAgency>
 78 |         <a:processingStepDescription>Detection of horizonzal lines</a:processingStepDescription>
 79 |         <a:processingStepSettings>ForceBitmap=true MaxThickness=0.5 MinThickness=0.0 MaxWhiteLength=0.1 MinBlackLength=0.9 MinTotalLength=2.0 Margins={Left=0.5 Right=0.5 Top=0.800000011920929 Bottom=0.5}</a:processingStepSettings>
 80 |         <a:processingSoftware>
 81 |           <a:softwareCreator>B.I.T. Bureau Ingénieur Tomasi</a:softwareCreator>
 82 |           <a:softwareName>BIT-Alpha</a:softwareName>
 83 |           <a:softwareVersion>2.0.38.595 (Rel. 38)</a:softwareVersion>
 84 |         </a:processingSoftware>
 85 |       </a:preProcessingStep>
 86 |       <a:preProcessingStep>
 87 |         <a:processingDateTime>2011-06-29T09:05:23</a:processingDateTime>
 88 |         <a:processingAgency>Staatsbibliothek zu Berlin – PK</a:processingAgency>
 89 |         <a:processingStepDescription>Detection of vertical lines</a:processingStepDescription>
 90 |         <a:processingStepSettings>ForceBitmap=true MaxThickness=0.1 MinThickness=0.0 MaxWhiteLength=0.0 MinBlackLength=0.9 MinTotalLength=5.0 Margins={Left=0.5 Right=0.5 Top=0.800000011920929 Bottom=0.5}</a:processingStepSettings>
 91 |         <a:processingSoftware>
 92 |           <a:softwareCreator>B.I.T. Bureau Ingénieur Tomasi</a:softwareCreator>
 93 |           <a:softwareName>BIT-Alpha</a:softwareName>
 94 |           <a:softwareVersion>2.0.38.595 (Rel. 38)</a:softwareVersion>
 95 |         </a:processingSoftware>
 96 |       </a:preProcessingStep>
 97 |       <a:preProcessingStep>
 98 |         <a:processingDateTime>2011-06-29T09:05:24</a:processingDateTime>
 99 |         <a:processingAgency>Staatsbibliothek zu Berlin – PK</a:processingAgency>
100 |         <a:processingStepDescription>Segmentation</a:processingStepDescription>
101 |         <a:processingStepSettings>MinHeight=0.1 MinWidth=0.1 MinVertDist=2.0 MinHorDist=2.0 Margins={Left=0.5 Right=0.5 Top=0.800000011920929 Bottom=0.5}</a:processingStepSettings>
102 |         <a:processingSoftware>
103 |           <a:softwareCreator>B.I.T. Bureau Ingénieur Tomasi</a:softwareCreator>
104 |           <a:softwareName>BIT-Alpha</a:softwareName>
105 |           <a:softwareVersion>2.0.38.595 (Rel. 38)</a:softwareVersion>
106 |         </a:processingSoftware>
107 |       </a:preProcessingStep>
108 |       <a:preProcessingStep>
109 |         <a:processingDateTime>2011-06-29T09:05:24</a:processingDateTime>
110 |         <a:processingAgency>Staatsbibliothek zu Berlin – PK</a:processingAgency>
111 |         <a:processingStepDescription>Region identification</a:processingStepDescription>
112 |         <a:processingStepSettings>Default=Binary ColorRegions={Detect=false} PaletteRegions={Detect=false} BinaryRegions={Detect=false}</a:processingStepSettings>
113 |         <a:processingSoftware>
114 |           <a:softwareCreator>B.I.T. Bureau Ingénieur Tomasi</a:softwareCreator>
115 |           <a:softwareName>BIT-Alpha</a:softwareName>
116 |           <a:softwareVersion>2.0.38.595 (Rel. 38)</a:softwareVersion>
117 |         </a:processingSoftware>
118 |       </a:preProcessingStep>
119 |       <a:ocrProcessingStep>
120 |         <a:processingDateTime>2011-06-29T09:05:24</a:processingDateTime>
121 |         <a:processingAgency>Staatsbibliothek zu Berlin – PK</a:processingAgency>
122 |         <a:processingStepDescription>Optical Character Recognition</a:processingStepDescription>
123 |         <a:processingStepSettings>not implemented.</a:processingStepSettings>
124 |         <a:processingSoftware>
125 |           <a:softwareCreator>B.I.T. Bureau Ingénieur Tomasi</a:softwareCreator>
126 |           <a:softwareName>BIT-Alpha</a:softwareName>
127 |           <a:softwareVersion>2.0.38.595 (Rel. 38)</a:softwareVersion>
128 |         </a:processingSoftware>
129 |       </a:ocrProcessingStep>
130 |     </a:OCRProcessing>
131 |   </a:Description>
132 |   <a:Layout>
133 |     <a:Page ID="ID_BIT_6837fff5-4c2d-4b24-a1d3-e64bd3b419d3" HEIGHT="2126" WIDTH="1550" PHYSICAL_IMG_NR="0" QUALITY="OK" PROCESSING="ID_BIT_2e715125-c746-44e5-8667-e65f5d1646c8">
134 |       <a:PrintSpace ID="ID_BIT_21cc02ac-46ca-4454-8997-4ad51806b419" HEIGHT="1995.98" WIDTH="1449.4" HPOS="50.0" VPOS="80.0">
135 |         <a:TextBlock ID="ID_BIT_7900b623-20a4-4533-af32-9b10beffe3b4" HEIGHT="17" WIDTH="10" HPOS="60" VPOS="151">
136 |           <a:TextLine ID="ID_BIT_ec8f9de9-4839-4201-b377-79a16b3048cf" HEIGHT="5.93" WIDTH="7.62" HPOS="0.0" VPOS="0.0">
137 |             <a:String HEIGHT="5.93" WIDTH="7.62" VPOS="0.0" HPOS="0.0" CONTENT="."/>
138 |           </a:TextLine>
139 |           <a:TextLine ID="ID_BIT_26ba1f83-20c9-47cd-b9f3-901a3bf80732" HEIGHT="5.93" WIDTH="6.77" HPOS="2.54" VPOS="11.85">
140 |             <a:String HEIGHT="5.93" WIDTH="6.77" VPOS="11.85" HPOS="2.54" CONTENT="."/>
141 |           </a:TextLine>
142 |         </a:TextBlock>
143 |         <a:TextBlock ID="ID_BIT_70b61cf1-9a1c-4366-ba1b-ef9c32343ec9" HEIGHT="22" WIDTH="247" HPOS="756" VPOS="151">
144 |           <a:TextLine ID="ID_BIT_39a54744-2fae-4845-b56e-91634c3d6408" HEIGHT="20.32" WIDTH="115.99" HPOS="119.38" VPOS="0.85">
145 |             <a:String HEIGHT="18.63" WIDTH="5.08" VPOS="0.85" HPOS="119.38" CONTENT="."/>
146 |             <a:SP WIDTH="9.31" VPOS="0.85" HPOS="119.38"/>
147 |             <a:String HEIGHT="18.63" WIDTH="8.47" VPOS="0.85" HPOS="128.69" CONTENT="r"/>
148 |             <a:SP WIDTH="16.09" VPOS="0.85" HPOS="128.69"/>
149 |             <a:String HEIGHT="18.63" WIDTH="17.78" VPOS="0.85" HPOS="144.78" CONTENT="."/>
150 |             <a:SP WIDTH="18.63" VPOS="0.85" HPOS="144.78"/>
151 |             <a:String HEIGHT="18.63" WIDTH="3.39" VPOS="0.85" HPOS="163.41" CONTENT="'"/>
152 |             <a:SP WIDTH="22.01" VPOS="0.0" HPOS="163.41"/>
153 |             <a:String HEIGHT="18.63" WIDTH="17.78" VPOS="0.85" HPOS="185.42" CONTENT="."/>
154 |             <a:SP WIDTH="22.86" VPOS="0.85" HPOS="185.42"/>
155 |             <a:String HEIGHT="18.63" WIDTH="11.85" VPOS="0.85" HPOS="208.28" CONTENT="."/>
156 |             <a:SP WIDTH="16.09" VPOS="0.85" HPOS="208.28"/>
157 |             <a:String HEIGHT="18.63" WIDTH="11.01" VPOS="0.85" HPOS="224.37" CONTENT="."/>
158 |           </a:TextLine>
159 |         </a:TextBlock>
160 |         <a:TextBlock ID="ID_BIT_8dfefa75-d5fe-42d1-b80a-91c892ee3e20" HEIGHT="55" WIDTH="11" HPOS="112" VPOS="2009">
161 |           <a:TextLine ID="ID_BIT_7b06da4a-c624-4bba-bfcb-d34c658bd466" HEIGHT="17.78" WIDTH="5.08" HPOS="0.0" VPOS="13.55">
162 |             <a:String HEIGHT="17.78" WIDTH="5.08" VPOS="13.55" HPOS="0.0" CONTENT="l"/>
163 |           </a:TextLine>
164 |           <a:TextLine ID="ID_BIT_eaf7bd08-189c-4e4d-91a5-0cc42faaa3ce" HEIGHT="22.01" WIDTH="5.93" HPOS="0.0" VPOS="34.71">
165 |             <a:String HEIGHT="22.01" WIDTH="5.93" VPOS="34.71" HPOS="0.0" CONTENT="t"/>
166 |           </a:TextLine>
167 |         </a:TextBlock>
168 |       </a:PrintSpace>
169 |     </a:Page>
170 |   </a:Layout>
171 | </alto>
172 | 


--------------------------------------------------------------------------------
/src/mods4pandas/tests/data/alto/PPN767883624/00000001.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
 2 | <alto xmlns="http://www.loc.gov/standards/alto/ns-v2#" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/standards/alto/ns-v2# http://www.loc.gov/standards/alto/alto-v2.0.xsd">
 3 |   <Description>
 4 |     <MeasurementUnit>pixel</MeasurementUnit>
 5 |     <OCRProcessing ID="IdOcr">
 6 |       <ocrProcessingStep>
 7 |         <processingDateTime>2014-05-21</processingDateTime>
 8 |         <processingSoftware>
 9 |           <softwareCreator>ABBYY</softwareCreator>
10 |           <softwareName>ABBYY FineReader Engine</softwareName>
11 |           <softwareVersion>11</softwareVersion>
12 |         </processingSoftware>
13 |       </ocrProcessingStep>
14 |     </OCRProcessing>
15 |   </Description>
16 |   <Styles>
17 |     <TextStyle ID="font0" FONTFAMILY="Times New Roman" FONTSIZE="12"/>
18 |     <TextStyle ID="font1" FONTFAMILY="Times New Roman" FONTSIZE="25"/>
19 |   </Styles>
20 |   <Layout>
21 |     <Page ID="Page1" PHYSICAL_IMG_NR="1" HEIGHT="3357" WIDTH="2514">
22 |       <LeftMargin HEIGHT="3357" WIDTH="2" VPOS="0" HPOS="0">
23 | </LeftMargin>
24 |       <PrintSpace HEIGHT="3357" WIDTH="2512" VPOS="0" HPOS="2">
25 |         <TextBlock ID="Page1_Block1" HEIGHT="60" WIDTH="850" VPOS="16" HPOS="1472" language="de" STYLEREFS="font0">
26 |           <TextLine HEIGHT="47" WIDTH="837" VPOS="23" HPOS="1478">
27 |             <String STYLE="bold" WC="1." CONTENT="BY" HEIGHT="36" WIDTH="70" VPOS="23" HPOS="1478"/>
28 |             <SP WIDTH="25" VPOS="23" HPOS="1549"/>
29 |             <String STYLE="bold" WC="0.8762500286" CONTENT="BALLOON." HEIGHT="37" WIDTH="276" VPOS="23" HPOS="1575"/>
30 |             <SP WIDTH="97" VPOS="27" HPOS="1852"/>
31 |             <String STYLE="bold" WC="0.3300000131" CONTENT="Durch" HEIGHT="43" WIDTH="115" VPOS="26" HPOS="1950"/>
32 |             <SP WIDTH="24" VPOS="25" HPOS="2066"/>
33 |             <String STYLE="bold" WC="0.4480000138" CONTENT="Luftballon" HEIGHT="45" WIDTH="224" VPOS="25" HPOS="2091"/>
34 |           </TextLine>
35 |         </TextBlock>
36 |         <TextBlock ID="Page1_Block2" HEIGHT="102" WIDTH="260" VPOS="0" HPOS="99" language="de" STYLEREFS="font0">
37 |           <TextLine HEIGHT="57" WIDTH="247" VPOS="0" HPOS="105">
38 |             <String STYLE="bold" WC="0.5" CONTENT="H.P." HEIGHT="57" WIDTH="160" VPOS="0" HPOS="105"/>
39 |             <SP WIDTH="21" VPOS="22" HPOS="266"/>
40 |             <String STYLE="bold" WC="0.6233333349" CONTENT="70." HEIGHT="35" WIDTH="64" VPOS="22" HPOS="288"/>
41 |           </TextLine>
42 |         </TextBlock>
43 |         <TextBlock ID="Page1_Block3" HEIGHT="98" WIDTH="871" VPOS="116" HPOS="812" language="de" STYLEREFS="font1">
44 |           <TextLine HEIGHT="86" WIDTH="858" VPOS="123" HPOS="819">
45 |             <String WC="0.6171428561" CONTENT="Undankbarkeit." HEIGHT="86" WIDTH="858" VPOS="123" HPOS="819"/>
46 |           </TextLine>
47 |         </TextBlock>
48 |         <Illustration ID="Page1_Block4" HEIGHT="3111" WIDTH="2512" VPOS="246" HPOS="2"/>
49 |         <GraphicalElement ID="Page1_Block5" HEIGHT="24" WIDTH="2190" VPOS="248" HPOS="164"/>
50 |         <GraphicalElement ID="Page1_Block6" HEIGHT="6" WIDTH="104" VPOS="455" HPOS="1962"/>
51 |         <GraphicalElement ID="Page1_Block7" HEIGHT="14" WIDTH="193" VPOS="2002" HPOS="1209"/>
52 |         <GraphicalElement ID="Page1_Block8" HEIGHT="14" WIDTH="639" VPOS="3206" HPOS="1728"/>
53 |         <GraphicalElement ID="Page1_Block9" HEIGHT="1085" WIDTH="25" VPOS="258" HPOS="163"/>
54 |         <GraphicalElement ID="Page1_Block10" HEIGHT="278" WIDTH="4" VPOS="1512" HPOS="170"/>
55 |         <GraphicalElement ID="Page1_Block11" HEIGHT="501" WIDTH="18" VPOS="1356" HPOS="176"/>
56 |         <GraphicalElement ID="Page1_Block12" HEIGHT="503" WIDTH="24" VPOS="2686" HPOS="176"/>
57 |         <GraphicalElement ID="Page1_Block13" HEIGHT="600" WIDTH="14" VPOS="1924" HPOS="178"/>
58 |         <GraphicalElement ID="Page1_Block14" HEIGHT="514" WIDTH="24" VPOS="856" HPOS="255"/>
59 |         <GraphicalElement ID="Page1_Block15" HEIGHT="250" WIDTH="18" VPOS="1386" HPOS="258"/>
60 |         <GraphicalElement ID="Page1_Block16" HEIGHT="1357" WIDTH="29" VPOS="249" HPOS="303"/>
61 |         <GraphicalElement ID="Page1_Block17" HEIGHT="238" WIDTH="16" VPOS="1638" HPOS="318"/>
62 |         <GraphicalElement ID="Page1_Block18" HEIGHT="215" WIDTH="12" VPOS="255" HPOS="824"/>
63 |         <GraphicalElement ID="Page1_Block19" HEIGHT="585" WIDTH="21" VPOS="516" HPOS="830"/>
64 |         <GraphicalElement ID="Page1_Block20" HEIGHT="126" WIDTH="8" VPOS="878" HPOS="889"/>
65 |         <GraphicalElement ID="Page1_Block21" HEIGHT="107" WIDTH="6" VPOS="886" HPOS="911"/>
66 |         <GraphicalElement ID="Page1_Block22" HEIGHT="191" WIDTH="16" VPOS="868" HPOS="922"/>
67 |         <GraphicalElement ID="Page1_Block23" HEIGHT="188" WIDTH="13" VPOS="964" HPOS="948"/>
68 |         <GraphicalElement ID="Page1_Block24" HEIGHT="125" WIDTH="8" VPOS="251" HPOS="1288"/>
69 |         <GraphicalElement ID="Page1_Block25" HEIGHT="131" WIDTH="6" VPOS="313" HPOS="1472"/>
70 |         <GraphicalElement ID="Page1_Block26" HEIGHT="144" WIDTH="7" VPOS="313" HPOS="1489"/>
71 |         <GraphicalElement ID="Page1_Block27" HEIGHT="100" WIDTH="4" VPOS="440" HPOS="1585"/>
72 |         <GraphicalElement ID="Page1_Block28" HEIGHT="258" WIDTH="10" VPOS="254" HPOS="1636"/>
73 |         <GraphicalElement ID="Page1_Block29" HEIGHT="135" WIDTH="7" VPOS="278" HPOS="1734"/>
74 |         <GraphicalElement ID="Page1_Block30" HEIGHT="137" WIDTH="9" VPOS="349" HPOS="1758"/>
75 |         <GraphicalElement ID="Page1_Block31" HEIGHT="149" WIDTH="7" VPOS="297" HPOS="1941"/>
76 |         <GraphicalElement ID="Page1_Block32" HEIGHT="164" WIDTH="8" VPOS="460" HPOS="1944"/>
77 |         <GraphicalElement ID="Page1_Block33" HEIGHT="191" WIDTH="8" VPOS="257" HPOS="1957"/>
78 |         <GraphicalElement ID="Page1_Block34" HEIGHT="150" WIDTH="11" VPOS="1687" HPOS="2088"/>
79 |         <GraphicalElement ID="Page1_Block35" HEIGHT="111" WIDTH="6" VPOS="256" HPOS="2093"/>
80 |         <GraphicalElement ID="Page1_Block36" HEIGHT="714" WIDTH="26" VPOS="556" HPOS="2337"/>
81 |         <GraphicalElement ID="Page1_Block37" HEIGHT="449" WIDTH="19" VPOS="2270" HPOS="2344"/>
82 |         <GraphicalElement ID="Page1_Block38" HEIGHT="393" WIDTH="22" VPOS="2822" HPOS="2346"/>
83 |         <GraphicalElement ID="Page1_Block39" HEIGHT="688" WIDTH="15" VPOS="1314" HPOS="2348"/>
84 |         <GraphicalElement ID="Page1_Block40" HEIGHT="156" WIDTH="12" VPOS="2048" HPOS="2350"/>
85 |         <GraphicalElement ID="Page1_Block41" HEIGHT="114" WIDTH="3" VPOS="2530" HPOS="2364"/>
86 |         <GraphicalElement ID="Page1_Block42" HEIGHT="7" WIDTH="165" VPOS="368" HPOS="1570"/>
87 |       </PrintSpace>
88 |     </Page>
89 |   </Layout>
90 | </alto>
91 | 


--------------------------------------------------------------------------------
/src/mods4pandas/tests/data/alto/PPN767883624/00000002.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
 2 | <alto xmlns="http://www.loc.gov/standards/alto/ns-v2#" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/standards/alto/ns-v2# http://www.loc.gov/standards/alto/alto-v2.0.xsd">
 3 |   <Description>
 4 |     <MeasurementUnit>pixel</MeasurementUnit>
 5 |     <OCRProcessing ID="IdOcr">
 6 |       <ocrProcessingStep>
 7 |         <processingDateTime>2014-05-21</processingDateTime>
 8 |         <processingSoftware>
 9 |           <softwareCreator>ABBYY</softwareCreator>
10 |           <softwareName>ABBYY FineReader Engine</softwareName>
11 |           <softwareVersion>11</softwareVersion>
12 |         </processingSoftware>
13 |       </ocrProcessingStep>
14 |     </OCRProcessing>
15 |   </Description>
16 |   <Styles>
17 |     <TextStyle ID="font0" FONTFAMILY="Arial" FONTSIZE="9"/>
18 |     <TextStyle ID="font1" FONTFAMILY="Arial" FONTSIZE="10"/>
19 |     <TextStyle ID="font2" FONTFAMILY="Arial" FONTSIZE="11"/>
20 |   </Styles>
21 |   <Layout>
22 |     <Page ID="Page1" PHYSICAL_IMG_NR="1" HEIGHT="3357" WIDTH="2513">
23 |       <TopMargin HEIGHT="2648" WIDTH="2513" VPOS="0" HPOS="0">
24 | </TopMargin>
25 |       <LeftMargin HEIGHT="472" WIDTH="587" VPOS="2648" HPOS="0">
26 | </LeftMargin>
27 |       <RightMargin HEIGHT="472" WIDTH="940" VPOS="2648" HPOS="1573">
28 | </RightMargin>
29 |       <BottomMargin HEIGHT="237" WIDTH="2513" VPOS="3120" HPOS="0">
30 | </BottomMargin>
31 |       <PrintSpace HEIGHT="472" WIDTH="986" VPOS="2648" HPOS="587">
32 |         <TextBlock ID="Page1_Block1" HEIGHT="169" WIDTH="699" VPOS="2648" HPOS="588" language="de">
33 |           <Shape>
34 |             <Polygon POINTS="599,2656 1297,2656 1297,2822 599,2822 599,2656"/>
35 |           </Shape>
36 |           <TextLine HEIGHT="37" WIDTH="341" VPOS="2655" HPOS="790">
37 |             <String STYLE="bold" STYLEREFS="font2" WC="0.8937500119" CONTENT="Staatsbibliothek" HEIGHT="37" WIDTH="341" VPOS="2655" HPOS="790"/>
38 |           </TextLine>
39 |           <TextLine HEIGHT="36" WIDTH="183" VPOS="2708" HPOS="791">
40 |             <String STYLE="bold" STYLEREFS="font2" WC="0.8899999857" CONTENT="zu" HEIGHT="26" WIDTH="45" VPOS="2716" HPOS="791"/>
41 |             <SP WIDTH="15" VPOS="2710" HPOS="837"/>
42 |             <String STYLE="bold" STYLEREFS="font2" WC="0.9866666794" CONTENT="Berlin" HEIGHT="36" WIDTH="121" VPOS="2708" HPOS="853"/>
43 |           </TextLine>
44 |           <TextLine HEIGHT="58" WIDTH="638" VPOS="2753" HPOS="642">
45 |             <String STYLE="bold" STYLEREFS="font1" WC="1." CONTENT="WM" HEIGHT="55" WIDTH="100" VPOS="2753" HPOS="642"/>
46 |             <SP WIDTH="47" VPOS="2753" HPOS="743"/>
47 |             <String STYLE="bold" STYLEREFS="font1" WC="0.8927272558" CONTENT="Preußischer" HEIGHT="40" WIDTH="233" VPOS="2770" HPOS="791"/>
48 |             <SP WIDTH="16" VPOS="2773" HPOS="1025"/>
49 |             <String STYLE="bold" STYLEREFS="font1" WC="0.9058333039" CONTENT="Kulturbesitz" HEIGHT="35" WIDTH="238" VPOS="2771" HPOS="1042"/>
50 |           </TextLine>
51 |         </TextBlock>
52 |         <TextBlock ID="Page1_Block2" HEIGHT="142" WIDTH="537" VPOS="2978" HPOS="1034" language="de" STYLEREFS="font0">
53 |           <Shape>
54 |             <Polygon POINTS="1046,2984 1583,2984 1583,3124 1046,3124 1046,2984"/>
55 |           </Shape>
56 |           <TextLine HEIGHT="66" WIDTH="525" VPOS="2984" HPOS="1040" STYLEREFS="font2">
57 |             <String STYLE="italics" WC="0.3578571379" CONTENT="PuiM.AUvl'l'S," HEIGHT="65" WIDTH="319" VPOS="2984" HPOS="1040"/>
58 |             <SP WIDTH="14" VPOS="3007" HPOS="1360"/>
59 |             <String STYLE="italics" STYLEREFS="font1" WC="0.3400000036" CONTENT="HHAZ," HEIGHT="38" WIDTH="117" VPOS="3007" HPOS="1375"/>
60 |             <SP WIDTH="21" VPOS="3011" HPOS="1493"/>
61 |             <String STYLE="italics" STYLEREFS="font1" WC="0.200000003" CONTENT="fo" HEIGHT="39" WIDTH="50" VPOS="3011" HPOS="1515"/>
62 |           </TextLine>
63 |           <TextLine HEIGHT="51" WIDTH="321" VPOS="3063" HPOS="1062">
64 |             <String STYLE="italics" WC="0.2633333206" CONTENT="(&lt;f" HEIGHT="40" WIDTH="43" VPOS="3063" HPOS="1062"/>
65 |             <SP WIDTH="5" VPOS="3071" HPOS="1106"/>
66 |             <String STYLE="italics" WC="0.4799999893" CONTENT="???(&gt;&amp;-" HEIGHT="38" WIDTH="151" VPOS="3071" HPOS="1112"/>
67 |             <SP WIDTH="22" VPOS="3076" HPOS="1264"/>
68 |             <String STYLE="italics" WC="0.3149999976" CONTENT="1VO)" HEIGHT="41" WIDTH="96" VPOS="3073" HPOS="1287"/>
69 |           </TextLine>
70 |         </TextBlock>
71 |         <GraphicalElement ID="Page1_Block3" HEIGHT="32" WIDTH="865" VPOS="2847" HPOS="587"/>
72 |       </PrintSpace>
73 |     </Page>
74 |   </Layout>
75 | </alto>
76 | 


--------------------------------------------------------------------------------
/src/mods4pandas/tests/data/alto/weird-ns/00000007.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | <alto xmlns="http://www.loc.gov/standards/alto/" xmlns:default="http://www.loc.gov/standards/alto/" default:xsi="http://www.w3.org/2001/XMLSchema-instance" default:alto="http://www.loc.gov/standards/alto http://www.loc.gov/standards/alto/alto-v2.0.xsd">
  3 |   <Description>
  4 |     <MeasurementUnit/>
  5 |     <sourceImageInformation>
  6 |       <fileName>00000007_FR.xml</fileName>
  7 |     </sourceImageInformation>
  8 |     <OCRProcessing default:ID="OCR_1">
  9 |       <ocrProcessingStep>
 10 |         <processingDateTime>2013-12-18</processingDateTime>
 11 |         <processingStepSettings>OCR Average Character Confidence 89.97%</processingStepSettings>
 12 |       </ocrProcessingStep>
 13 |     </OCRProcessing>
 14 |   </Description>
 15 |   <Layout>
 16 |     <Page default:ID="Page_1" default:HEIGHT="2344" default:WIDTH="1720" default:PHYSICAL_IMAGE_NR="1">
 17 |       <PrintSpace default:ID="PrintSpace_1" default:HPOS="132" default:VPOS="200" default:HEIGHT="1916" default:WIDTH="1034">
 18 |         <TextBlock default:ID="TextBlock_1" default:HPOS="137" default:VPOS="217" default:HEIGHT="70" default:WIDTH="298">
 19 |           <TextLine default:ID="TextLine_1" default:HPOS="137" default:VPOS="217" default:HEIGHT="70" default:WIDTH="298">
 20 |             <String default:ID="String_1" default:CONTENT="Was^t^" default:HPOS="137" default:VPOS="217" default:HEIGHT="70" default:WIDTH="298"/>
 21 |           </TextLine>
 22 |         </TextBlock>
 23 |         <TextBlock default:ID="TextBlock_2" default:HPOS="137" default:VPOS="284" default:HEIGHT="207" default:WIDTH="937">
 24 |           <TextLine default:ID="TextLine_2" default:HPOS="137" default:VPOS="284" default:HEIGHT="57" default:WIDTH="815">
 25 |             <String default:ID="String_2" default:CONTENT="Jh.-" default:HPOS="137" default:VPOS="284" default:HEIGHT="58" default:WIDTH="88"/>
 26 |             <SP/>
 27 |             <String default:ID="String_3" default:CONTENT="Kinder/" default:HPOS="240" default:VPOS="292" default:HEIGHT="52" default:WIDTH="192"/>
 28 |             <SP/>
 29 |             <String default:ID="String_4" default:CONTENT="mässigt" default:HPOS="443" default:VPOS="307" default:HEIGHT="41" default:WIDTH="178"/>
 30 |             <SP/>
 31 |             <String default:ID="String_5" default:CONTENT="ewre" default:HPOS="635" default:VPOS="312" default:HEIGHT="35" default:WIDTH="114"/>
 32 |             <SP/>
 33 |             <String default:ID="String_6" default:CONTENT="Zehren^" default:HPOS="759" default:VPOS="293" default:HEIGHT="48" default:WIDTH="193"/>
 34 |           </TextLine>
 35 |           <TextLine default:ID="TextLine_3" default:HPOS="138" default:VPOS="353" default:HEIGHT="67" default:WIDTH="936">
 36 |             <String default:ID="String_7" default:CONTENT="Sie" default:HPOS="138" default:VPOS="353" default:HEIGHT="58" default:WIDTH="97"/>
 37 |             <SP/>
 38 |             <String default:ID="String_8" default:CONTENT="ächten" default:HPOS="249" default:VPOS="368" default:HEIGHT="48" default:WIDTH="161"/>
 39 |             <SP/>
 40 |             <String default:ID="String_9" default:CONTENT="nichts/" default:HPOS="423" default:VPOS="378" default:HEIGHT="44" default:WIDTH="160"/>
 41 |             <SP/>
 42 |             <String default:ID="String_10" default:CONTENT="auch" default:HPOS="591" default:VPOS="377" default:HEIGHT="43" default:WIDTH="111"/>
 43 |             <SP/>
 44 |             <String default:ID="String_11" default:CONTENT="wenn" default:HPOS="716" default:VPOS="371" default:HEIGHT="44" default:WIDTH="130"/>
 45 |             <SP/>
 46 |             <String default:ID="String_12" default:CONTENT="siewären" default:HPOS="857" default:VPOS="362" default:HEIGHT="58" default:WIDTH="217"/>
 47 |           </TextLine>
 48 |           <TextLine default:ID="TextLine_4" default:HPOS="138" default:VPOS="420" default:HEIGHT="55" default:WIDTH="692">
 49 |             <String default:ID="String_13" default:CONTENT="Ein" default:HPOS="138" default:VPOS="420" default:HEIGHT="62" default:WIDTH="89"/>
 50 |             <SP/>
 51 |             <String default:ID="String_14" default:CONTENT="Gold" default:HPOS="241" default:VPOS="428" default:HEIGHT="54" default:WIDTH="128"/>
 52 |             <SP/>
 53 |             <String default:ID="String_15" default:CONTENT="und" default:HPOS="392" default:VPOS="444" default:HEIGHT="39" default:WIDTH="93"/>
 54 |             <SP/>
 55 |             <String default:ID="String_16" default:CONTENT="Si^er-thaw." default:HPOS="500" default:VPOS="430" default:HEIGHT="45" default:WIDTH="330"/>
 56 |           </TextLine>
 57 |         </TextBlock>
 58 |         <TextBlock default:ID="TextBlock_3" default:HPOS="137" default:VPOS="494" default:HEIGHT="405" default:WIDTH="984">
 59 |           <TextLine default:ID="TextLine_5" default:HPOS="248" default:VPOS="495" default:HEIGHT="63" default:WIDTH="828">
 60 |             <String default:ID="String_17" default:CONTENT="Deralso" default:HPOS="248" default:VPOS="495" default:HEIGHT="57" default:WIDTH="208"/>
 61 |             <SP/>
 62 |             <String default:ID="String_18" default:CONTENT="vielvou" default:HPOS="468" default:VPOS="509" default:HEIGHT="38" default:WIDTH="186"/>
 63 |             <SP/>
 64 |             <String default:ID="String_19" default:CONTENT="GOtt" default:HPOS="669" default:VPOS="494" default:HEIGHT="55" default:WIDTH="159"/>
 65 |             <SP/>
 66 |             <String default:ID="String_20" default:CONTENT="gehalttenn" default:HPOS="845" default:VPOS="512" default:HEIGHT="46" default:WIDTH="231"/>
 67 |           </TextLine>
 68 |           <TextLine default:ID="TextLine_6" default:HPOS="139" default:VPOS="554" default:HEIGHT="74" default:WIDTH="779">
 69 |             <String default:ID="String_21" default:CONTENT="GerechtigkeitstuSlassenwaltten/" default:HPOS="139" default:VPOS="554" default:HEIGHT="74" default:WIDTH="779"/>
 70 |           </TextLine>
 71 |           <TextLine default:ID="TextLine_7" default:HPOS="138" default:VPOS="619" default:HEIGHT="64" default:WIDTH="619">
 72 |             <String default:ID="String_22" default:CONTENT="Ietzt" default:HPOS="138" default:VPOS="619" default:HEIGHT="64" default:WIDTH="108"/>
 73 |             <SP/>
 74 |             <String default:ID="String_23" default:CONTENT="bey" default:HPOS="257" default:VPOS="632" default:HEIGHT="65" default:WIDTH="78"/>
 75 |             <SP/>
 76 |             <String default:ID="String_24" default:CONTENT="dem-HErren" default:HPOS="354" default:VPOS="639" default:HEIGHT="43" default:WIDTH="300"/>
 77 |             <SP/>
 78 |             <String default:ID="String_25" default:CONTENT="lebt" default:HPOS="664" default:VPOS="632" default:HEIGHT="51" default:WIDTH="93"/>
 79 |           </TextLine>
 80 |           <TextLine default:ID="TextLine_8" default:HPOS="139" default:VPOS="692" default:HEIGHT="70" default:WIDTH="941">
 81 |             <String default:ID="String_26" default:CONTENT="^nd" default:HPOS="139" default:VPOS="692" default:HEIGHT="57" default:WIDTH="106"/>
 82 |             <SP/>
 83 |             <String default:ID="String_27" default:CONTENT="leuchtet" default:HPOS="258" default:VPOS="702" default:HEIGHT="50" default:WIDTH="178"/>
 84 |             <SP/>
 85 |             <String default:ID="String_28" default:CONTENT="trotz" default:HPOS="449" default:VPOS="710" default:HEIGHT="50" default:WIDTH="102"/>
 86 |             <SP/>
 87 |             <String default:ID="String_29" default:CONTENT="der" default:HPOS="564" default:VPOS="704" default:HEIGHT="45" default:WIDTH="73"/>
 88 |             <SP/>
 89 |             <String default:ID="String_30" default:CONTENT="Soaueetc.KerNen/" default:HPOS="648" default:VPOS="695" default:HEIGHT="67" default:WIDTH="432"/>
 90 |           </TextLine>
 91 |           <TextLine default:ID="TextLine_9" default:HPOS="140" default:VPOS="756" default:HEIGHT="71" default:WIDTH="981">
 92 |             <String default:ID="String_31" default:CONTENT="^nd" default:HPOS="140" default:VPOS="756" default:HEIGHT="61" default:WIDTH="107"/>
 93 |             <SP/>
 94 |             <String default:ID="String_32" default:CONTENT="Frewd" default:HPOS="257" default:VPOS="766" default:HEIGHT="53" default:WIDTH="154"/>
 95 |             <SP/>
 96 |             <String default:ID="String_33" default:CONTENT="empfinde/" default:HPOS="427" default:VPOS="781" default:HEIGHT="37" default:WIDTH="233"/>
 97 |             <SP/>
 98 |             <String default:ID="String_34" default:CONTENT="die" default:HPOS="671" default:VPOS="770" default:HEIGHT="49" default:WIDTH="66"/>
 99 |             <SP/>
100 |             <String default:ID="String_35" default:CONTENT="uoch" default:HPOS="748" default:VPOS="778" default:HEIGHT="47" default:WIDTH="108"/>
101 |             <SP/>
102 |             <String default:ID="String_36" default:CONTENT="im" default:HPOS="870" default:VPOS="772" default:HEIGHT="49" default:WIDTH="61"/>
103 |             <SP/>
104 |             <String default:ID="String_37" default:CONTENT="Hertz-n." default:HPOS="942" default:VPOS="768" default:HEIGHT="59" default:WIDTH="179"/>
105 |           </TextLine>
106 |           <TextLine default:ID="TextLine_10" default:HPOS="137" default:VPOS="821" default:HEIGHT="65" default:WIDTH="705">
107 |             <String default:ID="String_38" default:CONTENT="Der" default:HPOS="137" default:VPOS="821" default:HEIGHT="63" default:WIDTH="103"/>
108 |             <SP/>
109 |             <String default:ID="String_39" default:CONTENT="Menschen" default:HPOS="250" default:VPOS="835" default:HEIGHT="48" default:WIDTH="242"/>
110 |             <SP/>
111 |             <String default:ID="String_40" default:CONTENT="nie" default:HPOS="509" default:VPOS="846" default:HEIGHT="35" default:WIDTH="71"/>
112 |             <SP/>
113 |             <String default:ID="String_41" default:CONTENT="^schwebt/" default:HPOS="592" default:VPOS="841" default:HEIGHT="45" default:WIDTH="250"/>
114 |           </TextLine>
115 |         </TextBlock>
116 |         <TextBlock default:ID="TextBlock_4" default:HPOS="138" default:VPOS="895" default:HEIGHT="416" default:WIDTH="949">
117 |           <TextLine default:ID="TextLine_11" default:HPOS="252" default:VPOS="901" default:HEIGHT="52" default:WIDTH="814">
118 |             <String default:ID="String_42" default:CONTENT="Vmb" default:HPOS="252" default:VPOS="901" default:HEIGHT="50" default:WIDTH="129"/>
119 |             <SP/>
120 |             <String default:ID="String_43" default:CONTENT="diesen" default:HPOS="396" default:VPOS="906" default:HEIGHT="45" default:WIDTH="132"/>
121 |             <SP/>
122 |             <String default:ID="String_44" default:CONTENT="sol" default:HPOS="540" default:VPOS="898" default:HEIGHT="53" default:WIDTH="59"/>
123 |             <SP/>
124 |             <String default:ID="String_45" default:CONTENT="man" default:HPOS="609" default:VPOS="911" default:HEIGHT="41" default:WIDTH="107"/>
125 |             <SP/>
126 |             <String default:ID="String_46" default:CONTENT="Klage" default:HPOS="729" default:VPOS="895" default:HEIGHT="55" default:WIDTH="143"/>
127 |             <SP/>
128 |             <String default:ID="String_47" default:CONTENT="führen?" default:HPOS="884" default:VPOS="898" default:HEIGHT="55" default:WIDTH="182"/>
129 |           </TextLine>
130 |           <TextLine default:ID="TextLine_12" default:HPOS="138" default:VPOS="953" default:HEIGHT="72" default:WIDTH="909">
131 |             <String default:ID="String_48" default:CONTENT="^hm" default:HPOS="138" default:VPOS="953" default:HEIGHT="64" default:WIDTH="121"/>
132 |             <SP/>
133 |             <String default:ID="String_49" default:CONTENT="waS" default:HPOS="274" default:VPOS="979" default:HEIGHT="41" default:WIDTH="105"/>
134 |             <SP/>
135 |             <String default:ID="String_50" default:CONTENT="den" default:HPOS="393" default:VPOS="973" default:HEIGHT="47" default:WIDTH="82"/>
136 |             <SP/>
137 |             <String default:ID="String_51" default:CONTENT="Christen" default:HPOS="491" default:VPOS="966" default:HEIGHT="54" default:WIDTH="205"/>
138 |             <SP/>
139 |             <String default:ID="String_52" default:CONTENT="wil" default:HPOS="714" default:VPOS="979" default:HEIGHT="42" default:WIDTH="79"/>
140 |             <SP/>
141 |             <String default:ID="String_53" default:CONTENT="gebühren/" default:HPOS="801" default:VPOS="980" default:HEIGHT="45" default:WIDTH="246"/>
142 |           </TextLine>
143 |           <TextLine default:ID="TextLine_13" default:HPOS="139" default:VPOS="1022" default:HEIGHT="65" default:WIDTH="706">
144 |             <String default:ID="String_54" default:CONTENT="Danckt" default:HPOS="139" default:VPOS="1022" default:HEIGHT="65" default:WIDTH="184"/>
145 |             <SP/>
146 |             <String default:ID="String_55" default:CONTENT="GOtt" default:HPOS="335" default:VPOS="1035" default:HEIGHT="52" default:WIDTH="152"/>
147 |             <SP/>
148 |             <String default:ID="String_56" default:CONTENT="der" default:HPOS="498" default:VPOS="1038" default:HEIGHT="47" default:WIDTH="72"/>
149 |             <SP/>
150 |             <String default:ID="String_57" default:CONTENT="ihn" default:HPOS="581" default:VPOS="1042" default:HEIGHT="47" default:WIDTH="79"/>
151 |             <SP/>
152 |             <String default:ID="String_58" default:CONTENT="befreyt" default:HPOS="677" default:VPOS="1036" default:HEIGHT="51" default:WIDTH="168"/>
153 |           </TextLine>
154 |           <TextLine default:ID="TextLine_14" default:HPOS="140" default:VPOS="1091" default:HEIGHT="75" default:WIDTH="947">
155 |             <String default:ID="String_59" default:CONTENT="Von" default:HPOS="140" default:VPOS="1091" default:HEIGHT="61" default:WIDTH="109"/>
156 |             <SP/>
157 |             <String default:ID="String_60" default:CONTENT="Noht" default:HPOS="261" default:VPOS="1102" default:HEIGHT="52" default:WIDTH="128"/>
158 |             <SP/>
159 |             <String default:ID="String_61" default:CONTENT="und" default:HPOS="400" default:VPOS="1118" default:HEIGHT="34" default:WIDTH="90"/>
160 |             <SP/>
161 |             <String default:ID="String_62" default:CONTENT="Tod" default:HPOS="505" default:VPOS="1098" default:HEIGHT="53" default:WIDTH="99"/>
162 |             <SP/>
163 |             <String default:ID="String_63" default:CONTENT="und" default:HPOS="627" default:VPOS="1116" default:HEIGHT="38" default:WIDTH="94"/>
164 |             <SP/>
165 |             <String default:ID="String_64" default:CONTENT="allen" default:HPOS="731" default:VPOS="1119" default:HEIGHT="37" default:WIDTH="112"/>
166 |             <SP/>
167 |             <String default:ID="String_65" default:CONTENT="Sünden/" default:HPOS="858" default:VPOS="1103" default:HEIGHT="63" default:WIDTH="229"/>
168 |           </TextLine>
169 |           <TextLine default:ID="TextLine_15" default:HPOS="142" default:VPOS="1159" default:HEIGHT="68" default:WIDTH="873">
170 |             <String default:ID="String_66" default:CONTENT="Vnd" default:HPOS="142" default:VPOS="1159" default:HEIGHT="59" default:WIDTH="109"/>
171 |             <SP/>
172 |             <String default:ID="String_67" default:CONTENT="von" default:HPOS="261" default:VPOS="1182" default:HEIGHT="41" default:WIDTH="93"/>
173 |             <SP/>
174 |             <String default:ID="String_68" default:CONTENT="der" default:HPOS="366" default:VPOS="1178" default:HEIGHT="44" default:WIDTH="70"/>
175 |             <SP/>
176 |             <String default:ID="String_69" default:CONTENT="Angst" default:HPOS="447" default:VPOS="1168" default:HEIGHT="64" default:WIDTH="139"/>
177 |             <SP/>
178 |             <String default:ID="String_70" default:CONTENT="die" default:HPOS="596" default:VPOS="1175" default:HEIGHT="46" default:WIDTH="63"/>
179 |             <SP/>
180 |             <String default:ID="String_71" default:CONTENT="wir" default:HPOS="671" default:VPOS="1181" default:HEIGHT="43" default:WIDTH="82"/>
181 |             <SP/>
182 |             <String default:ID="String_72" default:CONTENT="empfinden" default:HPOS="764" default:VPOS="1186" default:HEIGHT="41" default:WIDTH="251"/>
183 |           </TextLine>
184 |           <TextLine default:ID="TextLine_16" default:HPOS="144" default:VPOS="1240" default:HEIGHT="50" default:WIDTH="552">
185 |             <String default:ID="String_73" default:CONTENT="^ey" default:HPOS="144" default:VPOS="1240" default:HEIGHT="56" default:WIDTH="98"/>
186 |             <SP/>
187 |             <String default:ID="String_74" default:CONTENT="dieser" default:HPOS="262" default:VPOS="1243" default:HEIGHT="47" default:WIDTH="125"/>
188 |             <SP/>
189 |             <String default:ID="String_75" default:CONTENT=".)" default:HPOS="401" default:VPOS="1276" default:HEIGHT="12" default:WIDTH="22"/>
190 |             <SP/>
191 |             <String default:ID="String_76" default:CONTENT="ueges^eit." default:HPOS="449" default:VPOS="1251" default:HEIGHT="39" default:WIDTH="247"/>
192 |           </TextLine>
193 |         </TextBlock>
194 |         <TextBlock default:ID="TextBlock_5" default:HPOS="139" default:VPOS="1302" default:HEIGHT="404" default:WIDTH="968">
195 |           <TextLine default:ID="TextLine_17" default:HPOS="247" default:VPOS="1302" default:HEIGHT="54" default:WIDTH="791">
196 |             <String default:ID="String_77" default:CONTENT="Ist" default:HPOS="247" default:VPOS="1302" default:HEIGHT="65" default:WIDTH="69"/>
197 |             <SP/>
198 |             <String default:ID="String_78" default:CONTENT="daßihr" default:HPOS="329" default:VPOS="1311" default:HEIGHT="47" default:WIDTH="172"/>
199 |             <SP/>
200 |             <String default:ID="String_79" default:CONTENT="halttet" default:HPOS="514" default:VPOS="1310" default:HEIGHT="47" default:WIDTH="157"/>
201 |             <SP/>
202 |             <String default:ID="String_80" default:CONTENT="euch" default:HPOS="683" default:VPOS="1321" default:HEIGHT="45" default:WIDTH="99"/>
203 |             <SP/>
204 |             <String default:ID="String_81" default:CONTENT="verlassen^" default:HPOS="801" default:VPOS="1316" default:HEIGHT="40" default:WIDTH="237"/>
205 |           </TextLine>
206 |           <TextLine default:ID="TextLine_18" default:HPOS="139" default:VPOS="1360" default:HEIGHT="68" default:WIDTH="913">
207 |             <String default:ID="String_82" default:CONTENT="Habt" default:HPOS="139" default:VPOS="1360" default:HEIGHT="63" default:WIDTH="126"/>
208 |             <SP/>
209 |             <String default:ID="String_83" default:CONTENT="ihr" default:HPOS="275" default:VPOS="1378" default:HEIGHT="45" default:WIDTH="71"/>
210 |             <SP/>
211 |             <String default:ID="String_84" default:CONTENT="nicht" default:HPOS="362" default:VPOS="1386" default:HEIGHT="37" default:WIDTH="115"/>
212 |             <SP/>
213 |             <String default:ID="String_85" default:CONTENT="starcken" default:HPOS="490" default:VPOS="1373" default:HEIGHT="52" default:WIDTH="177"/>
214 |             <SP/>
215 |             <String default:ID="String_86" default:CONTENT="Trost" default:HPOS="679" default:VPOS="1374" default:HEIGHT="66" default:WIDTH="131"/>
216 |             <SP/>
217 |             <String default:ID="String_87" default:CONTENT="zu" default:HPOS="819" default:VPOS="1388" default:HEIGHT="42" default:WIDTH="54"/>
218 |             <SP/>
219 |             <String default:ID="String_88" default:CONTENT="fassen" default:HPOS="886" default:VPOS="1377" default:HEIGHT="54" default:WIDTH="135"/>
220 |             <SP/>
221 |             <String default:ID="String_89" default:CONTENT="?" default:HPOS="1034" default:VPOS="1391" default:HEIGHT="37" default:WIDTH="18"/>
222 |           </TextLine>
223 |           <TextLine default:ID="TextLine_19" default:HPOS="141" default:VPOS="1427" default:HEIGHT="66" default:WIDTH="616">
224 |             <String default:ID="String_90" default:CONTENT="Sehe" default:HPOS="141" default:VPOS="1427" default:HEIGHT="62" default:WIDTH="132"/>
225 |             <SP/>
226 |             <String default:ID="String_91" default:CONTENT="GOtt" default:HPOS="287" default:VPOS="1442" default:HEIGHT="50" default:WIDTH="156"/>
227 |             <SP/>
228 |             <String default:ID="String_92" default:CONTENT="für" default:HPOS="465" default:VPOS="1440" default:HEIGHT="51" default:WIDTH="73"/>
229 |             <SP/>
230 |             <String default:ID="String_93" default:CONTENT="allen" default:HPOS="552" default:VPOS="1453" default:HEIGHT="39" default:WIDTH="111"/>
231 |             <SP/>
232 |             <String default:ID="String_94" default:CONTENT="au/" default:HPOS="678" default:VPOS="1456" default:HEIGHT="37" default:WIDTH="79"/>
233 |           </TextLine>
234 |           <TextLine default:ID="TextLine_20" default:HPOS="142" default:VPOS="1493" default:HEIGHT="73" default:WIDTH="847">
235 |             <String default:ID="String_95" default:CONTENT="^er" default:HPOS="142" default:VPOS="1493" default:HEIGHT="61" default:WIDTH="97"/>
236 |             <SP/>
237 |             <String default:ID="String_96" default:CONTENT="solcher" default:HPOS="248" default:VPOS="1506" default:HEIGHT="54" default:WIDTH="153"/>
238 |             <SP/>
239 |             <String default:ID="String_97" default:CONTENT="Leau" default:HPOS="413" default:VPOS="1508" default:HEIGHT="53" default:WIDTH="121"/>
240 |             <SP/>
241 |             <String default:ID="String_98" default:CONTENT="ammeisten" default:HPOS="544" default:VPOS="1524" default:HEIGHT="39" default:WIDTH="253"/>
242 |             <SP/>
243 |             <String default:ID="String_99" default:CONTENT="dencket/" default:HPOS="807" default:VPOS="1515" default:HEIGHT="51" default:WIDTH="182"/>
244 |           </TextLine>
245 |           <TextLine default:ID="TextLine_21" default:HPOS="141" default:VPOS="1561" default:HEIGHT="70" default:WIDTH="966">
246 |             <String default:ID="String_100" default:CONTENT="euchlaugstauffdiesen" default:HPOS="141" default:VPOS="1561" default:HEIGHT="67" default:WIDTH="520"/>
247 |             <SP/>
248 |             <String default:ID="String_101" default:CONTENT="Fall" default:HPOS="678" default:VPOS="1575" default:HEIGHT="55" default:WIDTH="95"/>
249 |             <SP/>
250 |             <String default:ID="String_102" default:CONTENT="geschencket" default:HPOS="784" default:VPOS="1591" default:HEIGHT="34" default:WIDTH="254"/>
251 |             <SP/>
252 |             <String default:ID="String_103" default:CONTENT="." default:HPOS="1099" default:VPOS="1621" default:HEIGHT="10" default:WIDTH="8"/>
253 |           </TextLine>
254 |           <TextLine default:ID="TextLine_22" default:HPOS="143" default:VPOS="1629" default:HEIGHT="62" default:WIDTH="617">
255 |             <String default:ID="String_104" default:CONTENT="^en" default:HPOS="143" default:VPOS="1629" default:HEIGHT="59" default:WIDTH="106"/>
256 |             <SP/>
257 |             <String default:ID="String_105" default:CONTENT="hochbegabten" default:HPOS="261" default:VPOS="1643" default:HEIGHT="49" default:WIDTH="319"/>
258 |             <SP/>
259 |             <String default:ID="String_106" default:CONTENT="Mann" default:HPOS="593" default:VPOS="1643" default:HEIGHT="48" default:WIDTH="167"/>
260 |           </TextLine>
261 |         </TextBlock>
262 |         <TextBlock default:ID="TextBlock_6" default:HPOS="135" default:VPOS="1700" default:HEIGHT="220" default:WIDTH="1015">
263 |           <TextLine default:ID="TextLine_23" default:HPOS="198" default:VPOS="1700" default:HEIGHT="67" default:WIDTH="952">
264 |             <String default:ID="String_107" default:CONTENT="HurStraß" default:HPOS="198" default:VPOS="1700" default:HEIGHT="69" default:WIDTH="279"/>
265 |             <SP/>
266 |             <String default:ID="String_108" default:CONTENT="nrgk" default:HPOS="509" default:VPOS="1721" default:HEIGHT="35" default:WIDTH="108"/>
267 |             <SP/>
268 |             <String default:ID="String_109" default:CONTENT="sorgt" default:HPOS="630" default:VPOS="1708" default:HEIGHT="46" default:WIDTH="115"/>
269 |             <SP/>
270 |             <String default:ID="String_110" default:CONTENT="für" default:HPOS="758" default:VPOS="1704" default:HEIGHT="52" default:WIDTH="76"/>
271 |             <SP/>
272 |             <String default:ID="String_111" default:CONTENT="eure" default:HPOS="846" default:VPOS="1718" default:HEIGHT="38" default:WIDTH="96"/>
273 |             <SP/>
274 |             <String default:ID="String_112" default:CONTENT="Sachen" default:HPOS="952" default:VPOS="1704" default:HEIGHT="63" default:WIDTH="198"/>
275 |           </TextLine>
276 |           <TextLine default:ID="TextLine_24" default:HPOS="137" default:VPOS="1762" default:HEIGHT="72" default:WIDTH="842">
277 |             <String default:ID="String_113" default:CONTENT="^ird" default:HPOS="137" default:VPOS="1762" default:HEIGHT="59" default:WIDTH="124"/>
278 |             <SP/>
279 |             <String default:ID="String_114" default:CONTENT="als" default:HPOS="273" default:VPOS="1789" default:HEIGHT="39" default:WIDTH="73"/>
280 |             <SP/>
281 |             <String default:ID="String_115" default:CONTENT="ein" default:HPOS="357" default:VPOS="1791" default:HEIGHT="37" default:WIDTH="65"/>
282 |             <SP/>
283 |             <String default:ID="String_116" default:CONTENT="Vater" default:HPOS="436" default:VPOS="1771" default:HEIGHT="53" default:WIDTH="142"/>
284 |             <SP/>
285 |             <String default:ID="String_117" default:CONTENT="für" default:HPOS="590" default:VPOS="1771" default:HEIGHT="53" default:WIDTH="72"/>
286 |             <SP/>
287 |             <String default:ID="String_118" default:CONTENT="euch" default:HPOS="672" default:VPOS="1784" default:HEIGHT="42" default:WIDTH="99"/>
288 |             <SP/>
289 |             <String default:ID="String_119" default:CONTENT="wachen/" default:HPOS="785" default:VPOS="1781" default:HEIGHT="53" default:WIDTH="194"/>
290 |           </TextLine>
291 |           <TextLine default:ID="TextLine_25" default:HPOS="135" default:VPOS="1826" default:HEIGHT="60" default:WIDTH="619">
292 |             <String default:ID="String_120" default:CONTENT="^erNntzbetrissianchihn:" default:HPOS="135" default:VPOS="1826" default:HEIGHT="60" default:WIDTH="619"/>
293 |           </TextLine>
294 |         </TextBlock>
295 |         <TextBlock default:ID="TextBlock_7" default:HPOS="133" default:VPOS="1893" default:HEIGHT="217" default:WIDTH="949">
296 |           <TextLine default:ID="TextLine_26" default:HPOS="134" default:VPOS="1893" default:HEIGHT="82" default:WIDTH="948">
297 |             <String default:ID="String_121" default:CONTENT="^eu" default:HPOS="134" default:VPOS="1893" default:HEIGHT="60" default:WIDTH="104"/>
298 |             <SP/>
299 |             <String default:ID="String_122" default:CONTENT="Douori" default:HPOS="250" default:VPOS="1904" default:HEIGHT="57" default:WIDTH="204"/>
300 |             <SP/>
301 |             <String default:ID="String_123" default:CONTENT="dem" default:HPOS="469" default:VPOS="1920" default:HEIGHT="42" default:WIDTH="95"/>
302 |             <SP/>
303 |             <String default:ID="String_124" default:CONTENT="sich" default:HPOS="577" default:VPOS="1905" default:HEIGHT="65" default:WIDTH="81"/>
304 |             <SP/>
305 |             <String default:ID="String_125" default:CONTENT="Phoebus" default:HPOS="669" default:VPOS="1908" default:HEIGHT="56" default:WIDTH="231"/>
306 |             <SP/>
307 |             <String default:ID="String_126" default:CONTENT="trawet/" default:HPOS="908" default:VPOS="1917" default:HEIGHT="58" default:WIDTH="174"/>
308 |           </TextLine>
309 |           <TextLine default:ID="TextLine_27" default:HPOS="133" default:VPOS="1962" default:HEIGHT="71" default:WIDTH="773">
310 |             <String default:ID="String_127" default:CONTENT="^hrstyd" default:HPOS="133" default:VPOS="1962" default:HEIGHT="64" default:WIDTH="189"/>
311 |             <SP/>
312 |             <String default:ID="String_128" default:CONTENT="es" default:HPOS="340" default:VPOS="1990" default:HEIGHT="38" default:WIDTH="48"/>
313 |             <SP/>
314 |             <String default:ID="String_129" default:CONTENT="auffdeu" default:HPOS="400" default:VPOS="1994" default:HEIGHT="37" default:WIDTH="187"/>
315 |             <SP/>
316 |             <String default:ID="String_130" default:CONTENT="einig" default:HPOS="600" default:VPOS="1991" default:HEIGHT="54" default:WIDTH="113"/>
317 |             <SP/>
318 |             <String default:ID="String_131" default:CONTENT="schawet" default:HPOS="725" default:VPOS="1976" default:HEIGHT="57" default:WIDTH="181"/>
319 |           </TextLine>
320 |           <TextLine default:ID="TextLine_28" default:HPOS="133" default:VPOS="2024" default:HEIGHT="70" default:WIDTH="464">
321 |             <String default:ID="String_132" default:CONTENT="^le" default:HPOS="133" default:VPOS="2024" default:HEIGHT="60" default:WIDTH="93"/>
322 |             <SP/>
323 |             <String default:ID="String_133" default:CONTENT="herlich" default:HPOS="238" default:VPOS="2037" default:HEIGHT="71" default:WIDTH="154"/>
324 |             <SP/>
325 |             <String default:ID="String_134" default:CONTENT="Officin-" default:HPOS="405" default:VPOS="2043" default:HEIGHT="51" default:WIDTH="192"/>
326 |           </TextLine>
327 |         </TextBlock>
328 |       </PrintSpace>
329 |     </Page>
330 |   </Layout>
331 | </alto>
332 | 


--------------------------------------------------------------------------------
/src/mods4pandas/tests/data/mets-mods/PPN1678618276.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <mets:mets xmlns:mets="http://www.loc.gov/METS/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="info:lc/xmlns/premis-v2 http://www.loc.gov/standards/premis/v2/premis-v2-0.xsd http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-6.xsd http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/version17/mets.v1-7.xsd http://www.loc.gov/mix/v10 http://www.loc.gov/standards/mix/mix10/mix10.xsd">
  3 |   <mets:metsHdr CREATEDATE="2020-06-08T06:20:49">
  4 |     <mets:agent OTHERTYPE="SOFTWARE" ROLE="CREATOR" TYPE="OTHER">
  5 |       <mets:name>Kitodo - kitodo-ugh-2.1.3-kitodo-ugh-2.1.1-11-g4b06eaa - 30−July−2019</mets:name>
  6 |       <mets:note>Kitodo</mets:note>
  7 |     </mets:agent>
  8 |   </mets:metsHdr>
  9 |   <mets:dmdSec ID="DMDLOG_0000">
 10 |     <mets:mdWrap MDTYPE="MODS">
 11 |       <mets:xmlData>
 12 |         <mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
 13 |           <mods:location>
 14 |             <mods:physicalLocation authority="marcorg" valueURI="http://ld.zdb-services.de/resource/organisations/DE-1">Staatsbibliothek zu Berlin - Preußischer Kulturbesitz, Berlin, Germany</mods:physicalLocation>
 15 |             <mods:shelfLocator>55 Nachl 100/B,25431</mods:shelfLocator>
 16 |           </mods:location>
 17 |           <mods:originInfo>
 18 |             <mods:place>
 19 |               <mods:placeTerm type="text">Wertheim</mods:placeTerm>
 20 |             </mods:place>
 21 |             <mods:dateCreated encoding="iso8601" keyDate="yes">1825</mods:dateCreated>
 22 |             <mods:dateCreated encoding="iso8601">1825-07-30</mods:dateCreated>
 23 |           </mods:originInfo>
 24 |           <mods:originInfo eventType="digitization">
 25 |             <mods:place>
 26 |               <mods:placeTerm type="text">Berlin</mods:placeTerm>
 27 |             </mods:place>
 28 |             <mods:dateCaptured encoding="iso8601">2019</mods:dateCaptured>
 29 |             <mods:publisher>Staatsbibliothek zu Berlin – Preußischer Kulturbesitz, Germany</mods:publisher>
 30 |             <mods:edition>[Electronic ed.]</mods:edition>
 31 |           </mods:originInfo>
 32 |           <mods:classification authority="ZVDD">Musik</mods:classification>
 33 |           <mods:classification authority="ZVDD">Nachlässe und Autographe</mods:classification>
 34 |           <mods:classification authority="ZVDD">Schott-Archiv</mods:classification>
 35 |           <mods:recordInfo>
 36 |             <mods:recordIdentifier source="gbv-ppn">PPN1678618276</mods:recordIdentifier>
 37 |           </mods:recordInfo>
 38 |           <mods:identifier type="purl">http://resolver.staatsbibliothek-berlin.de/SBB0002A14000000000</mods:identifier>
 39 |           <mods:identifier type="KOPE">3489696</mods:identifier>
 40 |           <mods:titleInfo>
 41 |             <mods:title>Brief an B. Schott's Söhne : 30.07.1825</mods:title>
 42 |           </mods:titleInfo>
 43 |           <mods:note type="source characteristics">P_SBB_Sondermat_Nachlaesse</mods:note>
 44 |           <mods:name type="personal">
 45 |             <mods:namePart type="given">...</mods:namePart>
 46 |             <mods:namePart type="family">Goebel</mods:namePart>
 47 |             <mods:role>
 48 |               <mods:roleTerm authority="marcrelator" type="code">aut</mods:roleTerm>
 49 |             </mods:role>
 50 |           </mods:name>
 51 |           <mods:name authority="gnd" authorityURI="http://d-nb.de/gnd" type="corporate" valueURI="http://d-nb.info/gnd/4053219-7">
 52 |             <mods:namePart>B. Schott's Söhne</mods:namePart>
 53 |             <mods:nameIdentifier type="ppn">106168096</mods:nameIdentifier>
 54 |             <mods:role>
 55 |               <mods:roleTerm authority="marcrelator" type="code">oth</mods:roleTerm>
 56 |             </mods:role>
 57 |           </mods:name>
 58 |           <mods:physicalDescription>
 59 |             <mods:digitalOrigin>reformatted digital</mods:digitalOrigin>
 60 |             <mods:extent>1 Br., 1 S.</mods:extent>
 61 |           </mods:physicalDescription>
 62 |           <mods:relatedItem type="series">
 63 |             <mods:titleInfo>
 64 |               <mods:title>Nachlässe und Autographe digital</mods:title>
 65 |             </mods:titleInfo>
 66 |           </mods:relatedItem>
 67 |           <mods:relatedItem type="series">
 68 |             <mods:titleInfo>
 69 |               <mods:title>Schott-Archiv digital</mods:title>
 70 |             </mods:titleInfo>
 71 |           </mods:relatedItem>
 72 |           <mods:accessCondition type="use and reproduction">CC BY-NC-SA 4.0 International</mods:accessCondition>
 73 |           <mods:typeOfResource>text</mods:typeOfResource>
 74 |         </mods:mods>
 75 |       </mets:xmlData>
 76 |     </mets:mdWrap>
 77 |   </mets:dmdSec>
 78 |   <mets:amdSec ID="AMD">
 79 |     <mets:rightsMD ID="RIGHTS">
 80 |       <mets:mdWrap MDTYPE="OTHER" MIMETYPE="text/xml" OTHERMDTYPE="DVRIGHTS">
 81 |         <mets:xmlData>
 82 |           <dv:rights xmlns:dv="http://dfg-viewer.de/">
 83 |             <dv:owner>Staatsbibliothek zu Berlin - Preußischer Kulturbesitz</dv:owner>
 84 |             <dv:ownerLogo>http://resolver.staatsbibliothek-berlin.de/SBB0000000100000000</dv:ownerLogo>
 85 |             <dv:ownerSiteURL>http://www.staatsbibliothek-berlin.de</dv:ownerSiteURL>
 86 |             <dv:ownerContact>mailto:info@sbb.spk-berlin.de</dv:ownerContact>
 87 |           </dv:rights>
 88 |         </mets:xmlData>
 89 |       </mets:mdWrap>
 90 |     </mets:rightsMD>
 91 |     <mets:digiprovMD ID="DIGIPROV">
 92 |       <mets:mdWrap MDTYPE="OTHER" MIMETYPE="text/xml" OTHERMDTYPE="DVLINKS">
 93 |         <mets:xmlData>
 94 |           <dv:links xmlns:dv="http://dfg-viewer.de/">
 95 |             <dv:reference>http://www.stabikat.de/DB=1/PPN?PPN=1678618276 </dv:reference>
 96 |             <dv:presentation>http://digital.staatsbibliothek-berlin.de/dms/werkansicht/?PPN=PPN1678618276</dv:presentation>
 97 |           </dv:links>
 98 |         </mets:xmlData>
 99 |       </mets:mdWrap>
100 |     </mets:digiprovMD>
101 |   </mets:amdSec>
102 |   <mets:fileSec>
103 |     <mets:fileGrp USE="PRESENTATION">
104 |       <mets:file ID="FILE_0001_PRESENTATION" MIMETYPE="image/tiff">
105 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="file:///goobi/tiff001/sbb/PPN1678618276/00000001.tif"/>
106 |       </mets:file>
107 |       <mets:file ID="FILE_0002_PRESENTATION" MIMETYPE="image/tiff">
108 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="file:///goobi/tiff001/sbb/PPN1678618276/00000002.tif"/>
109 |       </mets:file>
110 |       <mets:file ID="FILE_0003_PRESENTATION" MIMETYPE="image/tiff">
111 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="file:///goobi/tiff001/sbb/PPN1678618276/00000003.tif"/>
112 |       </mets:file>
113 |     </mets:fileGrp>
114 |     <mets:fileGrp USE="THUMBS">
115 |       <mets:file ID="FILE_0001_THUMBS" MIMETYPE="image/jpg">
116 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="http://content.staatsbibliothek-berlin.de/dms/PPN1678618276/150/0/00000001.jpg"/>
117 |       </mets:file>
118 |       <mets:file ID="FILE_0002_THUMBS" MIMETYPE="image/jpg">
119 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="http://content.staatsbibliothek-berlin.de/dms/PPN1678618276/150/0/00000002.jpg"/>
120 |       </mets:file>
121 |       <mets:file ID="FILE_0003_THUMBS" MIMETYPE="image/jpg">
122 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="http://content.staatsbibliothek-berlin.de/dms/PPN1678618276/150/0/00000003.jpg"/>
123 |       </mets:file>
124 |     </mets:fileGrp>
125 |     <mets:fileGrp USE="DEFAULT">
126 |       <mets:file ID="FILE_0001_DEFAULT" MIMETYPE="image/jpg">
127 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="http://content.staatsbibliothek-berlin.de/dms/PPN1678618276/800/0/00000001.jpg"/>
128 |       </mets:file>
129 |       <mets:file ID="FILE_0002_DEFAULT" MIMETYPE="image/jpg">
130 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="http://content.staatsbibliothek-berlin.de/dms/PPN1678618276/800/0/00000002.jpg"/>
131 |       </mets:file>
132 |       <mets:file ID="FILE_0003_DEFAULT" MIMETYPE="image/jpg">
133 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="http://content.staatsbibliothek-berlin.de/dms/PPN1678618276/800/0/00000003.jpg"/>
134 |       </mets:file>
135 |     </mets:fileGrp>
136 |   </mets:fileSec>
137 |   <mets:structMap TYPE="LOGICAL">
138 |     <mets:div ADMID="AMD" CONTENTIDS="http://resolver.staatsbibliothek-berlin.de/SBB0002A14000000000" DMDID="DMDLOG_0000" ID="LOG_0000" LABEL="Brief an B. Schott's Söhne : 30.07.1825" ORDERLABEL="Brief an B. Schott's Söhne : 30.07.1825" TYPE="manuscript">
139 |       <mets:div ID="LOG_0001" TYPE="colourchecker"/>
140 |     </mets:div>
141 |   </mets:structMap>
142 |   <mets:structMap TYPE="PHYSICAL">
143 |     <mets:div CONTENTIDS="http://resolver.staatsbibliothek-berlin.de/SBB0002A14000000000" ID="PHYS_0000" TYPE="physSequence">
144 |       <mets:div CONTENTIDS="http://resolver.staatsbibliothek-berlin.de/SBB0002A14000000001" ID="PHYS_0001" ORDER="1" ORDERLABEL=" - " TYPE="page">
145 |         <mets:fptr FILEID="FILE_0001_PRESENTATION"/>
146 |         <mets:fptr FILEID="FILE_0001_THUMBS"/>
147 |         <mets:fptr FILEID="FILE_0001_DEFAULT"/>
148 |       </mets:div>
149 |       <mets:div CONTENTIDS="http://resolver.staatsbibliothek-berlin.de/SBB0002A14000000002" ID="PHYS_0002" ORDER="2" ORDERLABEL=" - " TYPE="page">
150 |         <mets:fptr FILEID="FILE_0002_PRESENTATION"/>
151 |         <mets:fptr FILEID="FILE_0002_THUMBS"/>
152 |         <mets:fptr FILEID="FILE_0002_DEFAULT"/>
153 |       </mets:div>
154 |       <mets:div CONTENTIDS="http://resolver.staatsbibliothek-berlin.de/SBB0002A14000000003" ID="PHYS_0003" ORDER="3" ORDERLABEL=" - " TYPE="page">
155 |         <mets:fptr FILEID="FILE_0003_PRESENTATION"/>
156 |         <mets:fptr FILEID="FILE_0003_THUMBS"/>
157 |         <mets:fptr FILEID="FILE_0003_DEFAULT"/>
158 |       </mets:div>
159 |     </mets:div>
160 |   </mets:structMap>
161 |   <mets:structLink>
162 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0001" xlink:from="LOG_0000"/>
163 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0002" xlink:from="LOG_0000"/>
164 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0003" xlink:from="LOG_0000"/>
165 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0003" xlink:from="LOG_0001"/>
166 |   </mets:structLink>
167 | </mets:mets>
168 | 


--------------------------------------------------------------------------------
/src/mods4pandas/tests/data/mets-mods/PPN1769395962.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <mets:mets xmlns:mets="http://www.loc.gov/METS/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="info:lc/xmlns/premis-v2 http://www.loc.gov/standards/premis/v2/premis-v2-0.xsd http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-6.xsd http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/version17/mets.v1-7.xsd http://www.loc.gov/mix/v10 http://www.loc.gov/standards/mix/mix10/mix10.xsd">
  3 |   <mets:metsHdr CREATEDATE="2021-10-06T12:38:59">
  4 |     <mets:agent OTHERTYPE="SOFTWARE" ROLE="CREATOR" TYPE="OTHER">
  5 |       <mets:name>Kitodo - kitodo-ugh-2.1.3-kitodo-ugh-2.1.1-11-g4b06eaa - 30−July−2019</mets:name>
  6 |       <mets:note>Kitodo</mets:note>
  7 |     </mets:agent>
  8 |   </mets:metsHdr>
  9 |   <mets:dmdSec ID="DMDLOG_0001">
 10 |     <mets:mdWrap MDTYPE="MODS">
 11 |       <mets:xmlData>
 12 |         <mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
 13 |           <mods:location>
 14 |             <mods:physicalLocation valueURI="http://ld.zdb-services.de/resource/organisations/DE-1">Staatsbibliothek zu Berlin - Preußischer Kulturbesitz, Berlin, Germany</mods:physicalLocation>
 15 |             <mods:shelfLocator>DMS 22613</mods:shelfLocator>
 16 |           </mods:location>
 17 |           <mods:originInfo eventType="publication">
 18 |             <mods:place>
 19 |               <mods:placeTerm type="text">Berlin</mods:placeTerm>
 20 |             </mods:place>
 21 |             <mods:place>
 22 |               <mods:placeTerm type="text">[Deutschland?]</mods:placeTerm>
 23 |             </mods:place>
 24 |             <mods:dateIssued encoding="iso8601" keyDate="yes">1890</mods:dateIssued>
 25 |             <mods:publisher>Georg Plothow</mods:publisher>
 26 |             <mods:publisher>Pantheon-Verlag Bruno C.L. Plothow</mods:publisher>
 27 |           </mods:originInfo>
 28 |           <mods:originInfo eventType="digitization">
 29 |             <mods:place>
 30 |               <mods:placeTerm type="text">Berlin</mods:placeTerm>
 31 |             </mods:place>
 32 |             <mods:dateCaptured encoding="iso8601">2021</mods:dateCaptured>
 33 |             <mods:publisher>Staatsbibliothek zu Berlin – Preußischer Kulturbesitz, Germany</mods:publisher>
 34 |             <mods:edition>[Electronic ed.]</mods:edition>
 35 |           </mods:originInfo>
 36 |           <mods:classification authority="ZVDD">Musiknoten</mods:classification>
 37 |           <mods:classification authority="ZVDD">Musikdrucke</mods:classification>
 38 |           <mods:relatedItem type="host">
 39 |             <mods:recordInfo>
 40 |               <mods:recordIdentifier source="gbv-ppn">PPN1769395032</mods:recordIdentifier>
 41 |             </mods:recordInfo>
 42 |           </mods:relatedItem>
 43 |           <mods:recordInfo>
 44 |             <mods:recordIdentifier source="gbv-ppn">PPN1769395962</mods:recordIdentifier>
 45 |           </mods:recordInfo>
 46 |           <mods:identifier type="purl">http://resolver.staatsbibliothek-berlin.de/SBB000309C200060000</mods:identifier>
 47 |           <mods:relatedItem type="original">
 48 |             <mods:recordInfo>
 49 |               <mods:recordIdentifier source="gbv-ppn">PPN1769388664</mods:recordIdentifier>
 50 |             </mods:recordInfo>
 51 |           </mods:relatedItem>
 52 |           <mods:titleInfo>
 53 |             <mods:title>Kinderlied</mods:title>
 54 |             <mods:subTitle>Op. 25 No. 6</mods:subTitle>
 55 |           </mods:titleInfo>
 56 |           <mods:note type="source characteristics">P_Drucke_Noten</mods:note>
 57 |           <mods:part order="1000061890">
 58 |             <mods:detail type="volume">
 59 |               <mods:number>No. 6</mods:number>
 60 |             </mods:detail>
 61 |           </mods:part>
 62 |           <mods:language>
 63 |             <mods:languageTerm authority="iso639-2b" type="code">ger</mods:languageTerm>
 64 |             <mods:scriptTerm authority="iso15924" type="code">215</mods:scriptTerm>
 65 |           </mods:language>
 66 |           <mods:language>
 67 |             <mods:languageTerm authority="iso639-2b" type="code">eng</mods:languageTerm>
 68 |           </mods:language>
 69 |           <mods:relatedItem type="series">
 70 |             <mods:titleInfo>
 71 |               <mods:title>Musikdrucke digital</mods:title>
 72 |             </mods:titleInfo>
 73 |           </mods:relatedItem>
 74 |           <mods:name type="personal" valueURI="http://d-nb.info/gnd/117357669">
 75 |             <mods:displayForm>Wurm, Mary</mods:displayForm>
 76 |             <mods:namePart type="given">Mary</mods:namePart>
 77 |             <mods:nameIdentifier type="gbv-ppn">078789583</mods:nameIdentifier>
 78 |             <mods:namePart type="family">Wurm</mods:namePart>
 79 |             <mods:role>
 80 |               <mods:roleTerm authority="marcrelator" type="code">cmp</mods:roleTerm>
 81 |             </mods:role>
 82 |             <mods:role>
 83 |               <mods:roleTerm authority="marcrelator" type="code">aut</mods:roleTerm>
 84 |             </mods:role>
 85 |           </mods:name>
 86 |           <mods:name type="personal" valueURI="http://d-nb.info/gnd/1013632028">
 87 |             <mods:displayForm>Marshall, Florence</mods:displayForm>
 88 |             <mods:namePart type="given">Florence</mods:namePart>
 89 |             <mods:nameIdentifier type="gbv-ppn">705064530</mods:nameIdentifier>
 90 |             <mods:namePart type="family">Marshall</mods:namePart>
 91 |             <mods:role>
 92 |               <mods:roleTerm authority="marcrelator" type="code">trl</mods:roleTerm>
 93 |             </mods:role>
 94 |           </mods:name>
 95 |           <mods:physicalDescription>
 96 |             <mods:digitalOrigin>reformatted digital</mods:digitalOrigin>
 97 |             <mods:extent>1 Partitur (3 Seiten), 1 Stimme (1 Seite)</mods:extent>
 98 |           </mods:physicalDescription>
 99 |           <mods:accessCondition xmlns:xlink="http://www.w3.org/1999/xlink" type="use and reproduction" xlink:href="https://creativecommons.org/publicdomain/mark/1.0/">Public Domain Mark 1.0</mods:accessCondition>
100 |           <mods:typeOfResource>text</mods:typeOfResource>
101 |           <mods:accessCondition xmlns:xlink="http://www.w3.org/1999/xlink" displayLabel="Access Status" type="restriction on access" xlink:href="http://purl.org/coar/access_right/c_abf2">open access</mods:accessCondition>
102 |         </mods:mods>
103 |       </mets:xmlData>
104 |     </mets:mdWrap>
105 |   </mets:dmdSec>
106 |   <mets:amdSec ID="AMD">
107 |     <mets:rightsMD ID="RIGHTS">
108 |       <mets:mdWrap MDTYPE="OTHER" MIMETYPE="text/xml" OTHERMDTYPE="DVRIGHTS">
109 |         <mets:xmlData>
110 |           <dv:rights xmlns:dv="http://dfg-viewer.de/">
111 |             <dv:owner>Staatsbibliothek zu Berlin - Preußischer Kulturbesitz</dv:owner>
112 |             <dv:ownerLogo>http://resolver.staatsbibliothek-berlin.de/SBB0000000100000000</dv:ownerLogo>
113 |             <dv:ownerSiteURL>http://www.staatsbibliothek-berlin.de</dv:ownerSiteURL>
114 |             <dv:ownerContact>mailto:info@sbb.spk-berlin.de</dv:ownerContact>
115 |           </dv:rights>
116 |         </mets:xmlData>
117 |       </mets:mdWrap>
118 |     </mets:rightsMD>
119 |     <mets:digiprovMD ID="DIGIPROV">
120 |       <mets:mdWrap MDTYPE="OTHER" MIMETYPE="text/xml" OTHERMDTYPE="DVLINKS">
121 |         <mets:xmlData>
122 |           <dv:links xmlns:dv="http://dfg-viewer.de/">
123 |             <dv:reference>http://www.stabikat.de/DB=1/PPN?PPN=1769395962 </dv:reference>
124 |             <dv:presentation>http://digital.staatsbibliothek-berlin.de/dms/werkansicht/?PPN=PPN1769395962</dv:presentation>
125 |             <dv:iiif>https://content.staatsbibliothek-berlin.de/dc/PPN1769395962/manifest</dv:iiif>
126 |           </dv:links>
127 |         </mets:xmlData>
128 |       </mets:mdWrap>
129 |     </mets:digiprovMD>
130 |   </mets:amdSec>
131 |   <mets:fileSec>
132 |     <mets:fileGrp USE="THUMBS">
133 |       <mets:file ID="FILE_0001_THUMBS" MIMETYPE="image/png">
134 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="https://content.staatsbibliothek-berlin.de/dc/PPN1769395962-00000001/full/150,/0/default.png"/>
135 |       </mets:file>
136 |       <mets:file ID="FILE_0002_THUMBS" MIMETYPE="image/png">
137 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="https://content.staatsbibliothek-berlin.de/dc/PPN1769395962-00000002/full/150,/0/default.png"/>
138 |       </mets:file>
139 |       <mets:file ID="FILE_0003_THUMBS" MIMETYPE="image/png">
140 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="https://content.staatsbibliothek-berlin.de/dc/PPN1769395962-00000003/full/150,/0/default.png"/>
141 |       </mets:file>
142 |       <mets:file ID="FILE_0004_THUMBS" MIMETYPE="image/png">
143 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="https://content.staatsbibliothek-berlin.de/dc/PPN1769395962-00000004/full/150,/0/default.png"/>
144 |       </mets:file>
145 |       <mets:file ID="FILE_0005_THUMBS" MIMETYPE="image/png">
146 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="https://content.staatsbibliothek-berlin.de/dc/PPN1769395962-00000005/full/150,/0/default.png"/>
147 |       </mets:file>
148 |       <mets:file ID="FILE_0006_THUMBS" MIMETYPE="image/png">
149 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="https://content.staatsbibliothek-berlin.de/dc/PPN1769395962-00000006/full/150,/0/default.png"/>
150 |       </mets:file>
151 |       <mets:file ID="FILE_0007_THUMBS" MIMETYPE="image/png">
152 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="https://content.staatsbibliothek-berlin.de/dc/PPN1769395962-00000007/full/150,/0/default.png"/>
153 |       </mets:file>
154 |     </mets:fileGrp>
155 |     <mets:fileGrp USE="DEFAULT">
156 |       <mets:file ID="FILE_0001_DEFAULT" MIMETYPE="image/jpg">
157 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="https://content.staatsbibliothek-berlin.de/dc/PPN1769395962-00000001/full/max/0/default.jpg"/>
158 |       </mets:file>
159 |       <mets:file ID="FILE_0002_DEFAULT" MIMETYPE="image/jpg">
160 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="https://content.staatsbibliothek-berlin.de/dc/PPN1769395962-00000002/full/max/0/default.jpg"/>
161 |       </mets:file>
162 |       <mets:file ID="FILE_0003_DEFAULT" MIMETYPE="image/jpg">
163 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="https://content.staatsbibliothek-berlin.de/dc/PPN1769395962-00000003/full/max/0/default.jpg"/>
164 |       </mets:file>
165 |       <mets:file ID="FILE_0004_DEFAULT" MIMETYPE="image/jpg">
166 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="https://content.staatsbibliothek-berlin.de/dc/PPN1769395962-00000004/full/max/0/default.jpg"/>
167 |       </mets:file>
168 |       <mets:file ID="FILE_0005_DEFAULT" MIMETYPE="image/jpg">
169 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="https://content.staatsbibliothek-berlin.de/dc/PPN1769395962-00000005/full/max/0/default.jpg"/>
170 |       </mets:file>
171 |       <mets:file ID="FILE_0006_DEFAULT" MIMETYPE="image/jpg">
172 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="https://content.staatsbibliothek-berlin.de/dc/PPN1769395962-00000006/full/max/0/default.jpg"/>
173 |       </mets:file>
174 |       <mets:file ID="FILE_0007_DEFAULT" MIMETYPE="image/jpg">
175 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="https://content.staatsbibliothek-berlin.de/dc/PPN1769395962-00000007/full/max/0/default.jpg"/>
176 |       </mets:file>
177 |     </mets:fileGrp>
178 |     <mets:fileGrp USE="PRESENTATION">
179 |       <mets:file ID="FILE_0001_PRESENTATION" MIMETYPE="image/tiff">
180 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="file:///goobi/tiff001/sbb/PPN1769395962/00000001.tif"/>
181 |       </mets:file>
182 |       <mets:file ID="FILE_0002_PRESENTATION" MIMETYPE="image/tiff">
183 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="file:///goobi/tiff001/sbb/PPN1769395962/00000002.tif"/>
184 |       </mets:file>
185 |       <mets:file ID="FILE_0003_PRESENTATION" MIMETYPE="image/tiff">
186 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="file:///goobi/tiff001/sbb/PPN1769395962/00000003.tif"/>
187 |       </mets:file>
188 |       <mets:file ID="FILE_0004_PRESENTATION" MIMETYPE="image/tiff">
189 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="file:///goobi/tiff001/sbb/PPN1769395962/00000004.tif"/>
190 |       </mets:file>
191 |       <mets:file ID="FILE_0005_PRESENTATION" MIMETYPE="image/tiff">
192 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="file:///goobi/tiff001/sbb/PPN1769395962/00000005.tif"/>
193 |       </mets:file>
194 |       <mets:file ID="FILE_0006_PRESENTATION" MIMETYPE="image/tiff">
195 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="file:///goobi/tiff001/sbb/PPN1769395962/00000006.tif"/>
196 |       </mets:file>
197 |       <mets:file ID="FILE_0007_PRESENTATION" MIMETYPE="image/tiff">
198 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="file:///goobi/tiff001/sbb/PPN1769395962/00000007.tif"/>
199 |       </mets:file>
200 |     </mets:fileGrp>
201 |   </mets:fileSec>
202 |   <mets:structMap TYPE="LOGICAL">
203 |     <mets:div CONTENTIDS="http://resolver.staatsbibliothek-berlin.de/SBB000309C200000000" ID="LOG_0002" LABEL="Neun Lieder für eine Singstimme mit Begleitung des Pianoforte" ORDERLABEL="Neun Lieder für eine Singstimme mit Begleitung des Pianoforte" TYPE="multivolume_work">
204 |       <mets:mptr xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="http://digital.staatsbibliothek-berlin.de/dms/metsresolver/?PPN=PPN1769395032"/>
205 |       <mets:div ADMID="AMD" DMDID="DMDLOG_0001" ID="LOG_0003" LABEL="Kinderlied" ORDER="1000061890" ORDERLABEL="Kinderlied" TYPE="volume">
206 |         <mets:div ID="LOG_0004" TYPE="title_page"/>
207 |         <mets:div ID="LOG_0005" TYPE="colour_checker"/>
208 |       </mets:div>
209 |     </mets:div>
210 |   </mets:structMap>
211 |   <mets:structMap TYPE="PHYSICAL">
212 |     <mets:div CONTENTIDS="http://resolver.staatsbibliothek-berlin.de/SBB000309C200060000" ID="PHYS_0000" TYPE="physSequence">
213 |       <mets:div CONTENTIDS="http://resolver.staatsbibliothek-berlin.de/SBB000309C200060001" ID="PHYS_0001" ORDER="1" ORDERLABEL=" - " TYPE="page">
214 |         <mets:fptr FILEID="FILE_0001_THUMBS"/>
215 |         <mets:fptr FILEID="FILE_0001_DEFAULT"/>
216 |         <mets:fptr FILEID="FILE_0001_PRESENTATION"/>
217 |       </mets:div>
218 |       <mets:div CONTENTIDS="http://resolver.staatsbibliothek-berlin.de/SBB000309C200060002" ID="PHYS_0002" ORDER="2" ORDERLABEL=" - " TYPE="page">
219 |         <mets:fptr FILEID="FILE_0002_THUMBS"/>
220 |         <mets:fptr FILEID="FILE_0002_DEFAULT"/>
221 |         <mets:fptr FILEID="FILE_0002_PRESENTATION"/>
222 |       </mets:div>
223 |       <mets:div CONTENTIDS="http://resolver.staatsbibliothek-berlin.de/SBB000309C200060003" ID="PHYS_0003" ORDER="3" ORDERLABEL=" - " TYPE="page">
224 |         <mets:fptr FILEID="FILE_0003_THUMBS"/>
225 |         <mets:fptr FILEID="FILE_0003_DEFAULT"/>
226 |         <mets:fptr FILEID="FILE_0003_PRESENTATION"/>
227 |       </mets:div>
228 |       <mets:div CONTENTIDS="http://resolver.staatsbibliothek-berlin.de/SBB000309C200060004" ID="PHYS_0004" ORDER="4" ORDERLABEL=" - " TYPE="page">
229 |         <mets:fptr FILEID="FILE_0004_THUMBS"/>
230 |         <mets:fptr FILEID="FILE_0004_DEFAULT"/>
231 |         <mets:fptr FILEID="FILE_0004_PRESENTATION"/>
232 |       </mets:div>
233 |       <mets:div CONTENTIDS="http://resolver.staatsbibliothek-berlin.de/SBB000309C200060005" ID="PHYS_0005" ORDER="5" ORDERLABEL="3" TYPE="page">
234 |         <mets:fptr FILEID="FILE_0005_THUMBS"/>
235 |         <mets:fptr FILEID="FILE_0005_DEFAULT"/>
236 |         <mets:fptr FILEID="FILE_0005_PRESENTATION"/>
237 |       </mets:div>
238 |       <mets:div CONTENTIDS="http://resolver.staatsbibliothek-berlin.de/SBB000309C200060006" ID="PHYS_0006" ORDER="6" ORDERLABEL=" - " TYPE="page">
239 |         <mets:fptr FILEID="FILE_0006_THUMBS"/>
240 |         <mets:fptr FILEID="FILE_0006_DEFAULT"/>
241 |         <mets:fptr FILEID="FILE_0006_PRESENTATION"/>
242 |       </mets:div>
243 |       <mets:div CONTENTIDS="http://resolver.staatsbibliothek-berlin.de/SBB000309C200060007" ID="PHYS_0007" ORDER="7" ORDERLABEL=" - " TYPE="page">
244 |         <mets:fptr FILEID="FILE_0007_THUMBS"/>
245 |         <mets:fptr FILEID="FILE_0007_DEFAULT"/>
246 |         <mets:fptr FILEID="FILE_0007_PRESENTATION"/>
247 |       </mets:div>
248 |     </mets:div>
249 |   </mets:structMap>
250 |   <mets:structLink>
251 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0001" xlink:from="LOG_0003"/>
252 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0002" xlink:from="LOG_0003"/>
253 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0003" xlink:from="LOG_0003"/>
254 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0004" xlink:from="LOG_0003"/>
255 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0005" xlink:from="LOG_0003"/>
256 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0006" xlink:from="LOG_0003"/>
257 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0007" xlink:from="LOG_0003"/>
258 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0001" xlink:from="LOG_0004"/>
259 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0007" xlink:from="LOG_0005"/>
260 |   </mets:structLink>
261 | </mets:mets>
262 | 


--------------------------------------------------------------------------------
/src/mods4pandas/tests/data/mets-mods/PPN3348760607-mehrere-shelfLocator.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <mets:mets xmlns:mets="http://www.loc.gov/METS/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="info:lc/xmlns/premis-v2 http://www.loc.gov/standards/premis/v2/premis-v2-0.xsd http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/mods.xsd http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/mets.xsd http://www.loc.gov/mix/v10 http://www.loc.gov/standards/mix/mix10/mix10.xsd">
  3 |   <mets:metsHdr CREATEDATE="2021-04-21T12:26:17">
  4 |     <mets:agent OTHERTYPE="SOFTWARE" ROLE="CREATOR" TYPE="OTHER">
  5 |       <mets:name>Kitodo - kitodo-ugh-2.1.3-kitodo-ugh-2.1.1-11-g4b06eaa - 30−July−2019</mets:name>
  6 |       <mets:note>Kitodo</mets:note>
  7 |     </mets:agent>
  8 |   </mets:metsHdr>
  9 |   <mets:dmdSec ID="DMDLOG_0000">
 10 |     <mets:mdWrap MDTYPE="MODS">
 11 |       <mets:xmlData>
 12 |         <mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
 13 |           <mods:location>
 14 |             <mods:physicalLocation valueURI="http://ld.zdb-services.de/resource/organisations/DE-1">Staatsbibliothek zu Berlin - Preußischer Kulturbesitz, Berlin, Germany</mods:physicalLocation>
 15 |             <mods:shelfLocator>Libri sin. 21c</mods:shelfLocator>
 16 |             <mods:shelfLocator>Ms sin. 21</mods:shelfLocator>
 17 |             <mods:shelfLocator>Libri sin. 21</mods:shelfLocator>
 18 |             <mods:shelfLocator>Libri sin. 21c</mods:shelfLocator>
 19 |           </mods:location>
 20 |           <mods:originInfo>
 21 |             <mods:edition>刻本</mods:edition>
 22 |           </mods:originInfo>
 23 |           <mods:originInfo eventType="digitization">
 24 |             <mods:place>
 25 |               <mods:placeTerm type="text">Berlin</mods:placeTerm>
 26 |             </mods:place>
 27 |             <mods:dateCaptured encoding="w3cdtf">2014</mods:dateCaptured>
 28 |             <mods:publisher>Staatsbibliothek zu Berlin - Preußischer Kulturbesitz, Germany</mods:publisher>
 29 |             <mods:edition>[Electronic ed.]</mods:edition>
 30 |           </mods:originInfo>
 31 |           <mods:classification authority="ZVDD">Historische Drucke</mods:classification>
 32 |           <mods:classification authority="ZVDD">Ostasiatica</mods:classification>
 33 |           <mods:classification authority="ZVDD">Sinica</mods:classification>
 34 |           <mods:recordInfo>
 35 |             <mods:recordIdentifier source="gbv-ppn">PPN3348760607</mods:recordIdentifier>
 36 |           </mods:recordInfo>
 37 |           <mods:identifier type="purl">http://resolver.staatsbibliothek-berlin.de/SBB0001589A00000000</mods:identifier>
 38 |           <mods:identifier type="PPNanalog">PPN3348760593</mods:identifier>
 39 |           <mods:titleInfo>
 40 |             <mods:title>赤道南北兩總星圖 8幅 (殘, 存4幅)</mods:title>
 41 |           </mods:titleInfo>
 42 |           <mods:titleInfo type="alternative">
 43 |             <mods:title>chi dao nan bei liang zong xing tu</mods:title>
 44 |           </mods:titleInfo>
 45 |           <mods:titleInfo type="alternative">
 46 |             <mods:title>赤道南北两总星图</mods:title>
 47 |           </mods:titleInfo>
 48 |           <mods:language>
 49 |             <mods:languageTerm authority="iso639-2b" type="code">zh</mods:languageTerm>
 50 |           </mods:language>
 51 |           <mods:note>chidanab</mods:note>
 52 |           <mods:relatedItem type="series">
 53 |             <mods:titleInfo>
 54 |               <mods:title>SSG 6,25 Digital : Digitalisierung des Sondersammelgebiets Ost- und Südostasien der Staatsbibliothek zu Berlin – ostasiatischer Bestand</mods:title>
 55 |             </mods:titleInfo>
 56 |           </mods:relatedItem>
 57 |           <mods:name authority="gnd" authorityURI="http://d-nb.info/gnd/" type="personal" valueURI="http://d-nb.info/gnd/118606387">
 58 |             <mods:role>
 59 |               <mods:roleTerm authority="marcrelator" type="code">aut</mods:roleTerm>
 60 |             </mods:role>
 61 |             <mods:namePart type="family">Schall von Bell</mods:namePart>
 62 |             <mods:namePart type="given">Johann Adam</mods:namePart>
 63 |             <mods:displayForm>Schall von Bell, Johann Adam</mods:displayForm>
 64 |           </mods:name>
 65 |           <mods:name authority="gnd" authorityURI="http://d-nb.info/gnd/" type="personal" valueURI="http://d-nb.info/gnd/118959689">
 66 |             <mods:role>
 67 |               <mods:roleTerm authority="marcrelator" type="code">aut</mods:roleTerm>
 68 |             </mods:role>
 69 |             <mods:namePart type="family">徐</mods:namePart>
 70 |             <mods:namePart type="given">光啓</mods:namePart>
 71 |             <mods:displayForm>徐, 光啓</mods:displayForm>
 72 |           </mods:name>
 73 |           <mods:name>
 74 |             <mods:role>
 75 |               <mods:roleTerm authority="marcrelator" type="code">fnd</mods:roleTerm>
 76 |             </mods:role>
 77 |             <mods:displayForm>Deutsche Forschungsgemeinschaft</mods:displayForm>
 78 |           </mods:name>
 79 |           <mods:physicalDescription>
 80 |             <mods:digitalOrigin>reformatted digital</mods:digitalOrigin>
 81 |             <mods:extent>Online-Ressource (4 幅)</mods:extent>
 82 |             <mods:extent>圖</mods:extent>
 83 |           </mods:physicalDescription>
 84 |         </mods:mods>
 85 |       </mets:xmlData>
 86 |     </mets:mdWrap>
 87 |   </mets:dmdSec>
 88 |   <mets:dmdSec ID="DMDLOG_0001">
 89 |     <mets:mdWrap MDTYPE="MODS">
 90 |       <mets:xmlData>
 91 |         <mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
 92 |           <mods:titleInfo>
 93 |             <mods:title>[第八幅上部 : 圖說 [上部]丶 歲星緯圖丶 赤道經緯儀丶 熒惑星緯圖]</mods:title>
 94 |           </mods:titleInfo>
 95 |           <mods:note>Blatt VIII</mods:note>
 96 |         </mods:mods>
 97 |       </mets:xmlData>
 98 |     </mets:mdWrap>
 99 |   </mets:dmdSec>
100 |   <mets:dmdSec ID="DMDLOG_0002">
101 |     <mets:mdWrap MDTYPE="MODS">
102 |       <mets:xmlData>
103 |         <mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
104 |           <mods:titleInfo>
105 |             <mods:title>[第八幅下部 : 圖說 [下部]丶 紀限儀丶 太白緯圖]</mods:title>
106 |           </mods:titleInfo>
107 |           <mods:note>Blatt VIII</mods:note>
108 |         </mods:mods>
109 |       </mets:xmlData>
110 |     </mets:mdWrap>
111 |   </mets:dmdSec>
112 |   <mets:dmdSec ID="DMDLOG_0003">
113 |     <mets:mdWrap MDTYPE="MODS">
114 |       <mets:xmlData>
115 |         <mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
116 |           <mods:titleInfo>
117 |             <mods:title>[第七幅上部 : 填星緯圖丶 赤道南圖 [左上部]]</mods:title>
118 |           </mods:titleInfo>
119 |           <mods:note>Blatt VII</mods:note>
120 |         </mods:mods>
121 |       </mets:xmlData>
122 |     </mets:mdWrap>
123 |   </mets:dmdSec>
124 |   <mets:dmdSec ID="DMDLOG_0004">
125 |     <mets:mdWrap MDTYPE="MODS">
126 |       <mets:xmlData>
127 |         <mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
128 |           <mods:titleInfo>
129 |             <mods:title>[第七幅下部 : 辰星緯圖丶 赤道南圖 [左下部]]</mods:title>
130 |           </mods:titleInfo>
131 |           <mods:note>Blatt VII</mods:note>
132 |         </mods:mods>
133 |       </mets:xmlData>
134 |     </mets:mdWrap>
135 |   </mets:dmdSec>
136 |   <mets:dmdSec ID="DMDLOG_0005">
137 |     <mets:mdWrap MDTYPE="MODS">
138 |       <mets:xmlData>
139 |         <mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
140 |           <mods:titleInfo>
141 |             <mods:title>[第六幅上部 : 赤道南圖 [中上部]]</mods:title>
142 |           </mods:titleInfo>
143 |           <mods:note>Blatt VI</mods:note>
144 |         </mods:mods>
145 |       </mets:xmlData>
146 |     </mets:mdWrap>
147 |   </mets:dmdSec>
148 |   <mets:dmdSec ID="DMDLOG_0006">
149 |     <mets:mdWrap MDTYPE="MODS">
150 |       <mets:xmlData>
151 |         <mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
152 |           <mods:titleInfo>
153 |             <mods:title>[第六幅下部 : 赤道南圖 [中下部]丶 星等表]</mods:title>
154 |           </mods:titleInfo>
155 |           <mods:note>Blatt VI</mods:note>
156 |         </mods:mods>
157 |       </mets:xmlData>
158 |     </mets:mdWrap>
159 |   </mets:dmdSec>
160 |   <mets:dmdSec ID="DMDLOG_0007">
161 |     <mets:mdWrap MDTYPE="MODS">
162 |       <mets:xmlData>
163 |         <mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
164 |           <mods:titleInfo>
165 |             <mods:title>[第五幅上部 : 赤道南圖 [右上部]丶 [圖說]丶 [星圖]]</mods:title>
166 |           </mods:titleInfo>
167 |           <mods:note>Blatt V</mods:note>
168 |         </mods:mods>
169 |       </mets:xmlData>
170 |     </mets:mdWrap>
171 |   </mets:dmdSec>
172 |   <mets:dmdSec ID="DMDLOG_0008">
173 |     <mets:mdWrap MDTYPE="MODS">
174 |       <mets:xmlData>
175 |         <mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
176 |           <mods:titleInfo>
177 |             <mods:title>[第五幅下部 : 赤道南圖 [右下部]丶 [圖說]丶 [星圖]]</mods:title>
178 |           </mods:titleInfo>
179 |           <mods:note>Blatt V</mods:note>
180 |         </mods:mods>
181 |       </mets:xmlData>
182 |     </mets:mdWrap>
183 |   </mets:dmdSec>
184 |   <mets:amdSec ID="AMD">
185 |     <mets:rightsMD ID="RIGHTS">
186 |       <mets:mdWrap MDTYPE="OTHER" MIMETYPE="text/xml" OTHERMDTYPE="DVRIGHTS">
187 |         <mets:xmlData>
188 |           <dv:rights xmlns:dv="http://dfg-viewer.de/">
189 |             <dv:owner>Staatsbibliothek zu Berlin - Preußischer Kulturbesitz</dv:owner>
190 |             <dv:ownerLogo>http://resolver.staatsbibliothek-berlin.de/SBB0000000100000000</dv:ownerLogo>
191 |             <dv:ownerSiteURL>http://www.staatsbibliothek-berlin.de</dv:ownerSiteURL>
192 |             <dv:ownerContact>mailto:info@sbb.spk-berlin.de</dv:ownerContact>
193 |           </dv:rights>
194 |         </mets:xmlData>
195 |       </mets:mdWrap>
196 |     </mets:rightsMD>
197 |     <mets:digiprovMD ID="DIGIPROV">
198 |       <mets:mdWrap MDTYPE="OTHER" MIMETYPE="text/xml" OTHERMDTYPE="DVLINKS">
199 |         <mets:xmlData>
200 |           <dv:links xmlns:dv="http://dfg-viewer.de/">
201 |             <dv:reference>http://gso.gbv.de/DB=1.97/PPN?PPN=3348760607 </dv:reference>
202 |             <dv:presentation>http://digital.staatsbibliothek-berlin.de/dms/werkansicht/?PPN=PPN3348760607</dv:presentation>
203 |             <dv:iiif>https://content.staatsbibliothek-berlin.de/dc/PPN3348760607/manifest</dv:iiif>
204 |           </dv:links>
205 |         </mets:xmlData>
206 |       </mets:mdWrap>
207 |     </mets:digiprovMD>
208 |   </mets:amdSec>
209 |   <mets:fileSec>
210 |     <mets:fileGrp USE="THUMBS">
211 |       <mets:file ID="FILE_0001_THUMBS" MIMETYPE="image/png">
212 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="https://content.staatsbibliothek-berlin.de/dc/PPN3348760607-00000001/full/150,/0/default.png"/>
213 |       </mets:file>
214 |       <mets:file ID="FILE_0002_THUMBS" MIMETYPE="image/png">
215 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="https://content.staatsbibliothek-berlin.de/dc/PPN3348760607-00000002/full/150,/0/default.png"/>
216 |       </mets:file>
217 |       <mets:file ID="FILE_0003_THUMBS" MIMETYPE="image/png">
218 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="https://content.staatsbibliothek-berlin.de/dc/PPN3348760607-00000003/full/150,/0/default.png"/>
219 |       </mets:file>
220 |       <mets:file ID="FILE_0004_THUMBS" MIMETYPE="image/png">
221 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="https://content.staatsbibliothek-berlin.de/dc/PPN3348760607-00000004/full/150,/0/default.png"/>
222 |       </mets:file>
223 |       <mets:file ID="FILE_0005_THUMBS" MIMETYPE="image/png">
224 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="https://content.staatsbibliothek-berlin.de/dc/PPN3348760607-00000005/full/150,/0/default.png"/>
225 |       </mets:file>
226 |       <mets:file ID="FILE_0006_THUMBS" MIMETYPE="image/png">
227 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="https://content.staatsbibliothek-berlin.de/dc/PPN3348760607-00000006/full/150,/0/default.png"/>
228 |       </mets:file>
229 |       <mets:file ID="FILE_0007_THUMBS" MIMETYPE="image/png">
230 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="https://content.staatsbibliothek-berlin.de/dc/PPN3348760607-00000007/full/150,/0/default.png"/>
231 |       </mets:file>
232 |       <mets:file ID="FILE_0008_THUMBS" MIMETYPE="image/png">
233 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="https://content.staatsbibliothek-berlin.de/dc/PPN3348760607-00000008/full/150,/0/default.png"/>
234 |       </mets:file>
235 |       <mets:file ID="FILE_0009_THUMBS" MIMETYPE="image/png">
236 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="https://content.staatsbibliothek-berlin.de/dc/PPN3348760607-00000009/full/150,/0/default.png"/>
237 |       </mets:file>
238 |     </mets:fileGrp>
239 |     <mets:fileGrp USE="PRESENTATION">
240 |       <mets:file ID="FILE_0001_PRESENTATION" MIMETYPE="image/tiff">
241 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="file:///goobi/tiff001/sbb/PPN3348760607/00000001.tif"/>
242 |       </mets:file>
243 |       <mets:file ID="FILE_0002_PRESENTATION" MIMETYPE="image/tiff">
244 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="file:///goobi/tiff001/sbb/PPN3348760607/00000002.tif"/>
245 |       </mets:file>
246 |       <mets:file ID="FILE_0003_PRESENTATION" MIMETYPE="image/tiff">
247 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="file:///goobi/tiff001/sbb/PPN3348760607/00000003.tif"/>
248 |       </mets:file>
249 |       <mets:file ID="FILE_0004_PRESENTATION" MIMETYPE="image/tiff">
250 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="file:///goobi/tiff001/sbb/PPN3348760607/00000004.tif"/>
251 |       </mets:file>
252 |       <mets:file ID="FILE_0005_PRESENTATION" MIMETYPE="image/tiff">
253 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="file:///goobi/tiff001/sbb/PPN3348760607/00000005.tif"/>
254 |       </mets:file>
255 |       <mets:file ID="FILE_0006_PRESENTATION" MIMETYPE="image/tiff">
256 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="file:///goobi/tiff001/sbb/PPN3348760607/00000006.tif"/>
257 |       </mets:file>
258 |       <mets:file ID="FILE_0007_PRESENTATION" MIMETYPE="image/tiff">
259 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="file:///goobi/tiff001/sbb/PPN3348760607/00000007.tif"/>
260 |       </mets:file>
261 |       <mets:file ID="FILE_0008_PRESENTATION" MIMETYPE="image/tiff">
262 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="file:///goobi/tiff001/sbb/PPN3348760607/00000008.tif"/>
263 |       </mets:file>
264 |       <mets:file ID="FILE_0009_PRESENTATION" MIMETYPE="image/tiff">
265 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="file:///goobi/tiff001/sbb/PPN3348760607/00000009.tif"/>
266 |       </mets:file>
267 |     </mets:fileGrp>
268 |     <mets:fileGrp USE="DEFAULT">
269 |       <mets:file ID="FILE_0001_DEFAULT" MIMETYPE="image/jpg">
270 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="https://content.staatsbibliothek-berlin.de/dc/PPN3348760607-00000001/full/max/0/default.jpg"/>
271 |       </mets:file>
272 |       <mets:file ID="FILE_0002_DEFAULT" MIMETYPE="image/jpg">
273 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="https://content.staatsbibliothek-berlin.de/dc/PPN3348760607-00000002/full/max/0/default.jpg"/>
274 |       </mets:file>
275 |       <mets:file ID="FILE_0003_DEFAULT" MIMETYPE="image/jpg">
276 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="https://content.staatsbibliothek-berlin.de/dc/PPN3348760607-00000003/full/max/0/default.jpg"/>
277 |       </mets:file>
278 |       <mets:file ID="FILE_0004_DEFAULT" MIMETYPE="image/jpg">
279 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="https://content.staatsbibliothek-berlin.de/dc/PPN3348760607-00000004/full/max/0/default.jpg"/>
280 |       </mets:file>
281 |       <mets:file ID="FILE_0005_DEFAULT" MIMETYPE="image/jpg">
282 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="https://content.staatsbibliothek-berlin.de/dc/PPN3348760607-00000005/full/max/0/default.jpg"/>
283 |       </mets:file>
284 |       <mets:file ID="FILE_0006_DEFAULT" MIMETYPE="image/jpg">
285 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="https://content.staatsbibliothek-berlin.de/dc/PPN3348760607-00000006/full/max/0/default.jpg"/>
286 |       </mets:file>
287 |       <mets:file ID="FILE_0007_DEFAULT" MIMETYPE="image/jpg">
288 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="https://content.staatsbibliothek-berlin.de/dc/PPN3348760607-00000007/full/max/0/default.jpg"/>
289 |       </mets:file>
290 |       <mets:file ID="FILE_0008_DEFAULT" MIMETYPE="image/jpg">
291 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="https://content.staatsbibliothek-berlin.de/dc/PPN3348760607-00000008/full/max/0/default.jpg"/>
292 |       </mets:file>
293 |       <mets:file ID="FILE_0009_DEFAULT" MIMETYPE="image/jpg">
294 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="https://content.staatsbibliothek-berlin.de/dc/PPN3348760607-00000009/full/max/0/default.jpg"/>
295 |       </mets:file>
296 |     </mets:fileGrp>
297 |   </mets:fileSec>
298 |   <mets:structMap TYPE="LOGICAL">
299 |     <mets:div ADMID="AMD" CONTENTIDS="http://resolver.staatsbibliothek-berlin.de/SBB0001589A00000000" DMDID="DMDLOG_0000" ID="LOG_0000" LABEL="赤道南北兩總星圖 8幅 (殘, 存4幅)" ORDERLABEL="赤道南北兩總星圖 8幅 (殘, 存4幅)" TYPE="monograph">
300 |       <mets:div DMDID="DMDLOG_0001" ID="LOG_0001" LABEL="[第八幅上部 : 圖說 [上部]丶 歲星緯圖丶 赤道經緯儀丶 熒惑星緯圖]" TYPE="section"/>
301 |       <mets:div DMDID="DMDLOG_0002" ID="LOG_0002" LABEL="[第八幅下部 : 圖說 [下部]丶 紀限儀丶 太白緯圖]" TYPE="section"/>
302 |       <mets:div DMDID="DMDLOG_0003" ID="LOG_0003" LABEL="[第七幅上部 : 填星緯圖丶 赤道南圖 [左上部]]" TYPE="section"/>
303 |       <mets:div DMDID="DMDLOG_0004" ID="LOG_0004" LABEL="[第七幅下部 : 辰星緯圖丶 赤道南圖 [左下部]]" TYPE="section"/>
304 |       <mets:div DMDID="DMDLOG_0005" ID="LOG_0005" LABEL="[第六幅上部 : 赤道南圖 [中上部]]" TYPE="section"/>
305 |       <mets:div DMDID="DMDLOG_0006" ID="LOG_0006" LABEL="[第六幅下部 : 赤道南圖 [中下部]丶 星等表]" TYPE="section"/>
306 |       <mets:div DMDID="DMDLOG_0007" ID="LOG_0007" LABEL="[第五幅上部 : 赤道南圖 [右上部]丶 [圖說]丶 [星圖]]" TYPE="section"/>
307 |       <mets:div DMDID="DMDLOG_0008" ID="LOG_0008" LABEL="[第五幅下部 : 赤道南圖 [右下部]丶 [圖說]丶 [星圖]]" TYPE="section"/>
308 |       <mets:div ID="LOG_0009" TYPE="colour_checker"/>
309 |     </mets:div>
310 |   </mets:structMap>
311 |   <mets:structMap TYPE="PHYSICAL">
312 |     <mets:div CONTENTIDS="http://resolver.staatsbibliothek-berlin.de/SBB0001589A00000000" ID="PHYS_0000" TYPE="physSequence">
313 |       <mets:div CONTENTIDS="http://resolver.staatsbibliothek-berlin.de/SBB0001589A00000001" ID="PHYS_0001" ORDER="1" ORDERLABEL="1" TYPE="page">
314 |         <mets:fptr FILEID="FILE_0001_THUMBS"/>
315 |         <mets:fptr FILEID="FILE_0001_PRESENTATION"/>
316 |         <mets:fptr FILEID="FILE_0001_DEFAULT"/>
317 |       </mets:div>
318 |       <mets:div CONTENTIDS="http://resolver.staatsbibliothek-berlin.de/SBB0001589A00000002" ID="PHYS_0002" ORDER="2" ORDERLABEL="2" TYPE="page">
319 |         <mets:fptr FILEID="FILE_0002_THUMBS"/>
320 |         <mets:fptr FILEID="FILE_0002_PRESENTATION"/>
321 |         <mets:fptr FILEID="FILE_0002_DEFAULT"/>
322 |       </mets:div>
323 |       <mets:div CONTENTIDS="http://resolver.staatsbibliothek-berlin.de/SBB0001589A00000003" ID="PHYS_0003" ORDER="3" ORDERLABEL="3" TYPE="page">
324 |         <mets:fptr FILEID="FILE_0003_THUMBS"/>
325 |         <mets:fptr FILEID="FILE_0003_PRESENTATION"/>
326 |         <mets:fptr FILEID="FILE_0003_DEFAULT"/>
327 |       </mets:div>
328 |       <mets:div CONTENTIDS="http://resolver.staatsbibliothek-berlin.de/SBB0001589A00000004" ID="PHYS_0004" ORDER="4" ORDERLABEL="4" TYPE="page">
329 |         <mets:fptr FILEID="FILE_0004_THUMBS"/>
330 |         <mets:fptr FILEID="FILE_0004_PRESENTATION"/>
331 |         <mets:fptr FILEID="FILE_0004_DEFAULT"/>
332 |       </mets:div>
333 |       <mets:div CONTENTIDS="http://resolver.staatsbibliothek-berlin.de/SBB0001589A00000005" ID="PHYS_0005" ORDER="5" ORDERLABEL="5" TYPE="page">
334 |         <mets:fptr FILEID="FILE_0005_THUMBS"/>
335 |         <mets:fptr FILEID="FILE_0005_PRESENTATION"/>
336 |         <mets:fptr FILEID="FILE_0005_DEFAULT"/>
337 |       </mets:div>
338 |       <mets:div CONTENTIDS="http://resolver.staatsbibliothek-berlin.de/SBB0001589A00000006" ID="PHYS_0006" ORDER="6" ORDERLABEL="6" TYPE="page">
339 |         <mets:fptr FILEID="FILE_0006_THUMBS"/>
340 |         <mets:fptr FILEID="FILE_0006_PRESENTATION"/>
341 |         <mets:fptr FILEID="FILE_0006_DEFAULT"/>
342 |       </mets:div>
343 |       <mets:div CONTENTIDS="http://resolver.staatsbibliothek-berlin.de/SBB0001589A00000007" ID="PHYS_0007" ORDER="7" ORDERLABEL="7" TYPE="page">
344 |         <mets:fptr FILEID="FILE_0007_THUMBS"/>
345 |         <mets:fptr FILEID="FILE_0007_PRESENTATION"/>
346 |         <mets:fptr FILEID="FILE_0007_DEFAULT"/>
347 |       </mets:div>
348 |       <mets:div CONTENTIDS="http://resolver.staatsbibliothek-berlin.de/SBB0001589A00000008" ID="PHYS_0008" ORDER="8" ORDERLABEL="8" TYPE="page">
349 |         <mets:fptr FILEID="FILE_0008_THUMBS"/>
350 |         <mets:fptr FILEID="FILE_0008_PRESENTATION"/>
351 |         <mets:fptr FILEID="FILE_0008_DEFAULT"/>
352 |       </mets:div>
353 |       <mets:div CONTENTIDS="http://resolver.staatsbibliothek-berlin.de/SBB0001589A00000009" ID="PHYS_0009" ORDER="9" ORDERLABEL=" - " TYPE="page">
354 |         <mets:fptr FILEID="FILE_0009_THUMBS"/>
355 |         <mets:fptr FILEID="FILE_0009_PRESENTATION"/>
356 |         <mets:fptr FILEID="FILE_0009_DEFAULT"/>
357 |       </mets:div>
358 |     </mets:div>
359 |   </mets:structMap>
360 |   <mets:structLink>
361 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0001" xlink:from="LOG_0000"/>
362 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0002" xlink:from="LOG_0000"/>
363 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0003" xlink:from="LOG_0000"/>
364 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0004" xlink:from="LOG_0000"/>
365 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0005" xlink:from="LOG_0000"/>
366 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0006" xlink:from="LOG_0000"/>
367 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0007" xlink:from="LOG_0000"/>
368 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0008" xlink:from="LOG_0000"/>
369 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0009" xlink:from="LOG_0000"/>
370 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0001" xlink:from="LOG_0001"/>
371 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0002" xlink:from="LOG_0002"/>
372 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0003" xlink:from="LOG_0003"/>
373 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0004" xlink:from="LOG_0004"/>
374 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0005" xlink:from="LOG_0005"/>
375 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0006" xlink:from="LOG_0006"/>
376 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0007" xlink:from="LOG_0007"/>
377 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0008" xlink:from="LOG_0008"/>
378 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0009" xlink:from="LOG_0009"/>
379 |   </mets:structLink>
380 | </mets:mets>
381 | 


--------------------------------------------------------------------------------
/src/mods4pandas/tests/data/mets-mods/PPN717884805-multivolume_work-no-structMap-PHYSICAL.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <mets:mets xmlns:mets="http://www.loc.gov/METS/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="info:lc/xmlns/premis-v2 http://www.loc.gov/standards/premis/v2/premis-v2-0.xsd http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-6.xsd http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/version17/mets.v1-7.xsd http://www.loc.gov/mix/v10 http://www.loc.gov/standards/mix/mix10/mix10.xsd">
  3 |   <mets:metsHdr CREATEDATE="2019-02-01T13:50:33">
  4 |     <mets:agent OTHERTYPE="SOFTWARE" ROLE="CREATOR" TYPE="OTHER">
  5 |       <mets:name>Goobi - UGH-1.11.1-v1.11.0-11-gbafb11b - 16−November−2015</mets:name>
  6 |       <mets:note>Goobi</mets:note>
  7 |     </mets:agent>
  8 |   </mets:metsHdr>
  9 |   <mets:dmdSec ID="DMDLOG_0000">
 10 |     <mets:mdWrap MDTYPE="MODS">
 11 |       <mets:xmlData>
 12 |         <mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
 13 |           <mods:originInfo eventType="publication">
 14 |             <mods:place>
 15 |               <mods:placeTerm type="text">Herborn</mods:placeTerm>
 16 |             </mods:place>
 17 |             <mods:publisher>Buchhandlung des Nassauischen Colportagevereins</mods:publisher>
 18 |             <mods:dateIssued encoding="iso8601" keyDate="yes" point="start">1916</mods:dateIssued>
 19 |           </mods:originInfo>
 20 |           <mods:originInfo eventType="digitization">
 21 |             <mods:place>
 22 |               <mods:placeTerm type="text">Berlin</mods:placeTerm>
 23 |             </mods:place>
 24 |             <mods:publisher>Staatsbibliothek zu Berlin - Preußischer Kulturbesitz, Germany</mods:publisher>
 25 |             <mods:edition>[Electronic ed.]</mods:edition>
 26 |           </mods:originInfo>
 27 |           <mods:classification authority="ZVDD">Krieg 1914-1918</mods:classification>
 28 |           <mods:classification authority="ZVDD">Historische Drucke</mods:classification>
 29 |           <mods:recordInfo>
 30 |             <mods:recordIdentifier source="gbv-ppn">PPN717884805</mods:recordIdentifier>
 31 |           </mods:recordInfo>
 32 |           <mods:identifier type="purl">http://resolver.staatsbibliothek-berlin.de/SBB00008D1E00000000</mods:identifier>
 33 |           <mods:relatedItem type="original">
 34 |             <mods:recordInfo>
 35 |               <mods:recordIdentifier source="gbv-ppn">PPN242046452</mods:recordIdentifier>
 36 |             </mods:recordInfo>
 37 |           </mods:relatedItem>
 38 |           <mods:titleInfo>
 39 |             <mods:title>Die Predigt des Evangeliums in der Zeitenwende</mods:title>
 40 |             <mods:subTitle>Erläuterungen und Dispositionen zu den altkirchlichen und den Eisenacher Perikopen und zu freien Texten unter besonderer Berücksichtigung der Kriegszeit</mods:subTitle>
 41 |           </mods:titleInfo>
 42 |           <mods:note type="source characteristics">P_Drucke_Europeana1914-1918</mods:note>
 43 |           <mods:subject authority="EC1418">
 44 |             <mods:genre>book</mods:genre>
 45 |           </mods:subject>
 46 |           <mods:classification authority="sbb">Weltkr. 625</mods:classification>
 47 |           <mods:language>
 48 |             <mods:languageTerm authority="iso639-2b" type="code">ger</mods:languageTerm>
 49 |           </mods:language>
 50 |           <mods:relatedItem type="series">
 51 |             <mods:titleInfo>
 52 |               <mods:title>Europeana Collections 1914-1918</mods:title>
 53 |             </mods:titleInfo>
 54 |           </mods:relatedItem>
 55 |           <mods:name type="personal">
 56 |             <mods:role>
 57 |               <mods:roleTerm authority="marcrelator" type="code">aut</mods:roleTerm>
 58 |             </mods:role>
 59 |             <mods:namePart type="family">Dunkmann</mods:namePart>
 60 |             <mods:namePart type="given">Karl</mods:namePart>
 61 |             <mods:displayForm>Dunkmann, Karl</mods:displayForm>
 62 |           </mods:name>
 63 |           <mods:physicalDescription>
 64 |             <mods:digitalOrigin>reformatted digital</mods:digitalOrigin>
 65 |           </mods:physicalDescription>
 66 |           <mods:language>
 67 |             <mods:scriptTerm authority="iso15924" type="code">217</mods:scriptTerm>
 68 |           </mods:language>
 69 |           <mods:subject authority="lcsh">
 70 |             <mods:topic>sh2010119545</mods:topic>
 71 |             <mods:topic>sh2008113843</mods:topic>
 72 |           </mods:subject>
 73 |           <mods:accessCondition type="use and reproduction">UNKNOWN</mods:accessCondition>
 74 |           <mods:typeOfResource>text</mods:typeOfResource>
 75 |         </mods:mods>
 76 |       </mets:xmlData>
 77 |     </mets:mdWrap>
 78 |   </mets:dmdSec>
 79 |   <mets:amdSec ID="AMD">
 80 |     <mets:rightsMD ID="RIGHTS">
 81 |       <mets:mdWrap MDTYPE="OTHER" MIMETYPE="text/xml" OTHERMDTYPE="DVRIGHTS">
 82 |         <mets:xmlData>
 83 |           <dv:rights xmlns:dv="http://dfg-viewer.de/">
 84 |             <dv:owner>Staatsbibliothek zu Berlin - Preußischer Kulturbesitz</dv:owner>
 85 |             <dv:ownerLogo>http://resolver.staatsbibliothek-berlin.de/SBB0000000100000000</dv:ownerLogo>
 86 |             <dv:ownerSiteURL>http://www.staatsbibliothek-berlin.de</dv:ownerSiteURL>
 87 |             <dv:ownerContact>mailto:info@sbb.spk-berlin.de</dv:ownerContact>
 88 |           </dv:rights>
 89 |         </mets:xmlData>
 90 |       </mets:mdWrap>
 91 |     </mets:rightsMD>
 92 |     <mets:digiprovMD ID="DIGIPROV">
 93 |       <mets:mdWrap MDTYPE="OTHER" MIMETYPE="text/xml" OTHERMDTYPE="DVLINKS">
 94 |         <mets:xmlData>
 95 |           <dv:links xmlns:dv="http://dfg-viewer.de/">
 96 |             <dv:reference>http://www.stabikat.de/DB=1/PPN?PPN=717884805 </dv:reference>
 97 |             <dv:presentation>http://digital.staatsbibliothek-berlin.de/dms/werkansicht/?PPN=PPN717884805</dv:presentation>
 98 |             <dv:iiif>https://content.staatsbibliothek-berlin.de/dc/PPN717884805/manifest</dv:iiif>
 99 |           </dv:links>
100 |         </mets:xmlData>
101 |       </mets:mdWrap>
102 |     </mets:digiprovMD>
103 |   </mets:amdSec>
104 |   <mets:structMap TYPE="LOGICAL">
105 |     <mets:div ADMID="AMD" CONTENTIDS="http://resolver.staatsbibliothek-berlin.de/SBB00008D1E00000000" DMDID="DMDLOG_0000" ID="LOG_0000" LABEL="Die Predigt des Evangeliums in der Zeitenwende" ORDERLABEL="Predigt des Evangeliums in der Zeitenwende" TYPE="multivolume_work">
106 |       <mets:div ID="LOG_0001" LABEL="Altkirchliche Perikopen" ORDERLABEL="Altkirchliche Perikopen" TYPE="volume">
107 |         <mets:mptr xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="http://digital.staatsbibliothek-berlin.de/dms/metsresolver/?PPN=PPN717885003"/>
108 |       </mets:div>
109 |       <mets:div ID="LOG_0002" TYPE="Volume" LABEL="Eisenacher Perikopen Bd. 2" ORDER="20">
110 |         <mets:mptr xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="http://digital.staatsbibliothek-berlin.de/dms/metsresolver/?PPN=PPN717885429"/>
111 |       </mets:div>
112 |     </mets:div>
113 |   </mets:structMap>
114 | </mets:mets>
115 | 


--------------------------------------------------------------------------------
/src/mods4pandas/tests/data/mets-mods/PPN773555676.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <mets:mets xmlns:mets="http://www.loc.gov/METS/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="info:lc/xmlns/premis-v2 http://www.loc.gov/standards/premis/v2/premis-v2-0.xsd http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-6.xsd http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/version17/mets.v1-7.xsd http://www.loc.gov/mix/v10 http://www.loc.gov/standards/mix/mix10/mix10.xsd">
  3 |   <mets:metsHdr CREATEDATE="2018-07-13T13:57:54">
  4 |     <mets:agent OTHERTYPE="SOFTWARE" ROLE="CREATOR" TYPE="OTHER">
  5 |       <mets:name>Goobi - UGH-1.11.1-v1.11.0-11-gbafb11b - 16−November−2015</mets:name>
  6 |       <mets:note>Goobi</mets:note>
  7 |     </mets:agent>
  8 |     <mets:agent TYPE="OTHER" OTHERTYPE="SOFTWARE" ROLE="OTHER" OTHERROLE="preprocessing/optimization/binarization">
  9 |       <mets:name>ocrd-sbb-binarize v0.0.8</mets:name>
 10 |     </mets:agent>
 11 |     <mets:agent TYPE="OTHER" OTHERTYPE="SOFTWARE" ROLE="OTHER" OTHERROLE="layout/segmentation/region">
 12 |       <mets:name>ocrd-eynollah-segment v0.0.7</mets:name>
 13 |     </mets:agent>
 14 |     <mets:agent TYPE="OTHER" OTHERTYPE="SOFTWARE" ROLE="OTHER" OTHERROLE="recognition/text-recognition">
 15 |       <mets:name>ocrd-calamari-recognize v1.0.3 (calamari 1.0.5, tensorflow 2.5.0)</mets:name>
 16 |     </mets:agent>
 17 |   </mets:metsHdr>
 18 |   <mets:dmdSec ID="DMDLOG_0000">
 19 |     <mets:mdWrap MDTYPE="MODS">
 20 |       <mets:xmlData>
 21 |         <mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
 22 |           <mods:location>
 23 |             <mods:physicalLocation authority="marcorg" displayLabel="Staatsbibliothek zu Berlin - Preußischer Kulturbesitz, Berlin, Germany">DE-1</mods:physicalLocation>
 24 |             <mods:shelfLocator>Ye 6081</mods:shelfLocator>
 25 |           </mods:location>
 26 |           <mods:originInfo eventType="publication">
 27 |             <mods:place>
 28 |               <mods:placeTerm type="text">[S.l.]</mods:placeTerm>
 29 |             </mods:place>
 30 |             <mods:dateIssued encoding="iso8601" keyDate="yes">1619</mods:dateIssued>
 31 |           </mods:originInfo>
 32 |           <mods:originInfo eventType="digitization">
 33 |             <mods:place>
 34 |               <mods:placeTerm type="text">Berlin</mods:placeTerm>
 35 |             </mods:place>
 36 |             <mods:dateCaptured encoding="iso8601">2014</mods:dateCaptured>
 37 |             <mods:publisher>Staatsbibliothek zu Berlin - Preußischer Kulturbesitz, Germany</mods:publisher>
 38 |             <mods:edition>[Electronic ed.]</mods:edition>
 39 |           </mods:originInfo>
 40 |           <mods:classification authority="ZVDD">Historische Drucke</mods:classification>
 41 |           <mods:classification authority="ZVDD">Sprachen / Literaturen</mods:classification>
 42 |           <mods:classification authority="ZVDD">Musik</mods:classification>
 43 |           <mods:recordInfo>
 44 |             <mods:recordIdentifier source="gbv-ppn">PPN773555676</mods:recordIdentifier>
 45 |           </mods:recordInfo>
 46 |           <mods:identifier type="purl">http://resolver.staatsbibliothek-berlin.de/SBB0001458F00000000</mods:identifier>
 47 |           <mods:identifier type="vd17">1:692277T</mods:identifier>
 48 |           <mods:relatedItem type="original">
 49 |             <mods:recordInfo>
 50 |               <mods:recordIdentifier source="gbv-ppn">PPN537331794</mods:recordIdentifier>
 51 |             </mods:recordInfo>
 52 |           </mods:relatedItem>
 53 |           <mods:titleInfo>
 54 |             <mods:title>Zwey Böhmische Lieder verdeutscht</mods:title>
 55 |             <mods:subTitle>I. Wie in einem Uffzug/ das Bawrenvolck in Böhmen/ den jäm[m]erlichen Zustand ihres Lands/ Ihrem König Friderichen/ Pfaltzgraffen bey Rhein und Churfürsten [et]c. beym Einritt zu Prag geklagt ... II. Wie es bey höchstbemeldten Königs/ und seiner Königlichen Gemahlin/ Elisabethen/ Princessin in GrosBrittannien/ Krönung zu Prag zugangen</mods:subTitle>
 56 |           </mods:titleInfo>
 57 |           <mods:note type="source characteristics">P_SBB_Drucke_VDLiedDigital</mods:note>
 58 |           <mods:note type="bibliography">VD17 1:692277T</mods:note>
 59 |           <mods:note type="bibliography">Nehlsen. BLF 1972</mods:note>
 60 |           <mods:genre authority="aad" type="class">Lied</mods:genre>
 61 |           <mods:genre authority="aad" type="class">Flugschrift</mods:genre>
 62 |           <mods:language>
 63 |             <mods:languageTerm authority="iso639-2b" type="code">ger</mods:languageTerm>
 64 |           </mods:language>
 65 |           <mods:relatedItem type="series">
 66 |             <mods:titleInfo>
 67 |               <mods:title>VD Lied digital - Berliner Liedflugschriften</mods:title>
 68 |             </mods:titleInfo>
 69 |           </mods:relatedItem>
 70 |           <mods:relatedItem type="series">
 71 |             <mods:titleInfo>
 72 |               <mods:title>VD17 digital</mods:title>
 73 |             </mods:titleInfo>
 74 |           </mods:relatedItem>
 75 |           <mods:name type="personal">
 76 |             <mods:role>
 77 |               <mods:roleTerm authority="marcrelator" type="code">asn</mods:roleTerm>
 78 |             </mods:role>
 79 |             <mods:namePart type="family">Friedrich &lt;V.&gt; &lt; Pfalz, Kurfürst&gt;</mods:namePart>
 80 |             <mods:displayForm>Friedrich &lt;V.&gt; &lt; Pfalz, Kurfürst&gt;</mods:displayForm>
 81 |           </mods:name>
 82 |           <mods:name type="corporate">
 83 |             <mods:role>
 84 |               <mods:roleTerm authority="marcrelator" type="code">fnd</mods:roleTerm>
 85 |             </mods:role>
 86 |             <mods:displayForm>Deutsche Forschungsgemeinschaft</mods:displayForm>
 87 |           </mods:name>
 88 |           <mods:physicalDescription>
 89 |             <mods:digitalOrigin>reformatted digital</mods:digitalOrigin>
 90 |             <mods:extent>[4] Bl</mods:extent>
 91 |             <mods:extent>4°</mods:extent>
 92 |           </mods:physicalDescription>
 93 |           <mods:extension>
 94 |             <zvdd:zvddWrap xmlns:zvdd="http://zvdd.gdz-cms.de/">
 95 |               <zvdd:titleWord>Zwei</zvdd:titleWord>
 96 |             </zvdd:zvddWrap>
 97 |           </mods:extension>
 98 |           <mods:accessCondition type="use and reproduction">Public Domain Mark 1.0</mods:accessCondition>
 99 |           <mods:typeOfResource>text</mods:typeOfResource>
100 |         </mods:mods>
101 |       </mets:xmlData>
102 |     </mets:mdWrap>
103 |   </mets:dmdSec>
104 |   <mets:dmdSec ID="DMDLOG_0001">
105 |     <mets:mdWrap MDTYPE="MODS">
106 |       <mets:xmlData>
107 |         <mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
108 |           <mods:originInfo eventType="publication">
109 |             <mods:dateIssued encoding="iso8601" keyDate="yes">1619</mods:dateIssued>
110 |           </mods:originInfo>
111 |           <mods:originInfo eventType="digitization">
112 |             <mods:place>
113 |               <mods:placeTerm type="text">Berlin</mods:placeTerm>
114 |             </mods:place>
115 |             <mods:dateCaptured encoding="iso8601">2013</mods:dateCaptured>
116 |             <mods:publisher>Staatsbibliothek zu Berlin – Preußischer Kulturbesitz, Germany</mods:publisher>
117 |             <mods:edition>[Electronic ed.]</mods:edition>
118 |           </mods:originInfo>
119 |           <mods:recordInfo>
120 |             <mods:recordIdentifier source="gbv-ppn">PPN777148331</mods:recordIdentifier>
121 |           </mods:recordInfo>
122 |           <mods:identifier type="purl">http://resolver.staatsbibliothek-berlin.de/SBB0001458F00010000</mods:identifier>
123 |           <mods:relatedItem type="original">
124 |             <mods:recordInfo>
125 |               <mods:recordIdentifier source="gbv-ppn">PPN777085771</mods:recordIdentifier>
126 |             </mods:recordInfo>
127 |           </mods:relatedItem>
128 |           <mods:titleInfo>
129 |             <mods:title>Wjllkommen/ König Friederich: || Jn Jesu namen grüssen dich ||</mods:title>
130 |           </mods:titleInfo>
131 |           <mods:note type="content">Liedanfang [Vorlage]: (W)Jllkommen/ Kœnig Friederich: || Jn Jesu namen grüssen dich ||</mods:note>
132 |           <mods:note type="content">Liedanfang [normiert]: Willkommen König Friederich/ in Jesu Namen grüßen dich</mods:note>
133 |           <mods:note type="content">Strophen/Zeilen: 72/4</mods:note>
134 |           <mods:note type="bibliography">Nehlsen. BLF 1972, 1</mods:note>
135 |           <mods:genre authority="aad" type="class">Lied</mods:genre>
136 |           <mods:language>
137 |             <mods:languageTerm authority="iso639-2b" type="code">ger</mods:languageTerm>
138 |           </mods:language>
139 |           <mods:relatedItem type="series">
140 |             <mods:titleInfo>
141 |               <mods:title>VD Lied digital - Berliner Liedflugschriften</mods:title>
142 |             </mods:titleInfo>
143 |           </mods:relatedItem>
144 |           <mods:physicalDescription>
145 |             <mods:digitalOrigin>reformatted digital</mods:digitalOrigin>
146 |             <mods:extent>S. [2 - 5]</mods:extent>
147 |           </mods:physicalDescription>
148 |           <mods:accessCondition type="use and reproduction">Public Domain Mark 1.0</mods:accessCondition>
149 |           <mods:typeOfResource>text</mods:typeOfResource>
150 |         </mods:mods>
151 |       </mets:xmlData>
152 |     </mets:mdWrap>
153 |   </mets:dmdSec>
154 |   <mets:dmdSec ID="DMDLOG_0002">
155 |     <mets:mdWrap MDTYPE="MODS">
156 |       <mets:xmlData>
157 |         <mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
158 |           <mods:originInfo eventType="publication">
159 |             <mods:dateIssued encoding="iso8601" keyDate="yes">1619</mods:dateIssued>
160 |           </mods:originInfo>
161 |           <mods:originInfo eventType="digitization">
162 |             <mods:place>
163 |               <mods:placeTerm type="text">Berlin</mods:placeTerm>
164 |             </mods:place>
165 |             <mods:dateCaptured encoding="iso8601">2013</mods:dateCaptured>
166 |             <mods:publisher>Staatsbibliothek zu Berlin – Preußischer Kulturbesitz, Germany</mods:publisher>
167 |             <mods:edition>[Electronic ed.]</mods:edition>
168 |           </mods:originInfo>
169 |           <mods:recordInfo>
170 |             <mods:recordIdentifier source="gbv-ppn">PPN777148463</mods:recordIdentifier>
171 |           </mods:recordInfo>
172 |           <mods:identifier type="purl">http://resolver.staatsbibliothek-berlin.de/SBB0001458F00020000</mods:identifier>
173 |           <mods:relatedItem type="original">
174 |             <mods:recordInfo>
175 |               <mods:recordIdentifier source="gbv-ppn">PPN777086026</mods:recordIdentifier>
176 |             </mods:recordInfo>
177 |           </mods:relatedItem>
178 |           <mods:titleInfo>
179 |             <mods:title>Das Ander Lied.</mods:title>
180 |           </mods:titleInfo>
181 |           <mods:note type="content">Liedanfang [Vorlage]: (L)Aßt hoch vns halten was ich sag: || Grosse frewd ist in gantz Prag ||</mods:note>
182 |           <mods:note type="content">Liedanfang [normiert]: Laßt hoch uns halten was ich sag/ große Freud ist in ganz Prag</mods:note>
183 |           <mods:note type="content">Strophen/Zeilen: 69/4</mods:note>
184 |           <mods:note type="bibliography">Nehlsen. BLF 1972, 2</mods:note>
185 |           <mods:genre authority="aad" type="class">Lied</mods:genre>
186 |           <mods:language>
187 |             <mods:languageTerm authority="iso639-2b" type="code">ger</mods:languageTerm>
188 |           </mods:language>
189 |           <mods:relatedItem type="series">
190 |             <mods:titleInfo>
191 |               <mods:title>VD Lied digital - Berliner Liedflugschriften</mods:title>
192 |             </mods:titleInfo>
193 |           </mods:relatedItem>
194 |           <mods:physicalDescription>
195 |             <mods:digitalOrigin>reformatted digital</mods:digitalOrigin>
196 |             <mods:extent>S. [5 - 8]</mods:extent>
197 |           </mods:physicalDescription>
198 |           <mods:accessCondition type="use and reproduction">Public Domain Mark 1.0</mods:accessCondition>
199 |           <mods:typeOfResource>text</mods:typeOfResource>
200 |         </mods:mods>
201 |       </mets:xmlData>
202 |     </mets:mdWrap>
203 |   </mets:dmdSec>
204 |   <mets:amdSec ID="AMD">
205 |     <mets:rightsMD ID="RIGHTS">
206 |       <mets:mdWrap MDTYPE="OTHER" MIMETYPE="text/xml" OTHERMDTYPE="DVRIGHTS">
207 |         <mets:xmlData>
208 |           <dv:rights xmlns:dv="http://dfg-viewer.de/">
209 |             <dv:owner>Staatsbibliothek zu Berlin - Preußischer Kulturbesitz</dv:owner>
210 |             <dv:ownerLogo>http://resolver.staatsbibliothek-berlin.de/SBB0000000100000000</dv:ownerLogo>
211 |             <dv:ownerSiteURL>http://www.staatsbibliothek-berlin.de</dv:ownerSiteURL>
212 |             <dv:ownerContact>mailto:info@sbb.spk-berlin.de</dv:ownerContact>
213 |           </dv:rights>
214 |         </mets:xmlData>
215 |       </mets:mdWrap>
216 |     </mets:rightsMD>
217 |     <mets:digiprovMD ID="DIGIPROV">
218 |       <mets:mdWrap MDTYPE="OTHER" MIMETYPE="text/xml" OTHERMDTYPE="DVLINKS">
219 |         <mets:xmlData>
220 |           <dv:links xmlns:dv="http://dfg-viewer.de/">
221 |             <dv:reference>http://www.stabikat.de/DB=1/PPN?PPN=773555676 </dv:reference>
222 |             <dv:presentation>http://digital.staatsbibliothek-berlin.de/dms/werkansicht/?PPN=PPN773555676</dv:presentation>
223 |           </dv:links>
224 |         </mets:xmlData>
225 |       </mets:mdWrap>
226 |     </mets:digiprovMD>
227 |   </mets:amdSec>
228 |   <mets:fileSec>
229 |     <mets:fileGrp USE="DEFAULT">
230 |       <mets:file ID="FILE_0001_DEFAULT" MIMETYPE="image/jpg">
231 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="OTHER" xlink:href="DEFAULT/FILE_0001_DEFAULT.jpg" OTHERLOCTYPE="FILE"/>
232 |       </mets:file>
233 |       <mets:file ID="FILE_0004_DEFAULT" MIMETYPE="image/jpg">
234 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="OTHER" xlink:href="DEFAULT/FILE_0004_DEFAULT.jpg" OTHERLOCTYPE="FILE"/>
235 |       </mets:file>
236 |       <mets:file ID="FILE_0006_DEFAULT" MIMETYPE="image/jpg">
237 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="OTHER" xlink:href="DEFAULT/FILE_0006_DEFAULT.jpg" OTHERLOCTYPE="FILE"/>
238 |       </mets:file>
239 |       <mets:file ID="FILE_0009_DEFAULT" MIMETYPE="image/jpg">
240 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="OTHER" xlink:href="DEFAULT/FILE_0009_DEFAULT.jpg" OTHERLOCTYPE="FILE"/>
241 |       </mets:file>
242 |     </mets:fileGrp>
243 |     <mets:fileGrp USE="THUMBS">
244 |       <mets:file ID="FILE_0001_THUMBS" MIMETYPE="image/png">
245 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="OTHER" xlink:href="THUMBS/FILE_0001_THUMBS.png" OTHERLOCTYPE="FILE"/>
246 |       </mets:file>
247 |       <mets:file ID="FILE_0004_THUMBS" MIMETYPE="image/png">
248 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="OTHER" xlink:href="THUMBS/FILE_0004_THUMBS.png" OTHERLOCTYPE="FILE"/>
249 |       </mets:file>
250 |       <mets:file ID="FILE_0006_THUMBS" MIMETYPE="image/png">
251 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="OTHER" xlink:href="THUMBS/FILE_0006_THUMBS.png" OTHERLOCTYPE="FILE"/>
252 |       </mets:file>
253 |       <mets:file ID="FILE_0009_THUMBS" MIMETYPE="image/png">
254 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="OTHER" xlink:href="THUMBS/FILE_0009_THUMBS.png" OTHERLOCTYPE="FILE"/>
255 |       </mets:file>
256 |     </mets:fileGrp>
257 |     <mets:fileGrp USE="MAX">
258 |       <mets:file ID="FILE_0001_MAX" MIMETYPE="image/tiff">
259 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="OTHER" xlink:href="MAX/FILE_0001_MAX.tif" OTHERLOCTYPE="FILE"/>
260 |       </mets:file>
261 |       <mets:file ID="FILE_0004_MAX" MIMETYPE="image/tiff">
262 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="OTHER" xlink:href="MAX/FILE_0004_MAX.tif" OTHERLOCTYPE="FILE"/>
263 |       </mets:file>
264 |       <mets:file ID="FILE_0006_MAX" MIMETYPE="image/tiff">
265 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="OTHER" xlink:href="MAX/FILE_0006_MAX.tif" OTHERLOCTYPE="FILE"/>
266 |       </mets:file>
267 |       <mets:file ID="FILE_0009_MAX" MIMETYPE="image/tiff">
268 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="OTHER" xlink:href="MAX/FILE_0009_MAX.tif" OTHERLOCTYPE="FILE"/>
269 |       </mets:file>
270 |     </mets:fileGrp>
271 |     <mets:fileGrp USE="OCR-D-IMG-BIN">
272 |       <mets:file MIMETYPE="image/png" ID="FILE_0001_OCR-D-IMG-BIN.IMG-BIN">
273 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="OTHER" OTHERLOCTYPE="FILE" xlink:href="OCR-D-IMG-BIN/FILE_0001_OCR-D-IMG-BIN.IMG-BIN.png"/>
274 |       </mets:file>
275 |       <mets:file MIMETYPE="application/vnd.prima.page+xml" ID="FILE_0001_OCR-D-IMG-BIN">
276 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="OTHER" OTHERLOCTYPE="FILE" xlink:href="OCR-D-IMG-BIN/FILE_0001_OCR-D-IMG-BIN.xml"/>
277 |       </mets:file>
278 |       <mets:file MIMETYPE="image/png" ID="FILE_0004_OCR-D-IMG-BIN.IMG-BIN">
279 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="OTHER" OTHERLOCTYPE="FILE" xlink:href="OCR-D-IMG-BIN/FILE_0004_OCR-D-IMG-BIN.IMG-BIN.png"/>
280 |       </mets:file>
281 |       <mets:file MIMETYPE="application/vnd.prima.page+xml" ID="FILE_0004_OCR-D-IMG-BIN">
282 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="OTHER" OTHERLOCTYPE="FILE" xlink:href="OCR-D-IMG-BIN/FILE_0004_OCR-D-IMG-BIN.xml"/>
283 |       </mets:file>
284 |       <mets:file MIMETYPE="image/png" ID="FILE_0006_OCR-D-IMG-BIN.IMG-BIN">
285 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="OTHER" OTHERLOCTYPE="FILE" xlink:href="OCR-D-IMG-BIN/FILE_0006_OCR-D-IMG-BIN.IMG-BIN.png"/>
286 |       </mets:file>
287 |       <mets:file MIMETYPE="application/vnd.prima.page+xml" ID="FILE_0006_OCR-D-IMG-BIN">
288 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="OTHER" OTHERLOCTYPE="FILE" xlink:href="OCR-D-IMG-BIN/FILE_0006_OCR-D-IMG-BIN.xml"/>
289 |       </mets:file>
290 |       <mets:file MIMETYPE="image/png" ID="FILE_0009_OCR-D-IMG-BIN.IMG-BIN">
291 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="OTHER" OTHERLOCTYPE="FILE" xlink:href="OCR-D-IMG-BIN/FILE_0009_OCR-D-IMG-BIN.IMG-BIN.png"/>
292 |       </mets:file>
293 |       <mets:file MIMETYPE="application/vnd.prima.page+xml" ID="FILE_0009_OCR-D-IMG-BIN">
294 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="OTHER" OTHERLOCTYPE="FILE" xlink:href="OCR-D-IMG-BIN/FILE_0009_OCR-D-IMG-BIN.xml"/>
295 |       </mets:file>
296 |     </mets:fileGrp>
297 |     <mets:fileGrp USE="OCR-D-SEG-LINE">
298 |       <mets:file MIMETYPE="application/vnd.prima.page+xml" ID="FILE_0001_OCR-D-SEG-LINE">
299 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="OTHER" OTHERLOCTYPE="FILE" xlink:href="OCR-D-SEG-LINE/FILE_0001_OCR-D-SEG-LINE.xml"/>
300 |       </mets:file>
301 |       <mets:file MIMETYPE="application/vnd.prima.page+xml" ID="FILE_0004_OCR-D-SEG-LINE">
302 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="OTHER" OTHERLOCTYPE="FILE" xlink:href="OCR-D-SEG-LINE/FILE_0004_OCR-D-SEG-LINE.xml"/>
303 |       </mets:file>
304 |       <mets:file MIMETYPE="application/vnd.prima.page+xml" ID="FILE_0006_OCR-D-SEG-LINE">
305 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="OTHER" OTHERLOCTYPE="FILE" xlink:href="OCR-D-SEG-LINE/FILE_0006_OCR-D-SEG-LINE.xml"/>
306 |       </mets:file>
307 |       <mets:file MIMETYPE="application/vnd.prima.page+xml" ID="FILE_0009_OCR-D-SEG-LINE">
308 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="OTHER" OTHERLOCTYPE="FILE" xlink:href="OCR-D-SEG-LINE/FILE_0009_OCR-D-SEG-LINE.xml"/>
309 |       </mets:file>
310 |     </mets:fileGrp>
311 |     <mets:fileGrp USE="OCR-D-OCR-CALAMARI">
312 |       <mets:file MIMETYPE="application/vnd.prima.page+xml" ID="FILE_0001_OCR-D-OCR-CALAMARI">
313 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="OTHER" OTHERLOCTYPE="FILE" xlink:href="OCR-D-OCR-CALAMARI/FILE_0001_OCR-D-OCR-CALAMARI.xml"/>
314 |       </mets:file>
315 |       <mets:file MIMETYPE="application/vnd.prima.page+xml" ID="FILE_0004_OCR-D-OCR-CALAMARI">
316 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="OTHER" OTHERLOCTYPE="FILE" xlink:href="OCR-D-OCR-CALAMARI/FILE_0004_OCR-D-OCR-CALAMARI.xml"/>
317 |       </mets:file>
318 |       <mets:file MIMETYPE="application/vnd.prima.page+xml" ID="FILE_0006_OCR-D-OCR-CALAMARI">
319 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="OTHER" OTHERLOCTYPE="FILE" xlink:href="OCR-D-OCR-CALAMARI/FILE_0006_OCR-D-OCR-CALAMARI.xml"/>
320 |       </mets:file>
321 |       <mets:file MIMETYPE="application/vnd.prima.page+xml" ID="FILE_0009_OCR-D-OCR-CALAMARI">
322 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="OTHER" OTHERLOCTYPE="FILE" xlink:href="OCR-D-OCR-CALAMARI/FILE_0009_OCR-D-OCR-CALAMARI.xml"/>
323 |       </mets:file>
324 |     </mets:fileGrp>
325 |   </mets:fileSec>
326 |   <mets:structMap TYPE="LOGICAL">
327 |     <mets:div ADMID="AMD" CONTENTIDS="http://resolver.staatsbibliothek-berlin.de/SBB0001458F00000000" DMDID="DMDLOG_0000" ID="LOG_0000" LABEL="Zwey Böhmische Lieder verdeutscht" ORDERLABEL="Zwey Böhmische Lieder verdeutscht" TYPE="monograph">
328 |       <mets:div ID="LOG_0001" TYPE="binding"/>
329 |       <mets:div ID="LOG_0002" TYPE="title_page"/>
330 |       <mets:div DMDID="DMDLOG_0001" ID="LOG_0003" LABEL="Wjllkommen/ König Friederich: || Jn Jesu namen grüssen dich ||" ORDERLABEL="Wjllkommen/ König Friederich: || Jn Jesu namen grüssen dich ||" TYPE="contained_work"/>
331 |       <mets:div DMDID="DMDLOG_0002" ID="LOG_0004" LABEL="Das Ander Lied." ORDERLABEL="Ander Lied." TYPE="contained_work"/>
332 |       <mets:div ID="LOG_0005" TYPE="binding"/>
333 |       <mets:div ID="LOG_0006" TYPE="colour_checker"/>
334 |     </mets:div>
335 |   </mets:structMap>
336 |   <mets:structMap TYPE="PHYSICAL">
337 |     <mets:div CONTENTIDS="http://resolver.staatsbibliothek-berlin.de/SBB0001458F00000000" DMDID="DMDPHYS_0000" ID="PHYS_0000" TYPE="physSequence">
338 |       <mets:div CONTENTIDS="http://resolver.staatsbibliothek-berlin.de/SBB0001458F00000001" ID="PHYS_0001" ORDER="1" ORDERLABEL=" - " TYPE="page">
339 |         <mets:fptr FILEID="FILE_0001_DEFAULT"/>
340 |         <mets:fptr FILEID="FILE_0001_THUMBS"/>
341 |         <mets:fptr FILEID="FILE_0001_MAX"/>
342 |         <mets:fptr FILEID="FILE_0001_OCR-D-IMG-BIN.IMG-BIN"/>
343 |         <mets:fptr FILEID="FILE_0001_OCR-D-IMG-BIN"/>
344 |         <mets:fptr FILEID="FILE_0001_OCR-D-SEG-LINE"/>
345 |         <mets:fptr FILEID="FILE_0001_OCR-D-OCR-CALAMARI"/>
346 |       </mets:div>
347 |       <mets:div CONTENTIDS="http://resolver.staatsbibliothek-berlin.de/SBB0001458F00000004" ID="PHYS_0004" ORDER="4" ORDERLABEL=" - " TYPE="page">
348 |         <mets:fptr FILEID="FILE_0004_DEFAULT"/>
349 |         <mets:fptr FILEID="FILE_0004_THUMBS"/>
350 |         <mets:fptr FILEID="FILE_0004_MAX"/>
351 |         <mets:fptr FILEID="FILE_0004_OCR-D-IMG-BIN.IMG-BIN"/>
352 |         <mets:fptr FILEID="FILE_0004_OCR-D-IMG-BIN"/>
353 |         <mets:fptr FILEID="FILE_0004_OCR-D-SEG-LINE"/>
354 |         <mets:fptr FILEID="FILE_0004_OCR-D-OCR-CALAMARI"/>
355 |       </mets:div>
356 |       <mets:div CONTENTIDS="http://resolver.staatsbibliothek-berlin.de/SBB0001458F00000006" ID="PHYS_0006" ORDER="6" ORDERLABEL=" - " TYPE="page">
357 |         <mets:fptr FILEID="FILE_0006_DEFAULT"/>
358 |         <mets:fptr FILEID="FILE_0006_THUMBS"/>
359 |         <mets:fptr FILEID="FILE_0006_MAX"/>
360 |         <mets:fptr FILEID="FILE_0006_OCR-D-IMG-BIN.IMG-BIN"/>
361 |         <mets:fptr FILEID="FILE_0006_OCR-D-IMG-BIN"/>
362 |         <mets:fptr FILEID="FILE_0006_OCR-D-SEG-LINE"/>
363 |         <mets:fptr FILEID="FILE_0006_OCR-D-OCR-CALAMARI"/>
364 |       </mets:div>
365 |       <mets:div CONTENTIDS="http://resolver.staatsbibliothek-berlin.de/SBB0001458F00000009" ID="PHYS_0009" ORDER="9" ORDERLABEL=" - " TYPE="page">
366 |         <mets:fptr FILEID="FILE_0009_DEFAULT"/>
367 |         <mets:fptr FILEID="FILE_0009_THUMBS"/>
368 |         <mets:fptr FILEID="FILE_0009_MAX"/>
369 |         <mets:fptr FILEID="FILE_0009_OCR-D-IMG-BIN.IMG-BIN"/>
370 |         <mets:fptr FILEID="FILE_0009_OCR-D-IMG-BIN"/>
371 |         <mets:fptr FILEID="FILE_0009_OCR-D-SEG-LINE"/>
372 |         <mets:fptr FILEID="FILE_0009_OCR-D-OCR-CALAMARI"/>
373 |       </mets:div>
374 |     </mets:div>
375 |   </mets:structMap>
376 |   <mets:structLink>
377 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0001" xlink:from="LOG_0000"/>
378 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0002" xlink:from="LOG_0000"/>
379 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0003" xlink:from="LOG_0000"/>
380 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0004" xlink:from="LOG_0000"/>
381 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0005" xlink:from="LOG_0000"/>
382 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0006" xlink:from="LOG_0000"/>
383 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0007" xlink:from="LOG_0000"/>
384 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0008" xlink:from="LOG_0000"/>
385 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0009" xlink:from="LOG_0000"/>
386 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0010" xlink:from="LOG_0000"/>
387 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0011" xlink:from="LOG_0000"/>
388 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0012" xlink:from="LOG_0000"/>
389 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0013" xlink:from="LOG_0000"/>
390 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0001" xlink:from="LOG_0001"/>
391 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0002" xlink:from="LOG_0001"/>
392 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0003" xlink:from="LOG_0002"/>
393 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0004" xlink:from="LOG_0003"/>
394 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0005" xlink:from="LOG_0003"/>
395 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0006" xlink:from="LOG_0003"/>
396 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0007" xlink:from="LOG_0003"/>
397 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0007" xlink:from="LOG_0004"/>
398 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0008" xlink:from="LOG_0004"/>
399 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0009" xlink:from="LOG_0004"/>
400 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0010" xlink:from="LOG_0004"/>
401 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0011" xlink:from="LOG_0005"/>
402 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0012" xlink:from="LOG_0005"/>
403 |     <mets:smLink xmlns:xlink="http://www.w3.org/1999/xlink" xlink:to="PHYS_0013" xlink:from="LOG_0006"/>
404 |   </mets:structLink>
405 | </mets:mets>
406 | 


--------------------------------------------------------------------------------
/src/mods4pandas/tests/test_alto.py:
--------------------------------------------------------------------------------
 1 | from lxml import etree as ET
 2 | 
 3 | 
 4 | from mods4pandas.alto4pandas import alto_to_dict
 5 | from mods4pandas.lib import flatten
 6 | 
 7 | 
 8 | def dict_fromstring(x):
 9 |    return flatten(alto_to_dict(ET.fromstring(x)))
10 | 
11 | def test_Page_counts():
12 |     """
13 |     Elements below Layout/Page should be counted
14 |     """
15 |     d = dict_fromstring("""
16 |     <alto xmlns="http://www.loc.gov/standards/alto/ns-v2#">
17 |       <Layout>
18 |         <Page ID="Page1" PHYSICAL_IMG_NR="1">
19 |             <TextBlock ID="Page1_Block1">
20 |               <TextLine>
21 |                 <String STYLE="bold" WC="0.8937500119" CONTENT="Staatsbibliothek" />
22 |               </TextLine>
23 |               <TextLine>
24 |                 <String STYLE="bold" WC="0.8899999857" CONTENT="zu" />
25 |                 <String STYLE="bold" WC="0.9866666794" CONTENT="Berlin" />
26 |               </TextLine>
27 |               <TextLine>
28 |                 <String STYLE="bold" WC="1." CONTENT="WM" />
29 |                 <String STYLE="bold" WC="0.8927272558" CONTENT="Preußischer" />
30 |                 <String STYLE="bold" WC="0.9058333039" CONTENT="Kulturbesitz" />
31 |               </TextLine>
32 |             </TextBlock>
33 |         </Page>
34 |       </Layout>
35 |     </alto>
36 |     """)
37 |     assert d['Layout_Page_TextBlock-count'] == 1
38 |     assert d['Layout_Page_TextLine-count'] == 3
39 |     assert d['Layout_Page_String-count'] == 6
40 | 
41 | def test_Tags_counts():
42 |     d = dict_fromstring("""
43 |     <alto xmlns="http://www.loc.gov/standards/alto/ns-v2#">
44 |       <Tags>
45 |         <NamedEntityTag ID="PER0" LABEL="Pentlings"/>
46 |         <NamedEntityTag ID="LOC1" LABEL="Pentling"/>
47 |         <NamedEntityTag ID="LOC2" LABEL="Hamm"/>
48 |         <NamedEntityTag ID="PER4" LABEL="Hofes Pentling"/>
49 |         <NamedEntityTag ID="LOC5" LABEL="Hofs Pentling"/>
50 |         <NamedEntityTag ID="LOC7" LABEL="Hilbeck"/>
51 |         <NamedEntityTag ID="PER8" LABEL="Hoff"/>
52 |         <NamedEntityTag ID="PER9" LABEL="L i b e r"/>
53 |         <NamedEntityTag ID="PER10" LABEL="Jhesu Christi"/>
54 |       </Tags>
55 |     </alto>
56 |     """)
57 |     assert d['Tags_NamedEntityTag-count'] == 9
58 | 
59 | def test_String_TAGREF_counts():
60 |     d = dict_fromstring("""
61 |     <alto xmlns="http://www.loc.gov/standards/alto/ns-v2#">
62 |       <Layout>
63 |       <Page>
64 |       <PrintSpace>
65 |       <TextBlock>
66 |         <TextLine>
67 |           <String CONTENT="Pentlings" HEIGHT="33" HPOS="330" TAGREFS="PER0" VPOS="699" WC="0.4511111081" WIDTH="146"/>
68 |         </TextLine>
69 |         <TextLine>
70 |           <String CONTENT="Pentlings" HEIGHT="33" HPOS="330" TAGREFS="PER0" VPOS="699" WC="0.4511111081" WIDTH="146"/>
71 |           <String CONTENT="Pentlings" HEIGHT="33" HPOS="330" TAGREFS="PER0" VPOS="699" WC="0.4511111081" WIDTH="146"/>
72 |           <String CONTENT="No TAGREF!" />
73 |         </TextLine>
74 |       </TextBlock>
75 |       </PrintSpace>
76 |       </Page>
77 |       </Layout>
78 |     </alto>
79 |     """)
80 |     assert d['Layout_Page_//alto:String[@TAGREFS]-count'] == 3
81 |     assert d['Layout_Page_String-count'] == 4
82 | 


--------------------------------------------------------------------------------
/src/mods4pandas/tests/test_mets.py:
--------------------------------------------------------------------------------
 1 | from lxml import etree as ET
 2 | 
 3 | 
 4 | from mods4pandas.mods4pandas import mets_to_dict
 5 | from mods4pandas.lib import flatten
 6 | 
 7 | 
 8 | def dict_fromstring(x):
 9 |    """Helper function to parse a METS/MODS XML string to a flattened dict"""
10 |    return flatten(mets_to_dict(ET.fromstring(x)))
11 |    # XXX move to test lib
12 | 
13 | def test_fileGrp():
14 |     """
15 |     Elements of mets:fileGrp should be counted
16 |     """
17 |     d = dict_fromstring("""
18 |     <mets:mets xmlns:mets="http://www.loc.gov/METS/">
19 | 
20 |     <mets:fileSec>
21 |     <mets:fileGrp USE="PRESENTATION">
22 |       <mets:file ID="FILE_0001_PRESENTATION" MIMETYPE="image/tiff">
23 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="file:///goobi/tiff001/sbb/PPN1678618276/00000001.tif"/>
24 |       </mets:file>
25 |       <mets:file ID="FILE_0002_PRESENTATION" MIMETYPE="image/tiff">
26 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="file:///goobi/tiff001/sbb/PPN1678618276/00000002.tif"/>
27 |       </mets:file>
28 |       <mets:file ID="FILE_0003_PRESENTATION" MIMETYPE="image/tiff">
29 |         <mets:FLocat xmlns:xlink="http://www.w3.org/1999/xlink" LOCTYPE="URL" xlink:href="file:///goobi/tiff001/sbb/PPN1678618276/00000003.tif"/>
30 |       </mets:file>
31 |     </mets:fileGrp>
32 |     </mets:fileSec>
33 |     </mets:mets>
34 |     """)
35 |     assert d['fileSec_fileGrp-PRESENTATION-count'] == 3
36 | 


--------------------------------------------------------------------------------
/src/mods4pandas/tests/test_mods4pandas.py:
--------------------------------------------------------------------------------
  1 | from lxml import etree as ET
  2 | import pytest
  3 | 
  4 | 
  5 | from mods4pandas.mods4pandas import mods_to_dict
  6 | from mods4pandas.lib import flatten
  7 | 
  8 | 
  9 | def dict_fromstring(x):
 10 |     """Helper function to parse a MODS XML string to a flattened dict"""
 11 |     return flatten(mods_to_dict(ET.fromstring(x)))
 12 | 
 13 | def test_single_language_languageTerm():
 14 |     d = dict_fromstring("""
 15 |     <mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
 16 |         <mods:language>
 17 |             <mods:languageTerm authority="iso639-2b" type="code">lat</mods:languageTerm>
 18 |             <mods:languageTerm authority="iso639-2b" type="code">ger</mods:languageTerm>
 19 |         </mods:language>
 20 |     </mods:mods>
 21 |     """)
 22 |     assert d['language_languageTerm'] == {'ger', 'lat'}
 23 | 
 24 | def test_multitple_language_languageTerm():
 25 |     """
 26 |     Different languages MAY have multiple mods:language elements.
 27 |     See MODS-AP 2.3.1
 28 |     """
 29 |     d = dict_fromstring("""
 30 |     <mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
 31 |         <mods:language><mods:languageTerm authority="iso639-2b" type="code">lat</mods:languageTerm></mods:language>
 32 |         <mods:language><mods:languageTerm authority="iso639-2b" type="code">ger</mods:languageTerm></mods:language>
 33 |     </mods:mods>
 34 |     """)
 35 |     assert d['language_languageTerm'] == {'ger', 'lat'}
 36 | 
 37 | def test_role_roleTerm():
 38 |     d = dict_fromstring("""
 39 |     <mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
 40 |     <mods:name type="personal" valueURI="http://d-nb.info/gnd/117357669">
 41 |       <mods:displayForm>Wurm, Mary</mods:displayForm>
 42 |       <mods:namePart type="given">Mary</mods:namePart>
 43 |       <mods:nameIdentifier type="gbv-ppn">078789583</mods:nameIdentifier>
 44 |       <mods:namePart type="family">Wurm</mods:namePart>
 45 |       <mods:role>
 46 |         <mods:roleTerm authority="marcrelator" type="code">cmp</mods:roleTerm>
 47 |       </mods:role>
 48 |     </mods:name>
 49 |     </mods:mods>
 50 |     """)
 51 |     assert d['name0_role_roleTerm'] == {'cmp'}
 52 | 
 53 | def test_multiple_role_roleTerm():
 54 |     """
 55 |     Multiple mods:role/mods:roleTerm should be merged into one column.
 56 |     """
 57 |     d = dict_fromstring("""
 58 |     <mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
 59 |     <mods:name type="personal" valueURI="http://d-nb.info/gnd/117357669">
 60 |       <mods:displayForm>Wurm, Mary</mods:displayForm>
 61 |       <mods:namePart type="given">Mary</mods:namePart>
 62 |       <mods:nameIdentifier type="gbv-ppn">078789583</mods:nameIdentifier>
 63 |       <mods:namePart type="family">Wurm</mods:namePart>
 64 |       <mods:role>
 65 |         <mods:roleTerm authority="marcrelator" type="code">cmp</mods:roleTerm>
 66 |       </mods:role>
 67 |       <mods:role>
 68 |         <mods:roleTerm authority="marcrelator" type="code">aut</mods:roleTerm>
 69 |       </mods:role>
 70 |     </mods:name>
 71 |     </mods:mods>
 72 |     """)
 73 |     assert d['name0_role_roleTerm'] == {'cmp', 'aut'}
 74 | 
 75 | def test_scriptTerm():
 76 |     """
 77 |     Same language using different scripts have one mods:language, with multiple scriptTerms inside.
 78 | 
 79 |     See MODS-AP 2.3.1.
 80 |     """
 81 |     d = dict_fromstring("""
 82 |     <mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
 83 |         <mods:language>
 84 |             <mods:languageTerm authority="iso639-2b" type="code">ger</mods:languageTerm>
 85 |             <mods:scriptTerm authority="iso15924" type="code">215</mods:scriptTerm>
 86 |             <mods:scriptTerm authority="iso15924" type="code">217</mods:scriptTerm>
 87 |         </mods:language>
 88 |         <mods:language>
 89 |             <mods:languageTerm authority="iso639-2b" type="code">lat</mods:languageTerm>
 90 |             <mods:scriptTerm authority="iso15924" type="code">216</mods:scriptTerm>
 91 |         </mods:language>
 92 |     </mods:mods>
 93 |     """)
 94 |     assert d['language_scriptTerm'] == {'215', '216', '217'}
 95 | 
 96 | def test_recordInfo():
 97 |     d = dict_fromstring("""
 98 |     <mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
 99 |         <mods:recordInfo>
100 |             <mods:recordIdentifier source="gbv-ppn">PPN610714341</mods:recordIdentifier>
101 |         </mods:recordInfo>
102 |     </mods:mods>
103 |     """)
104 |     assert d['recordInfo_recordIdentifier'] == 'PPN610714341'
105 | 
106 | def test_accessCondition():
107 |     d = dict_fromstring("""
108 |     <mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
109 |         <mods:accessCondition type="use and reproduction">UNKNOWN</mods:accessCondition>
110 |     </mods:mods>
111 |     """)
112 |     assert d['accessCondition-use and reproduction'] == 'UNKNOWN'
113 | 
114 | def test_originInfo_no_event_type():
115 |     with pytest.warns(UserWarning) as ws:
116 |         d = dict_fromstring("""
117 |         <mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
118 |             <mods:originInfo>
119 |                <mods:place><mods:placeTerm type="text">Berlin</mods:placeTerm></mods:place>
120 |             </mods:originInfo>
121 |         </mods:mods>
122 |         """)
123 | 
124 |     assert d == {}  # empty
125 | 
126 |     assert len(ws) == 1
127 |     assert ws[0].message.args[0] == 'Filtered {http://www.loc.gov/mods/v3}originInfo element (has no eventType)'
128 | 
129 | def test_relatedItem():
130 |     d = dict_fromstring("""
131 |     <mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
132 |         <mods:relatedItem type="original">
133 |             <mods:recordInfo>
134 |                 <mods:recordIdentifier source="gbv-ppn">PPN167755803</mods:recordIdentifier>
135 |             </mods:recordInfo>
136 |         </mods:relatedItem>
137 |     </mods:mods>
138 |     """)
139 | 
140 |     assert d['relatedItem-original_recordInfo_recordIdentifier'] == 'PPN167755803'
141 | 
142 |     # mods:relatedItem may also have source="dnb-ppn" recordIdentifiers:
143 |     d = dict_fromstring("""
144 |     <mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
145 |         <mods:relatedItem type="original">
146 |             <mods:recordInfo>
147 |                 <mods:recordIdentifier source="dnb-ppn">1236513355</mods:recordIdentifier>
148 |             </mods:recordInfo>
149 |         </mods:relatedItem>
150 |     </mods:mods>
151 |     """)
152 | 
153 |     assert d['relatedItem-original_recordInfo_recordIdentifier-dnb-ppn'] == '1236513355'
154 | 


--------------------------------------------------------------------------------
/src/mods4pandas/tests/test_page_info.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from pathlib import Path
 3 | 
 4 | from lxml import etree as ET
 5 | 
 6 | from mods4pandas.mods4pandas import pages_to_dict
 7 | 
 8 | 
 9 | TESTS_DATA_DIR = Path(__file__).parent / "data"
10 | 
11 | 
12 | def removeprefix(s, prefix):
13 |     if sys.version_info < (3,9):
14 |         return s[len(prefix):] if s.startswith(prefix) else s
15 |     else:
16 |         return s.removeprefix(prefix)
17 | 
18 | 
19 | def test_page_info():
20 |     """Test creation of page_info"""
21 |     mets = ET.parse(TESTS_DATA_DIR / "mets-mods" / "PPN821507109-1361-pages.xml")
22 |     page_info = pages_to_dict(mets)
23 | 
24 |     # We have 1361 pages for this one work.
25 |     assert len(page_info) == 1361
26 |     assert all(p["ppn"] == "PPN821507109" for p in page_info)
27 | 
28 |     # Look closer at an interesting page
29 |     from pprint import pprint; pprint(page_info[0])
30 |     page_info_page = next(p for p in page_info if p["ID"] == "PHYS_0005")
31 | 
32 |     assert page_info_page["fileGrp_PRESENTATION_file_FLocat_href"] == "file:///goobi/tiff001/sbb/PPN821507109/00000005.tif"
33 | 
34 |     # This is a title page with an illustration, check that we correctly got this info from the
35 |     # structMap.
36 |     struct_types = sorted(removeprefix(k, "structMap-LOGICAL_TYPE_") for k, v in page_info_page.items() if k.startswith("structMap-LOGICAL_TYPE_") and v == 1)
37 |     assert struct_types == ["illustration", "monograph", "title_page"]
38 | 
39 | 
40 | def test_page_info_multivolume_work():
41 |     """Test creation of page_info for multivolume_work"""
42 |     mets = ET.parse(TESTS_DATA_DIR / "mets-mods" / "PPN717884805-multivolume_work-no-structMap-PHYSICAL.xml")
43 |     page_info = pages_to_dict(mets)
44 |     assert page_info == []
45 | 
46 | 


--------------------------------------------------------------------------------