├── src
    └── oaipmh
    │   ├── __init__.py
    │   ├── tests
    │       ├── __init__.py
    │       ├── fake4
    │       │   ├── mapping.txt
    │       │   └── 00001.xml
    │       ├── fake5
    │       │   ├── mapping.txt
    │       │   └── 00001.xml
    │       ├── fake3
    │       │   ├── mapping.txt
    │       │   └── 00001.xml
    │       ├── runtests.sh
    │       ├── fake2
    │       │   ├── mapping.txt
    │       │   ├── 00000.xml
    │       │   └── 00001.xml
    │       ├── fake1
    │       │   ├── mapping.txt
    │       │   ├── 00002.xml
    │       │   ├── 00000.xml
    │       │   ├── 00001.xml
    │       │   ├── 00003.xml
    │       │   ├── 00006.xml
    │       │   └── 00004.xml
    │       ├── test_broken.py
    │       ├── createdata_deleted_records.py
    │       ├── test_deleted_records.py
    │       ├── createdata.py
    │       ├── test_validation.py
    │       ├── createbrokendata.py
    │       ├── test_datestamp.py
    │       ├── fakeclient.py
    │       ├── fakeserver.py
    │       ├── test_client.py
    │       ├── OAI-PMH.xsd
    │       └── test_server.py
    │   ├── error.py
    │   ├── validation.py
    │   ├── datestamp.py
    │   ├── metadata.py
    │   ├── interfaces.py
    │   ├── common.py
    │   ├── client.py
    │   └── server.py
├── MANIFEST.in
├── .hgignore
├── .gitignore
├── tox.ini
├── .github
    └── workflows
    │   └── run_tests.yml
├── INSTALL.txt
├── CREDITS.txt
├── README.rst
├── setup.py
├── .hgtags
├── doc
    ├── oaiclient.py
    ├── oai.css
    ├── API.txt
    └── API.html
├── LICENSE.txt
└── HISTORY.txt


/src/oaipmh/__init__.py:
--------------------------------------------------------------------------------
1 | # 
2 | 


--------------------------------------------------------------------------------
/src/oaipmh/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # this is a package
2 | 


--------------------------------------------------------------------------------
/src/oaipmh/tests/fake4/mapping.txt:
--------------------------------------------------------------------------------
1 | verb=Identify
2 | 00001.xml


--------------------------------------------------------------------------------
/src/oaipmh/tests/fake5/mapping.txt:
--------------------------------------------------------------------------------
1 | verb=Identify
2 | 00001.xml


--------------------------------------------------------------------------------
/src/oaipmh/tests/fake3/mapping.txt:
--------------------------------------------------------------------------------
1 | verb=Identify
2 | 00001.xml
3 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include src *
2 | recursive-include doc *
3 | include *
4 | 


--------------------------------------------------------------------------------
/.hgignore:
--------------------------------------------------------------------------------
1 | src/pyoai.egg-info
2 | bin
3 | parts
4 | .installed.cfg
5 | develop-eggs
6 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | .installed.cfg
3 | bin
4 | develop-eggs
5 | parts
6 | src/pyoai.egg-info
7 | .tox
8 | 


--------------------------------------------------------------------------------
/src/oaipmh/tests/runtests.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | python -m unittest test_broken test_client test_datestamp test_deleted_records test_server test_validation


--------------------------------------------------------------------------------
/src/oaipmh/tests/fake2/mapping.txt:
--------------------------------------------------------------------------------
1 | identifier=hdl%3A1765%2F1160&metadataPrefix=oai_dc&verb=GetRecord
2 | 00000.xml
3 | identifier=hdl%3A1765%2F1162&metadataPrefix=oai_dc&verb=GetRecord
4 | 00001.xml
5 | from=2004-01-01T00%3A00%3A00Z&metadataPrefix=oai_dc&verb=ListRecords
6 | 00002.xml
7 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = py{27,35,36,37,38}
 3 | 
 4 | [gh-actions]
 5 | python =
 6 |     2.7: py27
 7 |     3.5: py35
 8 |     3.6: py36
 9 |     3.7: py37
10 |     3.8: py38
11 | 
12 | [testenv]
13 | changedir = src/oaipmh/tests
14 | commands = ./runtests.sh
15 | 
16 | [testenv:py27]
17 | deps =
18 |     mock
19 | 


--------------------------------------------------------------------------------
/src/oaipmh/tests/fake1/mapping.txt:
--------------------------------------------------------------------------------
 1 | verb=ListSets
 2 | 00000.xml
 3 | verb=Identify
 4 | 00001.xml
 5 | verb=ListMetadataFormats
 6 | 00002.xml
 7 | from=2003-04-10T00%3A00%3A00Z&metadataPrefix=oai_dc&verb=ListIdentifiers
 8 | 00003.xml
 9 | identifier=hdl%3A1765%2F315&metadataPrefix=oai_dc&verb=GetRecord
10 | 00004.xml
11 | from=2003-04-10T00%3A00%3A00Z&metadataPrefix=oai_dc&verb=ListRecords
12 | 00005.xml
13 | identifier=hdl%3A1765%2F315&metadataPrefix=oai_dc&verb=GetMetadata
14 | 00006.xml
15 | 


--------------------------------------------------------------------------------
/src/oaipmh/tests/fake2/00000.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" ?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2004-02-17T13:44:55Z</responseDate><request metadataPrefix="oai_dc" verb="GetRecord" identifier="hdl:1765/1160">http://dspace.ubib.eur.nl/oai/</request><GetRecord><record><header status="deleted"><identifier>hdl:1765/1160</identifier><datestamp>2004-02-16T13:29:54Z</datestamp><setSpec>1:1</setSpec><setSpec>1:1</setSpec></header></record></GetRecord></OAI-PMH>


--------------------------------------------------------------------------------
/src/oaipmh/tests/fake1/00002.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" ?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2003-04-30T16:08:02Z</responseDate><request verb="ListMetadataFormats">http://dspace.ubib.eur.nl/oai/</request><ListMetadataFormats><metadataFormat><metadataPrefix>oai_dc</metadataPrefix><schema>http://www.openarchives.org/OAI/2.0/oai_dc.xsd</schema><metadataNamespace>http://www.openarchives.org/OAI/2.0/oai_dc/</metadataNamespace></metadataFormat></ListMetadataFormats></OAI-PMH>


--------------------------------------------------------------------------------
/.github/workflows/run_tests.yml:
--------------------------------------------------------------------------------
 1 | name: Run tests
 2 | on: [push, pull_request]
 3 | 
 4 | jobs:
 5 |   build:
 6 |     runs-on: ubuntu-latest
 7 |     strategy:
 8 |       max-parallel: 4
 9 |       matrix:
10 |         python-version: [2.7, 3.6, 3.7, 3.8]
11 | 
12 |     steps:
13 |     - uses: actions/checkout@v1
14 |     - name: Set up Python ${{ matrix.python-version }}
15 |       uses: actions/setup-python@v1
16 |       with:
17 |         python-version: ${{ matrix.python-version }}
18 |     - name: Install dependencies
19 |       run: |
20 |         python -m pip install --upgrade pip
21 |         pip install tox tox-gh-actions
22 |     - name: Test with tox
23 |       run: tox
24 | 


--------------------------------------------------------------------------------
/INSTALL.txt:
--------------------------------------------------------------------------------
 1 | Installation
 2 | ============
 3 | 
 4 | python setup.py install
 5 | 
 6 | will install the oaipmh module in your Python's site-packages.
 7 | 
 8 | Python version
 9 | ==============
10 | 
11 | The module should work for Python versions 2.3 and up.
12 | 
13 | Dependencies
14 | ============
15 | 
16 | The oaipmh module needs the lxml python bindings for
17 | libxml2/libxslt. You can find lxml here:
18 | 
19 | http://codespeak.net/lxml
20 | 
21 | lxml needs libxml2 and libxslt (though not their Python bindings;
22 | installing those is optional). libxml2 can can be found here:
23 | 
24 | http://xmlsoft.org/
25 | 
26 | and libxslt can be found here:
27 | 
28 | http://xmlsoft.org/XSLT
29 | 


--------------------------------------------------------------------------------
/CREDITS.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2003 - 2006 Infrae. All rights reserved.
 2 | See also LICENSE.txt
 3 | 
 4 | The oaipmh package for Python is developed by Infrae
 5 | (http://www.infrae.com). Initial development was for the Erasmus
 6 | University of Rotterdam library (http://www.eur.nl,
 7 | http://eps.eur.nl).
 8 | 
 9 | Infrae oaipmh module developers
10 | ===============================
11 | 
12 | Martijn Faassen <faassen@infrae.com>
13 | Eric Casteleijn <eric@infrae.com>
14 | Jasper Op de Coul <jasper@infrae.com>
15 | 
16 | Thanks to
17 | =========
18 | 
19 | 
20 | Uli Köhler and Michał Pasternak for Python3 compatibility. 
21 | 
22 | Jan-Wijbrand Kolman for API feedback.
23 | 
24 | Thom Hickey for critical discussion of the source code.
25 | 
26 | Thijs Janssen for a bug report.
27 | 
28 | Stefan Oderbolz (http_get client patch)
29 | 
30 | Many thanks go to Henk Ellermann at the library of Erasmus University
31 | Rotterdam for making this project possible.
32 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | ======
 2 | OAIPMH
 3 | ======
 4 | 
 5 | 
 6 | .. image:: https://github.com/infrae/pyoai/workflows/Run%20tests/badge.svg
 7 |     :target: https://github.com/infrae/pyoai/actions?query=workflow%3A%22Run+tests%22
 8 |     
 9 | The oaipmh module is a Python implementation of an "Open Archives
10 | Initiative Protocol for Metadata Harvesting" (version 2) client and
11 | server. The protocol is described here:
12 | 
13 | http://www.openarchives.org/OAI/openarchivesprotocol.html
14 | 
15 | Below is a simple implementation of an OAIPMH client:
16 | 
17 | >>> from oaipmh.client import Client
18 | >>> from oaipmh.metadata import MetadataRegistry, oai_dc_reader
19 | 
20 | >>> URL = 'http://uni.edu/ir/oaipmh'
21 | 
22 | >>> registry = MetadataRegistry()
23 | >>> registry.registerReader('oai_dc', oai_dc_reader)
24 | >>> client = Client(URL, registry)
25 | 
26 | >>> for record in client.listRecords(metadataPrefix='oai_dc'):
27 | >>>    print record
28 | 
29 | 
30 | The pyoai package also contains a generic server implementation of the 
31 | OAIPMH protocol, this is used as the foundation of the `MOAI Server Platform`_
32 | 
33 | .. _MOAI Server Platform: http://pypi.python.org/pypi/MOAI
34 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | from os.path import join, dirname
 3 | 
 4 | setup(
 5 |     name='pyoai',
 6 |     version='2.5.2pre',
 7 |     author='Infrae',
 8 |     author_email='info@infrae.com',
 9 |     url='http://www.infrae.com/download/oaipmh',
10 |     classifiers=["Development Status :: 4 - Beta",
11 |                  "Programming Language :: Python",
12 |                  "License :: OSI Approved :: BSD License",
13 |                  "Topic :: Software Development :: Libraries :: Python Modules",
14 |                  "Environment :: Web Environment"],
15 |     description="""The oaipmh module is a Python implementation of an "Open Archives Initiative Protocol for Metadata Harvesting" (version 2) client and server.""",
16 |     long_description=(open(join(dirname(__file__), 'README.rst')).read()+
17 |         '\n\n'+
18 |         open(join(dirname(__file__), 'HISTORY.txt')).read()),
19 |     long_description_content_type='text/x-rst',
20 |     packages=find_packages('src'),
21 |     package_dir = {'': 'src'},
22 |     zip_safe=False,
23 |     license='BSD',
24 |     keywords='OAI-PMH xml archive',
25 |     install_requires=['lxml', 'six'],
26 | )
27 | 


--------------------------------------------------------------------------------
/.hgtags:
--------------------------------------------------------------------------------
 1 | d5a3ef73faa2d52ee05571021fccabd3967312b6 pyoai-2_0b1
 2 | 6adf7a5390092088c5ed121965caa185fae4767e pyoai-2_0
 3 | 89abb3fc4659a08a4b232298d9848b0c7d7bd0ea eepi-2.1-prerelease
 4 | 2531c56e02c0828b26b707d59540a56cb6afc3da pyoai-2.1.2
 5 | 191ae315d02db00c42822dc6a1b6f08f181bbe3a pyoai-2.1.3
 6 | 64b86a11ecf6107316baa836480e8a4a619eb44e pyoai-2.1.4
 7 | 3754c3f119fa72b5174c5358048c1fd644f983d0 pyoai-2.1.6
 8 | 0000000000000000000000000000000000000000 pyoai-2.1.6
 9 | 65d0f7bdee6a5b5ff386d153dfb4ebdd458a3fab pyoai-2.1.5
10 | fffb45120065457f6ac2d397b97c8c1069ff1697 pyoai-2.2.1
11 | c3ae70b661a8bec2273432f2d540fe963c7d32c0 pyoai-2.3
12 | 63ad54d4a44a623786cc123f76b2cfa59edb1ebe pyoai-2.3.1
13 | 9a9e75ac23adbe19bb015a29faf464c882057378 pyoai-2.4
14 | 77c9da2756cc17de4ea226de7d04737daed0e7e8 pyoai-2.4.1
15 | e659e2a4e8d7a07cebf58b6838b7738a0f8a306b pyoai-2.4.2
16 | 0000000000000000000000000000000000000000 pyoai-2.4.2
17 | 712f939900749717ecabddbb39f2a716bf8838a4 pyoai-2.4.2
18 | 0000000000000000000000000000000000000000 pyoai-2.4.2
19 | 88386ea25a94fae2815f1f364394c389ecd98351 pyoai-2.4.2
20 | 780e7c76d845999d8b2797ff2a43a1e17bb268e9 pyoai-2.4.3
21 | 570b3c00bbfff2341bae2c69ec12a1529624ea91 2.4.4
22 | 


--------------------------------------------------------------------------------
/doc/oaiclient.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | from oaipmh.client import Client
 4 | from oaipmh.metadata import MetadataRegistry, oai_dc_reader
 5 | 
 6 | URL = sys.argv[1]
 7 | METADATA_PREFIX = sys.argv[2]
 8 | if len(sys.argv) == 4:
 9 |     SETSPEC = sys.argv[3]
10 | else:
11 |     SETSPEC = None
12 |                        
13 | 
14 | 
15 | registry = MetadataRegistry()
16 | registry.registerReader('oai_dc', oai_dc_reader)
17 | registry.registerReader(METADATA_PREFIX, oai_dc_reader)
18 | 
19 | client = Client(URL, registry)
20 | 
21 | record_count = 0
22 | deleted_count = 0
23 | 
24 | if SETSPEC:
25 |     records = client.listRecords(metadataPrefix=METADATA_PREFIX, set=SETSPEC)
26 | else:
27 |     records = client.listRecords(metadataPrefix=METADATA_PREFIX)
28 | 
29 | for num, record in enumerate(records):
30 |     record_count += 1
31 |     delinfo = ''
32 |     if record[0].isDeleted():
33 |         deleted_count += 1
34 |         delinfo = '(deleted)'
35 |     print('%0.6d %s %s' % (num, record[0].identifier(), delinfo))
36 |     print('       %s' % ';'.join(record[0].setSpec()))
37 | 
38 | print('Harvested %s records, of which %s were deleted' % (record_count,
39 |                                                           deleted_count))
40 |     
41 | 
42 | 
43 | 
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/src/oaipmh/tests/test_broken.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from datetime import datetime
 3 | from unittest import TestCase, TestSuite, makeSuite, main
 4 | 
 5 | from fakeclient import FakeClient
 6 | from oaipmh import metadata, error
 7 | 
 8 | test_directory = os.path.dirname(__file__)
 9 | 
10 | class BrokenDataTestCase(TestCase):
11 |     def createFakeClient(self, directory):
12 |         fake = os.path.join(test_directory, directory)
13 |         fakeclient = FakeClient(fake)
14 |         fakeclient.getMetadataRegistry().registerReader(
15 |             'oai_dc', metadata.oai_dc_reader)
16 |         return fakeclient
17 |     
18 |     def test_notwellformed(self):
19 |         fakeclient = self.createFakeClient('fake3')
20 |         self.assertRaises(error.XMLSyntaxError, fakeclient.identify)
21 | 
22 |     def test_unknown_entities(self):
23 |         fakeclient = self.createFakeClient('fake4')
24 |         self.assertRaises(error.XMLSyntaxError, fakeclient.identify)
25 | 
26 |     def test_broken_datestamp(self):
27 |         fakeclient = self.createFakeClient('fake5')
28 |         self.assertRaises(error.DatestampError, fakeclient.identify)
29 | 
30 | def test_suite():
31 |     return TestSuite((makeSuite(BrokenDataTestCase), ))
32 | 
33 | if __name__ == '__main__':
34 |     main()
35 | 


--------------------------------------------------------------------------------
/src/oaipmh/tests/fake5/00001.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" ?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2006-02-10T13:21:17Z</responseDate><request verb="Identify">http://ep.eur.nl/oai/request</request><Identify><repositoryName>DSpace at Erasmus</repositoryName><baseURL>http://ep.eur.nl/oai/request</baseURL><protocolVersion>2.0</protocolVersion><adminEmail>eepi@ubib.eur.nl</adminEmail><earliestDatestamp>aaaa-bb-cc</earliestDatestamp><deletedRecord>persistent</deletedRecord><granularity>YYYY-MM-DDThh:mm:ssZ</granularity><compression>gzip</compression><compression>deflate</compression><description><toolkit xsi:schemaLocation="http://oai.dlib.vt.edu/OAI/metadata/toolkit http://oai.dlib.vt.edu/OAI/metadata/toolkit.xsd" xmlns="http://oai.dlib.vt.edu/OAI/metadata/toolkit"><title>OCLC's OAICat Repository Framework</title><author><name>Jeffrey A. Young</name><email>jyoung@oclc.org</email><institution>OCLC</institution></author><version>1.5.26</version><toolkitIcon>http://alcme.oclc.org/oaicat/oaicat_icon.gif</toolkitIcon><URL>http://www.oclc.org/research/software/oai/cat.shtm</URL></toolkit></description></Identify></OAI-PMH>


--------------------------------------------------------------------------------
/src/oaipmh/tests/fake1/00000.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" ?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2003-04-30T16:08:03Z</responseDate><request verb="ListSets">http://dspace.ubib.eur.nl/oai/</request><ListSets><set><setSpec>3</setSpec><setName>Erasmus MC (University Medical Center Rotterdam)</setName></set><set><setSpec>3:5</setSpec><setName>EUR Medical Dissertations</setName></set><set><setSpec>1</setSpec><setName>Erasmus Research Institute of Management (ERIM)</setName></set><set><setSpec>1:2</setSpec><setName>ERIM Inaugural Addresses Research in Management Series</setName></set><set><setSpec>1:4</setSpec><setName>ERIM Ph.D. Series Research in Management</setName></set><set><setSpec>1:1</setSpec><setName>ERIM Report Series Research in Management </setName></set><set><setSpec>2</setSpec><setName>Faculty of Social Sciences (FSW)</setName></set><set><setSpec>2:6</setSpec><setName>Centre for Public Management</setName></set><set><setSpec>2:7</setSpec><setName>Research Group on Public Governance</setName></set><set><setSpec>2:3</setSpec><setName>World Database of Happiness -  Summary reports</setName></set></ListSets></OAI-PMH>


--------------------------------------------------------------------------------
/src/oaipmh/tests/fake3/00001.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" ?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><bogus><responseDate>2006-02-10T13:21:17Z</responseDate><request verb="Identify">http://ep.eur.nl/oai/request</request><Identify><repositoryName>DSpace at Erasmus</repositoryName><baseURL>http://ep.eur.nl/oai/request</baseURL><protocolVersion>2.0</protocolVersion><adminEmail>eepi@ubib.eur.nl</adminEmail><earliestDatestamp>2001-01-01T00:00:00Z</earliestDatestamp><deletedRecord>persistent</deletedRecord><granularity>YYYY-MM-DDThh:mm:ssZ</granularity><compression>gzip</compression><compression>deflate</compression><description><toolkit xsi:schemaLocation="http://oai.dlib.vt.edu/OAI/metadata/toolkit http://oai.dlib.vt.edu/OAI/metadata/toolkit.xsd" xmlns="http://oai.dlib.vt.edu/OAI/metadata/toolkit"><title>OCLC's OAICat Repository Framework</title><author><name>Jeffrey A. Young</name><email>jyoung@oclc.org</email><institution>OCLC</institution></author><version>1.5.26</version><toolkitIcon>http://alcme.oclc.org/oaicat/oaicat_icon.gif</toolkitIcon><URL>http://www.oclc.org/research/software/oai/cat.shtm</URL></toolkit></description></Identify></OAI-PMH>


--------------------------------------------------------------------------------
/src/oaipmh/tests/fake4/00001.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" ?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2006-02-10T13:21:17Z&bogus;</responseDate><request verb="Identify">http://ep.eur.nl/oai/request</request><Identify><repositoryName>DSpace at Erasmus</repositoryName><baseURL>http://ep.eur.nl/oai/request</baseURL><protocolVersion>2.0</protocolVersion><adminEmail>eepi@ubib.eur.nl</adminEmail><earliestDatestamp>2001-01-01T00:00:00Z</earliestDatestamp><deletedRecord>persistent</deletedRecord><granularity>YYYY-MM-DDThh:mm:ssZ</granularity><compression>gzip</compression><compression>deflate</compression><description><toolkit xsi:schemaLocation="http://oai.dlib.vt.edu/OAI/metadata/toolkit http://oai.dlib.vt.edu/OAI/metadata/toolkit.xsd" xmlns="http://oai.dlib.vt.edu/OAI/metadata/toolkit"><title>OCLC's OAICat Repository Framework</title><author><name>Jeffrey A. Young</name><email>jyoung@oclc.org</email><institution>OCLC</institution></author><version>1.5.26</version><toolkitIcon>http://alcme.oclc.org/oaicat/oaicat_icon.gif</toolkitIcon><URL>http://www.oclc.org/research/software/oai/cat.shtm</URL></toolkit></description></Identify></OAI-PMH>


--------------------------------------------------------------------------------
/src/oaipmh/tests/fake1/00001.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" ?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2003-04-30T16:08:01Z</responseDate><request verb="Identify">http://dspace.ubib.eur.nl/oai/</request><Identify><repositoryName>Erasmus University : Research Online</repositoryName><baseURL>http://dspace.ubib.eur.nl/oai/</baseURL><protocolVersion>2.0</protocolVersion><adminEmail>service@ubib.eur.nl</adminEmail><earliestDatestamp>2001-01-01T00:00:00Z</earliestDatestamp><deletedRecord>no</deletedRecord><granularity>YYYY-MM-DDThh:mm:ssZ</granularity><compression>gzip</compression><compression>compress</compression><compression>deflate</compression><description><toolkit xsi:schemaLocation="http://oai.dlib.vt.edu/OAI/metadata/toolkit http://oai.dlib.vt.edu/OAI/metadata/toolkit.xsd" xmlns="http://oai.dlib.vt.edu/OAI/metadata/toolkit"><title>OCLC's OAICat Repository Framework</title><toolkitIcon>http://alcme.oclc.org/oaicat/oaicat_icon.gif</toolkitIcon><author><name>Jeffrey A. Young</name><email>jyoung@oclc.org</email><institution>OCLC</institution></author><version>1.5.2</version><URL>http://www.oclc.org/research/software/oai/cat.shtm</URL></toolkit></description></Identify></OAI-PMH>


--------------------------------------------------------------------------------
/src/oaipmh/tests/createdata_deleted_records.py:
--------------------------------------------------------------------------------
 1 | from fakeserver import FakeCreaterServerProxy
 2 | 
 3 | # tied to the server at EUR..
 4 | server = FakeCreaterServerProxy(
 5 |     'http://dspace.ubib.eur.nl/oai/',
 6 |     '/home/eric/CVS_checkouts/oai/tests/fake2')
 7 | 
 8 | #deleted record
 9 | print "GetRecord"
10 | header, metadata, about = server.getRecord(
11 |     metadataPrefix='oai_dc', identifier='hdl:1765/1160')
12 | print "identifier:", header.identifier()
13 | print "datestamp:", header.datestamp()
14 | print "setSpec:", header.setSpec()
15 | print "isDeleted:", header.isDeleted()
16 | print
17 | 
18 | #normal record
19 | print "GetRecord"
20 | header, metadata, about = server.getRecord(
21 |     metadataPrefix='oai_dc', identifier='hdl:1765/1162')
22 | print "identifier:", header.identifier()
23 | print "datestamp:", header.datestamp()
24 | print "setSpec:", header.setSpec()
25 | print "isDeleted:", header.isDeleted()
26 | print
27 | 
28 | print "ListRecords"
29 | for header, metadata, about in server.listRecords(
30 |     from_=datetime(2004, 01, 01), until=datetime(2004, 02, 01),
31 |     metadataPrefix='oai_dc'):
32 |     print "header"
33 |     print "identifier:", header.identifier()
34 |     print "datestamp:", header.datestamp()
35 |     print "setSpec:", header.setSpec()
36 |     print "isDeleted:", header.isDeleted()
37 |     print "metadata"
38 |     if metadata is not None:
39 |         for fieldname in metadata.getMap().keys():
40 |             print "%s:" % fieldname, metadata.getField(fieldname)
41 |     print "about"
42 |     print about
43 | print
44 | 
45 | server.save()
46 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2003-2006 Infrae. All rights reserved.
 2 | 
 3 | Redistribution and use in source and binary forms, with or without
 4 | modification, are permitted provided that the following conditions are
 5 | met:
 6 | 
 7 |   1. Redistributions of source code must retain the above copyright
 8 |      notice, this list of conditions and the following disclaimer.
 9 |    
10 |   2. Redistributions in binary form must reproduce the above copyright
11 |      notice, this list of conditions and the following disclaimer in
12 |      the documentation and/or other materials provided with the
13 |      distribution.
14 | 
15 |   3. Neither the name of Infrae nor the names of its contributors may
16 |      be used to endorse or promote products derived from this software
17 |      without specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INFRAE OR
23 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/src/oaipmh/tests/test_deleted_records.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase, TestSuite, main, makeSuite
 2 | from fakeclient import FakeClient
 3 | import os
 4 | from oaipmh import metadata
 5 | from datetime import datetime
 6 | 
 7 | directory = os.path.dirname(__file__)
 8 | fake2 = os.path.join(directory, 'fake2')
 9 | fakeclient = FakeClient(fake2)
10 | 
11 | fakeclient.getMetadataRegistry().registerReader(
12 |     'oai_dc', metadata.oai_dc_reader)
13 | 
14 | class DeletedRecordsTestCase(TestCase):
15 |     def test_getRecord_deleted(self):
16 |         header, metadata, about = fakeclient.getRecord(
17 |             metadataPrefix='oai_dc', identifier='hdl:1765/1160')
18 |         self.assert_(metadata is None)
19 |         self.assert_(header.isDeleted())
20 | 
21 |     def test_getRecord_not_deleted(self):
22 |         header, metadata, about = fakeclient.getRecord(
23 |             metadataPrefix='oai_dc', identifier='hdl:1765/1162')
24 |         self.assert_(metadata is not None)
25 |         self.assert_(not header.isDeleted())
26 | 
27 |     def test_listRecords(self):
28 |         records = fakeclient.listRecords(from_=datetime(2004, 1, 1),
29 |                                          metadataPrefix='oai_dc')
30 |         # lazy, just test first one
31 |         for header, metadata, about in records:
32 |             if header.isDeleted():
33 |                 self.assert_(metadata is None)
34 |             else:
35 |                 self.assert_(metadata is not None)
36 |     
37 | def test_suite():
38 |     return TestSuite((makeSuite(DeletedRecordsTestCase), ))
39 | 
40 | if __name__=='__main__':
41 |     main(defaultTest='test_suite')
42 | 


--------------------------------------------------------------------------------
/src/oaipmh/error.py:
--------------------------------------------------------------------------------
 1 | 
 2 | class ErrorBase(Exception):
 3 |     def oainame(self):
 4 |         name = self.__class__.__name__
 5 |         # strip off 'Error' part
 6 |         name = name[:-5]
 7 |         # lowercase error name
 8 |         name = name[0].lower() + name[1:]
 9 |         return name
10 | 
11 | class BadArgumentError(ErrorBase):
12 |     pass
13 | 
14 | class BadVerbError(ErrorBase):
15 |     pass
16 | 
17 | class BadResumptionTokenError(ErrorBase):
18 |     pass
19 | 
20 | class CannotDisseminateFormatError(ErrorBase):
21 |     pass
22 | 
23 | class IdDoesNotExistError(ErrorBase):
24 |     pass
25 | 
26 | class NoRecordsMatchError(ErrorBase):
27 |     pass
28 | 
29 | class NoMetadataFormatsError(ErrorBase):
30 |     pass
31 | 
32 | class NoSetHierarchyError(ErrorBase):
33 |     pass
34 | 
35 | class UnknownError(ErrorBase):
36 |     pass
37 | 
38 | # errors not defined by OAI-PMH but which can occur in a client when
39 | # the server is somehow misbehaving
40 | class ClientError(Exception):
41 |     def details(self):
42 |         """Error details in human readable text.
43 |         """
44 |         raise NotImplementedError
45 | 
46 | class XMLSyntaxError(ClientError):
47 |     """The OAI-PMH XML can not be parsed as it is not well-formed.
48 |     """
49 |     def details(self):
50 |         return ("The data delivered by the server could not be parsed, as it "
51 |                 "is not well-formed XML.")
52 |     
53 | class DatestampError(ClientError):
54 |     """The OAI-PMH datestamps were not proper UTC datestamps as by spec.
55 |     """
56 |     def __init__(self, datestamp):
57 |         self.datestamp = datestamp
58 | 
59 |     def details(self):
60 |         return ("An illegal datestamp was encountered: %s" % self.datestamp)
61 |     
62 | 


--------------------------------------------------------------------------------
/src/oaipmh/tests/fake2/00001.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" ?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2004-02-17T13:44:55Z</responseDate><request identifier="hdl:1765/1162" verb="GetRecord" metadataPrefix="oai_dc">http://dspace.ubib.eur.nl/oai/</request><GetRecord><record><header><identifier>hdl:1765/1162</identifier><datestamp>2004-02-17T10:30:46Z</datestamp><setSpec>6:20</setSpec></header><metadata><oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd"><dc:creator>Cavelaars, P.A.D.</dc:creator><dc:contributor>Cavelaars, P.A.D.</dc:contributor><dc:date>2004-02-16T12:15:34Z</dc:date><dc:date>2004-02-16T12:15:34Z</dc:date><dc:date>2004-02-16T12:15:34Z</dc:date><dc:identifier>http://hdl.handle.net/1765/1162</dc:identifier><dc:description>Policymakers’ efforts to boost trend output growth may be hampered by the presence of a tradeoff between productivity gains and job creation. This paper presents empirical evidence that the negative relationship between productivity growth and employment growth that prevailed in the 1960s and 1970s has disappeared since then. This finding is robust to using alternative measures and including other explanatory variables. The improved tradeoff may be good news for policymakers who aim at raising the ‘speed limit’ of the economy.</dc:description><dc:language>en</dc:language><dc:relation>OCFEB Research Memoranda;RM 0403</dc:relation><dc:subject>Productivity</dc:subject><dc:subject>employment</dc:subject><dc:subject>cross-country analysis</dc:subject><dc:title>Has the tradeoff between productivity gains and job growth disappeared?</dc:title><dc:type>Working Paper</dc:type><dc:subject>O400; O570</dc:subject><dc:format>application/pdf https://ep.eur.nl/retrieve/2566/rm0403.pdf</dc:format></oai_dc:dc></metadata></record></GetRecord></OAI-PMH>


--------------------------------------------------------------------------------
/src/oaipmh/tests/fake1/00003.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" ?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2003-04-30T16:08:01Z</responseDate><request metadataPrefix="oai_dc" verb="ListIdentifiers" from="2003-04-10">http://dspace.ubib.eur.nl/oai/</request><ListIdentifiers><header><identifier>hdl:1765/308</identifier><datestamp>2003-04-15T10:18:51Z</datestamp><setSpec>1:2</setSpec></header><header><identifier>hdl:1765/309</identifier><datestamp>2003-04-15T15:53:12Z</datestamp><setSpec>1:2</setSpec></header><header><identifier>hdl:1765/311</identifier><datestamp>2003-04-22T12:49:53Z</datestamp><setSpec>2:6</setSpec></header><header><identifier>hdl:1765/312</identifier><datestamp>2003-04-22T12:52:59Z</datestamp><setSpec>2:6</setSpec></header><header><identifier>hdl:1765/313</identifier><datestamp>2003-04-22T12:59:14Z</datestamp><setSpec>2:6</setSpec></header><header><identifier>hdl:1765/315</identifier><datestamp>2003-04-22T13:13:44Z</datestamp><setSpec>2:7</setSpec></header><header><identifier>hdl:1765/316</identifier><datestamp>2003-04-22T14:05:54Z</datestamp><setSpec>1:1</setSpec></header><header><identifier>hdl:1765/317</identifier><datestamp>2003-04-28T10:07:59Z</datestamp><setSpec>1:1</setSpec></header><header><identifier>hdl:1765/318</identifier><datestamp>2003-04-28T10:15:57Z</datestamp><setSpec>1:1</setSpec></header><header><identifier>hdl:1765/319</identifier><datestamp>2003-04-29T10:29:32Z</datestamp><setSpec>1:1</setSpec></header><header><identifier>hdl:1765/320</identifier><datestamp>2003-04-29T10:49:16Z</datestamp><setSpec>1:1</setSpec></header><header><identifier>hdl:1765/321</identifier><datestamp>2003-04-29T13:59:06Z</datestamp><setSpec>1:1</setSpec></header><header><identifier>hdl:1765/322</identifier><datestamp>2003-04-29T14:16:48Z</datestamp><setSpec>1:1</setSpec></header><header><identifier>hdl:1765/323</identifier><datestamp>2003-04-29T15:15:11Z</datestamp><setSpec>1:1</setSpec></header><header><identifier>hdl:1765/324</identifier><datestamp>2003-04-29T15:33:57Z</datestamp><setSpec>1:1</setSpec></header><header><identifier>hdl:1765/325</identifier><datestamp>2003-04-29T15:57:01Z</datestamp><setSpec>1:1</setSpec></header></ListIdentifiers></OAI-PMH>


--------------------------------------------------------------------------------
/src/oaipmh/tests/createdata.py:
--------------------------------------------------------------------------------
 1 | from fakeclient import FakeCreaterClient
 2 | 
 3 | # tied to the server at EUR..
 4 | client = FakeCreaterClient(
 5 |     'http://dspace.ubib.eur.nl/oai/',
 6 |     '/home/faassen/py/oai/tests/fake2')
 7 | 
 8 | print "GetRecord"
 9 | header, metadata, about = client.getRecord(
10 |     metadataPrefix='oai_dc', identifier='hdl:1765/315')
11 | print "identifier:", header.identifier()
12 | print "datestamp:", header.datestamp()
13 | print "setSpec:", header.setSpec()
14 | print "isDeleted:", header.isDeleted()
15 | print
16 | 
17 | print "Identify"
18 | identify = client.identify()
19 | print "repositoryName:", identify.repositoryName()
20 | print "baseURL:", identify.baseURL()
21 | print "protocolVerson:", identify.protocolVersion()
22 | print "adminEmails:", identify.adminEmails()
23 | print "earliestDatestamp:", identify.earliestDatestamp()
24 | print "deletedRecords:", identify.deletedRecord()
25 | print "granularity:", identify.granularity()
26 | print "compression:", identify.compression()
27 | print
28 | 
29 | print "ListIdentifiers"
30 | headers = client.listIdentifiers(from_=datetime(2003, 04, 10),
31 |                                  metadataPrefix='oai_dc')
32 | for header in headers:
33 |     print "identifier:", header.identifier()
34 |     print "datestamp:", header.datestamp()
35 |     print "setSpec:", header.setSpec()
36 |     print "isDeleted:", header.isDeleted()
37 | print
38 | 
39 | print "ListMetadataFormats"
40 | for prefix, schema, ns in client.listMetadataFormats():
41 |     print "metadataPrefix:", prefix
42 |     print "schema:", schema
43 |     print "metadataNamespace:", ns
44 | print
45 | 
46 | print "ListRecords"
47 | for header, metadata, about in client.listRecords(
48 |     from_=datetime(2003, 04, 10), metadataPrefix='oai_dc'):
49 |     print "header"
50 |     print "identifier:", header.identifier()
51 |     print "datestamp:", header.datestamp()
52 |     print "setSpec:", header.setSpec()
53 |     print "isDeleted:", header.isDeleted()
54 |     #print "metadata"
55 |     #for fieldname in fieldnames:
56 |     #    print "%s:" % fieldname, metadata.getField(fieldname)
57 |     print "about"
58 |     print about
59 | print
60 | 
61 | print "ListSets"
62 | for setSpec, setName, setDescription in client.listSets():
63 |     print "setSpec:", setSpec
64 |     print "setName:", setName
65 |     print "setDescription:", setDescription
66 | print
67 | 
68 | client.save()
69 | 


--------------------------------------------------------------------------------
/src/oaipmh/tests/test_validation.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from oaipmh import validation
 3 | 
 4 | class ArgumentValidatorTestCase(unittest.TestCase):
 5 |     def test_optional(self):
 6 |         spec = {
 7 |             'foo': 'optional',
 8 |             'bar': 'optional'
 9 |             }
10 |         self.assertEquals(
11 |             None,
12 |             validation.validate(spec, {'foo': 'Foo', 'bar': 'Bar'}))
13 |         # an extra argument gives an error
14 |         self.assertRaises(
15 |             validation.BadArgumentError,
16 |             validation.validate,
17 |             spec, {'hoi': 'Hoi', 'foo': 'Foo', 'bar': 'Bar'})
18 |         # a missing optional argument is fine
19 |         self.assertEquals(
20 |             None,
21 |             validation.validate(spec, {'foo': 'Foo'}))
22 |         self.assertEquals(
23 |             None,
24 |             validation.validate(spec, {}))
25 | 
26 |     def test_required(self):
27 |         spec = {
28 |             'foo': 'required',
29 |             'bar': 'optional'}
30 |         self.assertEquals(
31 |             None,
32 |             validation.validate(spec, {'foo': 'Foo', 'bar': 'Bar'}))
33 |         self.assertEquals(
34 |             None,
35 |             validation.validate(spec, {'foo': 'Foo'}))
36 |         self.assertRaises(
37 |             validation.BadArgumentError,
38 |             validation.validate, spec, {'bar': 'Bar'})
39 | 
40 |     def test_exclusive(self):
41 |         spec = {
42 |             'foo': 'required',
43 |             'bar': 'required',
44 |             'hoi': 'exclusive'}
45 |         self.assertEquals(
46 |             None,
47 |             validation.validate(spec, {'foo': 'Foo', 'bar': 'Bar'}))
48 |         self.assertRaises(
49 |             validation.BadArgumentError,
50 |             validation.validate, spec, {'foo': 'Foo'})
51 |         self.assertRaises(
52 |             validation.BadArgumentError,
53 |             validation.validate, spec, {'bar': 'Bar'})
54 |         # or a single exclusive argument
55 |         self.assertEquals(
56 |             None,
57 |             validation.validate(spec, {'hoi': 'Hoi'}))
58 |         self.assertRaises(
59 |             validation.BadArgumentError,
60 |             validation.validate, spec, {'foo': 'Foo', 'hoi': 'Hoi'})
61 |         
62 | def test_suite():
63 |     return unittest.TestSuite([unittest.makeSuite(ArgumentValidatorTestCase)])
64 | 
65 | if __name__=='__main__':
66 |     main(defaultTest='test_suite')
67 | 


--------------------------------------------------------------------------------
/src/oaipmh/tests/createbrokendata.py:
--------------------------------------------------------------------------------
 1 | from fakeclient import FakeCreaterClient
 2 | from datetime import datetime
 3 | from oaipmh import metadata
 4 | 
 5 | registry = metadata.MetadataRegistry()
 6 | registry.registerReader('oai_dc', metadata.oai_dc_reader)
 7 | # tied to the server at EUR..
 8 | client = FakeCreaterClient(
 9 |     'http://ep.eur.nl/oai/request',
10 |     '/home/faassen/tmp/fake3',
11 |     registry
12 |     )
13 | 
14 | #print "GetRecord"
15 | #header, metadata, about = client.getRecord(
16 | #    metadataPrefix='oai_dc', identifier='hdl:1765/315')
17 | #print "identifier:", header.identifier()
18 | #print "datestamp:", header.datestamp()
19 | #print "setSpec:", header.setSpec()
20 | #print "isDeleted:", header.isDeleted()
21 | #print
22 | 
23 | print "Identify"
24 | identify = client.identify()
25 | print "repositoryName:", identify.repositoryName()
26 | print "baseURL:", identify.baseURL()
27 | print "protocolVerson:", identify.protocolVersion()
28 | print "adminEmails:", identify.adminEmails()
29 | print "earliestDatestamp:", identify.earliestDatestamp()
30 | print "deletedRecords:", identify.deletedRecord()
31 | print "granularity:", identify.granularity()
32 | print "compression:", identify.compression()
33 | print
34 | 
35 | print "ListIdentifiers"
36 | headers = client.listIdentifiers(from_=datetime(2006, 02, 8),
37 |                                  metadataPrefix='oai_dc')
38 | for header in headers:
39 |     print "identifier:", header.identifier()
40 |     print "datestamp:", header.datestamp()
41 |     print "setSpec:", header.setSpec()
42 |     print "isDeleted:", header.isDeleted()
43 | print
44 | 
45 | print "ListMetadataFormats"
46 | for prefix, schema, ns in client.listMetadataFormats():
47 |     print "metadataPrefix:", prefix
48 |     print "schema:", schema
49 |     print "metadataNamespace:", ns
50 | print
51 | 
52 | print "ListRecords"
53 | for header, metadata, about in client.listRecords(
54 |     from_=datetime(2006, 02, 8), metadataPrefix='oai_dc'):
55 |     print "header"
56 |     print "identifier:", header.identifier()
57 |     print "datestamp:", header.datestamp()
58 |     print "setSpec:", header.setSpec()
59 |     print "isDeleted:", header.isDeleted()
60 |     #print "metadata"
61 |     #for fieldname in fieldnames:
62 |     #    print "%s:" % fieldname, metadata.getField(fieldname)
63 |     print "about"
64 |     print about
65 | print
66 | 
67 | print "ListSets"
68 | for setSpec, setName, setDescription in client.listSets():
69 |     print "setSpec:", setSpec
70 |     print "setName:", setName
71 |     print "setDescription:", setDescription
72 | print
73 | 
74 | client.save()
75 | 


--------------------------------------------------------------------------------
/src/oaipmh/tests/fake1/00006.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" ?><oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd"><dc:contributor>Edwards, A.R.</dc:contributor><dc:date>2003-04-22T13:13:44Z</dc:date><dc:date>2003-04-22T13:13:44Z</dc:date><dc:date>2003-04-22T13:13:44Z</dc:date><dc:identifier>90-9014980-5</dc:identifier><dc:identifier>http://hdl.handle.net/1765/315</dc:identifier><dc:description>THE WOMEN'S MOVEMENT ONLINE. A study into the uses of Internet by women's organizations in the Netherlands Arthur Edwards, Erasmus University Rotterdam Edwards@fsw.eur.nl Summary. This is an in-depth study of 12 organizations: six grass-roots organizations, three umbrella organizations and three service organizations within the Dutch women's movement. Also, six 'virtual organizations' (three portal sites, a platform site and two web organizations) were investigated. Apart from the service organizations, the uses of the Internet are almost limited to three communicative functions: information dissemi-nation and retrieval, recruitment and communication between the leaderships of organizations. Most organizations are leaving the 'homepage phase' of site development, but their current new ambitions seem to be more directed at applying network technology for purposes of internal communication than at interaction with the organization's environment. Until now, Internet uses had indeed some effects on the mobilization of resources, the relations with the environment and the 'management of frames', but these effects are almost limited to greater effectiveness and efficiency of existing action patterns. All organizations are now facing a situation in which the internal communication has to proceed along two speeds: only a part of the membership (individual members or member organizations) is online. The virtual organizations are more representative for the innovative potential of Internet. Together, they shape the contours of an information- and communication infrastructure for the women's movement in the information age.</dc:description><dc:format>151500</dc:format><dc:format>application/pdf</dc:format><dc:language>nl</dc:language><dc:subject>social movement internet</dc:subject><dc:subject>uses of internet</dc:subject><dc:subject>effects of virtual organizations</dc:subject><dc:subject>information-and communication infrastructure</dc:subject><dc:title>De vrouwenbeweging online. Een onderzoek naar het gebruik van Internet door vrouwenorganisaties in Nederland .</dc:title><dc:type>Technical Report</dc:type></oai_dc:dc>


--------------------------------------------------------------------------------
/src/oaipmh/validation.py:
--------------------------------------------------------------------------------
 1 | 
 2 | #
 3 | class BadArgumentError(Exception):
 4 |     pass
 5 | 
 6 | def validate(argspec, dictionary):
 7 |     exclusive = None
 8 |     for arg_name, arg_type in list(argspec.items()):
 9 |         if arg_type == 'exclusive':
10 |             exclusive = arg_name
11 |     # check if we have unknown arguments
12 |     for key, value in list(dictionary.items()):
13 |         if not key in argspec:
14 |             msg = "Unknown argument: %s" % key
15 |             raise BadArgumentError(msg)
16 |     # first investigate if we have exclusive argument
17 |     if exclusive in dictionary:
18 |         if len(dictionary) > 1:
19 |             msg = ("Exclusive argument %s is used but other "
20 |                    "arguments found." % exclusive)
21 |             raise BadArgumentError(msg)
22 |         return
23 |     # if not exclusive, check for required
24 |     for arg_name, arg_type in list(argspec.items()): 
25 |         if arg_type == 'required':
26 |             msg = "Argument required but not found: %s" % arg_name
27 |             if not arg_name in dictionary:
28 |                 raise BadArgumentError(msg)
29 |     return
30 |         
31 | class ValidationSpec(object):
32 |     GetRecord = {
33 |         'identifier':'required',
34 |         'metadataPrefix':'required'
35 |         }
36 |     GetMetadata = {
37 |         'identifier':'required',
38 |         'metadataPrefix':'required'
39 |         }
40 |     
41 |     Identify = {
42 |         }
43 | 
44 |     ListIdentifiers = {
45 |         'from_':'optional',
46 |         'until':'optional',
47 |         'metadataPrefix':'required',
48 |         'set':'optional',
49 |         }
50 | 
51 |     ListMetadataFormats = {
52 |         'identifier':'optional'
53 |         }
54 | 
55 |     ListRecords = {
56 |         'from_':'optional',
57 |         'until':'optional',
58 |         'set':'optional',
59 |         'metadataPrefix':'required',
60 |         }
61 | 
62 |     ListSets = {
63 |         }
64 | 
65 | class ResumptionValidationSpec(ValidationSpec):
66 | 
67 |     ListIdentifiers = {
68 |         'from_':'optional',
69 |         'until':'optional',
70 |         'metadataPrefix':'required',
71 |         'set':'optional',
72 |         'resumptionToken':'exclusive',
73 |         }
74 |     
75 |     ListRecords = {
76 |         'from_':'optional',
77 |         'until':'optional',
78 |         'set':'optional',
79 |         'metadataPrefix':'required',
80 |         'resumptionToken':'exclusive',
81 |         }
82 | 
83 |     ListSets = {
84 |         'resumptionToken':'exclusive',
85 |         }
86 | 
87 | def validateArguments(verb, kw):
88 |     validate(getattr(ValidationSpec, verb), kw)
89 | 
90 | def validateResumptionArguments(verb, kw):
91 |     validate(getattr(ResumptionValidationSpec, verb), kw)
92 |     
93 | 


--------------------------------------------------------------------------------
/src/oaipmh/tests/test_datestamp.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from unittest import TestCase, TestSuite, makeSuite
 3 | from oaipmh.datestamp import datestamp_to_datetime,\
 4 |      tolerant_datestamp_to_datetime
 5 | from oaipmh.error import DatestampError
 6 | 
 7 | class DatestampTestCase(TestCase):
 8 |     def test_strict_datestamp_to_datetime(self):
 9 |         self.assertEquals(
10 |             datetime(2005, 7, 4, 14, 35, 10),
11 |             datestamp_to_datetime('2005-07-04T14:35:10Z'))
12 |         self.assertEquals(
13 |             datetime(2005, 1, 24, 14, 34, 2),
14 |             datestamp_to_datetime('2005-01-24T14:34:02Z'))
15 |         self.assertEquals(
16 |             datetime(2005, 7, 4),
17 |             datestamp_to_datetime('2005-07-04'))
18 |         self.assertRaises(DatestampError,
19 |                           datestamp_to_datetime, '2005')
20 |         self.assertRaises(DatestampError,
21 |                           datestamp_to_datetime, '2005-07-04Z')
22 |         self.assertRaises(DatestampError,
23 |                           datestamp_to_datetime, '2005-07')
24 |         self.assertRaises(DatestampError,
25 |                           datestamp_to_datetime, '2005-07-04T')
26 |         self.assertRaises(DatestampError,
27 |                           datestamp_to_datetime, '2005-07-04T14:00Z')
28 |         self.assertRaises(DatestampError,
29 |                           datestamp_to_datetime, '2005-07-04T14:00:00')
30 |         self.assertRaises(DatestampError,
31 |                           datestamp_to_datetime, 'aaaa-bb-cc')
32 |         self.assertRaises(DatestampError,
33 |                           datestamp_to_datetime, 'foo')
34 |         try:
35 |             datestamp_to_datetime('foo')
36 |         except DatestampError as e:
37 |             self.assertEquals('foo', e.datestamp)
38 | 
39 |     def test_strict_datestamp_to_datetime_inclusive(self):
40 |         # passing inclusive=True to datestamp_to_datetime
41 |         # should default the time to 23:59:59 instead of 00:00:00
42 |         # when only a date is supplied
43 | 
44 |         self.assertEquals(datetime(2009, 11, 16, 23, 59, 59),
45 |                           datestamp_to_datetime('2009-11-16',
46 |                                                 inclusive=True))
47 |         
48 |     def test_tolerant_datestamp_to_datetime(self):
49 |         f = tolerant_datestamp_to_datetime
50 |         self.assertEquals(
51 |             datetime(2005, 7, 4, 14, 35, 10),
52 |             f('2005-07-04T14:35:10Z'))
53 |         self.assertEquals(
54 |             datetime(2005, 1, 24, 14, 34, 2),
55 |             f('2005-01-24T14:34:02Z'))
56 |         self.assertEquals(
57 |             datetime(2005, 7, 4),
58 |             f('2005-07-04'))
59 |         self.assertEquals(
60 |             datetime(2005, 1, 1),
61 |             f('2005'))
62 |         self.assertEquals(
63 |             datetime(2005, 2, 1),
64 |             f('2005-02'))
65 |         
66 | def test_suite():
67 |     return TestSuite((makeSuite(DatestampTestCase), ))
68 | 


--------------------------------------------------------------------------------
/src/oaipmh/datestamp.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | from oaipmh.error import DatestampError
 3 | 
 4 | def datetime_to_datestamp(dt, day_granularity=False):
 5 |     assert dt.tzinfo is None # only accept timezone naive datetimes
 6 |     # ignore microseconds
 7 |     dt = dt.replace(microsecond=0)
 8 |     result = dt.isoformat() + 'Z'
 9 |     if day_granularity:
10 |         result = result[:-10]
11 |     return result
12 | 
13 | # handy utility function not used by pyoai itself yet
14 | def date_to_datestamp(d, day_granularity=False): 	 
15 |     return datetime_to_datestamp( 	 
16 |         datetime.datetime.combine(d, datetime.time(0)), day_granularity)
17 | 
18 | def datestamp_to_datetime(datestamp, inclusive=False):
19 |     try:
20 |         return _datestamp_to_datetime(datestamp, inclusive)
21 |     except ValueError:
22 |         raise DatestampError(datestamp)
23 |     
24 | def _datestamp_to_datetime(datestamp, inclusive=False):
25 |     splitted = datestamp.split('T')
26 |     if len(splitted) == 2:
27 |         d, t = splitted
28 |         if not t or t[-1] != 'Z':
29 |             raise DatestampError(datestamp)
30 |         # strip off 'Z'
31 |         t = t[:-1]
32 |     else:
33 |         d = splitted[0]
34 |         if inclusive:
35 |             # used when a date was specified as ?until parameter
36 |             t = '23:59:59'
37 |         else:
38 |             t = '00:00:00'
39 |     YYYY, MM, DD = d.split('-')
40 |     hh, mm, ss = t.split(':') # this assumes there's no timezone info
41 |     # Some Dspace implementations are returning the in the YYYY-MM-DDThh:mm:ss.sssZ format 
42 |     # instead of YYYY-MM-DDThh:mm:ssZ as specified in the AOI-PMH protocol
43 |     # This resolves that
44 |     ss = ss.split('.')[0]
45 |     return datetime.datetime(
46 |         int(YYYY), int(MM), int(DD), int(hh), int(mm), int(ss))
47 | 
48 | def tolerant_datestamp_to_datetime(datestamp):
49 |     """A datestamp to datetime that's more tolerant of diverse inputs.
50 | 
51 |     Not used inside pyoai itself right now, but can be used when defining
52 |     your own metadata schema if that has a broader variety of datetimes
53 |     in there.
54 |     """
55 |     splitted = datestamp.split('T')
56 |     if len(splitted) == 2:
57 |         d, t = splitted
58 |         # if no Z is present, raise error
59 |         if t[-1] != 'Z':
60 |             raise DatestampError(datestamp)
61 |         # split off Z at the end
62 |         t = t[:-1]
63 |     else:
64 |         d = splitted[0]
65 |         t = '00:00:00'
66 |     d_splitted = d.split('-')
67 |     if len(d_splitted) == 3:
68 |         YYYY, MM, DD = d_splitted
69 |     elif len(d_splitted) == 2:
70 |         YYYY, MM = d_splitted
71 |         DD = '01'
72 |     elif len(d_splitted) == 1:
73 |         YYYY = d_splitted[0]
74 |         MM = '01'
75 |         DD = '01'   
76 |     else:
77 |         raise DatestampError(datestamp)
78 |     
79 |     t_splitted = t.split(':')
80 |     if len(t_splitted) == 3:
81 |         hh, mm, ss = t_splitted
82 |     else:
83 |         raise DatestampError(datestamp)
84 |     return datetime.datetime(
85 |         int(YYYY), int(MM), int(DD), int(hh), int(mm), int(ss))
86 | 


--------------------------------------------------------------------------------
/src/oaipmh/tests/fake1/00004.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" ?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2003-04-30T16:08:01Z</responseDate><request identifier="hdl:1765/315" metadataPrefix="oai_dc" verb="GetRecord">http://dspace.ubib.eur.nl/oai/</request><GetRecord><record><header><identifier>hdl:1765/315</identifier><datestamp>2003-04-22T13:13:44Z</datestamp><setSpec>2:7</setSpec></header><metadata><oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd"><dc:contributor>Edwards, A.R.</dc:contributor><dc:date>2003-04-22T13:13:44Z</dc:date><dc:date>2003-04-22T13:13:44Z</dc:date><dc:date>2003-04-22T13:13:44Z</dc:date><dc:identifier>90-9014980-5</dc:identifier><dc:identifier>http://hdl.handle.net/1765/315</dc:identifier><dc:description>THE WOMEN'S MOVEMENT ONLINE. A study into the uses of Internet by women's organizations in the Netherlands Arthur Edwards, Erasmus University Rotterdam Edwards@fsw.eur.nl Summary. This is an in-depth study of 12 organizations: six grass-roots organizations, three umbrella organizations and three service organizations within the Dutch women's movement. Also, six 'virtual organizations' (three portal sites, a platform site and two web organizations) were investigated. Apart from the service organizations, the uses of the Internet are almost limited to three communicative functions: information dissemi-nation and retrieval, recruitment and communication between the leaderships of organizations. Most organizations are leaving the 'homepage phase' of site development, but their current new ambitions seem to be more directed at applying network technology for purposes of internal communication than at interaction with the organization's environment. Until now, Internet uses had indeed some effects on the mobilization of resources, the relations with the environment and the 'management of frames', but these effects are almost limited to greater effectiveness and efficiency of existing action patterns. All organizations are now facing a situation in which the internal communication has to proceed along two speeds: only a part of the membership (individual members or member organizations) is online. The virtual organizations are more representative for the innovative potential of Internet. Together, they shape the contours of an information- and communication infrastructure for the women's movement in the information age.</dc:description><dc:format>151500</dc:format><dc:format>application/pdf</dc:format><dc:language>nl</dc:language><dc:subject>social movement internet</dc:subject><dc:subject>uses of internet</dc:subject><dc:subject>effects of virtual organizations</dc:subject><dc:subject>information-and communication infrastructure</dc:subject><dc:title>De vrouwenbeweging online. Een onderzoek naar het gebruik van Internet door vrouwenorganisaties in Nederland .</dc:title><dc:type>Technical Report</dc:type></oai_dc:dc></metadata></record></GetRecord></OAI-PMH>


--------------------------------------------------------------------------------
/src/oaipmh/tests/fakeclient.py:
--------------------------------------------------------------------------------
 1 | from oaipmh import client, common
 2 | import os.path
 3 | from datetime import datetime
 4 | try:
 5 |     from urllib.parse import urlencode
 6 | except ImportError:
 7 |     from urllib import urlencode
 8 | 
 9 | 
10 | class FakeClient(client.BaseClient):
11 |     def __init__(self, mapping_path, custom_retry_policy=None):
12 |         client.BaseClient.__init__(self, custom_retry_policy=custom_retry_policy)
13 |         self._mapping = createMapping(mapping_path)
14 | 
15 |     def makeRequest(self, **kw):
16 |         # this is a complete fake, and can only deal with a number of
17 |         # fixed requests that are mapped to files
18 |         # sort it to get stable behavior
19 |         return self._mapping[getRequestKey(kw)]
20 | 
21 | class TestError(Exception):
22 |     def __init__(self, kw):
23 |         self.kw = kw
24 | 
25 | class GranularityFakeClient(client.BaseClient):
26 |     def __init__(self, granularity):
27 |         client.BaseClient.__init__(self)
28 |         self._granularity = granularity
29 | 
30 |     def makeRequest(self, **kw):
31 |         # even more fake, we'll simply raise an exception with the request
32 |         # this can be caught by the test to see whether the request uses
33 |         # day granularity..
34 |         raise TestError(kw)
35 | 
36 |     def identify(self):
37 |         return common.Identify(
38 |             'Foo', 'http://test.info', '2.0', ['foo@bar.com'],
39 |             datetime(2005, 1, 1), 'no', self._granularity,
40 |             None)
41 | 
42 | def getRequestKey(kw):
43 |     """Create stable key for request dictionary to use in file.
44 |     """
45 |     items = list(kw.items())
46 |     items.sort()
47 |     return urlencode(items)
48 | 
49 | def createMapping(mapping_path):
50 |     f = open(os.path.join(mapping_path, 'mapping.txt'), 'r')
51 |     result = {}
52 |     while 1:
53 |         request = f.readline()
54 |         response = f.readline()
55 |         request = request.strip()
56 |         response = response.strip()
57 |         if not request or not response:
58 |             break
59 |         xml_f = open(os.path.join(mapping_path, response), 'r')
60 |         text = xml_f.read()
61 |         xml_f.close()
62 |         result[request] = text
63 |     return result
64 | 
65 | class FakeCreaterClient(client.Client):
66 |     def __init__(self, base_url, mapping_path, metadata_registry):
67 |         client.Client.__init__(self, base_url, metadata_registry)
68 |         self._mapping = {}
69 |         self._mapping_path = mapping_path
70 | 
71 |     def makeRequest(self, **kw):
72 |         text = client.Client.makeRequest(self, **kw)
73 |         self._mapping[getRequestKey(kw)] = text
74 |         return text
75 | 
76 |     def save(self):
77 |         mapping_path = self._mapping_path
78 |         f = open(os.path.join(mapping_path, 'mapping.txt'), 'w')
79 |         i = 0
80 |         for request, response in self._mapping.items():
81 |             f.write(request)
82 |             f.write('\n')
83 |             filename = str(i).zfill(5) + ".xml"
84 |             f.write(filename)
85 |             f.write('\n')
86 |             response_f = open(os.path.join(mapping_path, filename), 'w')
87 |             response_f.write(response)
88 |             response_f.close()
89 |             i += 1
90 |         f.close()
91 | 


--------------------------------------------------------------------------------
/src/oaipmh/metadata.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | from lxml import etree
  4 | from lxml.etree import SubElement
  5 | from oaipmh import common
  6 | 
  7 | if sys.version_info[0] == 3:
  8 |     text_type = str
  9 | else:
 10 |     text_type = unicode
 11 | 
 12 | class MetadataRegistry(object):
 13 |     """A registry that contains readers and writers of metadata.
 14 | 
 15 |     a reader is a function that takes a chunk of (parsed) XML and
 16 |     returns a metadata object.
 17 | 
 18 |     a writer is a function that takes a takes a metadata object and
 19 |     produces a chunk of XML in the right format for this metadata.
 20 |     """
 21 |     def __init__(self):
 22 |         self._readers = {}
 23 |         self._writers = {}
 24 |         
 25 |     def registerReader(self, metadata_prefix, reader):
 26 |         self._readers[metadata_prefix] = reader
 27 | 
 28 |     def registerWriter(self, metadata_prefix, writer):
 29 |         self._writers[metadata_prefix] = writer
 30 | 
 31 |     def hasReader(self, metadata_prefix):
 32 |         return metadata_prefix in self._readers
 33 |     
 34 |     def hasWriter(self, metadata_prefix):
 35 |         return metadata_prefix in self._writers
 36 |     
 37 |     def readMetadata(self, metadata_prefix, element):
 38 |         """Turn XML into metadata object.
 39 | 
 40 |         element - element to read in
 41 | 
 42 |         returns - metadata object
 43 |         """
 44 |         return self._readers[metadata_prefix](element)
 45 | 
 46 |     def writeMetadata(self, metadata_prefix, element, metadata):
 47 |         """Write metadata as XML.
 48 |         
 49 |         element - ElementTree element to write under
 50 |         metadata - metadata object to write
 51 |         """
 52 |         self._writers[metadata_prefix](element, metadata)
 53 | 
 54 | global_metadata_registry = MetadataRegistry()
 55 | 
 56 | class Error(Exception):
 57 |     pass
 58 | 
 59 | class MetadataReader(object):
 60 |     """A default implementation of a reader based on fields.
 61 |     """
 62 |     def __init__(self, fields, namespaces=None):
 63 |         self._fields = fields
 64 |         self._namespaces = namespaces or {}
 65 | 
 66 |     def __call__(self, element):
 67 |         map = {}
 68 |         # create XPathEvaluator for this element
 69 |         xpath_evaluator = etree.XPathEvaluator(element, 
 70 |                                                namespaces=self._namespaces)
 71 |         
 72 |         e = xpath_evaluator.evaluate
 73 |         # now extra field info according to xpath expr
 74 |         for field_name, (field_type, expr) in list(self._fields.items()):
 75 |             if field_type == 'bytes':
 76 |                 value = str(e(expr))
 77 |             elif field_type == 'bytesList':
 78 |                 value = [str(item) for item in e(expr)]
 79 |             elif field_type == 'text':
 80 |                 # make sure we get back unicode strings instead
 81 |                 # of lxml.etree._ElementUnicodeResult objects.
 82 |                 value = text_type(e(expr))
 83 |             elif field_type == 'textList':
 84 |                 # make sure we get back unicode strings instead
 85 |                 # of lxml.etree._ElementUnicodeResult objects.
 86 |                 value = [text_type(v) for v in e(expr)]
 87 |             else:
 88 |                 raise Error("Unknown field type: %s" % field_type)
 89 |             map[field_name] = value
 90 |         return common.Metadata(element, map)
 91 | 
 92 | oai_dc_reader = MetadataReader(
 93 |     fields={
 94 |     'title':       ('textList', 'oai_dc:dc/dc:title/text()'),
 95 |     'creator':     ('textList', 'oai_dc:dc/dc:creator/text()'),
 96 |     'subject':     ('textList', 'oai_dc:dc/dc:subject/text()'),
 97 |     'description': ('textList', 'oai_dc:dc/dc:description/text()'),
 98 |     'publisher':   ('textList', 'oai_dc:dc/dc:publisher/text()'),
 99 |     'contributor': ('textList', 'oai_dc:dc/dc:contributor/text()'),
100 |     'date':        ('textList', 'oai_dc:dc/dc:date/text()'),
101 |     'type':        ('textList', 'oai_dc:dc/dc:type/text()'),
102 |     'format':      ('textList', 'oai_dc:dc/dc:format/text()'),
103 |     'identifier':  ('textList', 'oai_dc:dc/dc:identifier/text()'),
104 |     'source':      ('textList', 'oai_dc:dc/dc:source/text()'),
105 |     'language':    ('textList', 'oai_dc:dc/dc:language/text()'),
106 |     'relation':    ('textList', 'oai_dc:dc/dc:relation/text()'),
107 |     'coverage':    ('textList', 'oai_dc:dc/dc:coverage/text()'),
108 |     'rights':      ('textList', 'oai_dc:dc/dc:rights/text()')
109 |     },
110 |     namespaces={
111 |     'oai_dc': 'http://www.openarchives.org/OAI/2.0/oai_dc/',
112 |     'dc' : 'http://purl.org/dc/elements/1.1/'}
113 |     )
114 | 
115 | 
116 |     
117 | 


--------------------------------------------------------------------------------
/src/oaipmh/tests/fakeserver.py:
--------------------------------------------------------------------------------
  1 | from oaipmh import common, error
  2 | from datetime import datetime
  3 | import random
  4 | 
  5 | class FakeServerCommon(object):
  6 |     def identify(self):
  7 |         return common.Identify(
  8 |             repositoryName='Fake',
  9 |             baseURL='http://www.infrae.com/oai/',
 10 |             protocolVersion="2.0",
 11 |             adminEmails=['faassen@infrae.com'],
 12 |             earliestDatestamp=datetime(2004, 1, 1),
 13 |             deletedRecord='transient',
 14 |             granularity='YYYY-MM-DDThh:mm:ssZ',
 15 |             compression=['identity'])
 16 | 
 17 |     def getRecord(self, metadataPrefix, identifier):
 18 |         try:
 19 |             return self._data[int(identifier)]
 20 |         except IndexError:
 21 |             raise error.IdDoesNotExistError("Id does not exist: %s" % identifier)
 22 | 
 23 | class FakeServerBase(FakeServerCommon):
 24 |     
 25 |     def listIdentifiers(self, metadataPrefix=None, from_=None, until=None,
 26 |                         set=None):
 27 |         result = []
 28 |         for header, metadata, about in self._data:
 29 |             if datestampInRange(header, from_, until):
 30 |                 result.append(header)
 31 |         return result
 32 | 
 33 |     def listRecords(self, metadataPrefix=None, from_=None, until=None,
 34 |                     set=None):
 35 |         result = []
 36 |         for header, metadata, about in self._data:
 37 |             if datestampInRange(header, from_, until):
 38 |                 result.append((header, metadata, about))
 39 |         return result
 40 | 
 41 | class BatchingFakeServerBase(FakeServerCommon):
 42 |     
 43 |     def listIdentifiers(self, metadataPrefix=None, from_=None, until=None,
 44 |                         set=None, cursor=0, batch_size=10):
 45 |         result = []
 46 |         for header, metadata, about in self._data:
 47 |             if datestampInRange(header, from_, until):
 48 |                 result.append(header)
 49 |         return result[cursor:cursor + batch_size]
 50 | 
 51 |     def listRecords(self, metadataPrefix=None, from_=None, until=None,
 52 |                     set=None, cursor=0, batch_size=10):
 53 |         result = []
 54 |         for header, metadata, about in self._data:
 55 |             if datestampInRange(header, from_, until):
 56 |                 result.append((header, metadata, about))
 57 |         return result[cursor:cursor + batch_size]
 58 | 
 59 | def datestampInRange(header, from_, until):
 60 |     if from_ is not None and header.datestamp() < from_:
 61 |         return False
 62 |     if until is not None and header.datestamp() > until:
 63 |         return False
 64 |     return True
 65 | 
 66 | def createFakeData():
 67 |     data = []
 68 |     for i in range(100):
 69 |         # create some datestamp spread
 70 |         year = 2004
 71 |         month = i % 12 + 1
 72 |         day = i % 28 + 1
 73 |         hour = i % 24
 74 |         minute = i % 60
 75 |         second = i % 60
 76 |         fake_element = None
 77 |         datestamp = datetime(year, month, day, hour, minute, second)
 78 |         data.append((common.Header(fake_element, str(i), datestamp, '', False),
 79 |                      common.Metadata(fake_element, {'title': ['Title %s' % i]}),
 80 |                      None))
 81 |     return data
 82 |     
 83 | class FakeServer(FakeServerBase):
 84 |     def __init__(self):
 85 |         self._data = createFakeData()
 86 | 
 87 | class BatchingFakeServer(BatchingFakeServerBase):
 88 |     def __init__(self):
 89 |         self._data = createFakeData()
 90 |     
 91 | class FakeServerWithDeletions(FakeServerBase):
 92 | 
 93 |     def __init__(self):
 94 |         data = []
 95 | 
 96 |         for i in range(0, 12):
 97 |             # create some records in a year
 98 |             year = 2005
 99 |             month = i + 1
100 |             day = 1
101 |             datestamp = datetime(year, month, day, 12, 30, 0)
102 |             fake_element = None
103 |             data.append((common.Header(fake_element, str(i), datestamp, '', False),
104 |                          common.Metadata(fake_element, {'title': ['Title %s' % i]}),
105 |                          None))
106 |         self._data = data
107 |         
108 |     def deletionEvent(self):
109 |         # delete half the records we store
110 |         data = []
111 |         # create deletion remains for these records
112 |         for i in range(0, 6):
113 |             year = 2006
114 |             month = i + 1
115 |             day = 1
116 |             datestamp = datetime(year, month, day, 12, 35, 0)
117 |             fake_element = None
118 |             data.append((common.Header(fake_element, str(i), datestamp, '', True),
119 |                          None,
120 |                          None))
121 |         # replace first half with deleted records
122 |         self._data = data + self._data[6:]
123 | 


--------------------------------------------------------------------------------
/doc/oai.css:
--------------------------------------------------------------------------------
  1 | /*
  2 | :Author: David Goodger
  3 | :Contact: goodger@users.sourceforge.net
  4 | :date: $Date: 2005/05/27 14:26:05 $
  5 | :version: $Revision: 1.1 $
  6 | :copyright: This stylesheet has been placed in the public domain.
  7 | 
  8 | Default cascading style sheet for the HTML output of Docutils.
  9 | */
 10 | 
 11 | .first {
 12 |   margin-top: 0 }
 13 | 
 14 | .last {
 15 |   margin-bottom: 0 }
 16 | 
 17 | a.toc-backref {
 18 |   text-decoration: none ;
 19 |   color: black }
 20 | 
 21 | dd {
 22 |   margin-bottom: 0.5em }
 23 | 
 24 | div.abstract {
 25 |   margin: 2em 5em }
 26 | 
 27 | div.abstract p.topic-title {
 28 |   font-weight: bold ;
 29 |   text-align: center }
 30 | 
 31 | div.attention, div.caution, div.danger, div.error, div.hint,
 32 | div.important, div.note, div.tip, div.warning, div.admonition {
 33 |   margin: 2em ;
 34 |   border: medium outset ;
 35 |   padding: 1em }
 36 | 
 37 | div.attention p.admonition-title, div.caution p.admonition-title,
 38 | div.danger p.admonition-title, div.error p.admonition-title,
 39 | div.warning p.admonition-title {
 40 |   color: red ;
 41 |   font-weight: bold ;
 42 |   font-family: sans-serif }
 43 | 
 44 | div.hint p.admonition-title, div.important p.admonition-title,
 45 | div.note p.admonition-title, div.tip p.admonition-title,
 46 | div.admonition p.admonition-title {
 47 |   font-weight: bold ;
 48 |   font-family: sans-serif }
 49 | 
 50 | div.dedication {
 51 |   margin: 2em 5em ;
 52 |   text-align: center ;
 53 |   font-style: italic }
 54 | 
 55 | div.dedication p.topic-title {
 56 |   font-weight: bold ;
 57 |   font-style: normal }
 58 | 
 59 | div.figure {
 60 |   margin-left: 2em }
 61 | 
 62 | div.footer, div.header {
 63 |   font-size: smaller }
 64 | 
 65 | div.sidebar {
 66 |   margin-left: 1em ;
 67 |   border: medium outset ;
 68 |   padding: 0em 1em ;
 69 |   background-color: #ffffee ;
 70 |   width: 40% ;
 71 |   float: right ;
 72 |   clear: right }
 73 | 
 74 | div.sidebar p.rubric {
 75 |   font-family: sans-serif ;
 76 |   font-size: medium }
 77 | 
 78 | div.system-messages {
 79 |   margin: 5em }
 80 | 
 81 | div.system-messages h1 {
 82 |   color: red }
 83 | 
 84 | div.system-message {
 85 |   border: medium outset ;
 86 |   padding: 1em }
 87 | 
 88 | div.system-message p.system-message-title {
 89 |   color: red ;
 90 |   font-weight: bold }
 91 | 
 92 | div.topic {
 93 |   margin: 2em }
 94 | 
 95 | h1.title {
 96 |   text-align: center }
 97 | 
 98 | h2.subtitle {
 99 |   text-align: center }
100 | 
101 | hr {
102 |   width: 75% }
103 | 
104 | ol.simple, ul.simple {
105 |   margin-bottom: 1em }
106 | 
107 | ol.arabic {
108 |   list-style: decimal }
109 | 
110 | ol.loweralpha {
111 |   list-style: lower-alpha }
112 | 
113 | ol.upperalpha {
114 |   list-style: upper-alpha }
115 | 
116 | ol.lowerroman {
117 |   list-style: lower-roman }
118 | 
119 | ol.upperroman {
120 |   list-style: upper-roman }
121 | 
122 | p.attribution {
123 |   text-align: right ;
124 |   margin-left: 50% }
125 | 
126 | p.caption {
127 |   font-style: italic }
128 | 
129 | p.credits {
130 |   font-style: italic ;
131 |   font-size: smaller }
132 | 
133 | p.label {
134 |   white-space: nowrap }
135 | 
136 | p.rubric {
137 |   font-weight: bold ;
138 |   font-size: larger ;
139 |   color: darkred ;
140 |   text-align: center }
141 | 
142 | p.sidebar-title {
143 |   font-family: sans-serif ;
144 |   font-weight: bold ;
145 |   font-size: larger }
146 | 
147 | p.sidebar-subtitle {
148 |   font-family: sans-serif ;
149 |   font-weight: bold }
150 | 
151 | p.topic-title {
152 |   font-weight: bold }
153 | 
154 | pre.address {
155 |   margin-bottom: 0 ;
156 |   margin-top: 0 ;
157 |   font-family: serif ;
158 |   font-size: 100% }
159 | 
160 | pre.line-block {
161 |   font-family: serif ;
162 |   font-size: 100% }
163 | 
164 | pre.literal-block, pre.doctest-block {
165 |   margin-left: 2em ;
166 |   margin-right: 2em ;
167 |   background-color: #eeeeee }
168 | 
169 | span.classifier {
170 |   font-family: sans-serif ;
171 |   font-style: oblique }
172 | 
173 | span.classifier-delimiter {
174 |   font-family: sans-serif ;
175 |   font-weight: bold }
176 | 
177 | span.interpreted {
178 |   font-family: sans-serif }
179 | 
180 | span.option {
181 |   white-space: nowrap }
182 | 
183 | span.option-argument {
184 |   font-style: italic }
185 | 
186 | span.pre {
187 |   white-space: pre }
188 | 
189 | span.problematic {
190 |   color: red }
191 | 
192 | table {
193 |   margin-top: 0.5em ;
194 |   margin-bottom: 0.5em }
195 | 
196 | table.citation {
197 |   border-left: solid thin gray ;
198 |   padding-left: 0.5ex }
199 | 
200 | table.docinfo {
201 |   margin: 2em 4em }
202 | 
203 | table.footnote {
204 |   border-left: solid thin black ;
205 |   padding-left: 0.5ex }
206 | 
207 | td, th {
208 |   padding-left: 0.5em ;
209 |   padding-right: 0.5em ;
210 |   vertical-align: top }
211 | 
212 | th.docinfo-name, th.field-name {
213 |   font-weight: bold ;
214 |   text-align: left ;
215 |   white-space: nowrap }
216 | 
217 | h1 tt, h2 tt, h3 tt, h4 tt, h5 tt, h6 tt {
218 |   font-size: 100% }
219 | 
220 | tt {
221 |   background-color: #eeeeee }
222 | 
223 | ul.auto-toc {
224 |   list-style-type: none }
225 | 


--------------------------------------------------------------------------------
/HISTORY.txt:
--------------------------------------------------------------------------------
  1 | Changelog
  2 | =========
  3 | 2.5.2 (unreleased)
  4 | 
  5 | 2.5.1
  6 | 
  7 | -  Added customizable client retry policy (contributed by adimascio)
  8 | 
  9 | -  Added compatibility with Python 3.8 (contributed by krenzlin)
 10 | 
 11 | -  Do not resume ListRecord requests if no result was returned (contributed by wetneb)
 12 | 
 13 | 2.5.0 (2017-07-03) 
 14 | 
 15 | -  Added Python 3 compatibility (contributed by Tobias Kurze, Uli Köhler
 16 |    and Michał Pasternak)
 17 | -  Travis support and badges (Michał Pasternak)
 18 | 
 19 | 2.4.5 (2015-12-23) 
 20 | 
 21 | -  Added switch in client to force harvesting using HTTP Get method
 22 |    (contributed by Stefan Oderbolz).
 23 | 
 24 | -  Added unofficial GetMetadata verb in server and client. GetMetadata
 25 |    is identical to GetRecord, but only returns the first element below
 26 |    the oai:metadata element, it does not return the oai enveloppe.
 27 | 
 28 | 2.4.4 (2010-09-30) 
 29 | 
 30 | -  Changed contact info, Migrated code from Subversion to Mercurial
 31 | 
 32 | 2.4.3 (2010-08-19) 
 33 | 
 34 | -  Convert lxml.etree._ElementUnicodeResult and ElementStringResult to
 35 |    normal string and unicode objects, to prevent errors when these
 36 |    objects get pickled. (lp #617439)
 37 | 
 38 | 2.4.2 (2010-05-03) 
 39 | 
 40 | -  OAI_DC and DC namespace declarations should not be declared on the
 41 |    document root, but on the child of the metadata element. According to
 42 |    the OAI spec
 43 | 
 44 | 2.4.1 (2009-11-16) 
 45 | 
 46 | -  When specifying a date (not a datetime) for the until parameter,
 47 |    default to 23:59:59 instead of 00:00:00
 48 | 
 49 | 2.4 (2009-05-04) 
 50 | 
 51 | -  Included support for description elements in OAI Identify headers,
 52 |    added ‘toolkit’ description by default.
 53 | 
 54 | 2.3.1 (2009-04-24) 
 55 | 
 56 | -  Raise correct error when from and until parameters have different
 57 |    granularities
 58 | 
 59 | 2.3 (2009-04-23) 
 60 | 
 61 | -  Fixed bug and added tests for handling invalid dateTime formats, the
 62 |    server will now respond with a BadArgument (XML) error instead of a
 63 |    python traceback.
 64 | 
 65 | -  Use buildout to create testrunner and environment as opposed to
 66 |    ``test.py`` script.
 67 | 
 68 |    Install buildout by:
 69 | 
 70 |    $ python bootstrap.py $ bin/buildout
 71 | 
 72 |    Run the tests by doing:
 73 | 
 74 |    $ bin/test
 75 | 
 76 |    To get a python interpreter with the ``oaipmh`` library importable::
 77 | 
 78 |    $ bin/devpython
 79 | 
 80 | 2.2.1 (2008-04-04) 
 81 | 
 82 | -  Added xml declaration to server output
 83 | -  Prettyprint xml output
 84 | -  compatibility fix: should be compatible with lxml 2.0 now
 85 | -  server resumption tokens now work with POST requests.
 86 | -  Fix for client code that handles 503 response from server.
 87 | 
 88 | 2.2 (2006-11-20) 
 89 | 
 90 | -  Support for BatchingServer. A BatchingServer implements the
 91 |    IBatchingOAI interface. This is very similar to IOAI, but methods get
 92 |    a ‘cursor’ and ‘batch_size’ argument. This can be used to efficiently
 93 |    implement batching OAI servers on top of relational databases.
 94 | 
 95 | -  Make it possible to explicitly pass None as the from or until
 96 |    parameters for a OAIPMH client.
 97 | 
 98 | -  an extra nsmap argument to Server and BatchingServer allows the
 99 |    programmer to specify either namespace prefix to namespace URI
100 |    mappings that should be used in the server output.
101 | 
102 | -  fixed a bug where the output wasn’t encoded properly as UTF-8.
103 | 
104 | 2.1.5 (2006-09-18) 
105 | 
106 | -  compatibility fix: it should work with lxml 1.1 now.
107 | 
108 | 2.1.4 (2006-06-16) 
109 | 
110 | -  Distribute as an egg.
111 | 
112 | 2.1.3 
113 | 
114 | -  Add infrastructure to deal with non-XML compliant OAI-PMH feeds; an
115 |    XMLSyntaxError is raised in that case.
116 | 
117 | -  added tolerant_datestamp_to_datetime which is a bit more tolerant
118 |    than the normal datestamp_to_datetime when encountering bad
119 |    datestamps.
120 | 
121 | -  Split off datestamp handling into separate datestamp module.
122 | 
123 | 2.0 
124 | 
125 | -  Add support for day-only granularity (YYYY-MM-DD) in client. calling
126 |    ‘updateGranularity’ with the client will check with the server (using
127 |    identify()) to see what granularity the server supports. If the
128 |    server only supports day level granularity, the client will make sure
129 |    only YYYY-MM-DD timestamps are sent.
130 | 
131 | 2.0b1 
132 | 
133 | -  Added framework for implementing OAI-PMH compliant servers.
134 | 
135 | -  Changed package structure: now a oaipmh namespace package. Client
136 |    functionality now in oaipmh.client.
137 | 
138 | -  Refactoring of oaipmh.py module to reuse code for both client and
139 |    server.
140 | 
141 | -  Extended testing infrastructure.
142 | 
143 | -  Switched over from using libxml2 Python wrappers to the lxml binding.
144 | 
145 | -  Use generators instead of hacked up **getitem**. This means that the
146 |    return from listRecords, listIdentifiers and listSets are now not
147 |    normal lists but iterators. They can easily be turned into a normal
148 |    list by using list() on them, however.
149 | 
150 | 1.0.1 
151 | 
152 | -  Typo in oaipmh.py
153 | 
154 | 1.0 
155 | 
156 | -  Added an encoding parameter to the serialize call, which fixes a
157 |    unicode bug.
158 | 
159 | 0.7.4 
160 | 
161 | -  A harvest can return records with <header status“deleted”> that
162 |    contain no metadata and are merely an indication that that
163 |    metadata-set for that resource is no longer on the OAI service. These
164 |    records should be used to remove metadata from the catalog if it is
165 |    there, bur should never be stored or catalogued themselves. They
166 |    aren’t now. (Fixed in zope/OAICore/core.py)
167 | 
168 | 0.7 
169 | 
170 | Initial public release.
171 | 


--------------------------------------------------------------------------------
/src/oaipmh/interfaces.py:
--------------------------------------------------------------------------------
  1 | class IOAI:
  2 |     def getRecord(metadataPrefix, identifier):
  3 |         """Get a record for a metadataPrefix and identifier.
  4 | 
  5 |         metadataPrefix - identifies metadata set to retrieve
  6 |         identifier - repository-unique identifier of record
  7 |         
  8 |         Should raise error.CannotDisseminateFormatError if
  9 |         metadataPrefix is unknown or not supported by identifier.
 10 |         
 11 |         Should raise error.IdDoesNotExistError if identifier is
 12 |         unknown or illegal.
 13 | 
 14 |         Returns a header, metadata, about tuple describing the record.
 15 |         """
 16 | 
 17 |     def identify():
 18 |         """Retrieve information about the repository.
 19 | 
 20 |         Returns an Identify object describing the repository.
 21 |         """
 22 | 
 23 |     def listIdentifiers(metadataPrefix, set=None, from_=None, until=None):
 24 |         """Get a list of header information on records.
 25 | 
 26 |         metadataPrefix - identifies metadata set to retrieve
 27 |         set - set identifier; only return headers in set (optional)
 28 |         from_ - only retrieve headers from from_ date forward (optional)
 29 |         until - only retrieve headers with dates up to and including
 30 |                 until date (optional)
 31 | 
 32 |         Should raise error.CannotDisseminateFormatError if metadataPrefix
 33 |         is not supported by the repository.
 34 | 
 35 |         Should raise error.NoSetHierarchyError if the repository does not
 36 |         support sets.
 37 |         
 38 |         Returns an iterable of headers.
 39 |         """
 40 |         
 41 |     def listMetadataFormats(identifier=None):
 42 |         """List metadata formats supported by repository or record.
 43 | 
 44 |         identifier - identify record for which we want to know all
 45 |                      supported metadata formats. if absent, list all metadata
 46 |                      formats supported by repository. (optional)
 47 | 
 48 | 
 49 |         Should raise error.IdDoesNotExistError if record with
 50 |         identifier does not exist.
 51 |         
 52 |         Should raise error.NoMetadataFormatsError if no formats are
 53 |         available for the indicated record.
 54 | 
 55 |         Returns an iterable of metadataPrefix, schema, metadataNamespace
 56 |         tuples (each entry in the tuple is a string).
 57 |         """
 58 |         
 59 |     def listRecords(metadataPrefix, set=None, from_=None, until=None):
 60 |         """Get a list of header, metadata and about information on records.
 61 | 
 62 |         metadataPrefix - identifies metadata set to retrieve
 63 |         set - set identifier; only return records in set (optional)
 64 |         from_ - only retrieve records from from_ date forward (optional)
 65 |         until - only retrieve records with dates up to and including
 66 |                 until date (optional)
 67 | 
 68 |         Should raise error.CannotDisseminateFormatError if metadataPrefix
 69 |         is not supported by the repository.
 70 | 
 71 |         Should raise error.NoSetHierarchyError if the repository does not
 72 |         support sets.
 73 | 
 74 |         Returns an iterable of header, metadata, about tuples.
 75 |         """
 76 | 
 77 |     def listSets():
 78 |         """Get a list of sets in the repository.
 79 | 
 80 |         Should raise error.NoSetHierarchyError if the repository does not
 81 |         support sets.
 82 | 
 83 |         Returns an iterable of setSpec, setName tuples (strings).
 84 |         """
 85 |         
 86 | class IBatchingOAI:
 87 |     """Very similar to IOAI, but the implementation can be batch-aware.
 88 | 
 89 |     Methods that support resumption will get two extra arguments,
 90 |     cursor and batch_size, which indicate the batch currently being
 91 |     requested.
 92 |     """
 93 |     
 94 |     def getRecord(metadataPrefix, identifier):
 95 |         pass
 96 |     
 97 |     def identify():
 98 |         pass
 99 |     
100 |     def listIdentifiers(metadataPrefix, set=None, from_=None, until=None,
101 |                         cursor=0, batch_size=10):
102 |         pass
103 |     
104 |     def listMetadataFormats(identifier=None):
105 |         pass
106 |     
107 |     def listRecords(metadataPrefix, set=None, from_=None, until=None,
108 |                     cursor=0, batch_size=10):
109 |         pass
110 |     
111 |     def listSets():
112 |         pass
113 |     
114 | class IIdentify:
115 |     def repositoryName():
116 |         """Name of repository.
117 |         """
118 | 
119 |     def baseURL():
120 |         """Base URL for OAI-PMH requests.
121 |         """
122 | 
123 |     def protocolVersion():
124 |         """OAI-PMH protocol version (should always be '2.0')
125 |         """
126 | 
127 |     def adminEmails():
128 |         """List of email addresses of repository administrators.
129 |         """
130 | 
131 |     def earliestDateStamp():
132 |         """The datetime (datestamp) of the earliest record in repository.
133 |         """
134 | 
135 |     def deletedRecord():
136 |         """Way the repository handles deleted records.
137 | 
138 |         Either 'no', 'transient' or 'persistent'.
139 |         """
140 | 
141 |     def granularity():
142 |         """Datetime granularity of datestamps in repository.
143 | 
144 |         Either YYYY-MM-DD or YYYY-MM-DDThh:mm:ssZ
145 |         """
146 | 
147 |     def compression():
148 |         """List of types of compression schemes supported by repository.
149 | 
150 |         'identity' is the 'do-nothing' scheme.
151 |         """
152 |         
153 | class IHeader:
154 |     def identifier():
155 |         """Repository-unique identifier of this record.
156 |         """
157 | 
158 |     def datestamp():
159 |         """Datetime of creation, last modification or deletion of the record.
160 | 
161 |         This can be used for selective harvesting.
162 |         """
163 | 
164 |     def setSpec():
165 |         """A list of sets this record is a member of.
166 |         """
167 | 
168 |     def isDeleted():
169 |         """If true, record has been deleted.
170 |         """
171 | 


--------------------------------------------------------------------------------
/src/oaipmh/common.py:
--------------------------------------------------------------------------------
  1 | import pkg_resources
  2 | 
  3 | from oaipmh import error
  4 | 
  5 | class Header(object):
  6 |     def __init__(self, element, identifier, datestamp, setspec, deleted):
  7 |         self._element = element
  8 |         # force identifier to be a string, it might be 
  9 |         # an lxml.etree._ElementStringResult...
 10 |         try:
 11 |             self._identifier = str(identifier)
 12 |         except UnicodeEncodeError:
 13 |             self._identifier = unicode(identifier)
 14 |         self._datestamp = datestamp
 15 |         self._setspec = setspec
 16 |         self._deleted = deleted
 17 | 
 18 |     def element(self):
 19 |         return self._element
 20 | 
 21 |     def identifier(self):
 22 |         return self._identifier
 23 | 
 24 |     def datestamp(self):
 25 |         return self._datestamp
 26 | 
 27 |     def setSpec(self):
 28 |         return self._setspec
 29 | 
 30 |     def isDeleted(self):
 31 |         return self._deleted
 32 | 
 33 | class Metadata(object):
 34 |     def __init__(self, element, map):
 35 |         self._element = element
 36 |         self._map = map
 37 | 
 38 |     def element(self):
 39 |         return self._element
 40 | 
 41 |     def getMap(self):
 42 |         return self._map
 43 | 
 44 |     def getField(self, name):
 45 |         return self._map[name]
 46 | 
 47 |     __getitem__ = getField
 48 | 
 49 | class Identify(object):
 50 |     def __init__(self, repositoryName, baseURL, protocolVersion, adminEmails,
 51 |                  earliestDatestamp, deletedRecord, granularity, compression,
 52 |                  toolkit_description=True):
 53 |         self._repositoryName = repositoryName
 54 |         self._baseURL = baseURL
 55 |         self._protocolVersion = protocolVersion
 56 |         self._adminEmails = adminEmails
 57 |         self._earliestDatestamp = earliestDatestamp
 58 |         self._deletedRecord = deletedRecord
 59 |         self._granularity = granularity
 60 |         self._compression = compression
 61 |         self._descriptions = []
 62 |         
 63 |         if toolkit_description:
 64 |             req = pkg_resources.Requirement.parse('pyoai')
 65 |             egg = pkg_resources.working_set.find(req)
 66 |             if egg:
 67 |                 version = '<version>%s</version>' % egg.version
 68 |             else:
 69 |                 version = ''
 70 |             self.add_description(
 71 |                 '<toolkit xsi:schemaLocation='
 72 |                 '"http://oai.dlib.vt.edu/OAI/metadata/toolkit '
 73 |                 'http://oai.dlib.vt.edu/OAI/metadata/toolkit.xsd" '
 74 |                 'xmlns="http://oai.dlib.vt.edu/OAI/metadata/toolkit" '
 75 |                 'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">'
 76 |                 '<title>pyoai</title>'
 77 |                 '%s'
 78 |                 '<URL>http://infrae.com/products/oaipack</URL>'
 79 |                 '</toolkit>' % version)
 80 |         
 81 |     def repositoryName(self):
 82 |         return self._repositoryName
 83 | 
 84 |     def baseURL(self):
 85 |         return self._baseURL
 86 | 
 87 |     def protocolVersion(self):
 88 |         return self._protocolVersion
 89 | 
 90 |     def adminEmails(self):
 91 |         return self._adminEmails
 92 | 
 93 |     def earliestDatestamp(self):
 94 |         return self._earliestDatestamp
 95 | 
 96 |     def deletedRecord(self):
 97 |         return self._deletedRecord
 98 | 
 99 |     def granularity(self):
100 |         return self._granularity
101 | 
102 |     def compression(self):
103 |         return self._compression
104 | 
105 |     def add_description(self, xml_string):
106 |         self._descriptions.append(xml_string)
107 | 
108 |     def descriptions(self):
109 |         return self._descriptions
110 |     
111 | def ResumptionTokenSpec(dict):
112 |     dict = dict.copy()
113 |     dict['resumptionToken'] = 'exclusive'
114 |     return dict
115 | 
116 | class OAIMethodImpl(object):
117 |     def __init__(self, verb):
118 |         self._verb = verb
119 |         
120 |     def __call__(self, bound_self, **kw):
121 |         return bound_self.handleVerb(self._verb, kw)
122 |         
123 | def OAIMethod(verb):
124 |     obj = OAIMethodImpl(verb)
125 |     def method(self, **kw):
126 |         return obj(self, **kw)
127 |     return method
128 | 
129 | class OAIPMH(object):
130 |     """Mixin that implements the Python-level OAI-PMH interface.
131 | 
132 |     It does not include resumptionToken handling.
133 |     
134 |     It passes the calls on to the 'handleVerb' method, which should be
135 |     overridden in a subclass.
136 |     """
137 |     def handleVerb(self, verb, kw):
138 |         raise NotImplementedError
139 |     
140 |     getRecord = OAIMethod(
141 |         'GetRecord',
142 |         )
143 | 
144 |     getMetadata = OAIMethod(
145 |         'GetMetadata',
146 |         )
147 |     
148 |     identify = OAIMethod(
149 |         'Identify',
150 |         )
151 | 
152 |     listIdentifiers = OAIMethod(
153 |         'ListIdentifiers',
154 |         )
155 | 
156 |     listMetadataFormats = OAIMethod(
157 |         'ListMetadataFormats',
158 |         )
159 | 
160 |     listRecords = OAIMethod(
161 |         'ListRecords',
162 |         )
163 | 
164 |     listSets = OAIMethod(
165 |         'ListSets',
166 |         )
167 |     
168 | class ResumptionOAIPMH(object):
169 |     """Mixin that implements the Resumption-capable OAI-PMH interface.
170 | 
171 |     It passes the arguments on to the 'handleVerb' method, which
172 |     should be overridden in a subclass.
173 | 
174 |     The listIdentifiers, listSets and listRecords methods return
175 |     tuples of a list and resumptionToken. If the resumptionToken
176 |     returned is None, this indicates the end of the list is reached.
177 |     """
178 | 
179 |     def handleVerb(self, verb, kw):
180 |         raise NotImplementedError
181 |     
182 |     getRecord = OAIMethod(
183 |         'GetRecord',
184 |         )
185 | 
186 |     getMetadata = OAIMethod(
187 |         'GetMetadata',
188 |         )
189 | 
190 |     identify = OAIMethod(
191 |         'Identify',
192 |         )
193 | 
194 |     listIdentifiers = OAIMethod(
195 |         'ListIdentifiers',
196 |         )
197 | 
198 |     listMetadataFormats = OAIMethod(
199 |         'ListMetadataFormats',
200 |         )
201 | 
202 |     listRecords = OAIMethod(
203 |         'ListRecords',
204 |         )
205 | 
206 |     listSets = OAIMethod(
207 |         'ListSets',
208 |         )
209 | 
210 | def getMethodForVerb(server, verb):
211 |     return getattr(server, verb[0].lower() + verb[1:])
212 | 
213 | 


--------------------------------------------------------------------------------
/src/oaipmh/tests/test_client.py:
--------------------------------------------------------------------------------
  1 | from unittest import TestCase, TestSuite, main, makeSuite
  2 | try:
  3 |     from unittest import mock
  4 | except ImportError:  # python < 3.3
  5 |     import mock
  6 | 
  7 | from fakeclient import FakeClient, GranularityFakeClient, TestError
  8 | import os
  9 | from datetime import datetime
 10 | try:
 11 |     import urllib.request as urllib2
 12 |     URLOPEN_PATH = 'urllib.request.urlopen'
 13 | except ImportError:
 14 |     import urllib2
 15 |     URLOPEN_PATH = 'urllib2.urlopen'
 16 | 
 17 | from oaipmh import common, metadata, validation, client
 18 | 
 19 | directory = os.path.dirname(__file__)
 20 | fake1 = os.path.join(directory, 'fake1')
 21 | fakeclient = FakeClient(fake1)
 22 | 
 23 | fakeclient.getMetadataRegistry().registerReader(
 24 |     'oai_dc', metadata.oai_dc_reader)
 25 | 
 26 | 
 27 | def http_error(code):
 28 |     return urllib2.HTTPError('mock-url', code, 'error', {}, None)
 29 | 
 30 | 
 31 | class ClientTestCase(TestCase):
 32 | 
 33 |     def test_getRecord(self):
 34 |         header, metadata, about = fakeclient.getRecord(
 35 |             metadataPrefix='oai_dc', identifier='hdl:1765/315')
 36 |         self.assertEquals(
 37 |             'hdl:1765/315',
 38 |             header.identifier())
 39 |         self.assertEquals(
 40 |             ['2:7'],
 41 |             header.setSpec())
 42 |         self.assert_(not header.isDeleted())
 43 | 
 44 |     def test_getMetadata(self):
 45 |         metadata = fakeclient.getMetadata(
 46 |             metadataPrefix='oai_dc', identifier='hdl:1765/315')
 47 |         self.assertEquals(metadata.tag,
 48 |                           '{http://www.openarchives.org/OAI/2.0/oai_dc/}dc')
 49 | 
 50 | 
 51 |     def test_identify(self):
 52 |         identify = fakeclient.identify()
 53 |         self.assertEquals(
 54 |             'Erasmus University : Research Online',
 55 |             identify.repositoryName())
 56 |         self.assertEquals(
 57 |             'http://dspace.ubib.eur.nl/oai/',
 58 |             identify.baseURL())
 59 |         self.assertEquals(
 60 |             '2.0',
 61 |             identify.protocolVersion())
 62 |         self.assertEquals(
 63 |             ['service@ubib.eur.nl'],
 64 |             identify.adminEmails())
 65 |         self.assertEquals(
 66 |             'no',
 67 |             identify.deletedRecord())
 68 |         self.assertEquals(
 69 |             'YYYY-MM-DDThh:mm:ssZ',
 70 |             identify.granularity())
 71 |         self.assertEquals(
 72 |             ['gzip', 'compress', 'deflate'],
 73 |             identify.compression())
 74 | 
 75 |     def test_listIdentifiers(self):
 76 |         headers = fakeclient.listIdentifiers(from_=datetime(2003, 4, 10),
 77 |                                              metadataPrefix='oai_dc')
 78 |         # lazy, just test first one
 79 |         headers = list(headers)
 80 | 
 81 |         header = headers[0]
 82 |         self.assertEquals(
 83 |             'hdl:1765/308',
 84 |             header.identifier())
 85 |         self.assertEquals(
 86 |             datetime(2003, 4, 15, 10, 18, 51),
 87 |             header.datestamp())
 88 |         self.assertEquals(
 89 |             ['1:2'],
 90 |             header.setSpec())
 91 |         self.assert_(not header.isDeleted())
 92 |         self.assertEquals(16, len(headers))
 93 | 
 94 | 
 95 |     def test_listIdentifiers_until_none(self):
 96 |         # test listIdentifiers with until argument as None explicitly
 97 |         headers = fakeclient.listIdentifiers(from_=datetime(2003, 4, 10),
 98 |                                              until=None,
 99 |                                              metadataPrefix='oai_dc')
100 |         self.assertEquals(16, len(list(headers)))
101 | 
102 |     def test_listIdentifiers_from_none(self):
103 |         # test listIdentifiers with until argument as None explicitly
104 | 
105 |         # XXX unfortunately a white box test relying on particular
106 |         # exception behavior of the fake server. We do verify whether
107 |         # from or from_ doesn't appear in the request args though
108 |         try:
109 |             headers = fakeclient.listIdentifiers(from_=None,
110 |                                                  metadataPrefix='oai_dc')
111 |         except KeyError as e:
112 |             self.assertEquals('metadataPrefix=oai_dc&verb=ListIdentifiers',
113 |                               e.args[0])
114 | 
115 |     def test_listIdentifiers_argument_error(self):
116 |         self.assertRaises(
117 |             validation.BadArgumentError,
118 |             fakeclient.listIdentifiers,
119 |             foo='bar')
120 | 
121 |     def test_listRecords(self):
122 |         records = fakeclient.listRecords(from_=datetime(2003, 4, 10),
123 |                                          metadataPrefix='oai_dc')
124 |         records = list(records)
125 |         # lazy, just test first one
126 |         header, metadata, about = records[0]
127 |         self.assertEquals(
128 |             'hdl:1765/308',
129 |             header.identifier())
130 |         self.assertEquals(
131 |             datetime(2003, 4, 15, 10, 18, 51),
132 |              header.datestamp())
133 |         self.assertEquals(
134 |             ['1:2'],
135 |             header.setSpec())
136 |         self.assert_(not header.isDeleted())
137 |         # XXX need to extend metadata tests
138 |         self.assertEquals(
139 |             ['Kijken in het brein: Over de mogelijkheden van neuromarketing'],
140 |             metadata.getField('title'))
141 | 
142 |     def test_listMetadataFormats(self):
143 |         formats = fakeclient.listMetadataFormats()
144 |         metadataPrefix, schema, metadataNamespace = formats[0]
145 |         self.assertEquals(
146 |             'oai_dc',
147 |             metadataPrefix)
148 |         self.assertEquals(
149 |             'http://www.openarchives.org/OAI/2.0/oai_dc.xsd',
150 |             schema)
151 |         self.assertEquals(
152 |             'http://www.openarchives.org/OAI/2.0/oai_dc/',
153 |             metadataNamespace)
154 | 
155 |     def test_listSets(self):
156 |         expected = [
157 |             ('3', 'Erasmus MC (University Medical Center Rotterdam)', None),
158 |             ('3:5', 'EUR Medical Dissertations', None),
159 |             ]
160 |         # lazy, just compare first two sets..
161 |         sets = fakeclient.listSets()
162 |         sets = list(sets)
163 |         compare = [sets[0], sets[1]]
164 |         self.assertEquals(
165 |             expected,
166 |             compare)
167 | 
168 |     def test_day_granularity(self):
169 |         fakeclient = GranularityFakeClient(granularity='YYYY-MM-DDThh:mm:ssZ')
170 |         fakeclient.updateGranularity()
171 |         try:
172 |             fakeclient.listRecords(from_=datetime(2003, 4, 10, 14, 0),
173 |                                    metadataPrefix='oai_dc')
174 |         except TestError as e:
175 |             self.assertEquals('2003-04-10T14:00:00Z', e.kw['from'])
176 |         fakeclient = GranularityFakeClient(granularity='YYYY-MM-DD')
177 |         fakeclient.updateGranularity()
178 |         try:
179 |             fakeclient.listRecords(from_=datetime(2003, 4, 10, 14, 0),
180 |                                    until=datetime(2004, 6, 17, 15, 30),
181 |                                    metadataPrefix='oai_dc')
182 |         except TestError as e:
183 |             self.assertEquals('2003-04-10', e.kw['from'])
184 |             self.assertEquals('2004-06-17', e.kw['until'])
185 | 
186 |     def test_no_retry_policy(self):
187 |         """check request is not retried by default on HTTP 500 errors"""
188 |         with mock.patch(URLOPEN_PATH, side_effect=http_error(500)):
189 |             urlclient = client.Client('http://mock.me')
190 |             with self.assertRaises(urllib2.HTTPError):
191 |                 urlclient.listRecords(from_=datetime(2003, 4, 10),
192 |                                       metadataPrefix='oai_dc')
193 | 
194 |     def test_custom_retry_policy(self):
195 |         """check request is retried on 500 if asked to"""
196 |         with mock.patch(URLOPEN_PATH, side_effect=http_error(500)):
197 |             with mock.patch('time.sleep') as sleep:
198 |                 urlclient = client.Client('http://mock.me', custom_retry_policy={
199 |                     'expected-errcodes': {500},
200 |                     'wait-default': 5,
201 |                     'retry': 3,
202 |                 })
203 |                 with self.assertRaises(client.Error):
204 |                     urlclient.listRecords(from_=datetime(2003, 4, 10),
205 |                                           metadataPrefix='oai_dc')
206 |                 self.assertEqual(sleep.call_count, 3)
207 |                 sleep.assert_has_calls([mock.call(5)] * 3)
208 | 
209 |     def test_custom_retry_policy_default_wait_max(self):
210 |         with mock.patch(URLOPEN_PATH, side_effect=http_error(500)):
211 |             with mock.patch('time.sleep') as sleep:
212 |                 urlclient = client.Client('http://mock.me', custom_retry_policy={
213 |                     'expected-errcodes': {500},
214 |                     'wait-default': 5,
215 |                 })
216 |                 with self.assertRaises(client.Error):
217 |                     urlclient.listRecords(from_=datetime(2003, 4, 10),
218 |                                           metadataPrefix='oai_dc')
219 |                 self.assertEqual(sleep.call_count, 5)
220 |                 sleep.assert_has_calls([mock.call(5)] * 5)
221 | 
222 | 
223 | def test_suite():
224 |     return TestSuite((makeSuite(ClientTestCase), ))
225 | 
226 | if __name__=='__main__':
227 |     main(defaultTest='test_suite')
228 | 


--------------------------------------------------------------------------------
/src/oaipmh/tests/OAI-PMH.xsd:
--------------------------------------------------------------------------------
  1 | <schema targetNamespace="http://www.openarchives.org/OAI/2.0/"
  2 |         xmlns="http://www.w3.org/2001/XMLSchema"
  3 |         xmlns:oai="http://www.openarchives.org/OAI/2.0/"
  4 |         elementFormDefault="qualified"
  5 |         attributeFormDefault="unqualified">
  6 | 
  7 |   <annotation>
  8 |     <documentation>
  9 |     XML Schema which can be used to validate replies to all OAI-PMH 
 10 |     v2.0 requests. Herbert Van de Sompel, 2002-05-13.
 11 |     Validated with XML Spy v.4.3 on 2002-05-13.
 12 |     Validated with XSV 1.203.2.45/1.106.2.22 on 2002-05-13.
 13 |     Added definition of protocolVersionType instead of using anonymous 
 14 |     type. No change of function. Simeon Warner, 2004-03-29.
 15 |     Tightened definition of UTCdatetimeType to enforce the restriction 
 16 |     to UTC Z notation. Simeon Warner, 2004-09-14.
 17 |     Corrected pattern matches for setSpecType and metedataPrefixType
 18 |     to agree with protocol specification. Simeon Warner, 2004-10-12.
 19 |     $Date: 2005/06/08 09:57:56 $
 20 |     </documentation>
 21 |   </annotation>
 22 | 
 23 |   <element name="OAI-PMH" type="oai:OAI-PMHtype"/>
 24 | 
 25 |   <complexType name="OAI-PMHtype">
 26 |     <sequence>
 27 |       <element name="responseDate" type="dateTime"/>
 28 |       <element name="request" type="oai:requestType"/>
 29 |       <choice>
 30 |         <element name="error" type="oai:OAI-PMHerrorType" maxOccurs="unbounded"/>
 31 |         <element name="Identify" type="oai:IdentifyType"/>
 32 |         <element name="ListMetadataFormats" type="oai:ListMetadataFormatsType"/>
 33 |         <element name="ListSets" type="oai:ListSetsType"/>
 34 |         <element name="GetRecord" type="oai:GetRecordType"/>
 35 |         <element name="ListIdentifiers" type="oai:ListIdentifiersType"/>
 36 |         <element name="ListRecords" type="oai:ListRecordsType"/>
 37 |       </choice>
 38 |     </sequence>
 39 |   </complexType>
 40 | 
 41 |   <complexType name="requestType">
 42 |     <annotation>
 43 |       <documentation>Define requestType, indicating the protocol request that 
 44 |       led to the response. Element content is BASE-URL, attributes are arguments 
 45 |       of protocol request, attribute-values are values of arguments of protocol 
 46 |       request</documentation>
 47 |     </annotation>
 48 |     <simpleContent>
 49 |       <extension base="anyURI">
 50 |         <attribute name="verb" type="oai:verbType" use="optional"/>
 51 |         <attribute name="identifier" type="oai:identifierType" use="optional"/>
 52 |         <attribute name="metadataPrefix" type="oai:metadataPrefixType" use="optional"/>
 53 |         <attribute name="from" type="oai:UTCdatetimeType" use="optional"/>
 54 |         <attribute name="until" type="oai:UTCdatetimeType" use="optional"/>
 55 |         <attribute name="set" type="oai:setSpecType" use="optional"/>
 56 |         <attribute name="resumptionToken" type="string" use="optional"/>
 57 |       </extension>
 58 |     </simpleContent>
 59 |   </complexType>
 60 | 
 61 |   <simpleType name="verbType">
 62 |     <restriction base="string">
 63 |       <enumeration value="Identify"/>
 64 |       <enumeration value="ListMetadataFormats"/>
 65 |       <enumeration value="ListSets"/>
 66 |       <enumeration value="GetRecord"/>
 67 |       <enumeration value="ListIdentifiers"/>
 68 |       <enumeration value="ListRecords"/>
 69 |     </restriction>
 70 |   </simpleType>
 71 | 
 72 |   <!-- define OAI-PMH error conditions -->
 73 |   <!-- =============================== -->
 74 | 
 75 |   <complexType name="OAI-PMHerrorType">
 76 |     <simpleContent>
 77 |       <extension base="string">
 78 |         <attribute name="code" type="oai:OAI-PMHerrorcodeType" use="required"/>
 79 |       </extension>
 80 |     </simpleContent>
 81 |   </complexType>
 82 | 
 83 |   <simpleType name="OAI-PMHerrorcodeType">
 84 |     <restriction base="string">
 85 |       <enumeration value="cannotDisseminateFormat"/>
 86 |       <enumeration value="idDoesNotExist"/>
 87 |       <enumeration value="badArgument"/>
 88 |       <enumeration value="badVerb"/>
 89 |       <enumeration value="noMetadataFormats"/>
 90 |       <enumeration value="noRecordsMatch"/>
 91 |       <enumeration value="badResumptionToken"/>
 92 |       <enumeration value="noSetHierarchy"/>
 93 |     </restriction>
 94 |   </simpleType>
 95 | 
 96 |   <!-- define OAI-PMH verb containers -->
 97 |   <!-- ============================== -->
 98 | 
 99 |   <complexType name="IdentifyType">
100 |     <sequence>
101 |       <element name="repositoryName" type="string"/>
102 |       <element name="baseURL" type="anyURI"/>
103 |       <element name="protocolVersion" type="oai:protocolVersionType"/>
104 |       <element name="adminEmail" type="oai:emailType" maxOccurs="unbounded"/>
105 |       <element name="earliestDatestamp" type="oai:UTCdatetimeType"/>
106 |       <element name="deletedRecord" type="oai:deletedRecordType"/>
107 |       <element name="granularity" type="oai:granularityType"/>
108 |       <element name="compression" type="string" minOccurs="0" maxOccurs="unbounded"/>
109 |       <element name="description" type="oai:descriptionType" 
110 |                minOccurs="0" maxOccurs="unbounded"/>
111 |     </sequence>
112 |   </complexType>
113 | 
114 |   <complexType name="ListMetadataFormatsType">
115 |     <sequence>
116 |       <element name="metadataFormat" type="oai:metadataFormatType" maxOccurs="unbounded"/>
117 |     </sequence>
118 |   </complexType>
119 | 
120 |   <complexType name="ListSetsType">
121 |     <sequence>
122 |       <element name="set" type="oai:setType" maxOccurs="unbounded"/>
123 |       <element name="resumptionToken" type="oai:resumptionTokenType" minOccurs="0"/>
124 |     </sequence>
125 |   </complexType>
126 | 
127 |   <complexType name="GetRecordType">
128 |     <sequence>
129 |       <element name="record" type="oai:recordType"/>
130 |     </sequence>
131 |   </complexType>
132 | 
133 |   <complexType name="ListRecordsType">
134 |     <sequence>
135 |       <element name="record" type="oai:recordType" maxOccurs="unbounded"/>
136 |       <element name="resumptionToken" type="oai:resumptionTokenType" minOccurs="0"/>
137 |     </sequence>
138 |   </complexType>
139 | 
140 |   <complexType name="ListIdentifiersType">
141 |     <sequence>
142 |       <element name="header" type="oai:headerType" maxOccurs="unbounded"/>
143 |       <element name="resumptionToken" type="oai:resumptionTokenType" minOccurs="0"/>
144 |     </sequence>
145 |   </complexType>
146 | 
147 |   <!-- define basic types used in replies to 
148 |        GetRecord, ListRecords, ListIdentifiers -->
149 |   <!-- ======================================= -->
150 | 
151 |   <complexType name="recordType">
152 |     <annotation>
153 |       <documentation>A record has a header, a metadata part, and
154 |         an optional about container</documentation>
155 |     </annotation>
156 |     <sequence>
157 |       <element name="header" type="oai:headerType"/>
158 |       <element name="metadata" type="oai:metadataType" minOccurs="0"/>
159 |       <element name="about" type="oai:aboutType" minOccurs="0" maxOccurs="unbounded"/>
160 |     </sequence>
161 |   </complexType>
162 | 
163 |   <complexType name="headerType">
164 |     <annotation>
165 |       <documentation>A header has a unique identifier, a datestamp,
166 |         and setSpec(s) in case the item from which
167 |         the record is disseminated belongs to set(s).
168 |         the header can carry a deleted status indicating
169 |         that the record is deleted.</documentation>
170 |     </annotation>
171 |     <sequence>
172 |       <element name="identifier" type="oai:identifierType"/>
173 |       <element name="datestamp" type="oai:UTCdatetimeType"/>
174 |       <element name="setSpec" type="oai:setSpecType" minOccurs="0" maxOccurs="unbounded"/>
175 |     </sequence>
176 |     <attribute name="status" type="oai:statusType" use="optional"/>
177 |   </complexType>
178 | 
179 |   <simpleType name="identifierType">
180 |     <restriction base="anyURI"/>
181 |   </simpleType>
182 | 
183 |   <simpleType name="statusType">
184 |     <restriction base="string">
185 |       <enumeration value="deleted"/>
186 |     </restriction>
187 |   </simpleType>
188 | 
189 |   <complexType name="metadataType">
190 |     <annotation>
191 |       <documentation>Metadata must be expressed in XML that complies
192 |        with another XML Schema (namespace=#other). Metadata must be 
193 |        explicitly qualified in the response.</documentation>
194 |     </annotation>
195 |     <sequence>
196 |       <any namespace="##other" processContents="skip"/>
197 |     </sequence>
198 |   </complexType>
199 | 
200 |   <complexType name="aboutType">
201 |     <annotation>
202 |       <documentation>Data "about" the record must be expressed in XML
203 |       that is compliant with an XML Schema defined by a community.</documentation>
204 |     </annotation>
205 |     <sequence>
206 |       <any namespace="##other" processContents="skip" />
207 |     </sequence>
208 |   </complexType>
209 | 
210 |   <complexType name="resumptionTokenType">
211 |     <annotation>
212 |       <documentation>A resumptionToken may have 3 optional attributes
213 |        and can be used in ListSets, ListIdentifiers, ListRecords
214 |        responses.</documentation>
215 |     </annotation>
216 |     <simpleContent>
217 |       <extension base="string">
218 |         <attribute name="expirationDate" type="dateTime" use="optional"/>
219 |         <attribute name="completeListSize" type="positiveInteger" use="optional"/>
220 |         <attribute name="cursor" type="nonNegativeInteger" use="optional"/>
221 |       </extension>
222 |     </simpleContent>
223 |   </complexType>
224 | 
225 |   <complexType name="descriptionType">
226 |     <annotation>
227 |       <documentation>The descriptionType is used for the description
228 |       element in Identify and for setDescription element in ListSets.
229 |       Content must be compliant with an XML Schema defined by a 
230 |       community.</documentation>
231 |     </annotation>
232 |     <sequence>
233 |       <any namespace="##other" processContents="skip" />
234 |     </sequence>
235 |   </complexType>
236 | 
237 |   <simpleType name="UTCdatetimeType">
238 |     <annotation>
239 |       <documentation>Datestamps are to either day (type date)
240 |       or to seconds granularity (type oai:UTCdateTimeZType)</documentation>
241 |     </annotation>
242 |     <union memberTypes="date oai:UTCdateTimeZType"/>
243 |   </simpleType>
244 | 
245 |   <simpleType name="UTCdateTimeZType">
246 |     <restriction base="dateTime">
247 |       <pattern value=".*Z"/>
248 |     </restriction>
249 |   </simpleType>
250 | 
251 |   <!-- define types used for Identify verb only -->
252 |   <!-- ======================================== -->
253 | 
254 |   <simpleType name="protocolVersionType">
255 |     <restriction base="string">
256 |       <enumeration value="2.0"/>
257 |     </restriction>
258 |   </simpleType>
259 | 
260 |   <simpleType name="emailType">
261 |     <restriction base="string">
262 |       <pattern value="\S+@(\S+\.)+\S+"/>
263 |     </restriction>
264 |   </simpleType>
265 | 
266 |   <simpleType name="deletedRecordType">
267 |     <restriction base="string">
268 |       <enumeration value="no"/>
269 |       <enumeration value="persistent"/>
270 |       <enumeration value="transient"/>
271 |     </restriction>
272 |   </simpleType>
273 | 
274 |   <simpleType name="granularityType">
275 |     <restriction base="string">
276 |       <enumeration value="YYYY-MM-DD"/>
277 |       <enumeration value="YYYY-MM-DDThh:mm:ssZ"/>
278 |     </restriction>
279 |   </simpleType>
280 | 
281 |   <!-- define types used for ListMetadataFormats verb only -->
282 |   <!-- =================================================== -->
283 | 
284 |   <complexType name="metadataFormatType">
285 |     <sequence>
286 |       <element name="metadataPrefix" type="oai:metadataPrefixType"/>
287 |       <element name="schema" type="anyURI"/>
288 |       <element name="metadataNamespace" type="anyURI"/>
289 |     </sequence>
290 |   </complexType>
291 | 
292 |   <simpleType name="metadataPrefixType">
293 |     <restriction base="string">
294 |       <pattern value="[A-Za-z0-9\-_\.!~\*'\(\)]+"/>
295 |     </restriction>
296 |   </simpleType>
297 | 
298 |   <!-- define types used for ListSets verb -->
299 |   <!-- =================================== -->
300 | 
301 |   <complexType name="setType">
302 |     <sequence>
303 |       <element name="setSpec" type="oai:setSpecType"/>
304 |       <element name="setName" type="string"/>
305 |       <element name="setDescription" type="oai:descriptionType" 
306 |                minOccurs="0" maxOccurs="unbounded"/>
307 |     </sequence>
308 |   </complexType>
309 | 
310 |   <simpleType name="setSpecType">
311 |     <restriction base="string">
312 |       <pattern value="([A-Za-z0-9\-_\.!~\*'\(\)])+(:[A-Za-z0-9\-_\.!~\*'\(\)]+)*"/>
313 |     </restriction>
314 |   </simpleType>
315 | 
316 | </schema>
317 | 


--------------------------------------------------------------------------------
/doc/API.txt:
--------------------------------------------------------------------------------
  1 | ====================
  2 | Python oaipmh module
  3 | ====================
  4 | 
  5 | Introduction
  6 | ============
  7 | 
  8 | The oaipmh module implements the `OAI-PMH protocol`_. It encapsulates
  9 | this protocol in Python, so that a request to the OAI-PMH server is
 10 | just a method call from the Python perspective. The XML data that is
 11 | returned from the server is processed as well, and returned as Python
 12 | objects.
 13 | 
 14 | Note: This document is out of date and only describes the client
 15 | support.
 16 | 
 17 | API
 18 | ===
 19 | 
 20 | .. _ServerProxy:
 21 | 
 22 | ``class ServerProxy(uri [, metadataSchemaRegistry])`` 
 23 | 
 24 |   A ServerProxy instance is an object that manages communication with
 25 |   the remote OAI-PMH server. The required first argument is the URI
 26 |   that accepts OAI-PMH requests. 
 27 | 
 28 |   The second optional argument is a `MetadataSchemaRegistry`_
 29 |   instance. This registry contains the metadata schemas that are
 30 |   understood by client. If it isn't supplied, a default and global
 31 |   schema registry will be used, with at least support for the
 32 |   ``oai_dc`` metadata scheme.
 33 | 
 34 |   The returned instance is a proxy object with methods that can be
 35 |   used to invoke the corresponding OAI-PMH requests to the server. The
 36 |   methods are named after the corresponding verbs of the OAI-PMH
 37 |   protocol, though start with a lowercase letter to follow Python
 38 |   camelCase conventions.
 39 | 
 40 |   The methods take zero or more keyword arguments; non-keyword
 41 |   arguments are not supported. The methods do some automatic checking
 42 |   to determine whether the right combination of arguments is used.
 43 | 
 44 |   The section `Protocol Requests and Responses`_ of the OAI-PMH
 45 |   standard describes the verbs (and thus methods) and the allowed
 46 |   arguments combinations.
 47 | 
 48 |   ``getRecord(identifier, metadataPrefix)``
 49 | 
 50 |     Returns a `header, metadata, about`_ tuple for the identified item.
 51 | 
 52 |   ``identify()``
 53 | 
 54 |     Get server identification information. This returns a
 55 |     `ServerIdentify`_ instance.
 56 | 
 57 |   ``listIdentifiers(metadataPrefix [, from_ [, until [, set [, resumptionToken [, max]]]]])``
 58 |      
 59 |     Returns a `lazy sequence`_ of `Header`_ instances.
 60 |   
 61 |     The result can be restricted using  `from_ and until`_ arguments.
 62 | 
 63 |     The result can be restricted for one particular set.
 64 | 
 65 |   ``listMetadataFormats([identifier])``
 66 |  
 67 |     If ``identifier`` is not specified, returns a list of
 68 |     ``metadataPrefix, schema, metadataNamespace`` tuples for this
 69 |     OAI-PMH repository.
 70 | 
 71 |     If ``identifier`` is specified, returns a list of tuples for the
 72 |     metadata associated with the identified item.
 73 | 
 74 |     ``metadataPrefix`` is a short string to uniquely identify the
 75 |     metadata format for this OAI-PMH repository. 
 76 | 
 77 |     ``schema`` is a URI to the XML schema describing the metadata
 78 |     format.
 79 | 
 80 |     ``metadataNamespace`` is a namespace URI used for to identify XML
 81 |     content in this metadata format.
 82 | 
 83 |   ``listRecords(metadataPrefix [, from_ [, until [, set [, resumptionToken [, max]]]]])``
 84 | 
 85 |     Returns a `lazy sequence`_ of `header, metadata, about`_ tuples
 86 |     for items in the repository.
 87 | 
 88 |     The result can be restricted using  `from_ and until`_ arguments.
 89 | 
 90 |     The result can be restricted for one particular set.
 91 | 
 92 |   ``listSets([resumptionToken [, max]])``
 93 | 
 94 |     Returns a `lazy sequence`_ of ``setSpec, setName, setDescription``
 95 |     tuples.
 96 | 
 97 |     ``setSpec`` is the repository-unique name of a set. It may be
 98 |     partioned into a hierarchy using a colon. See the section `Set`_
 99 |     of the OAI-PMH standard for more information.
100 | 
101 |     ``setName`` is the name of the set as it should be displayed to
102 |     end-users.
103 | 
104 |     At the of writing ``setDescription`` is not yet supported by the
105 |     oaipmh module, and this element of the tuple will always be ``None``.
106 | 
107 |   The following methods pertain to the metadata schema system.
108 | 
109 |   ``addMetadataSchema(schema)``
110 | 
111 |     Add a MetadataSchema_ instance to the ServerProxy_. The server
112 |     will then be able to create Metadata_ instances for metadata in
113 |     the format handled by the MetadataSchema_ instance.
114 | 
115 |   ``getMetadataSchemaRegistry()``
116 | 
117 |     Get the `MetadataSchemaRegistry`_ instance that handles metadata
118 |     for this `ServerProxy`_ instance.
119 |  
120 | .. _Header:
121 | 
122 | ``class Header(..)``
123 | 
124 |   ``identifier()``
125 | 
126 |     Returns the unique identifier of this item in this repository. The
127 |     identifier must be in URI form. Some repositories may for instance
128 |     implement this as handles (see www.handle.net).
129 | 
130 |     See the `Unique Identifier`_ section of the OAI-PMH standard for
131 |     more information.
132 | 
133 |     .. _Unique Identifier: http://www.openarchives.org/OAI/openarchivesprotocol.html#UniqueIdentifier
134 |     
135 |   ``datestamp()``
136 | 
137 |     Returns the time at which this item was added or last updated
138 |     within the repository. This is in string form, in `UTCdatetime`_
139 |     format.
140 | 
141 |   ``setSpec()``
142 | 
143 |     Returns a list of the sets this item is in. The object may be in
144 |     zero or more sets. Sets are represented as strings. See also the
145 |     section `Set`_ of the OAI-PMH standard.
146 | 
147 |    ``isDeleted()``
148 | 
149 |     Returns true if this item is deleted from the server, and this is
150 |     a delete notification.
151 | 
152 | .. _Metadata:
153 | 
154 | ``class Metadata(..)``
155 | 
156 |   ``getMap()``
157 | 
158 |     Returns a dictionary with as key the metadata field names and as
159 |     values the metadata values, as extracted from the XML.
160 | 
161 |   ``getField(name)``
162 | 
163 |     Returns the metadata value for metadata field name ``name``.
164 | 
165 |     There is also a dictionary API that is the equivalent of getField;
166 |     ``metadata[name]``.
167 | 
168 | .. _ServerIdentify:
169 | 
170 | ``class SeverIdentify(..)``
171 | 
172 |   ``repositoryName()``
173 | 
174 |     Returns the human readable name of the repository.
175 |  
176 |   ``baseURL()``
177 | 
178 |     Returns the base URL for the repository (which can receive OAI-PMH
179 |     requests).
180 | 
181 |   ``protocolVersion()``
182 | 
183 |     Returns the version of the OAI-PMH protocol supported by the
184 |     repository.
185 | 
186 |   ``earliestDatestamp()``
187 | 
188 |     Returns a UTCdatetime_ that is the guaranteed earliest datestamp
189 |     that can occur in headers.
190 | 
191 |   ``deletedRecord()``
192 |     
193 |     Returns an string indicating how the repository deals with deleted
194 |     records.
195 | 
196 |     ``no``
197 | 
198 |       The repository does not support deleted records in the
199 |       protocol. If records are deleted they don't appear anymore, but
200 |       no special information is returned about them.
201 | 
202 |     ``transient`` 
203 | 
204 |       Deleted records will be returned with ``isDeleted`` status in
205 |       the header set as true but these will not be returned forever.
206 | 
207 |     ``persistent`` 
208 | 
209 |       Deleted record information is stored permanently by the server
210 |       and will be returned with ``isDeleted`` status as true if the
211 |       deleted item is accessed.
212 | 
213 |   ``granularity()``
214 |   
215 |     Returns either ``YYYY-MM-DD`` or ``YYYY-MM-DDThh:mm:ssZ``. This determines
216 |     the finest granularity of timestamps returned by the server.
217 | 
218 |   ``adminEmails()``
219 | 
220 |     Returns a list of one or more email addresses of server admins.
221 | 
222 |   ``compression()``
223 | 
224 |     Returns the compression encoding supported by the repository.
225 | 
226 |   ``description()``
227 | 
228 |     Not yet implemented.
229 | 
230 | .. _MetadataSchema:
231 | 
232 | ``class MetadataSchema(metadata_prefix, namespaces)``
233 | 
234 |   Instances of this class describe ways to turn an XML representation
235 |   of metadata into python Metadata_ instances. Fields are described by
236 |   a name, a type and a way to retrieve the field information (in the
237 |   form of a string or a list of strings) from the XML representation.
238 |   The latter is described by an XPath_ expression. This way other
239 |   metadata schemas can be represented in Python by adding a new
240 |   MetadataSchema to the ServerProxy_'s metadata schema registry.
241 | 
242 |   ``addFieldDescription(field_name, field_type, xpath)``
243 | 
244 |     Add a field description to the metadata schema. 
245 | 
246 |     ``field_name``
247 | 
248 |       The name of the field in the Metadata_ instances generated
249 |       according to this schema.
250 | 
251 |     ``field_type``
252 | 
253 |       A string indicating the data type of the metadata
254 |       field. ``bytes`` indicates an 8-bit string, ``bytesList``
255 |       indicates a list of such strings, ``text`` indicates a unicode
256 |       string and ``textList`` indicates a list of unicode strings.
257 | 
258 |     ``xpath``
259 | 
260 |       And XPath_ expression that is executed from the top of the
261 |       particular metadata section in the retrieved XML. This
262 |       expression indicates how to retrieve the metadata.
263 | 
264 | .. _MetadataSchemaRegistry:
265 |  
266 | ``class MetadataSchemaRegistry()``
267 | 
268 |   Instances of this class store a number of MetadataSchema_
269 |   instances. These handle metadata found in OAI-PMH XML resultsets
270 |   according to their ``metadata_prefix``.
271 | 
272 |   ``addMetadataSchema(metadata_schema)``
273 | 
274 |     Add a MetadataSchema_ instance to this registry.
275 | 
276 | ``header, metadata, about``
277 | ---------------------------
278 | 
279 | ``header`` is a `Header`_ instance.
280 | 
281 | ``metadata`` is a `Metadata`_ instance if the metadataPrefix argument
282 | is in a registered format, or ``None`` if the metadataPrefix is not
283 | recognized.
284 | 
285 | At the time of writing ``about`` support has not yet been implemented
286 | and will always be returned as ``None``.
287 | 
288 | ``from_`` and ``until``
289 | -----------------------
290 | 
291 | The `from_ and until`_ arguments are optional and can be used to
292 | restrict the result to information about items which were added or
293 | modified after ``from_`` and before ``until``. ``from_`` is spelled
294 | with the extra ``_`` because ``from`` (without underscore) is a
295 | reserved keyword in Python. If only ``from_`` is used there is no
296 | lower limit, it only ``until`` is used there is no upper limit. Both
297 | arguments should be strings in OAI-PMH datestamp format
298 | (i.e. ``YYY-MM-DDDThh:mm:ssZ``). See the `UTCdatetime`_ section of
299 | the OAI-PMH standard for more information.
300 | 
301 | lazy sequence
302 | -------------
303 | 
304 | The list is *lazy* in that while you can loop through it, it behaves
305 | more like an iterator than a real list (it would be a real Python 2.2+
306 | iterator if Python 2.1 did not need to be supported by this
307 | module). The system automatically asks for the next resumptionToken if
308 | one was in the reply. While you can explicitly pass a resumptionToken
309 | this is therefore not very useful as the lazy lists take care of
310 | resumptionTokens automatically.
311 | 
312 | The optional ``max`` argument is not part of the OAI-PMH protocol, but
313 | a coarse way to control how many items are read before stopping. If
314 | the amount of items exceeds ``max`` after reading a resumptionToken,
315 | the method will halt.
316 | 
317 | retry policy
318 | ------------
319 | 
320 | When the harvested OAI server returns an HTTP 503, the default policy is to
321 | retry 5 times and wait 120 seconds between each try. Due to the variety of OAI
322 | server implementations, one might want to configure those parameters. This
323 | policy can be customized through the ``BaseClient.custom_retry_policy``'s
324 | parameter. For instance::
325 | 
326 |     >>> client = Client('http://the-oai-base-url.org', custom_retry_policy={
327 |             # retry on both 500 and 503 HTTP return codes
328 |             'expected-errcodes': {500, 503},
329 |             # wait for 30 seconds before retrying
330 |             'wait-default': 30,
331 |             # retry 10 times
332 |             'retry': 10,
333 |         })
334 | )
335 | 
336 | 
337 | 
338 | .. _OAI-PMH protocol: http://www.openarchives.org/OAI/openarchivesprotocol.html
339 | 
340 | .. _Protocol Requests and Responses: http://www.openarchives.org/OAI/openarchivesprotocol.html#ProtocolMessages
341 | 
342 | .. _UTCdatetime: http://www.openarchives.org/OAI/openarchivesprotocol.html#Dates
343 | 
344 | .. _Set: http://www.openarchives.org/OAI/openarchivesprotocol.html#Set 
345 | 
346 | .. _XPath: http://www.w3.org/TR/xpath


--------------------------------------------------------------------------------
/src/oaipmh/client.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2003, 2004, 2005 Infrae
  2 | # Released under the BSD license (see LICENSE.txt)
  3 | from __future__ import nested_scopes
  4 | from __future__ import absolute_import
  5 | 
  6 | import six
  7 | 
  8 | try:
  9 |     import urllib.request as urllib2
 10 |     from urllib.parse import urlencode
 11 | except ImportError:
 12 |     import urllib2
 13 |     from urllib import urlencode
 14 | 
 15 | import sys
 16 | import base64
 17 | from lxml import etree
 18 | import time
 19 | import codecs
 20 | 
 21 | from oaipmh import common, metadata, validation, error
 22 | from oaipmh.datestamp import datestamp_to_datetime, datetime_to_datestamp
 23 | 
 24 | WAIT_DEFAULT = 120 # two minutes
 25 | WAIT_MAX = 5
 26 | 
 27 | class Error(Exception):
 28 |     pass
 29 | 
 30 | 
 31 | class BaseClient(common.OAIPMH):
 32 |     # retry policy on error. Default is to retry request `WAIT_MAX` times
 33 |     # on HTTP 503 errors, waiting `WAIT_DEFAULT` before each retry
 34 |     default_retry_policy = {
 35 |         # how many seconds should we wait before each retry
 36 |         'wait-default': WAIT_DEFAULT,
 37 |         # how many times should we retry
 38 |         'retry': WAIT_MAX,
 39 |         # which HTTP codes are expected
 40 |         'expected-errcodes': {503},
 41 |     }
 42 | 
 43 |     def __init__(self, metadata_registry=None, custom_retry_policy=None):
 44 |         self._metadata_registry = (
 45 |             metadata_registry or metadata.global_metadata_registry)
 46 |         self._ignore_bad_character_hack = 0
 47 |         self._day_granularity = False
 48 |         self.retry_policy = self.default_retry_policy.copy()
 49 |         if custom_retry_policy is not None:
 50 |             self.retry_policy.update(custom_retry_policy)
 51 | 
 52 |     def updateGranularity(self):
 53 |         """Update the granularity setting dependent on that the server says.
 54 |         """
 55 |         identify = self.identify()
 56 |         granularity = identify.granularity()
 57 |         if granularity == 'YYYY-MM-DD':
 58 |             self._day_granularity = True
 59 |         elif granularity == 'YYYY-MM-DDThh:mm:ssZ':
 60 |             self._day_granularity= False
 61 |         else:
 62 |             raise Error("Non-standard granularity on server: %s" % granularity)
 63 | 
 64 |     def handleVerb(self, verb, kw):
 65 |         # validate kw first
 66 |         validation.validateArguments(verb, kw)
 67 |         # encode datetimes as datestamps
 68 |         from_ = kw.get('from_')
 69 |         if from_ is not None:
 70 |             # turn it into 'from', not 'from_' before doing actual request
 71 |             kw['from'] = datetime_to_datestamp(from_,
 72 |                                                self._day_granularity)
 73 |         if 'from_' in kw:
 74 |             # always remove it from the kw, no matter whether it be None or not
 75 |             del kw['from_']
 76 | 
 77 |         until = kw.get('until')
 78 |         if until is not None:
 79 |             kw['until'] = datetime_to_datestamp(until,
 80 |                                                 self._day_granularity)
 81 |         elif 'until' in kw:
 82 |             # until is None but is explicitly in kw, remove it
 83 |             del kw['until']
 84 | 
 85 |         # now call underlying implementation
 86 |         method_name = verb + '_impl'
 87 |         return getattr(self, method_name)(
 88 |             kw, self.makeRequestErrorHandling(verb=verb, **kw))
 89 | 
 90 |     def getNamespaces(self):
 91 |         """Get OAI namespaces.
 92 |         """
 93 |         return {'oai': 'http://www.openarchives.org/OAI/2.0/'}
 94 | 
 95 |     def getMetadataRegistry(self):
 96 |         """Return the metadata registry in use.
 97 | 
 98 |         Do we want to allow the returning of the global registry?
 99 |         """
100 |         return self._metadata_registry
101 | 
102 |     def ignoreBadCharacters(self, true_or_false):
103 |         """Set to ignore bad characters in UTF-8 input.
104 |         This is a hack to get around well-formedness errors of
105 |         input sources which *should* be in UTF-8 but for some reason
106 |         aren't completely.
107 |         """
108 |         self._ignore_bad_character_hack = true_or_false
109 | 
110 |     def parse(self, xml):
111 |         """Parse the XML to a lxml tree.
112 |         """
113 |         # XXX this is only safe for UTF-8 encoded content,
114 |         # and we're basically hacking around non-wellformedness anyway,
115 |         # but oh well
116 |         if self._ignore_bad_character_hack:
117 |             xml = six.text_type(xml, 'UTF-8', 'replace')
118 |             # also get rid of character code 12
119 |             xml = xml.replace(chr(12), '?')
120 |             xml = xml.encode('UTF-8')
121 |         if six.PY3:
122 |             if hasattr(xml, "encode"):
123 |                 xml = xml.encode("utf-8")
124 |             # xml = xml.encode("utf-8")
125 |         return etree.XML(xml)
126 | 
127 |     # implementation of the various methods, delegated here by
128 |     # handleVerb method
129 | 
130 |     def GetRecord_impl(self, args, tree):
131 |         records, token = self.buildRecords(
132 |             args['metadataPrefix'],
133 |             self.getNamespaces(),
134 |             self._metadata_registry,
135 |             tree
136 |             )
137 |         assert token is None
138 |         return records[0]
139 | 
140 |     def GetMetadata_impl(self, args, tree):
141 |         return tree
142 | 
143 |     def Identify_impl(self, args, tree):
144 |         namespaces = self.getNamespaces()
145 |         evaluator = etree.XPathEvaluator(tree, namespaces=namespaces)
146 |         identify_node = evaluator.evaluate(
147 |             '/oai:OAI-PMH/oai:Identify')[0]
148 |         identify_evaluator = etree.XPathEvaluator(identify_node,
149 |                                                   namespaces=namespaces)
150 |         e = identify_evaluator.evaluate
151 | 
152 |         repositoryName = e('string(oai:repositoryName/text())')
153 |         baseURL = e('string(oai:baseURL/text())')
154 |         protocolVersion = e('string(oai:protocolVersion/text())')
155 |         adminEmails = e('oai:adminEmail/text()')
156 |         earliestDatestamp = datestamp_to_datetime(
157 |             e('string(oai:earliestDatestamp/text())'))
158 |         deletedRecord = e('string(oai:deletedRecord/text())')
159 |         granularity = e('string(oai:granularity/text())')
160 |         compression = e('oai:compression/text()')
161 |         # XXX description
162 |         identify = common.Identify(
163 |             repositoryName, baseURL, protocolVersion,
164 |             adminEmails, earliestDatestamp,
165 |             deletedRecord, granularity, compression)
166 |         return identify
167 | 
168 |     def ListIdentifiers_impl(self, args, tree):
169 |         namespaces = self.getNamespaces()
170 |         def firstBatch():
171 |             return self.buildIdentifiers(namespaces, tree)
172 |         def nextBatch(token):
173 |             tree = self.makeRequestErrorHandling(verb='ListIdentifiers',
174 |                                                  resumptionToken=token)
175 |             return self.buildIdentifiers(namespaces, tree)
176 |         return ResumptionListGenerator(firstBatch, nextBatch)
177 | 
178 |     def ListMetadataFormats_impl(self, args, tree):
179 |         namespaces = self.getNamespaces()
180 |         evaluator = etree.XPathEvaluator(tree,
181 |                                          namespaces=namespaces)
182 | 
183 |         metadataFormat_nodes = evaluator.evaluate(
184 |             '/oai:OAI-PMH/oai:ListMetadataFormats/oai:metadataFormat')
185 |         metadataFormats = []
186 |         for metadataFormat_node in metadataFormat_nodes:
187 |             e = etree.XPathEvaluator(metadataFormat_node,
188 |                                      namespaces=namespaces).evaluate
189 |             metadataPrefix = e('string(oai:metadataPrefix/text())')
190 |             schema = e('string(oai:schema/text())')
191 |             metadataNamespace = e('string(oai:metadataNamespace/text())')
192 |             metadataFormat = (metadataPrefix, schema, metadataNamespace)
193 |             metadataFormats.append(metadataFormat)
194 | 
195 |         return metadataFormats
196 | 
197 |     def ListRecords_impl(self, args, tree):
198 |         namespaces = self.getNamespaces()
199 |         metadata_prefix = args['metadataPrefix']
200 |         metadata_registry = self._metadata_registry
201 |         def firstBatch():
202 |             return self.buildRecords(
203 |                 metadata_prefix, namespaces,
204 |                 metadata_registry, tree)
205 |         def nextBatch(token):
206 |             tree = self.makeRequestErrorHandling(
207 |                 verb='ListRecords',
208 |                 resumptionToken=token)
209 |             return self.buildRecords(
210 |                 metadata_prefix, namespaces,
211 |                 metadata_registry, tree)
212 |         return ResumptionListGenerator(firstBatch, nextBatch)
213 | 
214 |     def ListSets_impl(self, args, tree):
215 |         namespaces = self.getNamespaces()
216 |         def firstBatch():
217 |             return self.buildSets(namespaces, tree)
218 |         def nextBatch(token):
219 |             tree = self.makeRequestErrorHandling(
220 |                 verb='ListSets',
221 |                 resumptionToken=token)
222 |             return self.buildSets(namespaces, tree)
223 |         return ResumptionListGenerator(firstBatch, nextBatch)
224 | 
225 |     # various helper methods
226 | 
227 |     def buildRecords(self,
228 |                      metadata_prefix, namespaces, metadata_registry, tree):
229 |         # first find resumption token if available
230 |         evaluator = etree.XPathEvaluator(tree,
231 |                                          namespaces=namespaces)
232 |         token = evaluator.evaluate(
233 |             'string(/oai:OAI-PMH/*/oai:resumptionToken/text())')
234 |         if token.strip() == '':
235 |             token = None
236 |         record_nodes = evaluator.evaluate(
237 |             '/oai:OAI-PMH/*/oai:record')
238 |         result = []
239 |         for record_node in record_nodes:
240 |             record_evaluator = etree.XPathEvaluator(record_node,
241 |                                                     namespaces=namespaces)
242 |             e = record_evaluator.evaluate
243 |             # find header node
244 |             header_node = e('oai:header')[0]
245 |             # create header
246 |             header = buildHeader(header_node, namespaces)
247 |             # find metadata node
248 |             metadata_list = e('oai:metadata')
249 |             if metadata_list:
250 |                 metadata_node = metadata_list[0]
251 |                 # create metadata
252 |                 metadata = metadata_registry.readMetadata(metadata_prefix,
253 |                                                           metadata_node)
254 |             else:
255 |                 metadata = None
256 |             # XXX TODO: about, should be third element of tuple
257 |             result.append((header, metadata, None))
258 |         return result, token
259 | 
260 |     def buildIdentifiers(self, namespaces, tree):
261 |         evaluator = etree.XPathEvaluator(tree,
262 |                                          namespaces=namespaces)
263 |         # first find resumption token is available
264 |         token = evaluator.evaluate(
265 |             'string(/oai:OAI-PMH/*/oai:resumptionToken/text())')
266 |         #'string(/oai:OAI-PMH/oai:ListIdentifiers/oai:resumptionToken/text())')
267 |         if token.strip() == '':
268 |             token = None
269 |         header_nodes = evaluator.evaluate(
270 |                 '/oai:OAI-PMH/oai:ListIdentifiers/oai:header')
271 |         result = []
272 |         for header_node in header_nodes:
273 |             header = buildHeader(header_node, namespaces)
274 |             result.append(header)
275 |         return result, token
276 | 
277 |     def buildSets(self, namespaces, tree):
278 |         evaluator = etree.XPathEvaluator(tree,
279 |                                          namespaces=namespaces)
280 |         # first find resumption token if available
281 |         token = evaluator.evaluate(
282 |             'string(/oai:OAI-PMH/oai:ListSets/oai:resumptionToken/text())')
283 |         if token.strip() == '':
284 |             token = None
285 |         set_nodes = evaluator.evaluate(
286 |             '/oai:OAI-PMH/oai:ListSets/oai:set')
287 |         sets = []
288 |         for set_node in set_nodes:
289 |             e = etree.XPathEvaluator(set_node,
290 |                                      namespaces=namespaces).evaluate
291 |             # make sure we get back unicode strings instead
292 |             # of lxml.etree._ElementUnicodeResult objects.
293 |             setSpec = six.text_type(e('string(oai:setSpec/text())'))
294 |             setName = six.text_type(e('string(oai:setName/text())'))
295 |             # XXX setDescription nodes
296 |             sets.append((setSpec, setName, None))
297 |         return sets, token
298 | 
299 |     def makeRequestErrorHandling(self, **kw):
300 |         xml = self.makeRequest(**kw)
301 |         try:
302 |             tree = self.parse(xml)
303 |         except SyntaxError:
304 |             raise error.XMLSyntaxError(kw)
305 |         # check whether there are errors first
306 |         e_errors = tree.xpath('/oai:OAI-PMH/oai:error',
307 |                               namespaces=self.getNamespaces())
308 |         if e_errors:
309 |             # XXX right now only raise first error found, does not
310 |             # collect error info
311 |             for e_error in e_errors:
312 |                 code = e_error.get('code')
313 |                 msg = e_error.text
314 |                 if code not in ['badArgument', 'badResumptionToken',
315 |                                 'badVerb', 'cannotDisseminateFormat',
316 |                                 'idDoesNotExist', 'noRecordsMatch',
317 |                                 'noMetadataFormats', 'noSetHierarchy']:
318 |                     raise error.UnknownError(
319 |                           "Unknown error code from server: %s, message: %s" % (
320 |                         code, msg))
321 |                 # find exception in error module and raise with msg
322 |                 raise getattr(error, code[0].upper() + code[1:] + 'Error')(msg)
323 |         return tree
324 | 
325 |     def makeRequest(self, **kw):
326 |         raise NotImplementedError
327 | 
328 | class Client(BaseClient):
329 | 
330 |     def __init__(self, base_url, metadata_registry=None, credentials=None,
331 |                  local_file=False, force_http_get=False, custom_retry_policy=None):
332 |         BaseClient.__init__(self, metadata_registry,
333 |                             custom_retry_policy=custom_retry_policy)
334 |         self._base_url = base_url
335 |         self._local_file = local_file
336 |         self._force_http_get = force_http_get
337 |         if credentials is not None:
338 |             self._credentials = base64.encodestring('%s:%s' % credentials)
339 |         else:
340 |             self._credentials = None
341 | 
342 |     def makeRequest(self, **kw):
343 |         """Either load a local XML file or actually retrieve XML from a server.
344 |         """
345 |         if self._local_file:
346 |             with codecs.open(self._base_url, 'r', 'utf-8') as xmlfile:
347 |                 text = xmlfile.read()
348 |             return text.encode('ascii', 'replace')
349 |         else:
350 |             # XXX include From header?
351 |             headers = {'User-Agent': 'pyoai'}
352 |             if self._credentials is not None:
353 |                 headers['Authorization'] = 'Basic ' + self._credentials.strip()
354 |             if self._force_http_get:
355 |                 request_url = '%s?%s' % (self._base_url, urlencode(kw))
356 |                 request = urllib2.Request(request_url, headers=headers)
357 |             else:
358 |                 binary_data = urlencode(kw).encode('utf-8')
359 |                 request = urllib2.Request(
360 |                     self._base_url, data=binary_data, headers=headers)
361 | 
362 |             return retrieveFromUrlWaiting(
363 |                 request,
364 |                 wait_max=self.retry_policy['retry'],
365 |                 wait_default=self.retry_policy['wait-default'],
366 |                 expected_errcodes=self.retry_policy['expected-errcodes']
367 |             )
368 | 
369 | def buildHeader(header_node, namespaces):
370 |     e = etree.XPathEvaluator(header_node,
371 |                             namespaces=namespaces).evaluate
372 |     identifier = e('string(oai:identifier/text())')
373 |     datestamp = datestamp_to_datetime(
374 |         str(e('string(oai:datestamp/text())')))
375 |     setspec = [str(s) for s in e('oai:setSpec/text()')]
376 |     deleted = e("@status = 'deleted'")
377 |     return common.Header(header_node, identifier, datestamp, setspec, deleted)
378 | 
379 | def ResumptionListGenerator(firstBatch, nextBatch):
380 |     result, token = firstBatch()
381 |     while 1:
382 |         itemFound = False
383 |         for item in result:
384 |             yield item
385 |             itemFound = True
386 |         if token is None or not itemFound:
387 |             break
388 |         result, token = nextBatch(token)
389 | 
390 | def retrieveFromUrlWaiting(request,
391 |                            wait_max=WAIT_MAX, wait_default=WAIT_DEFAULT,
392 |                            expected_errcodes={503}):
393 |     """Get text from URL, handling 503 Retry-After.
394 |     """
395 |     for i in list(range(wait_max)):
396 |         try:
397 |             f = urllib2.urlopen(request)
398 |             text = f.read()
399 |             f.close()
400 |             # we successfully opened without having to wait
401 |             break
402 |         except urllib2.HTTPError as e:
403 |             if e.code in expected_errcodes:
404 |                 try:
405 |                     retryAfter = int(e.hdrs.get('Retry-After'))
406 |                 except TypeError:
407 |                     retryAfter = None
408 |                 if retryAfter is None:
409 |                     time.sleep(wait_default)
410 |                 else:
411 |                     time.sleep(retryAfter)
412 |             else:
413 |                 # reraise any other HTTP error
414 |                 raise
415 |     else:
416 |         raise Error("Waited too often (more than %s times)" % wait_max)
417 |     return text
418 | 
419 | class ServerClient(BaseClient):
420 |     def __init__(self, server, metadata_registry=None):
421 |         BaseClient.__init__(self, metadata_registry)
422 |         self._server = server
423 | 
424 |     def makeRequest(self, **kw):
425 |         return self._server.handleRequest(kw)
426 | 


--------------------------------------------------------------------------------
/src/oaipmh/tests/test_server.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import os
  3 | import six
  4 | try:
  5 |     from StringIO import StringIO
  6 | except ImportError:
  7 |     from io import StringIO, BytesIO
  8 | from oaipmh import server, client, common, metadata, error
  9 | from lxml import etree
 10 | from datetime import datetime
 11 | import fakeclient
 12 | import fakeserver
 13 | 
 14 | NS_OAIPMH = server.NS_OAIPMH
 15 | 
 16 | def fileInTestDir(name):
 17 |     _testdir = os.path.split(__file__)[0]
 18 |     return os.path.join(_testdir, name)
 19 | 
 20 | # load up schema
 21 | oaischema = etree.XMLSchema(etree.parse(fileInTestDir('OAI-PMH.xsd')))
 22 | 
 23 | def etree_parse(xml):
 24 |     if six.PY2:
 25 |         return etree.parse(StringIO(xml))
 26 |     return etree.parse(BytesIO(xml)) # .decode("utf-8")))
 27 | 
 28 | class XMLTreeServerTestCase(unittest.TestCase):
 29 |     
 30 |     def setUp(self):
 31 |         self._server = self.getXMLTreeServer()
 32 | 
 33 |     def getXMLTreeServer(self):
 34 |         directory = os.path.dirname(__file__)
 35 |         fake1 = os.path.join(directory, 'fake1')
 36 |         myserver = fakeclient.FakeClient(fake1)
 37 |         metadata_registry = metadata.MetadataRegistry()
 38 |         metadata_registry.registerWriter('oai_dc', server.oai_dc_writer)
 39 |         return server.XMLTreeServer(server.Resumption(myserver),
 40 |                                     metadata_registry)
 41 | 
 42 |     def test_getRecord(self):
 43 |         tree = self._server.getRecord(
 44 |             metadataPrefix='oai_dc', identifier='hdl:1765/315')
 45 |         self.assert_(oaischema.validate(tree))
 46 | 
 47 |     def test_getMetadata(self):
 48 |         tree = self._server.getMetadata(
 49 |             metadataPrefix='oai_dc', identifier='hdl:1765/315')
 50 |         self.assertEquals(tree.tag,
 51 |                           '{http://www.openarchives.org/OAI/2.0/oai_dc/}dc')
 52 |         
 53 |     def test_identify(self):
 54 |         tree = self._server.identify()
 55 |         self.assert_(oaischema.validate(tree))
 56 | 
 57 |     def test_listIdentifiers(self):
 58 |         tree = self._server.listIdentifiers(
 59 |             from_=datetime(2003, 4, 10),
 60 |             metadataPrefix='oai_dc')
 61 |         self.assert_(oaischema.validate(tree))
 62 |         
 63 |     def test_listMetadataFormats(self):
 64 |         tree = self._server.listMetadataFormats()
 65 |         self.assert_(oaischema.validate(tree))
 66 | 
 67 |     def test_listRecords(self):
 68 |         tree = self._server.listRecords(
 69 |             from_=datetime(2003, 4, 10),
 70 |             metadataPrefix='oai_dc')
 71 |         self.assert_(oaischema.validate(tree))
 72 | 
 73 |     def test_listSets(self):
 74 |         tree = self._server.listSets()
 75 |         self.assert_(oaischema.validate(tree))
 76 | 
 77 |     def test_namespaceDeclarations(self):
 78 |         # according to the spec, all namespace used in the metadata
 79 |         # element should be declared on the metadata element,
 80 |         # and not on root or ancestor elements (big sigh..)
 81 |         # this works, except for the xsi namespace which is allready declared
 82 |         # on the root element, which means lxml will not declare it again on
 83 |         # the metadata element
 84 | 
 85 |         tree = self._server.getRecord(
 86 |             metadataPrefix='oai_dc', identifier='hdl:1765/315')
 87 |         # ugly xml manipulation, this is probably why the requirement is in
 88 |         # the spec (yuck!)
 89 |         xml = etree.tostring(tree)
 90 |         if six.PY3:
 91 |             xml = xml.decode("utf-8")
 92 |         xml = xml.split('<metadata>')[-1].split('</metadata>')[0]
 93 |         first_el = xml.split('>')[0]
 94 |         self.assertTrue(first_el.startswith('<oai_dc:dc'))
 95 |         self.assertTrue(
 96 |             'xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/"'
 97 |             in first_el) 
 98 |         self.assertTrue(
 99 |             'xmlns:dc="http://purl.org/dc/elements/1.1/"'
100 |             in first_el) 
101 |         
102 | class ServerTestCase(unittest.TestCase):
103 |     """
104 |     Most of the tests are in the XMLTreeServerTestCase,
105 |     but to test integration with XML directly (argument passing and such),
106 |     a few test cases here.
107 |     """
108 |     
109 |     def setUp(self):
110 |         self._server = self.getServer()
111 |         
112 |     def getServer(self):
113 |         directory = os.path.dirname(__file__)
114 |         fake1 = os.path.join(directory, 'fake1')
115 |         myserver = fakeclient.FakeClient(fake1)
116 |         metadata_registry = metadata.MetadataRegistry()
117 |         metadata_registry.registerWriter('oai_dc', server.oai_dc_writer)
118 |         return server.Server(myserver, metadata_registry)
119 | 
120 |     def test_identify(self):
121 |         xml = self._server.identify()
122 |         tree = etree_parse(xml)
123 |         self.assert_(oaischema.validate(tree))
124 |         
125 |     def test_listIdentifiers(self):
126 |         xml = self._server.listIdentifiers(
127 |             from_=datetime(2003, 4, 10),
128 |             metadataPrefix='oai_dc')
129 |         tree = etree_parse(xml)
130 |         self.assert_(oaischema.validate(tree))
131 |         
132 | class ResumptionTestCase(unittest.TestCase):
133 |     def setUp(self):
134 |         self._fakeserver = fakeserver.FakeServer()
135 |         self._server = server.Resumption(self._fakeserver, 10)
136 | 
137 |     def test_resumption(self):
138 |         headers = []
139 |         result, token = self._server.listIdentifiers(metadataPrefix='oai_dc')
140 |         headers.extend(result)
141 |         while token is not None:
142 |             result, token = self._server.listIdentifiers(resumptionToken=token)
143 |             headers.extend(result)
144 |         self.assertEquals([str(i) for i in range(100)],
145 |                           [header.identifier() for header in headers])
146 | 
147 |     def test_tree_resumption(self):
148 |         metadata_registry = metadata.MetadataRegistry()
149 |         metadata_registry.registerWriter('oai_dc', server.oai_dc_writer)
150 |         myserver = server.XMLTreeServer(
151 |             self._server, metadata_registry)
152 |         tree = myserver.listIdentifiers(metadataPrefix='oai_dc')
153 |         self.assert_(oaischema.validate(tree))
154 |         # we should find a resumptionToken element with text
155 |         self.assert_(
156 |             tree.xpath('//oai:resumptionToken/text()', 
157 |                        namespaces={'oai': NS_OAIPMH} ))
158 |         
159 | class BatchingResumptionTestCase(unittest.TestCase):
160 |     def setUp(self):
161 |         self._fakeserver = fakeserver.BatchingFakeServer()
162 |         self._server = server.BatchingResumption(self._fakeserver, 10)
163 | 
164 |     def _listIdentifiers(self, resumption_server, expected_length):
165 |         headers = []
166 |         result, token = resumption_server.listIdentifiers(
167 |             metadataPrefix='oai_dc')
168 |         headers.extend(result)
169 |         self.assert_(token is not None)
170 |         while token is not None:
171 |             self.assert_(result)
172 |             self.assertEquals(expected_length, len(result))
173 |             result, token = resumption_server.listIdentifiers(
174 |                 resumptionToken=token)
175 |             headers.extend(result)
176 |         self.assertEquals([str(i) for i in range(100)],
177 |                           [header.identifier() for header in headers])
178 | 
179 |     def test_resumption(self):
180 |         self._listIdentifiers(self._server, 10)
181 |  
182 |     def test_resumption_nonexact(self):
183 |         myserver = server.BatchingResumption(self._fakeserver, 13)
184 |         self._listIdentifiers(myserver, 13)
185 | 
186 |     def test_resumption_overflow(self):
187 |         myserver = server.BatchingResumption(self._fakeserver, 300)
188 |         result, token = myserver.listIdentifiers(
189 |             metadataPrefix='oai_dc')
190 |         self.assert_(token is None)
191 |         self.assertEquals([str(i) for i in range(100)],
192 |                           [header.identifier() for header in result])
193 |         
194 |     def test_tree_resumption(self):
195 |         metadata_registry = metadata.MetadataRegistry()
196 |         metadata_registry.registerWriter('oai_dc', server.oai_dc_writer)
197 |         myserver = server.XMLTreeServer(self._server, metadata_registry)
198 |         tree = myserver.listIdentifiers(metadataPrefix='oai_dc')
199 |         self.assert_(oaischema.validate(tree))
200 |         # we should find a resumptionToken element with text
201 |         self.assert_(
202 |             tree.xpath('//oai:resumptionToken/text()', 
203 |                        namespaces={'oai': NS_OAIPMH} ))
204 |         
205 | class ClientServerTestCase(unittest.TestCase):
206 |     def setUp(self):
207 |         self._fakeserver = fakeserver.FakeServer()
208 |         metadata_registry = metadata.MetadataRegistry()
209 |         metadata_registry.registerWriter('oai_dc', server.oai_dc_writer)
210 |         metadata_registry.registerReader('oai_dc', metadata.oai_dc_reader)
211 |         self._server = server.Server(self._fakeserver, metadata_registry,
212 |                                      resumption_batch_size=7)
213 |         self._client = client.ServerClient(self._server, metadata_registry)
214 | 
215 |     def test_listIdentifiers(self):
216 |         headers = self._client.listIdentifiers(metadataPrefix='oai_dc')
217 |         self.assertEquals([str(i) for i in range(100)],
218 |                           [header.identifier() for header in headers])
219 | 
220 |     def test_listRecords(self):
221 |         records = self._client.listRecords(metadataPrefix='oai_dc')
222 |         records = list(records)
223 |         self.assertEquals(100, len(records))
224 |         metadatas = [metadata for (header, metadata, about) in records]
225 |         result = []
226 |         for metadata in metadatas:
227 |             result.append(metadata.getField('title')[0])
228 |         expected = ['Title %s' % i for i in range(100)]
229 |         self.assertEquals(expected, result)
230 |         #for record in records:
231 |         #    print record[0].datestamp()
232 | 
233 |     def test_listIdentifiersFromUntil(self):
234 |         headers = self._client.listIdentifiers(metadataPrefix='oai_dc',
235 |                                                from_=datetime(2004, 1, 1),
236 |                                                until=datetime(2004, 7, 1))
237 |         # we expect 52 items
238 |         headers = list(headers)
239 |         self.assertEquals(52, len(headers))
240 | 
241 |     def test_listIdentifiersFromUntil_nothing(self):
242 |         self.assertRaises(error.NoRecordsMatchError,
243 |                           self._client.listIdentifiers,
244 |                           metadataPrefix='oai_dc', from_=datetime(2003, 1, 1),
245 |                           until=datetime(2003, 7, 1))        
246 |         
247 |         
248 | class ErrorTestCase(unittest.TestCase):
249 |     def setUp(self):
250 |         self._fakeserver = fakeserver.FakeServer()
251 |         metadata_registry = metadata.MetadataRegistry()
252 |         metadata_registry.registerWriter('oai_dc', server.oai_dc_writer)
253 |         metadata_registry.registerReader('oai_dc', metadata.oai_dc_reader)
254 |         self._server = server.Server(self._fakeserver, metadata_registry,
255 |                                      resumption_batch_size=7)
256 | 
257 |     def test_badArgument(self):
258 |         xml = self._server.handleRequest({'verb': 'Identify',
259 |                                           'foo' : 'Bar'})
260 |         self.assertErrors([('badArgument', 'Unknown argument: foo')],
261 |                           xml)
262 |         # need more tests for different variations (required, etc)
263 | 
264 |     def test_noArgument(self):
265 |         xml = self._server.handleRequest({})
266 |         self.assertErrors([('badVerb', 'Required verb argument not found.')],
267 |                           xml)
268 |         
269 |     def test_badVerb(self):
270 |         xml = self._server.handleRequest({'verb': 'Frotz'})
271 |         self.assertErrors([('badVerb', 'Illegal verb: Frotz')], xml)
272 | 
273 |     def test_badResumptionToken(self):
274 |         xml = self._server.handleRequest({'verb': 'ListRecords',
275 |                                           'resumptionToken': 'foobar'})
276 |         self.assertErrors(
277 |             [('badResumptionToken',
278 |              'Unable to decode resumption token: foobar')], xml)
279 | 
280 |     def test_cannotDisseminateFormat(self):
281 |         xml = self._server.handleRequest({'verb': 'ListRecords',
282 |                                           'metadataPrefix': 'nonexistent'})
283 |         self.assertErrors(
284 |             [('cannotDisseminateFormat',
285 |               'Unknown metadata format: nonexistent')],
286 |             xml)
287 | 
288 |     def test_idDoesNotExist(self):
289 |         xml = self._server.handleRequest({'verb': 'GetRecord',
290 |                                           'metadataPrefix': 'oai_dc',
291 |                                           'identifier': '500'})
292 |         self.assertErrors(
293 |             [('idDoesNotExist',
294 |               'Id does not exist: 500')],
295 |             xml)
296 | 
297 |     def test_badDateArgument(self):
298 |         xml = self._server.handleRequest({'verb': 'ListRecords',
299 |                                           'metadataPrefix': 'oai_dc',
300 |                                           'from': 'junk'})
301 |         self.assertErrors(
302 |             [('badArgument',
303 |               "The value 'junk' of the argument 'from' is not valid.")],
304 |             xml)
305 |         xml = self._server.handleRequest({'verb': 'ListRecords',
306 |                                           'metadataPrefix': 'oai_dc',
307 |                                           'until': 'junk'})
308 |         self.assertErrors(
309 |             [('badArgument',
310 |               "The value 'junk' of the argument 'until' is not valid.")],
311 |             xml)
312 | 
313 | 
314 |     def testDifferentGranularities(self):
315 |         xml = self._server.handleRequest({'verb': 'ListRecords',
316 |                                           'metadataPrefix': 'oai_dc',
317 |                                           'from': '2006-01-01',
318 |                                           'until': '2008-01-01T00:00:00Z'})
319 |         self.assertErrors(
320 |             [('badArgument',
321 |               "The request has different granularities for the from"
322 |               " and until parameters")],
323 |             xml)
324 |         
325 |     
326 |     def assertErrors(self, errors, xml):
327 |         self.assertEquals(errors, self.findErrors(xml))
328 |         
329 |     def findErrors(self, xml):
330 |         # parse
331 |         tree = etree_parse(xml)
332 |         # validate xml
333 |         self.assert_(oaischema.validate(tree))
334 |         result = []
335 |         for e in tree.xpath(
336 |             '//oai:error', namespaces={'oai': NS_OAIPMH}):
337 |             result.append((e.get('code'), e.text))
338 |         result.sort()
339 |         return result
340 | 
341 | class DeletionTestCase(unittest.TestCase):
342 |     def setUp(self):
343 |         self._fakeserver = fakeserver.FakeServerWithDeletions()
344 |         metadata_registry = metadata.MetadataRegistry()
345 |         metadata_registry.registerWriter('oai_dc', server.oai_dc_writer)
346 |         metadata_registry.registerReader('oai_dc', metadata.oai_dc_reader)
347 |         self._server = server.Server(self._fakeserver, metadata_registry,
348 |                                      resumption_batch_size=7)
349 |         self._client = client.ServerClient(self._server, metadata_registry)
350 | 
351 |     def test_listIdentifiers(self):
352 |         headers = self._client.listIdentifiers(metadataPrefix='oai_dc')
353 |         # we expect 12 items
354 |         headers = list(headers)
355 |         self.assertEquals(12, len(headers))
356 |         # now delete
357 |         self._fakeserver.deletionEvent()
358 |         # check again, we expect 12 items, but half of which is deleted
359 |         headers = self._client.listIdentifiers(metadataPrefix='oai_dc')
360 |         headers = list(headers)
361 |         self.assertEquals(12, len(headers))
362 |         deleted_count = 0
363 |         for header in headers:
364 |             if header.isDeleted():
365 |                 deleted_count += 1
366 |         self.assertEquals(6, deleted_count)
367 | 
368 |     def test_listRecords(self):
369 |         self._fakeserver.deletionEvent()
370 |         # we expect 12 items, but half of which is deleted
371 |         records = self._client.listRecords(metadataPrefix='oai_dc')
372 |         records = list(records)
373 |         self.assertEquals(12, len(records))
374 |         deleted_count = 0
375 |         for header, metadata, about in records:
376 |             if header.isDeleted():
377 |                 deleted_count += 1
378 |                 self.assertEquals(None, metadata)
379 |         self.assertEquals(6, deleted_count)
380 | 
381 |     def test_getRecord(self):
382 |         self._fakeserver.deletionEvent()
383 |         header, metadata, about = self._fakeserver.getRecord(
384 |             metadataPrefix='oai_dc',
385 |             identifier='1')
386 |         # we try to access a deleted record
387 |         header, metadata, about = self._client.getRecord(
388 |             metadataPrefix='oai_dc', identifier='1')
389 |         self.assert_(header.isDeleted())
390 |         self.assertEquals(None, metadata)
391 | 
392 | class NsMapTestCase(unittest.TestCase):
393 |     def setUp(self):
394 |         self._fakeserver = fakeserver.FakeServer()
395 |         metadata_registry = metadata.MetadataRegistry()
396 |         metadata_registry.registerWriter('oai_dc', server.oai_dc_writer)
397 |         metadata_registry.registerReader('oai_dc', metadata.oai_dc_reader)
398 |         self._xmlserver = server.XMLTreeServer(
399 |             self._fakeserver,
400 |             metadata_registry,
401 |             nsmap={'cow': 'http://www.cow.com'})
402 |         self._server = server.Server(
403 |             self._fakeserver,
404 |             metadata_registry,
405 |             nsmap={'cow': 'http://www.cow.com'})
406 |         
407 |     def test_nsmap(self):
408 |         # if we pass another nsmap along to the server constructor, we
409 |         # can control extra namespaces in the output envelope
410 |         tree = self._xmlserver.identify()
411 |         self.assertEquals(
412 |             'http://www.cow.com',
413 |             tree.getroot().nsmap['cow'])
414 |         
415 |         
416 | def test_suite():
417 |     return unittest.TestSuite([
418 |         unittest.makeSuite(XMLTreeServerTestCase),
419 |         unittest.makeSuite(ServerTestCase),
420 |         unittest.makeSuite(ResumptionTestCase),
421 |         unittest.makeSuite(BatchingResumptionTestCase),
422 |         unittest.makeSuite(ClientServerTestCase),
423 |         unittest.makeSuite(ErrorTestCase),
424 |         unittest.makeSuite(DeletionTestCase),
425 |         unittest.makeSuite(NsMapTestCase)])
426 | 
427 | if __name__=='__main__':
428 |     main(defaultTest='test_suite')
429 | 


--------------------------------------------------------------------------------
/doc/API.html:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8" ?>
  2 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 | <head>
  5 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  6 | <meta name="generator" content="Docutils 0.2.9: http://docutils.sourceforge.net/" />
  7 | <title>Python oaipmh module</title>
  8 | <link rel="stylesheet" href="oai.css" type="text/css" />
  9 | </head>
 10 | <body>
 11 | <div class="document" id="python-oaipmh-module">
 12 | <h1 class="title">Python oaipmh module</h1>
 13 | <div class="section" id="introduction">
 14 | <h1><a name="introduction">Introduction</a></h1>
 15 | <p>The oaipmh module implements the <a class="reference" href="http://www.openarchives.org/OAI/openarchivesprotocol.html">OAI-PMH protocol</a>. It encapsulates
 16 | this protocol in Python, so that a request to the OAI-PMH server is
 17 | just a method call from the Python perspective. The XML data that is
 18 | returned from the server is processed as well, and returned as Python
 19 | objects.</p>
 20 | </div>
 21 | <div class="section" id="api">
 22 | <h1><a name="api">API</a></h1>
 23 | <a class="target" id="serverproxy" name="serverproxy"></a><p><tt class="literal"><span class="pre">class</span> <span class="pre">ServerProxy(uri</span> <span class="pre">[,</span> <span class="pre">metadataSchemaRegistry])</span></tt></p>
 24 | <blockquote>
 25 | <p>A ServerProxy instance is an object that manages communication with
 26 | the remote OAI-PMH server. The required first argument is the URI
 27 | that accepts OAI-PMH requests.</p>
 28 | <p>The second optional argument is a <a class="reference" href="#metadataschemaregistry">MetadataSchemaRegistry</a>
 29 | instance. This registry contains the metadata schemas that are
 30 | understood by client. If it isn't supplied, a default and global
 31 | schema registry will be used, with at least support for the
 32 | <tt class="literal"><span class="pre">oai_dc</span></tt> metadata scheme.</p>
 33 | <p>The returned instance is a proxy object with methods that can be
 34 | used to invoke the corresponding OAI-PMH requests to the server. The
 35 | methods are named after the corresponding verbs of the OAI-PMH
 36 | protocol, though start with a lowercase letter to follow Python
 37 | camelCase conventions.</p>
 38 | <p>The methods take zero or more keyword arguments; non-keyword
 39 | arguments are not supported. The methods do some automatic checking
 40 | to determine whether the right combination of arguments is used.</p>
 41 | <p>The section <a class="reference" href="http://www.openarchives.org/OAI/openarchivesprotocol.html#ProtocolMessages">Protocol Requests and Responses</a> of the OAI-PMH
 42 | standard describes the verbs (and thus methods) and the allowed
 43 | arguments combinations.</p>
 44 | <p><tt class="literal"><span class="pre">getRecord(identifier,</span> <span class="pre">metadataPrefix)</span></tt></p>
 45 | <blockquote>
 46 | Returns a <a class="reference" href="#header-metadata-about">header, metadata, about</a> tuple for the identified item.</blockquote>
 47 | <p><tt class="literal"><span class="pre">identify()</span></tt></p>
 48 | <blockquote>
 49 | Get server identification information. This returns a
 50 | <a class="reference" href="#serveridentify">ServerIdentify</a> instance.</blockquote>
 51 | <p><tt class="literal"><span class="pre">listIdentifiers(metadataPrefix</span> <span class="pre">[,</span> <span class="pre">from_</span> <span class="pre">[,</span> <span class="pre">until</span> <span class="pre">[,</span> <span class="pre">set</span> <span class="pre">[,</span> <span class="pre">resumptionToken</span> <span class="pre">[,</span> <span class="pre">max]]]]])</span></tt></p>
 52 | <blockquote>
 53 | <p>Returns a <a class="reference" href="#lazy-sequence">lazy sequence</a> of <a class="reference" href="#header">Header</a> instances.</p>
 54 | <p>The result can be restricted using  <a class="reference" href="#from-and-until">from_ and until</a> arguments.</p>
 55 | <p>The result can be restricted for one particular set.</p>
 56 | </blockquote>
 57 | <p><tt class="literal"><span class="pre">listMetadataFormats([identifier])</span></tt></p>
 58 | <blockquote>
 59 | <p>If <tt class="literal"><span class="pre">identifier</span></tt> is not specified, returns a list of
 60 | <tt class="literal"><span class="pre">metadataPrefix,</span> <span class="pre">schema,</span> <span class="pre">metadataNamespace</span></tt> tuples for this
 61 | OAI-PMH repository.</p>
 62 | <p>If <tt class="literal"><span class="pre">identifier</span></tt> is specified, returns a list of tuples for the
 63 | metadata associated with the identified item.</p>
 64 | <p><tt class="literal"><span class="pre">metadataPrefix</span></tt> is a short string to uniquely identify the
 65 | metadata format for this OAI-PMH repository.</p>
 66 | <p><tt class="literal"><span class="pre">schema</span></tt> is a URI to the XML schema describing the metadata
 67 | format.</p>
 68 | <p><tt class="literal"><span class="pre">metadataNamespace</span></tt> is a namespace URI used for to identify XML
 69 | content in this metadata format.</p>
 70 | </blockquote>
 71 | <p><tt class="literal"><span class="pre">listRecords(metadataPrefix</span> <span class="pre">[,</span> <span class="pre">from_</span> <span class="pre">[,</span> <span class="pre">until</span> <span class="pre">[,</span> <span class="pre">set</span> <span class="pre">[,</span> <span class="pre">resumptionToken</span> <span class="pre">[,</span> <span class="pre">max]]]]])</span></tt></p>
 72 | <blockquote>
 73 | <p>Returns a <a class="reference" href="#lazy-sequence">lazy sequence</a> of <a class="reference" href="#header-metadata-about">header, metadata, about</a> tuples
 74 | for items in the repository.</p>
 75 | <p>The result can be restricted using  <a class="reference" href="#from-and-until">from_ and until</a> arguments.</p>
 76 | <p>The result can be restricted for one particular set.</p>
 77 | </blockquote>
 78 | <p><tt class="literal"><span class="pre">listSets([resumptionToken</span> <span class="pre">[,</span> <span class="pre">max]])</span></tt></p>
 79 | <blockquote>
 80 | <p>Returns a <a class="reference" href="#lazy-sequence">lazy sequence</a> of <tt class="literal"><span class="pre">setSpec,</span> <span class="pre">setName,</span> <span class="pre">setDescription</span></tt>
 81 | tuples.</p>
 82 | <p><tt class="literal"><span class="pre">setSpec</span></tt> is the repository-unique name of a set. It may be
 83 | partioned into a hierarchy using a colon. See the section <a class="reference" href="http://www.openarchives.org/OAI/openarchivesprotocol.html#Set">Set</a>
 84 | of the OAI-PMH standard for more information.</p>
 85 | <p><tt class="literal"><span class="pre">setName</span></tt> is the name of the set as it should be displayed to
 86 | end-users.</p>
 87 | <p>At the of writing <tt class="literal"><span class="pre">setDescription</span></tt> is not yet supported by the
 88 | oaipmh module, and this element of the tuple will always be <tt class="literal"><span class="pre">None</span></tt>.</p>
 89 | </blockquote>
 90 | <p>The following methods pertain to the metadata schema system.</p>
 91 | <p><tt class="literal"><span class="pre">addMetadataSchema(schema)</span></tt></p>
 92 | <blockquote>
 93 | Add a <a class="reference" href="#metadataschema">MetadataSchema</a> instance to the <a class="reference" href="#serverproxy">ServerProxy</a>. The server
 94 | will then be able to create <a class="reference" href="#metadata">Metadata</a> instances for metadata in
 95 | the format handled by the <a class="reference" href="#metadataschema">MetadataSchema</a> instance.</blockquote>
 96 | <p><tt class="literal"><span class="pre">getMetadataSchemaRegistry()</span></tt></p>
 97 | <blockquote>
 98 | Get the <a class="reference" href="#metadataschemaregistry">MetadataSchemaRegistry</a> instance that handles metadata
 99 | for this <a class="reference" href="#serverproxy">ServerProxy</a> instance.</blockquote>
100 | </blockquote>
101 | <a class="target" id="header" name="header"></a><p><tt class="literal"><span class="pre">class</span> <span class="pre">Header(..)</span></tt></p>
102 | <blockquote>
103 | <p><tt class="literal"><span class="pre">identifier()</span></tt></p>
104 | <blockquote>
105 | <p>Returns the unique identifier of this item in this repository. The
106 | identifier must be in URI form. Some repositories may for instance
107 | implement this as handles (see www.handle.net).</p>
108 | <p>See the <a class="reference" href="http://www.openarchives.org/OAI/openarchivesprotocol.html#UniqueIdentifier">Unique Identifier</a> section of the OAI-PMH standard for
109 | more information.</p>
110 | </blockquote>
111 | <p><tt class="literal"><span class="pre">datestamp()</span></tt></p>
112 | <blockquote>
113 | Returns the time at which this item was added or last updated
114 | within the repository. This is in string form, in <a class="reference" href="http://www.openarchives.org/OAI/openarchivesprotocol.html#Dates">UTCdatetime</a>
115 | format.</blockquote>
116 | <p><tt class="literal"><span class="pre">setSpec()</span></tt></p>
117 | <blockquote>
118 | <blockquote>
119 | Returns a list of the sets this item is in. The object may be in
120 | zero or more sets. Sets are represented as strings. See also the
121 | section <a class="reference" href="http://www.openarchives.org/OAI/openarchivesprotocol.html#Set">Set</a> of the OAI-PMH standard.</blockquote>
122 | <p><tt class="literal"><span class="pre">isDeleted()</span></tt></p>
123 | <blockquote>
124 | Returns true if this item is deleted from the server, and this is
125 | a delete notification.</blockquote>
126 | </blockquote>
127 | </blockquote>
128 | <a class="target" id="metadata" name="metadata"></a><p><tt class="literal"><span class="pre">class</span> <span class="pre">Metadata(..)</span></tt></p>
129 | <blockquote>
130 | <p><tt class="literal"><span class="pre">getMap()</span></tt></p>
131 | <blockquote>
132 | Returns a dictionary with as key the metadata field names and as
133 | values the metadata values, as extracted from the XML.</blockquote>
134 | <p><tt class="literal"><span class="pre">getField(name)</span></tt></p>
135 | <blockquote>
136 | <p>Returns the metadata value for metadata field name <tt class="literal"><span class="pre">name</span></tt>.</p>
137 | <p>There is also a dictionary API that is the equivalent of getField;
138 | <tt class="literal"><span class="pre">metadata[name]</span></tt>.</p>
139 | </blockquote>
140 | </blockquote>
141 | <a class="target" id="serveridentify" name="serveridentify"></a><p><tt class="literal"><span class="pre">class</span> <span class="pre">SeverIdentify(..)</span></tt></p>
142 | <blockquote>
143 | <p><tt class="literal"><span class="pre">repositoryName()</span></tt></p>
144 | <blockquote>
145 | Returns the human readable name of the repository.</blockquote>
146 | <p><tt class="literal"><span class="pre">baseURL()</span></tt></p>
147 | <blockquote>
148 | Returns the base URL for the repository (which can receive OAI-PMH
149 | requests).</blockquote>
150 | <p><tt class="literal"><span class="pre">protocolVersion()</span></tt></p>
151 | <blockquote>
152 | Returns the version of the OAI-PMH protocol supported by the
153 | repository.</blockquote>
154 | <p><tt class="literal"><span class="pre">earliestDatestamp()</span></tt></p>
155 | <blockquote>
156 | Returns a <a class="reference" href="http://www.openarchives.org/OAI/openarchivesprotocol.html#Dates">UTCdatetime</a> that is the guaranteed earliest datestamp
157 | that can occur in headers.</blockquote>
158 | <p><tt class="literal"><span class="pre">deletedRecord()</span></tt></p>
159 | <blockquote>
160 | <p>Returns an string indicating how the repository deals with deleted
161 | records.</p>
162 | <p><tt class="literal"><span class="pre">no</span></tt></p>
163 | <blockquote>
164 | The repository does not support deleted records in the
165 | protocol. If records are deleted they don't appear anymore, but
166 | no special information is returned about them.</blockquote>
167 | <p><tt class="literal"><span class="pre">transient</span></tt></p>
168 | <blockquote>
169 | Deleted records will be returned with <tt class="literal"><span class="pre">isDeleted</span></tt> status in
170 | the header set as true but these will not be returned forever.</blockquote>
171 | <p><tt class="literal"><span class="pre">persistent</span></tt></p>
172 | <blockquote>
173 | Deleted record information is stored permanently by the server
174 | and will be returned with <tt class="literal"><span class="pre">isDeleted</span></tt> status as true if the
175 | deleted item is accessed.</blockquote>
176 | </blockquote>
177 | <p><tt class="literal"><span class="pre">granularity()</span></tt></p>
178 | <blockquote>
179 | Returns either <tt class="literal"><span class="pre">YYYY-MM-DD</span></tt> or <tt class="literal"><span class="pre">YYYY-MM-DDThh:mm:ssZ</span></tt>. This determines
180 | the finest granularity of timestamps returned by the server.</blockquote>
181 | <p><tt class="literal"><span class="pre">adminEmails()</span></tt></p>
182 | <blockquote>
183 | Returns a list of one or more email addresses of server admins.</blockquote>
184 | <p><tt class="literal"><span class="pre">compression()</span></tt></p>
185 | <blockquote>
186 | Returns the compression encoding supported by the repository.</blockquote>
187 | <p><tt class="literal"><span class="pre">description()</span></tt></p>
188 | <blockquote>
189 | Not yet implemented.</blockquote>
190 | </blockquote>
191 | <a class="target" id="metadataschema" name="metadataschema"></a><p><tt class="literal"><span class="pre">class</span> <span class="pre">MetadataSchema(metadata_prefix,</span> <span class="pre">namespaces)</span></tt></p>
192 | <blockquote>
193 | <p>Instances of this class describe ways to turn an XML representation
194 | of metadata into python <a class="reference" href="#metadata">Metadata</a> instances. Fields are described by
195 | a name, a type and a way to retrieve the field information (in the
196 | form of a string or a list of strings) from the XML representation.
197 | The latter is described by an <a class="reference" href="http://www.w3.org/TR/xpath">XPath</a> expression. This way other
198 | metadata schemas can be represented in Python by adding a new
199 | MetadataSchema to the <a class="reference" href="#serverproxy">ServerProxy</a>'s metadata schema registry.</p>
200 | <p><tt class="literal"><span class="pre">addFieldDescription(field_name,</span> <span class="pre">field_type,</span> <span class="pre">xpath)</span></tt></p>
201 | <blockquote>
202 | <p>Add a field description to the metadata schema.</p>
203 | <p><tt class="literal"><span class="pre">field_name</span></tt></p>
204 | <blockquote>
205 | The name of the field in the <a class="reference" href="#metadata">Metadata</a> instances generated
206 | according to this schema.</blockquote>
207 | <p><tt class="literal"><span class="pre">field_type</span></tt></p>
208 | <blockquote>
209 | A string indicating the data type of the metadata
210 | field. <tt class="literal"><span class="pre">bytes</span></tt> indicates an 8-bit string, <tt class="literal"><span class="pre">bytesList</span></tt>
211 | indicates a list of such strings, <tt class="literal"><span class="pre">text</span></tt> indicates a unicode
212 | string and <tt class="literal"><span class="pre">textList</span></tt> indicates a list of unicode strings.</blockquote>
213 | <p><tt class="literal"><span class="pre">xpath</span></tt></p>
214 | <blockquote>
215 | And <a class="reference" href="http://www.w3.org/TR/xpath">XPath</a> expression that is executed from the top of the
216 | particular metadata section in the retrieved XML. This
217 | expression indicates how to retrieve the metadata.</blockquote>
218 | </blockquote>
219 | </blockquote>
220 | <a class="target" id="metadataschemaregistry" name="metadataschemaregistry"></a><p><tt class="literal"><span class="pre">class</span> <span class="pre">MetadataSchemaRegistry()</span></tt></p>
221 | <blockquote>
222 | <p>Instances of this class store a number of <a class="reference" href="#metadataschema">MetadataSchema</a>
223 | instances. These handle metadata found in OAI-PMH XML resultsets
224 | according to their <tt class="literal"><span class="pre">metadata_prefix</span></tt>.</p>
225 | <p><tt class="literal"><span class="pre">addMetadataSchema(metadata_schema)</span></tt></p>
226 | <blockquote>
227 | Add a <a class="reference" href="#metadataschema">MetadataSchema</a> instance to this registry.</blockquote>
228 | </blockquote>
229 | <div class="section" id="header-metadata-about">
230 | <h2><a name="header-metadata-about"><tt class="literal"><span class="pre">header,</span> <span class="pre">metadata,</span> <span class="pre">about</span></tt></a></h2>
231 | <p><tt class="literal"><span class="pre">header</span></tt> is a <a class="reference" href="#header">Header</a> instance.</p>
232 | <p><tt class="literal"><span class="pre">metadata</span></tt> is a <a class="reference" href="#metadata">Metadata</a> instance if the metadataPrefix argument
233 | is in a registered format, or <tt class="literal"><span class="pre">None</span></tt> if the metadataPrefix is not
234 | recognized.</p>
235 | <p>At the time of writing <tt class="literal"><span class="pre">about</span></tt> support has not yet been implemented
236 | and will always be returned as <tt class="literal"><span class="pre">None</span></tt>.</p>
237 | </div>
238 | <div class="section" id="from-and-until">
239 | <h2><a name="from-and-until"><tt class="literal"><span class="pre">from_</span></tt> and <tt class="literal"><span class="pre">until</span></tt></a></h2>
240 | <p>The <a class="reference" href="#from-and-until">from_ and until</a> arguments are optional and can be used to
241 | restrict the result to information about items which were added or
242 | modified after <tt class="literal"><span class="pre">from_</span></tt> and before <tt class="literal"><span class="pre">until</span></tt>. <tt class="literal"><span class="pre">from_</span></tt> is spelled
243 | with the extra <tt class="literal"><span class="pre">_</span></tt> because <tt class="literal"><span class="pre">from</span></tt> (without underscore) is a
244 | reserved keyword in Python. If only <tt class="literal"><span class="pre">from_</span></tt> is used there is no
245 | lower limit, it only <tt class="literal"><span class="pre">until</span></tt> is used there is no upper limit. Both
246 | arguments should be strings in OAI-PMH datestamp format
247 | (i.e. <tt class="literal"><span class="pre">YYY-MM-DDDThh:mm:ssZ</span></tt>). See the <a class="reference" href="http://www.openarchives.org/OAI/openarchivesprotocol.html#Dates">UTCdatetime</a> section of
248 | the OAI-PMH standard for more information.</p>
249 | </div>
250 | <div class="section" id="lazy-sequence">
251 | <h2><a name="lazy-sequence">lazy sequence</a></h2>
252 | <p>The list is <em>lazy</em> in that while you can loop through it, it behaves
253 | more like an iterator than a real list (it would be a real Python 2.2+
254 | iterator if Python 2.1 did not need to be supported by this
255 | module). The system automatically asks for the next resumptionToken if
256 | one was in the reply. While you can explicitly pass a resumptionToken
257 | this is therefore not very useful as the lazy lists take care of
258 | resumptionTokens automatically.</p>
259 | <p>The optional <tt class="literal"><span class="pre">max</span></tt> argument is not part of the OAI-PMH protocol, but
260 | a coarse way to control how many items are read before stopping. If
261 | the amount of items exceeds <tt class="literal"><span class="pre">max</span></tt> after reading a resumptionToken,
262 | the method will halt.</p>
263 | </div>
264 | </div>
265 | </div>
266 | <hr class="footer"/>
267 | <div class="footer">
268 | <a class="reference" href="API.txt">View document source</a>.
269 | Generated on: 2003-06-17 17:05 UTC.
270 | Generated by <a class="reference" href="http://docutils.sourceforge.net/">Docutils</a> from <a class="reference" href="http://docutils.sourceforge.net/rst.html">reStructuredText</a> source.
271 | </div>
272 | </body>
273 | </html>
274 | 


--------------------------------------------------------------------------------
/src/oaipmh/server.py:
--------------------------------------------------------------------------------
  1 | from lxml.etree import ElementTree, Element, SubElement
  2 | from lxml import etree
  3 | from datetime import datetime
  4 | try:
  5 |     from urllib.parse import urlencode, quote, unquote
  6 | except ImportError:
  7 |     from urllib import quote, unquote, urlencode
  8 | try:
  9 |     from urllib.parse import parse_qs
 10 | except ImportError:
 11 |     from urlparse import parse_qs
 12 | import sys
 13 | 
 14 | from oaipmh import common, metadata, validation, error
 15 | from oaipmh.datestamp import datestamp_to_datetime, datetime_to_datestamp, DatestampError
 16 | 
 17 | NS_OAIPMH = 'http://www.openarchives.org/OAI/2.0/'
 18 | NS_XSI = 'http://www.w3.org/2001/XMLSchema-instance'
 19 | NS_OAIDC = 'http://www.openarchives.org/OAI/2.0/oai_dc/'
 20 | NS_DC = "http://purl.org/dc/elements/1.1/"
 21 | 
 22 | NSMAP = {
 23 |     None: NS_OAIPMH,
 24 |     }
 25 | 
 26 | class XMLTreeServer(object):
 27 |     """A server that responds to messages by returning XML trees.
 28 | 
 29 |     This is an implementation class that normally would not be exposed
 30 |     to the outside world.
 31 | 
 32 |     Takes a server object conforming to the ResumptionOAIPMH interface.
 33 |     """
 34 |     def __init__(self, server, metadata_registry, nsmap=None):
 35 |         if nsmap is None:
 36 |             nsmap = {}
 37 |         self._nsmap = NSMAP.copy()
 38 |         self._nsmap.update(nsmap)
 39 |         self._server = server
 40 |         self._metadata_registry = (
 41 |             metadata_registry or metadata.global_metadata_registry)
 42 |         
 43 |     def getRecord(self, **kw):
 44 |         envelope, e_getRecord = self._outputEnvelope(
 45 |             verb='GetRecord', **kw)
 46 |         header, metadata, about = self._server.getRecord(**kw)
 47 |         e_record = SubElement(e_getRecord, nsoai('record'))
 48 |         self._outputHeader(e_record, header)   
 49 |         if not header.isDeleted():
 50 |             self._outputMetadata(e_record, kw['metadataPrefix'], metadata)
 51 |         return envelope
 52 | 
 53 |     def getMetadata(self, **kw):
 54 |         """unofficial verb, works same as getRecord, but returns
 55 |         the first element below the oai:metadata element"""
 56 |         envelope = self.getRecord(**kw)
 57 |         metadata = envelope.xpath(
 58 |             '//oai:metadata/node()[1]', namespaces={'oai': NS_OAIPMH})
 59 |         return metadata[0]
 60 |         
 61 |     def identify(self):
 62 |         envelope, e_identify = self._outputEnvelope(verb='Identify')
 63 |         identify = self._server.identify()
 64 |         e_repositoryName = SubElement(e_identify, nsoai('repositoryName'))
 65 |         e_repositoryName.text = identify.repositoryName()
 66 |         e_baseURL = SubElement(e_identify, nsoai('baseURL'))
 67 |         e_baseURL.text = identify.baseURL()
 68 |         e_protocolVersion = SubElement(e_identify, nsoai('protocolVersion'))
 69 |         e_protocolVersion.text = identify.protocolVersion()
 70 |         for adminEmail in identify.adminEmails():
 71 |             e = SubElement(e_identify, nsoai('adminEmail'))
 72 |             e.text = adminEmail
 73 |         e_earliestDatestamp = SubElement(e_identify,
 74 |                                          nsoai('earliestDatestamp'))
 75 |         e_earliestDatestamp.text = datetime_to_datestamp(
 76 |             identify.earliestDatestamp())
 77 |         e_deletedRecord = SubElement(e_identify,
 78 |                                      nsoai('deletedRecord'))
 79 |         e_deletedRecord.text = identify.deletedRecord()
 80 |         e_granularity = SubElement(e_identify, nsoai('granularity'))
 81 |         e_granularity.text = identify.granularity()
 82 |         compressions = identify.compression()
 83 |         if compressions != ['identity']:
 84 |             for compression in compressions:
 85 |                 e_compression = SubElement(e_identify, nsoai('compression'))
 86 |                 e_compression.text = compression
 87 | 
 88 |         for description in identify.descriptions():
 89 |             e_description = SubElement(e_identify, nsoai('description'))
 90 |             e_description.append(etree.fromstring(description))
 91 |         return envelope
 92 | 
 93 |     def listMetadataFormats(self, **kw):
 94 |         envelope, e_listMetadataFormats = self._outputEnvelope(
 95 |             verb="ListMetadataFormats", **kw)
 96 |         for (metadataPrefix, schema,
 97 |              metadataNamespace) in self._server.listMetadataFormats(**kw):
 98 |             e_metadataFormat = SubElement(e_listMetadataFormats,
 99 |                                           nsoai('metadataFormat'))
100 |             e_metadataPrefix = SubElement(e_metadataFormat,
101 |                                           nsoai('metadataPrefix'))
102 |             e_metadataPrefix.text = metadataPrefix
103 |             e_schema = SubElement(e_metadataFormat,
104 |                                   nsoai('schema'))
105 |             e_schema.text = schema
106 |             e_metadataNamespace = SubElement(e_metadataFormat,
107 |                                              nsoai('metadataNamespace'))
108 |             e_metadataNamespace.text = metadataNamespace
109 |         return envelope            
110 | 
111 |     def listIdentifiers(self, **kw):
112 |         envelope, e_listIdentifiers = self._outputEnvelope(
113 |             verb='ListIdentifiers', **kw)
114 |         def outputFunc(element, headers, token_kw):
115 |             for header in headers:
116 |                 self._outputHeader(element, header)
117 |         self._outputResuming(
118 |             e_listIdentifiers,
119 |             self._server.listIdentifiers,
120 |             outputFunc,
121 |             kw)
122 |         return envelope
123 |     
124 |     def listRecords(self, **kw):
125 |         envelope, e_listRecords = self._outputEnvelope(
126 |             verb="ListRecords", **kw)
127 |         def outputFunc(element, records, token_kw):
128 |             metadataPrefix = token_kw['metadataPrefix']
129 |             for header, metadata, about in records:
130 |                 e_record = SubElement(e_listRecords, nsoai('record'))
131 |                 self._outputHeader(e_record, header)
132 |                 if not header.isDeleted():
133 |                     self._outputMetadata(e_record, metadataPrefix, metadata)
134 |                 # XXX about
135 |         self._outputResuming(
136 |             e_listRecords,
137 |             self._server.listRecords,
138 |             outputFunc,
139 |             kw)
140 |         return envelope
141 | 
142 |     def listSets(self, **kw):
143 |         envelope, e_listSets = self._outputEnvelope(
144 |             verb='ListSets', **kw)
145 |         def outputFunc(element, sets, token_kw):
146 |             for setSpec, setName, setDescription in sets:
147 |                 e_set = SubElement(e_listSets, nsoai('set'))
148 |                 e_setSpec = SubElement(e_set, nsoai('setSpec'))
149 |                 e_setSpec.text = setSpec
150 |                 e_setName = SubElement(e_set, nsoai('setName'))
151 |                 e_setName.text = setName
152 |                 # XXX ignore setDescription
153 |         self._outputResuming(
154 |             e_listSets,
155 |             self._server.listSets,
156 |             outputFunc,
157 |             kw)
158 |         return envelope
159 | 
160 |     def handleException(self, exception):
161 |         if isinstance(exception, error.ErrorBase):
162 |             envelope = self._outputErrors(
163 |                 [(exception.oainame(), str(exception))])
164 |             return envelope
165 |         # unhandled exception, so raise again
166 |         raise
167 |     
168 |     def _outputBasicEnvelope(self, **kw):
169 |         e_oaipmh = Element(nsoai('OAI-PMH'), nsmap=self._nsmap)
170 |         e_oaipmh.set('{%s}schemaLocation' % NS_XSI,
171 |                      ('http://www.openarchives.org/OAI/2.0/ '
172 |                       'http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd'))
173 |         e_tree = ElementTree(element=e_oaipmh)
174 |         e_responseDate = SubElement(e_oaipmh, nsoai('responseDate'))
175 |         # date should be first possible moment
176 |         e_responseDate.text = datetime_to_datestamp(
177 |             datetime.utcnow().replace(microsecond=0))
178 |         e_request = SubElement(e_oaipmh, nsoai('request'))
179 |         for key, value in kw.items():
180 |             if key == 'from_':
181 |                 key = 'from'
182 |             if key == 'from' or key == 'until':
183 |                 value = datetime_to_datestamp(value)
184 |             e_request.set(key, value)
185 |         # XXX this is potentially slow..
186 |         e_request.text = self._server.identify().baseURL()
187 |         return e_tree, e_oaipmh
188 |     
189 |     def _outputEnvelope(self, **kw):
190 |         e_tree, e_oaipmh = self._outputBasicEnvelope(**kw)
191 |         e_element = SubElement(e_oaipmh, nsoai(kw['verb']))
192 |         return e_tree, e_element
193 | 
194 |     def _outputErrors(self, errors, **kw):
195 |         # only pass functional arguments
196 |         e_tree, e_oaipmh = self._outputBasicEnvelope(**kw)
197 |         for error_code, error_msg in errors:
198 |             e_error = SubElement(e_oaipmh, nsoai('error'))
199 |             e_error.set('code', error_code)
200 |             e_error.text = error_msg
201 |         return e_tree
202 |     
203 |     def _outputResuming(self, element, input_func, output_func, kw):
204 |         if 'resumptionToken' in kw:
205 |             resumptionToken = kw['resumptionToken']
206 |             result, token = input_func(resumptionToken=resumptionToken)
207 |             # unpack keywords from resumption token
208 |             token_kw, dummy = decodeResumptionToken(resumptionToken)
209 |         else:
210 |             result, token = input_func(**kw)
211 |             # if we don't get results for the first request,
212 |             # then no records match
213 |             # XXX this will also be triggered if there are no sets,
214 |             # but input_func (listSets) should have already raised
215 |             # NoSetHierarchyError in that case
216 |             if not result:
217 |                 raise error.NoRecordsMatchError(
218 |                     "No records match for request.")
219 |             # without resumption token keys are fine
220 |             token_kw = kw
221 |         output_func(element, result, token_kw)
222 |         if token is not None:
223 |             e_resumptionToken = SubElement(element, nsoai('resumptionToken'))
224 |             e_resumptionToken.text = token
225 |             
226 |     def _outputHeader(self, element, header):
227 |         e_header = SubElement(element, nsoai('header'))
228 |         if header.isDeleted():
229 |             e_header.set('status', 'deleted')
230 |         e_identifier = SubElement(e_header, nsoai('identifier'))
231 |         e_identifier.text = header.identifier()
232 |         e_datestamp = SubElement(e_header, nsoai('datestamp'))
233 |         e_datestamp.text = datetime_to_datestamp(header.datestamp())
234 |         for set in header.setSpec():
235 |             e = SubElement(e_header, nsoai('setSpec'))
236 |             e.text = set
237 |     
238 |     def _outputMetadata(self, element, metadata_prefix, metadata):
239 |         e_metadata = SubElement(element, nsoai('metadata'))
240 |         if not self._metadata_registry.hasWriter(metadata_prefix):
241 |             raise error.CannotDisseminateFormatError(
242 |                   "Unknown metadata format: %s" % metadata_prefix)
243 |         self._metadata_registry.writeMetadata(
244 |             metadata_prefix, e_metadata, metadata)
245 | 
246 | class ServerBase(common.ResumptionOAIPMH):
247 |     """A server that responds to messages by returning OAI-PMH compliant XML.
248 | 
249 |     Takes a server object complying with the ResumptionOAIPMH interface.
250 |     """
251 |     def __init__(self, server, metadata_registry=None, nsmap=None):
252 |         self._tree_server = XMLTreeServer(server, metadata_registry, nsmap)
253 | 
254 |     def handleRequest(self, request_kw):
255 |         """Handles incoming OAI-PMH request.
256 | 
257 |         request_kw is a dictionary containing request parameters, including
258 |         verb.
259 |         """
260 |         # try to get verb, if not, we have an argument handling error
261 |         try:
262 |             new_kw = {}
263 |             try:
264 |                 for key, value in request_kw.items():
265 |                     new_kw[str(key)] = value
266 |             except UnicodeError:
267 |                 raise error.BadVerbError(
268 |                       "Non-ascii keys in request.")
269 |             request_kw = new_kw
270 |             try:
271 |                 verb = request_kw.pop('verb')
272 |             except KeyError:
273 |                 verb = 'unknown'
274 |                 raise error.BadVerbError(
275 |                       "Required verb argument not found.")
276 |             if verb not in ['GetRecord', 'Identify', 'ListIdentifiers',
277 |                             'GetMetadata', 'ListMetadataFormats',
278 |                             'ListRecords', 'ListSets']:
279 |                 raise error.BadVerbError("Illegal verb: %s" % verb)
280 |             # replace from and until arguments if necessary
281 |             from_ = request_kw.get('from')
282 |             if from_ is not None:
283 |                 # rename to from_ for internal use
284 |                 try:
285 |                     request_kw['from_'] = datestamp_to_datetime(from_)
286 |                 except DatestampError as err:
287 |                     raise error.BadArgumentError(
288 |                         "The value '%s' of the argument "
289 |                         "'%s' is not valid." %(from_, 'from'))
290 |                 del request_kw['from']
291 |             until = request_kw.get('until')
292 |             if until is not None:
293 |                 try:
294 |                     request_kw['until'] = datestamp_to_datetime(until,
295 |                                                                 inclusive=True)
296 |                 except DatestampError as err:
297 |                     raise error.BadArgumentError(
298 |                         "The value '%s' of the argument "
299 |                         "'%s' is not valid." %(until, 'until'))
300 | 
301 |             if from_ is not None and until is not None:
302 |                 if (('T' in from_ and not 'T' in until) or
303 |                     ('T' in until and not 'T' in from_)):
304 |                     raise error.BadArgumentError(
305 |                         "The request has different granularities for"
306 |                         " the from and until parameters")
307 |                 
308 |             # now validate parameters
309 |             try:
310 |                 validation.validateResumptionArguments(verb, request_kw)
311 |             except validation.BadArgumentError as e:
312 |                 # have to raise this as a error.BadArgumentError
313 |                 raise error.BadArgumentError(str(e))
314 |             # now handle verb
315 |             return self.handleVerb(verb, request_kw)            
316 |         except:
317 |             # in case of exception, call exception handler
318 |             return self.handleException(request_kw, sys.exc_info())
319 |         
320 |     def handleVerb(self, verb, kw):
321 |         method = common.getMethodForVerb(self._tree_server, verb)
322 |         return etree.tostring(method(**kw).getroot(), 
323 |                               encoding='UTF-8',
324 |                               xml_declaration=True,
325 |                               pretty_print=True)
326 |   
327 |     def handleException(self, kw, exc_info):
328 |         type, value, traceback = exc_info
329 |         return etree.tostring(
330 |             self._tree_server.handleException(value).getroot(),
331 |             encoding='UTF-8',
332 |             xml_declaration=True,
333 |             pretty_print=True)
334 | 
335 | class Server(ServerBase):
336 |     """Expects to be initialized with a IOAI server implementation.
337 |     """
338 |     def __init__(self, server, metadata_registry=None, nsmap=None,
339 |                  resumption_batch_size=10):
340 |         super(Server, self).__init__(
341 |             Resumption(server, resumption_batch_size),
342 |             metadata_registry,
343 |             nsmap)
344 | 
345 | class BatchingServer(ServerBase):
346 |     """Expects to be initialized with a IBatchingOAI server implementation.
347 |     """
348 |     def __init__(self, server, metadata_registry=None, nsmap=None,
349 |                  resumption_batch_size=10):
350 |         super(BatchingServer, self).__init__(
351 |             BatchingResumption(server, resumption_batch_size),
352 |             metadata_registry,
353 |             nsmap)
354 | 
355 | class Resumption(common.ResumptionOAIPMH):
356 |     """
357 |     The Resumption class can turn a plain IOAIPMH interface into
358 |     a ResumptionOAIPMH interface
359 | 
360 |     This implementation is not particularly efficient for large
361 |     result sets, as the complete result set needs to be reconstructed each
362 |     time.
363 |     """
364 |     def __init__(self, server, batch_size=10):
365 |         self._server = server
366 |         self._batch_size = batch_size
367 |     
368 |     def handleVerb(self, verb, kw):
369 |         # do original query
370 |         method = common.getMethodForVerb(self._server, verb)
371 |         # if we're handling a resumption token
372 |         if 'resumptionToken' in kw:
373 |             kw, cursor = decodeResumptionToken(
374 |                 kw['resumptionToken'])
375 |             end_batch = cursor + self._batch_size
376 |             # do query again with original parameters
377 |             result = method(**kw)
378 |             # XXX defeat laziness of any generators..
379 |             result = list(result)
380 |             if end_batch < len(result):
381 |                 resumptionToken = encodeResumptionToken(
382 |                     kw, end_batch)
383 |             else:
384 |                 resumptionToken = None
385 |             return result[cursor:end_batch], resumptionToken
386 | 
387 |         # we're not handling resumption token, so do request
388 |         result = method(**kw)
389 | 
390 |         # now handle resumption system
391 |         if verb in ['ListSets', 'ListIdentifiers', 'ListRecords']:
392 |             # XXX defeat the laziness effect of any generators..
393 |             result = list(result)
394 |             end_batch = self._batch_size
395 |             if end_batch < len(result):
396 |                 resumptionToken = encodeResumptionToken(
397 |                     kw, end_batch)
398 |             else:
399 |                 resumptionToken = None
400 |             return result[0:end_batch], resumptionToken
401 |         return result
402 | 
403 | class BatchingResumption(common.ResumptionOAIPMH):
404 |     """
405 |     The BatchingResumption class can turn a IBatchingOAIPMH interface into
406 |     a ResumptionOAIPMH interface.
407 |     """
408 |     
409 |     def __init__(self, server, batch_size=10):
410 |         self._server = server
411 |         self._batch_size = batch_size
412 |         
413 |     def handleVerb(self, verb, kw):
414 |         if 'resumptionToken' in kw:
415 |             kw, cursor = decodeResumptionToken(
416 |                 kw['resumptionToken'])
417 |             kw['cursor'] = cursor
418 |             
419 |         method = common.getMethodForVerb(self._server, verb)
420 | 
421 |         # now handle resumption system
422 |         if verb in ['ListSets', 'ListIdentifiers', 'ListRecords']:
423 |             kw = kw.copy()
424 |             cursor = kw.get('cursor', None)
425 |             if cursor is None:
426 |                 kw['cursor'] = cursor = 0
427 |             # we request 1 beyond the batch size, so that
428 |             # if we retrieve <= batch_size items, we know we
429 |             # don't need to output another resumption token
430 |             kw['batch_size'] = self._batch_size + 1  
431 |             result = method(**kw)
432 |             result = list(result)
433 |             if len(result) > self._batch_size:
434 |                 # more results are expected, so encode resumption token
435 |                 resumptionToken = encodeResumptionToken(
436 |                     kw, cursor + self._batch_size)
437 |                 # we also want to result only the batch_size, so pop the
438 |                 # last one
439 |                 result.pop()
440 |             else:
441 |                 # no more results are expected
442 |                 resumptionToken = None
443 |             return result, resumptionToken
444 |         return method(**kw)
445 |     
446 | def encodeResumptionToken(kw, cursor):
447 |     kw = kw.copy()
448 |     kw['cursor'] = str(cursor)
449 |     from_ = kw.get('from_')
450 |     if from_ is not None:
451 |         kw['from_'] = datetime_to_datestamp(from_)
452 |     until = kw.get('until')
453 |     if until is not None:
454 |         kw['until'] = datetime_to_datestamp(until)
455 |     return quote(urlencode(kw))
456 | 
457 | def decodeResumptionToken(token):
458 |     token = str(unquote(token))
459 |     
460 |     try:
461 |         kw = parse_qs(token, True, True)
462 |     except ValueError:
463 |         raise error.BadResumptionTokenError(
464 |               "Unable to decode resumption token: %s" % token)
465 |     result = {}
466 |     for key, value in kw.items():
467 |         value = value[0]
468 |         if key == 'from_' or key == 'until':
469 |             value = datestamp_to_datetime(value)
470 |         result[key] = value
471 |     try:
472 |         cursor = int(result.pop('cursor'))
473 |     except (KeyError, ValueError):
474 |         raise error.BadResumptionTokenError(
475 |               "Unable to decode resumption token (bad cursor): %s" % token)
476 |     # XXX should also validate result contents. Need verb information
477 |     # for this, and somewhat more flexible verb validation support
478 |     return result, cursor
479 |     
480 | def oai_dc_writer(element, metadata):
481 |     e_dc = SubElement(element, nsoaidc('dc'),
482 |                       nsmap={'oai_dc': NS_OAIDC, 'dc': NS_DC, 'xsi': NS_XSI})
483 |     e_dc.set('{%s}schemaLocation' % NS_XSI,
484 |              '%s http://www.openarchives.org/OAI/2.0/oai_dc.xsd' % NS_DC)
485 |     map = metadata.getMap()
486 |     for name in [
487 |         'title', 'creator', 'subject', 'description', 'publisher',
488 |         'contributor', 'date', 'type', 'format', 'identifier',
489 |         'source', 'language', 'relation', 'coverage', 'rights']:
490 |         for value in map.get(name, []):
491 |             e = SubElement(e_dc, nsdc(name))
492 |             e.text = value
493 |                
494 | def nsoai(name):
495 |     return '{%s}%s' % (NS_OAIPMH, name)
496 | 
497 | def nsoaidc(name):
498 |     return '{%s}%s' % (NS_OAIDC, name)
499 | 
500 | def nsdc(name):
501 |     return '{%s}%s' % (NS_DC, name)
502 | 


--------------------------------------------------------------------------------