├── .gitignore ├── .travis.yml ├── HISTORY.rst ├── LICENSE ├── dataverse ├── .gitignore ├── __init__.py ├── connection.py ├── dataset.py ├── dataverse.py ├── exceptions.py ├── file.py ├── resources │ ├── atom-entry-study.xml │ └── atom-example.xml ├── settings │ ├── __init__.py │ └── defaults.py ├── test │ ├── __init__.py │ ├── config.py │ └── test_dataverse.py └── utils.py ├── readme.md ├── setup.cfg └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | .vagrant 2 | .DS_Store 3 | *pyc 4 | .idea/ 5 | .cache/ 6 | local.py 7 | *~ 8 | *.egg-info/ 9 | 10 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - 2.7 5 | - 3.3 6 | #- 3.4 # "No output has been received in the last 10 minutes, this potentially indicates a stalled build or something wrong with the build itself." 7 | 8 | sudo: false 9 | 10 | cache: 11 | directories: 12 | - $HOME/wheelhouse 13 | 14 | env: 15 | - WHEELHOUSE=$HOME/wheelhouse/`python --version` 16 | 17 | install: 18 | - pip wheel --find-links=$HOME/wheelhouse --wheel-dir=$HOME/wheelhouse . httpretty pytest flake8 ordereddict 19 | - pip install --find-links=$HOME/wheelhouse --no-index . httpretty pytest flake8 ordereddict 20 | 21 | script: 22 | - flake8 . 23 | - py.test -v 24 | -------------------------------------------------------------------------------- /HISTORY.rst: -------------------------------------------------------------------------------- 1 | .. :changelog: 2 | 3 | Release History 4 | --------------- 5 | 6 | 0.1.2 (2016-6-13) 7 | ++++++++++++++++++ 8 | - Allow use of a non-https connection. 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /dataverse/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IQSS/dataverse-client-python/5cdaadc2c3882d6fe23d3cb7cef5280fe83ba909/dataverse/.gitignore -------------------------------------------------------------------------------- /dataverse/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from requests.packages import urllib3 4 | urllib3.disable_warnings() # noqa 5 | 6 | from dataverse.connection import Connection # noqa 7 | from dataverse.dataverse import Dataverse # noqa 8 | from dataverse.dataset import Dataset # noqa 9 | from dataverse.file import DataverseFile # noqa 10 | -------------------------------------------------------------------------------- /dataverse/connection.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from lxml import etree 4 | import requests 5 | 6 | from dataverse.dataverse import Dataverse 7 | from dataverse import exceptions 8 | from dataverse.utils import get_elements 9 | 10 | 11 | class Connection(object): 12 | 13 | def __init__(self, host, token, use_https=True): 14 | self.token = token 15 | self.host = host 16 | 17 | if use_https: 18 | url_scheme = 'https://' 19 | else: 20 | url_scheme = 'http://' 21 | self.base_url = '{0}{1}'.format(url_scheme, self.host) 22 | self.native_base_url = '{0}/api/v1'.format(self.base_url) 23 | self.sword_base_url = '{0}/dvn/api/data-deposit/v1.1/swordv2'.format(self.base_url) 24 | self.sd_uri = '{0}/service-document'.format(self.sword_base_url) 25 | self._service_document = None 26 | 27 | self.get_service_document() 28 | 29 | @property 30 | def auth(self): 31 | return self.token, None 32 | 33 | def get_service_document(self, refresh=False): 34 | if not refresh and self._service_document is not None: 35 | return self._service_document 36 | 37 | resp = requests.get(self.sd_uri, auth=self.auth) 38 | 39 | if resp.status_code == 403: 40 | raise exceptions.UnauthorizedError('The credentials provided are invalid.') 41 | elif resp.status_code != 200: 42 | raise exceptions.ConnectionError('Could not connect to the Dataverse') 43 | 44 | self._service_document = etree.XML(resp.content) 45 | return self._service_document 46 | 47 | def create_dataverse(self, alias, name, email, parent=':root'): 48 | resp = requests.post( 49 | '{0}/dataverses/{1}'.format(self.native_base_url, parent), 50 | json={ 51 | 'alias': alias, 52 | 'name': name, 53 | 'dataverseContacts': [{'contactEmail': email}], 54 | }, 55 | params={'key': self.token}, 56 | ) 57 | 58 | if resp.status_code == 404: 59 | raise exceptions.DataverseNotFoundError( 60 | 'Dataverse {0} was not found.'.format(parent) 61 | ) 62 | elif resp.status_code != 201: 63 | raise exceptions.OperationFailedError( 64 | '{0} Dataverse could not be created.'.format(name) 65 | ) 66 | 67 | self.get_service_document(refresh=True) 68 | return self.get_dataverse(alias) 69 | 70 | def delete_dataverse(self, dataverse): 71 | 72 | resp = requests.delete( 73 | '{0}/dataverses/{1}'.format(self.native_base_url, dataverse.alias), 74 | params={'key': self.token}, 75 | ) 76 | 77 | if resp.status_code == 401: 78 | raise exceptions.UnauthorizedError( 79 | 'Delete Dataverse {0} unauthorized.'.format(dataverse.alias) 80 | ) 81 | elif resp.status_code == 404: 82 | raise exceptions.DataverseNotFoundError( 83 | 'Dataverse {0} was not found.'.format(dataverse.alias) 84 | ) 85 | elif resp.status_code != 200: 86 | raise exceptions.OperationFailedError( 87 | 'Dataverse {0} could not be deleted.'.format(dataverse.alias) 88 | ) 89 | 90 | self.get_service_document(refresh=True) 91 | 92 | def get_dataverses(self, refresh=False): 93 | collections = get_elements( 94 | self.get_service_document(refresh)[0], 95 | tag='collection', 96 | ) 97 | 98 | return [Dataverse(self, col) for col in collections] 99 | 100 | def get_dataverse(self, alias, refresh=False): 101 | return next((dataverse for dataverse in self.get_dataverses(refresh) 102 | if dataverse.alias == alias), None) 103 | -------------------------------------------------------------------------------- /dataverse/dataset.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import os 4 | import json 5 | 6 | try: 7 | from StringIO import StringIO 8 | except ImportError: 9 | from io import BytesIO as StringIO 10 | 11 | from zipfile import ZipFile 12 | 13 | from lxml import etree 14 | import requests 15 | 16 | from .exceptions import ( 17 | NoContainerError, OperationFailedError, UnpublishedDataverseError, 18 | ConnectionError, MetadataNotFoundError, VersionJsonNotFoundError, 19 | ) 20 | from dataverse.file import DataverseFile 21 | from dataverse.settings import SWORD_BOOTSTRAP 22 | from dataverse.utils import get_element, get_files_in_path, add_field 23 | 24 | 25 | class Dataset(object): 26 | def __init__(self, entry=SWORD_BOOTSTRAP, dataverse=None, edit_uri=None, 27 | edit_media_uri=None, statement_uri=None, **kwargs): 28 | """ 29 | Datasets must have a title, description, and author. 30 | This can be specified in the atom entry or as kwargs 31 | """ 32 | self.dataverse = dataverse 33 | 34 | self.edit_uri = edit_uri 35 | self.edit_media_uri = edit_media_uri 36 | self.statement_uri = statement_uri 37 | self.is_deleted = False 38 | 39 | self._entry = etree.XML(entry) if isinstance(entry, str) else entry 40 | self._statement = None 41 | self._metadata = {} 42 | self._id = None 43 | 44 | # Updates sword entry from keyword arguments 45 | for key in kwargs: 46 | value = kwargs[key] 47 | if isinstance(value, list): 48 | for item in value: 49 | add_field(self._entry, key, item, 'dcterms') 50 | else: 51 | add_field(self._entry, key, value, 'dcterms') 52 | 53 | self.title = get_element( 54 | self._entry, tag='title', namespace='dcterms' 55 | ).text 56 | 57 | @classmethod 58 | def from_xml_file(cls, xml_file): 59 | with open(xml_file) as f: 60 | xml = f.read() 61 | return cls(xml) 62 | 63 | @classmethod 64 | def from_dataverse(cls, entry_element, dataverse): 65 | 66 | # Entry not in appropriate format--extract relevant metadata 67 | id_element = get_element(entry_element, tag='id') 68 | title_element = get_element(entry_element, tag='title') 69 | edit_media_element = get_element( 70 | entry_element, 71 | tag='link', 72 | attribute='rel', 73 | attribute_value='edit-media', 74 | ) 75 | 76 | return cls( 77 | title=title_element.text, 78 | id=id_element.text, 79 | dataverse=dataverse, 80 | edit_uri=entry_element.base, 81 | edit_media_uri=edit_media_element.get('href'), 82 | ) 83 | 84 | @property 85 | def doi(self): 86 | if not self.dataverse: 87 | raise NoContainerError('This dataset has not been added to a Dataverse.') 88 | 89 | # Note: This depends strongly on URL structure, and may break easily 90 | return self.edit_media_uri.rsplit('/study/', 1)[-1] 91 | 92 | @property 93 | def id(self): 94 | if self._id: 95 | return self._id 96 | 97 | if not self.dataverse: 98 | raise NoContainerError('This dataset has not been added to a Dataverse.') 99 | 100 | for dataset in self.dataverse.get_contents(refresh=True): 101 | if dataset['type'] == 'dataset': 102 | doi = '{0}:{1}/{2}'.format( 103 | dataset['protocol'], 104 | dataset['authority'], 105 | dataset['identifier'], 106 | ) 107 | if doi == self.doi: 108 | self._id = dataset['id'] 109 | return self._id 110 | 111 | raise MetadataNotFoundError('The dataset ID could not be found.') 112 | 113 | @property 114 | def citation(self): 115 | return get_element( 116 | self.get_entry(), 117 | namespace='http://purl.org/dc/terms/', 118 | tag='bibliographicCitation' 119 | ).text 120 | 121 | @property 122 | def connection(self): 123 | return self.dataverse.connection if self.dataverse else None 124 | 125 | def get_entry(self, refresh=False): 126 | if not refresh and self._entry is not None: 127 | return etree.tostring(self._entry) 128 | 129 | resp = requests.get(self.edit_uri, auth=self.connection.auth) 130 | 131 | if resp.status_code != 200: 132 | raise ConnectionError('Atom entry could not be retrieved.') 133 | 134 | entry_string = resp.content 135 | self._entry = etree.XML(entry_string) 136 | return entry_string 137 | 138 | def get_statement(self, refresh=False): 139 | if not refresh and self._statement: 140 | return self._statement 141 | 142 | if not self.dataverse: 143 | raise NoContainerError('This dataset has not been added to a Dataverse.') 144 | 145 | if not self.statement_uri: 146 | # Try to find statement uri without a request to the server 147 | link = get_element( 148 | self.get_entry(), 149 | tag='link', 150 | attribute='rel', 151 | attribute_value='http://purl.org/net/sword/terms/statement', 152 | ) 153 | if link is None: 154 | # Find link with request to server 155 | link = get_element( 156 | self.get_entry(refresh=True), 157 | tag='link', 158 | attribute='rel', 159 | attribute_value='http://purl.org/net/sword/terms/statement', 160 | ) 161 | self.statement_uri = link.get('href') 162 | 163 | resp = requests.get(self.statement_uri, auth=self.connection.auth) 164 | 165 | if resp.status_code != 200: 166 | raise ConnectionError('Statement could not be retrieved.') 167 | 168 | self._statement = resp.content 169 | return self._statement 170 | 171 | def get_state(self, refresh=False): 172 | if self.is_deleted: 173 | return 'DEACCESSIONED' 174 | 175 | return get_element( 176 | self.get_statement(refresh), 177 | tag='category', 178 | attribute='term', 179 | attribute_value='latestVersionState' 180 | ).text 181 | 182 | def get_metadata(self, version='latest', refresh=False): 183 | if not refresh and self._metadata.get(version): 184 | return self._metadata[version] 185 | 186 | if not self.dataverse: 187 | raise NoContainerError('This dataset has not been added to a Dataverse.') 188 | 189 | url = '{0}/datasets/{1}/versions/:{2}'.format( 190 | self.connection.native_base_url, 191 | self.id, 192 | version, 193 | ) 194 | 195 | resp = requests.get(url, params={'key': self.connection.token}) 196 | 197 | if resp.status_code == 404: 198 | raise VersionJsonNotFoundError( 199 | 'JSON metadata could not be found for this version.' 200 | ) 201 | elif resp.status_code != 200: 202 | raise ConnectionError('JSON metadata could not be retrieved.') 203 | 204 | metadata = resp.json()['data'] 205 | self._metadata[version] = metadata 206 | 207 | # Update corresponding version metadata if retrieving 'latest' 208 | if version == 'latest': 209 | latest_version = ( 210 | 'latest-published' 211 | if metadata['versionState'] == 'RELEASED' 212 | else 'draft' 213 | ) 214 | self._metadata[latest_version] = metadata 215 | 216 | return metadata 217 | 218 | def update_metadata(self, metadata): 219 | """Updates dataset draft with provided metadata. 220 | Will create a draft version if none exists. 221 | 222 | :param dict metadata: json retrieved from `get_version_metadata` 223 | """ 224 | url = '{0}/datasets/{1}/versions/:draft'.format( 225 | self.connection.native_base_url, 226 | self.id, 227 | ) 228 | resp = requests.put( 229 | url, 230 | headers={'Content-type': 'application/json'}, 231 | data=json.dumps(metadata), 232 | params={'key': self.connection.token}, 233 | ) 234 | 235 | if resp.status_code != 200: 236 | raise OperationFailedError('JSON metadata could not be updated.') 237 | 238 | updated_metadata = resp.json()['data'] 239 | self._metadata['draft'] = updated_metadata 240 | self._metadata['latest'] = updated_metadata 241 | 242 | def create_draft(self): 243 | """Create draft version of dataset without changing metadata""" 244 | metadata = self.get_metadata(refresh=True) 245 | if metadata.get('versionState') == 'RELEASED': 246 | self.update_metadata(metadata) 247 | 248 | def publish(self): 249 | if not self.dataverse.is_published: 250 | raise UnpublishedDataverseError('Host Dataverse must be published.') 251 | 252 | resp = requests.post( 253 | self.edit_uri, 254 | headers={'In-Progress': 'false', 'Content-Length': '0'}, 255 | auth=self.connection.auth, 256 | ) 257 | 258 | if resp.status_code != 200: 259 | raise OperationFailedError('The Dataset could not be published.') 260 | 261 | self._metadata.pop('draft', None) 262 | self._refresh(receipt=resp.content) 263 | 264 | def get_file(self, file_name, version='latest', refresh=False): 265 | files = self.get_files(version, refresh) 266 | return next((f for f in files if f.name == file_name), None) 267 | 268 | def get_file_by_id(self, file_id, version='latest', refresh=False): 269 | files = self.get_files(version, refresh) 270 | return next((f for f in files if f.id == file_id), None) 271 | 272 | def get_files(self, version='latest', refresh=False): 273 | try: 274 | files_json = self.get_metadata(version, refresh)['files'] 275 | return [DataverseFile.from_json(self, file_json) 276 | for file_json in files_json] 277 | except VersionJsonNotFoundError: 278 | return [] 279 | 280 | def upload_filepath(self, filepath): 281 | self.upload_filepaths([filepath]) 282 | 283 | def upload_filepaths(self, filepaths): 284 | # Convert a directory to a list of files 285 | if len(filepaths) == 1 and os.path.isdir(filepaths[0]): 286 | filepaths = get_files_in_path(filepaths[0]) 287 | 288 | # Zip up files 289 | s = StringIO() 290 | zip_file = ZipFile(s, 'w') 291 | for filepath in filepaths: 292 | zip_file.write(filepath) 293 | zip_file.close() 294 | content = s.getvalue() 295 | 296 | self.upload_file('temp.zip', content, zip_files=False) 297 | 298 | def upload_file(self, filename, content, zip_files=True): 299 | if zip_files: 300 | s = StringIO() 301 | zip_file = ZipFile(s, 'w') 302 | zip_file.writestr(filename, content) 303 | zip_file.close() 304 | # filename, content should reflect zipped file 305 | filename = 'temp.zip' 306 | content = s.getvalue() 307 | 308 | headers = { 309 | 'Content-Disposition': 'filename={0}'.format(filename), 310 | 'Content-Type': 'application/zip', 311 | 'Packaging': 'http://purl.org/net/sword/package/SimpleZip', 312 | } 313 | 314 | requests.post( 315 | self.edit_media_uri, 316 | data=content, 317 | headers=headers, 318 | auth=self.connection.auth, 319 | ) 320 | 321 | self.get_metadata(refresh=True) 322 | # Note: We can't determine which file was uploaded. Returns None 323 | 324 | def delete_file(self, dataverse_file): 325 | resp = requests.delete( 326 | dataverse_file.edit_media_uri, 327 | auth=self.connection.auth, 328 | ) 329 | 330 | if resp.status_code != 204: 331 | raise OperationFailedError('The file could not be deleted.') 332 | 333 | self.get_metadata(refresh=True) 334 | 335 | # If we perform a server operation, we should refresh the dataset object 336 | def _refresh(self, receipt=None): 337 | if receipt: 338 | self.edit_uri = get_element( 339 | receipt, 340 | tag='link', 341 | attribute='rel', 342 | attribute_value='edit' 343 | ).get('href') 344 | self.edit_media_uri = get_element( 345 | receipt, 346 | tag='link', 347 | attribute='rel', 348 | attribute_value='edit-media' 349 | ).get('href') 350 | self.statement_uri = get_element( 351 | receipt, 352 | tag='link', 353 | attribute='rel', 354 | attribute_value='http://purl.org/net/sword/terms/statement' 355 | ).get('href') 356 | 357 | self.get_statement(refresh=True) 358 | self.get_entry(refresh=True) 359 | self.get_metadata('latest', refresh=True) 360 | -------------------------------------------------------------------------------- /dataverse/dataverse.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import requests 4 | 5 | from dataverse.dataset import Dataset 6 | from dataverse.exceptions import ( 7 | ConnectionError, MethodNotAllowedError, OperationFailedError, 8 | ) 9 | from dataverse.utils import get_element, get_elements, sanitize 10 | 11 | 12 | class Dataverse(object): 13 | def __init__(self, connection, collection): 14 | self.connection = connection 15 | self.collection = collection 16 | 17 | self._collection_info = None 18 | self._contents_json = None 19 | 20 | @property 21 | def is_published(self): 22 | 23 | # Always check latest version 24 | collection_info = requests.get( 25 | self.collection.get('href'), 26 | auth=self.connection.auth, 27 | ).content 28 | 29 | status_tag = get_element( 30 | collection_info, 31 | namespace='http://purl.org/net/sword/terms/state', 32 | tag='dataverseHasBeenReleased', 33 | ) 34 | status = status_tag.text 35 | 36 | return status.lower() == 'true' 37 | 38 | @property 39 | def alias(self): 40 | return self.collection.get('href').split('/')[-1] 41 | 42 | @property 43 | def title(self): 44 | return sanitize(get_element( 45 | self.collection, 46 | namespace='atom', 47 | tag='title', 48 | ).text) 49 | 50 | def get_contents(self, refresh=False): 51 | if not refresh and self._contents_json: 52 | return self._contents_json 53 | 54 | content_uri = '{0}/dataverses/{1}/contents'.format( 55 | self.connection.native_base_url, self.alias 56 | ) 57 | resp = requests.get( 58 | content_uri, 59 | params={'key': self.connection.token} 60 | ) 61 | 62 | if resp.status_code != 200: 63 | raise ConnectionError('Atom entry could not be retrieved.') 64 | 65 | self._contents_json = resp.json()['data'] 66 | return self._contents_json 67 | 68 | def get_collection_info(self, refresh=False, timeout=None): 69 | if not refresh and self._collection_info: 70 | return self._collection_info 71 | 72 | self._collection_info = requests.get( 73 | self.collection.get('href'), 74 | auth=self.connection.auth, 75 | timeout=timeout, 76 | ).content 77 | return self._collection_info 78 | 79 | def publish(self): 80 | edit_uri = '{0}/edit/dataverse/{1}'.format( 81 | self.connection.sword_base_url, self.alias 82 | ) 83 | resp = requests.post( 84 | edit_uri, 85 | headers={'In-Progress': 'false'}, 86 | auth=self.connection.auth, 87 | ) 88 | 89 | if resp.status_code != 200: 90 | raise OperationFailedError('The Dataverse could not be published.') 91 | 92 | def create_dataset(self, title, description, creator, **kwargs): 93 | dataset = Dataset( 94 | title=title, 95 | description=description, 96 | creator=creator, 97 | **kwargs 98 | ) 99 | 100 | self._add_dataset(dataset) 101 | return dataset 102 | 103 | def _add_dataset(self, dataset): 104 | 105 | resp = requests.post( 106 | self.collection.get('href'), 107 | data=dataset.get_entry(), 108 | headers={'Content-type': 'application/atom+xml'}, 109 | auth=self.connection.auth, 110 | ) 111 | 112 | if resp.status_code != 201: 113 | raise OperationFailedError('This dataset could not be added.') 114 | 115 | dataset.dataverse = self 116 | dataset._refresh(receipt=resp.content) 117 | self.get_collection_info(refresh=True) 118 | 119 | def delete_dataset(self, dataset): 120 | if dataset.get_state() == 'DELETED' or dataset.get_state() == 'DEACCESSIONED': 121 | return 122 | 123 | resp = requests.delete( 124 | dataset.edit_uri, 125 | auth=self.connection.auth, 126 | ) 127 | if resp.status_code == 405: 128 | raise MethodNotAllowedError( 129 | 'Published datasets can only be deleted from the GUI. For ' 130 | 'more information, please refer to ' 131 | 'https://github.com/IQSS/dataverse/issues/778' 132 | ) 133 | 134 | dataset.is_deleted = True 135 | self.get_collection_info(refresh=True) 136 | 137 | def get_datasets(self, refresh=False, timeout=None): 138 | collection_info = self.get_collection_info(refresh, timeout=timeout) 139 | entries = get_elements(collection_info, tag='entry') 140 | return [Dataset.from_dataverse(entry, self) for entry in entries] 141 | 142 | def get_dataset_by_doi(self, doi, refresh=False, timeout=None): 143 | return next( 144 | (s for s in self.get_datasets(refresh, timeout=timeout) if s.doi == doi), 145 | None 146 | ) 147 | 148 | def get_dataset_by_title(self, title, refresh=False, timeout=None): 149 | return next( 150 | (s for s in self.get_datasets(refresh, timeout=timeout) if s.title == title), 151 | None 152 | ) 153 | 154 | def get_dataset_by_string_in_entry(self, string, refresh=False, timeout=None): 155 | return next( 156 | (s for s in self.get_datasets(refresh, timeout=timeout) if string in s.get_entry()), 157 | None 158 | ) 159 | -------------------------------------------------------------------------------- /dataverse/exceptions.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | 4 | class DataverseError(Exception): 5 | """Base exception class for Dataverse-related error.""" 6 | pass 7 | 8 | 9 | class UnauthorizedError(DataverseError): 10 | """Raised if a user provides invalid credentials.""" 11 | pass 12 | 13 | 14 | class InsufficientMetadataError(DataverseError): 15 | """Raised if more metadata is required.""" 16 | pass 17 | 18 | 19 | class MethodNotAllowedError(DataverseError): 20 | """Raised if the attempted method is not allowed""" 21 | pass 22 | 23 | 24 | class NoContainerError(DataverseError): 25 | """Raised if a dataset attempts to access the server before it is added to a Dataverse""" 26 | pass 27 | 28 | 29 | class ConnectionError(DataverseError): 30 | """Raised when connection fails for an unknown reason""" 31 | pass 32 | 33 | 34 | class OperationFailedError(DataverseError): 35 | """Raised when an operation fails for an unknown reason""" 36 | pass 37 | 38 | 39 | class MetadataNotFoundError(DataverseError): 40 | """Raised when metadata cannot be found for an unknown reason""" 41 | pass 42 | 43 | 44 | class UnpublishedDataverseError(DataverseError): 45 | """Raised when a request requires that a Dataverse first be published""" 46 | pass 47 | 48 | 49 | class VersionJsonNotFoundError(DataverseError): 50 | """Raised when requested json data for a version is not found""" 51 | pass 52 | 53 | 54 | class DataverseNotFoundError(DataverseError): 55 | """Raised when a Dataverse cannot be found""" 56 | pass 57 | -------------------------------------------------------------------------------- /dataverse/file.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from dataverse.utils import sanitize 4 | 5 | 6 | class DataverseFile(object): 7 | def __init__(self, dataset, name, file_id=None): 8 | self.dataset = dataset 9 | self.name = sanitize(name) 10 | self.id = file_id 11 | 12 | self.download_url = '{0}/access/datafile/{1}'.format( 13 | dataset.connection.native_base_url, self.id 14 | ) 15 | edit_media_base = '{0}/edit-media/file/{1}' 16 | self.edit_media_uri = edit_media_base.format( 17 | dataset.connection.sword_base_url, self.id 18 | ) 19 | 20 | @classmethod 21 | def from_json(cls, dataset, json): 22 | try: 23 | name = json['dataFile']['filename'] 24 | file_id = json['dataFile']['id'] 25 | except KeyError: 26 | name = json['datafile']['name'] 27 | file_id = json['datafile']['id'] 28 | return cls(dataset, name, file_id) 29 | -------------------------------------------------------------------------------- /dataverse/resources/atom-entry-study.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 7 | 8 | The first study for the New England Journal of Coffee dataverse 9 | urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 10 | 2005-10-07T17:17:08Z 11 | Creator 12 | The abstract 13 | 14 | Roasting at Home 15 | Peets, John 16 | Stumptown, Jane 17 | 18 | 2013-07-11 19 | Creative Commons CC-BY 3.0 (unported) http://creativecommons.org/licenses/by/3.0/ 20 | 21 | Insert Journal Article Citation from OJS 22 | Considerations before you start roasting your own coffee at home. 23 | 24 | 25 | doi:10.1000/182 26 | coffee 27 | beverage 28 | caffeine 29 | 30 | -------------------------------------------------------------------------------- /dataverse/resources/atom-example.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 7 | 8 | Roasting at Home 9 | Peets, John 10 | Stumptown, Jane 11 | 12 | 13 | Coffee Bean State University 14 | 15 | 16 | Peets, J., & Stumptown, J. (2013). Roasting at Home. New England Journal of Coffee, 3(1), 22-34. 18 | 19 | 20 | 2013-07-11 21 | 22 | 23 | doi:10.1000/182 24 | 25 | 26 | Considerations before you start roasting your own coffee at home. 27 | 28 | 29 | coffee 30 | beverage 31 | caffeine 32 | 33 | 34 | United States 35 | Canada 36 | 37 | aggregate data 38 | 39 | Stumptown, Jane. 2011. Home Roasting. Coffeemill Press. 40 | 41 | Creative Commons CC-BY 3.0 (unported) http://creativecommons.org/licenses/by/3.0/ 42 | 43 | Peets, John. 2010. Roasting Coffee at the Coffee Shop. Coffeemill Press 44 | 45 | 46 | 47 | A real abstract! 48 | Insert Journal Article Citation from OJS 49 | -------------------------------------------------------------------------------- /dataverse/settings/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from dataverse.settings.defaults import * # noqa 4 | 5 | try: 6 | from dataverse.settings.local import * # noqa 7 | except ImportError as error: 8 | pass -------------------------------------------------------------------------------- /dataverse/settings/defaults.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import os 4 | 5 | TEST_HOST = 'demo.dataverse.org' 6 | 7 | HERE = os.path.dirname(os.path.abspath(__file__)) 8 | BASE_PATH = os.path.abspath(os.path.join(HERE, os.pardir)) 9 | 10 | EXAMPLE_DICT = { 11 | 'title': 'ExampleTitle', 12 | 'id': 'ExampleID', 13 | 'author': ['ExampleAuthor1', 'ExampleAuthor2'], 14 | 'producer': 'ExampleProducer', 15 | 'date': '1992-10-04', 16 | 'description': 'ExampleDescription', 17 | 'abstract': 'ExampleAbstract', 18 | 'type': 'ExampleType', 19 | 'source': 'ExampleSource', 20 | 'restriction': 'ExampleRestriction', 21 | 'relation': 'ExampleRelation', 22 | 'keyword': 'ExampleKeyword', 23 | 'coverage': 'ExampleCoverage', 24 | 'publication': 'ExamplePublication', 25 | } 26 | 27 | SWORD_BOOTSTRAP = ''' 28 | 30 | ''' 31 | 32 | SWORD_NAMESPACE = { 33 | 'dcterms': 'http://purl.org/dc/terms/', 34 | 'atom': 'http://www.w3.org/2005/Atom', 35 | } 36 | 37 | UNIQUE_FIELDS = ['title', 'id', 'updated', 'summary'] 38 | 39 | REPLACEMENT_DICT = { 40 | 'id': 'identifier', 41 | 'author': 'creator', 42 | 'producer': 'publisher', 43 | 'restriction': 'rights', 44 | 'keyword': 'subject', 45 | 'publication': 'isReferencedBy' 46 | } 47 | -------------------------------------------------------------------------------- /dataverse/test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IQSS/dataverse-client-python/5cdaadc2c3882d6fe23d3cb7cef5280fe83ba909/dataverse/test/__init__.py -------------------------------------------------------------------------------- /dataverse/test/config.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import os 4 | 5 | from dataverse.settings import BASE_PATH 6 | 7 | PICS_OF_CATS_DATASET = { 8 | "id": "1", 9 | "title": "This Study is about Pictures of Cats", 10 | "author": "Peter Bull", 11 | "description": "In this study we prove there can be pictures of cats.", 12 | } 13 | 14 | ATOM_DATASET = os.path.join(BASE_PATH, 'resources', 'atom-entry-study.xml') 15 | 16 | EXAMPLE_FILES = [ 17 | os.path.join(BASE_PATH, 'test', '__init__.py'), 18 | os.path.join(BASE_PATH, 'test', 'config.py'), 19 | ] 20 | -------------------------------------------------------------------------------- /dataverse/test/test_dataverse.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, absolute_import 2 | 3 | import pytest 4 | 5 | import uuid 6 | import httpretty 7 | import requests 8 | 9 | from dataverse.connection import Connection 10 | from dataverse.dataset import Dataset 11 | from dataverse.settings import TEST_HOST 12 | from dataverse.test.config import PICS_OF_CATS_DATASET, ATOM_DATASET, EXAMPLE_FILES 13 | from dataverse import exceptions 14 | from dataverse import utils 15 | 16 | import logging 17 | logging.basicConfig(level=logging.ERROR) 18 | 19 | 20 | class DataverseServerTestBase(object): 21 | """Create a temporary user on `TEST_SERVER` for testing purposes. 22 | 23 | This attaches `username`, `password`, and `token` to the class. 24 | """ 25 | 26 | @classmethod 27 | def setup_class(cls): 28 | """Create a temporary user""" 29 | cls.username = str(uuid.uuid1()) 30 | cls.password = 'p4ssw0rd' 31 | key = 'burrito' # hardcoded on test servers 32 | user_url = 'https://{0}/api/builtin-users?key={1}&password={2}'.format( 33 | TEST_HOST, key, cls.password, 34 | ) 35 | user_json = { 36 | 'email': '{0}@gmail.com'.format(cls.username), 37 | 'firstName': 'Namey', 38 | 'lastName': 'Namington', 39 | 'userName': cls.username, 40 | } 41 | 42 | resp = requests.post(user_url, json=user_json) 43 | cls.token = resp.json()['data']['apiToken'] 44 | 45 | @classmethod 46 | def teardown_class(cls): 47 | """Delete the temporary user. 48 | 49 | Note that this will fail if the user has any non-deleted content. 50 | """ 51 | delete_url = 'https://{0}/api/admin/authenticatedUsers/{1}/'.format( 52 | TEST_HOST, cls.username, 53 | ) 54 | resp = requests.delete(delete_url) 55 | assert resp.status_code == 200 56 | 57 | 58 | class TestUtils(object): 59 | 60 | def test_get_element(self): 61 | with open(ATOM_DATASET) as f: 62 | entry = f.read() 63 | 64 | # One value 65 | title = utils.get_element(entry, 'title', 'dcterms').text 66 | assert title == 'Roasting at Home' 67 | 68 | # Two values 69 | creator = utils.get_element(entry, 'creator', 'dcterms').text 70 | assert creator == 'Peets, John' 71 | 72 | # No values 73 | nonsense = utils.get_element(entry, 'nonsense', 'booga') 74 | assert nonsense is None 75 | 76 | def test_get_elements(self): 77 | with open(ATOM_DATASET) as f: 78 | entry = f.read() 79 | 80 | # One value 81 | titles = utils.get_elements(entry, 'title', 'dcterms') 82 | assert len(titles) == 1 83 | assert titles[0].text == 'Roasting at Home' 84 | 85 | # Two values 86 | creators = utils.get_elements(entry, 'creator', 'dcterms') 87 | assert len(creators) == 2 88 | assert creators[0].text == 'Peets, John' 89 | assert creators[1].text == 'Stumptown, Jane' 90 | 91 | # No values 92 | nonsense = utils.get_elements(entry, 'nonsense', 'booga') 93 | assert nonsense == [] 94 | 95 | def test_format_term(self): 96 | # A term not in the replacement dict 97 | formatted_term = utils.format_term('title', namespace='dcterms') 98 | assert formatted_term == '{http://purl.org/dc/terms/}title' 99 | 100 | def test_format_term_replace(self): 101 | # A term in the replacement dict 102 | formatted_term = utils.format_term('id', namespace='dcterms') 103 | assert formatted_term == '{http://purl.org/dc/terms/}identifier' 104 | 105 | 106 | class TestConnection(DataverseServerTestBase): 107 | 108 | def test_connect(self): 109 | connection = Connection(TEST_HOST, self.token) 110 | 111 | assert connection.host == TEST_HOST 112 | assert connection.token == self.token 113 | assert connection._service_document 114 | 115 | def test_connect_unauthorized(self): 116 | with pytest.raises(exceptions.UnauthorizedError): 117 | Connection(TEST_HOST, 'wrong-token') 118 | 119 | @httpretty.activate 120 | def test_connect_unknown_failure(self): 121 | httpretty.register_uri( 122 | httpretty.GET, 123 | 'https://{host}/dvn/api/data-deposit/v1.1/swordv2/service-document'.format( 124 | host=TEST_HOST 125 | ), 126 | status=400, 127 | ) 128 | 129 | with pytest.raises(exceptions.ConnectionError): 130 | Connection(TEST_HOST, self.token) 131 | 132 | def test_create_dataverse(self): 133 | connection = Connection(TEST_HOST, self.token) 134 | alias = str(uuid.uuid1()) # must be unique 135 | connection.create_dataverse( 136 | alias, 137 | 'Test Name', 138 | 'dataverse@example.com', 139 | ) 140 | 141 | dataverse = connection.get_dataverse(alias, True) 142 | try: 143 | assert dataverse.alias == alias 144 | assert dataverse.title == 'Test Name' 145 | finally: 146 | connection.delete_dataverse(dataverse) 147 | 148 | def test_delete_dataverse(self): 149 | connection = Connection(TEST_HOST, self.token) 150 | alias = str(uuid.uuid1()) # must be unique 151 | dataverse = connection.create_dataverse( 152 | alias, 153 | 'Test Name', 154 | 'dataverse@example.com', 155 | ) 156 | 157 | connection.delete_dataverse(dataverse) 158 | dataverse = connection.get_dataverse(alias) 159 | 160 | assert dataverse is None 161 | 162 | def test_get_dataverses(self): 163 | connection = Connection(TEST_HOST, self.token) 164 | original_dataverses = connection.get_dataverses() 165 | assert isinstance(original_dataverses, list) 166 | 167 | alias = str(uuid.uuid1()) # must be unique 168 | 169 | dataverse = connection.create_dataverse( 170 | alias, 171 | 'Test Name', 172 | 'dataverse@example.com', 173 | ) 174 | 175 | current_dataverses = connection.get_dataverses() 176 | try: 177 | assert len(current_dataverses) == len(original_dataverses) + 1 178 | assert alias in [dv.alias for dv in current_dataverses] 179 | finally: 180 | connection.delete_dataverse(dataverse) 181 | 182 | current_dataverses = connection.get_dataverses() 183 | assert [dv.alias for dv in current_dataverses] == [dv.alias for dv in original_dataverses] 184 | 185 | def test_get_dataverse(self): 186 | connection = Connection(TEST_HOST, self.token) 187 | alias = str(uuid.uuid1()) # must be unique 188 | assert connection.get_dataverse(alias) is None 189 | 190 | dataverse = connection.create_dataverse( 191 | alias, 192 | 'Test Name', 193 | 'dataverse@example.com', 194 | ) 195 | 196 | try: 197 | assert dataverse is not None 198 | assert dataverse.alias == alias 199 | finally: 200 | connection.delete_dataverse(dataverse) 201 | 202 | 203 | class TestDataset(object): 204 | 205 | def test_init(self): 206 | dataset = Dataset(title='My Dataset', publisher='Mr. Pub Lisher') 207 | title = utils.get_element( 208 | dataset._entry, 209 | namespace='dcterms', 210 | tag='title' 211 | ).text 212 | publisher = utils.get_element( 213 | dataset._entry, 214 | namespace='dcterms', 215 | tag='publisher' 216 | ).text 217 | assert title == 'My Dataset' 218 | assert title == dataset.title 219 | assert publisher == 'Mr. Pub Lisher' 220 | 221 | def test_init_from_xml(self): 222 | dataset = Dataset.from_xml_file(ATOM_DATASET) 223 | title = utils.get_element( 224 | dataset.get_entry(), 225 | namespace='dcterms', 226 | tag='title' 227 | ).text 228 | publisher = utils.get_element( 229 | dataset.get_entry(), 230 | namespace='dcterms', 231 | tag='rights' 232 | ).text 233 | assert title == 'Roasting at Home' 234 | assert publisher == 'Creative Commons CC-BY 3.0 (unported) ' \ 235 | 'http://creativecommons.org/licenses/by/3.0/' 236 | 237 | 238 | class TestDatasetOperations(DataverseServerTestBase): 239 | 240 | @classmethod 241 | def setup_class(cls): 242 | super(TestDatasetOperations, cls).setup_class() 243 | 244 | print('Connecting to Dataverse host at {0}'.format(TEST_HOST)) 245 | cls.connection = Connection(TEST_HOST, cls.token) 246 | 247 | print('Creating test Dataverse') 248 | cls.alias = str(uuid.uuid1()) 249 | cls.connection.create_dataverse( 250 | cls.alias, 251 | 'Test Dataverse', 252 | 'dataverse@example.com', 253 | ) 254 | cls.dataverse = cls.connection.get_dataverse(cls.alias, True) 255 | assert cls.dataverse 256 | 257 | @classmethod 258 | def teardown_class(cls): 259 | super(TestDatasetOperations, cls).setup_class() 260 | 261 | print('Removing test Dataverse') 262 | cls.connection.delete_dataverse(cls.dataverse) 263 | dataverse = cls.connection.get_dataverse(cls.alias, True) 264 | assert dataverse is None 265 | 266 | def setup_method(self, method): 267 | 268 | # create a dataset for each test 269 | dataset = Dataset(**PICS_OF_CATS_DATASET) 270 | self.dataverse._add_dataset(dataset) 271 | self.dataset = self.dataverse.get_dataset_by_doi(dataset.doi) 272 | 273 | def teardown_method(self, method): 274 | try: 275 | self.dataverse.delete_dataset(self.dataset) 276 | finally: 277 | return 278 | 279 | def test_create_dataset(self): 280 | title = str(uuid.uuid1()) # must be unique 281 | self.dataverse.create_dataset(title, 'Descripty', 'foo@test.com') 282 | dataset = self.dataverse.get_dataset_by_title(title) 283 | try: 284 | assert dataset.title == title 285 | finally: 286 | self.dataverse.delete_dataset(dataset) 287 | 288 | def test_add_dataset_from_xml(self): 289 | new_dataset = Dataset.from_xml_file(ATOM_DATASET) 290 | self.dataverse._add_dataset(new_dataset) 291 | retrieved_dataset = self.dataverse.get_dataset_by_title('Roasting at Home') 292 | assert retrieved_dataset 293 | self.dataverse.delete_dataset(retrieved_dataset) 294 | 295 | def test_id_property(self): 296 | alias = str(uuid.uuid1()) 297 | # Creating a dataverse within a dataverse 298 | self.connection.create_dataverse( 299 | alias, 300 | 'Sub Dataverse', 301 | 'dataverse@example.com', 302 | self.alias, 303 | ) 304 | sub_dataverse = self.connection.get_dataverse(alias, True) 305 | assert self.dataset.id == self.dataset._id 306 | self.connection.delete_dataverse(sub_dataverse) 307 | 308 | def test_add_files(self): 309 | self.dataset.upload_filepaths(EXAMPLE_FILES) 310 | actual_files = [f.name for f in self.dataset.get_files()] 311 | 312 | assert '__init__.py' in actual_files 313 | assert 'config.py' in actual_files 314 | 315 | def test_upload_file(self): 316 | self.dataset.upload_file('file.txt', 'This is a simple text file!') 317 | self.dataset.upload_file('file2.txt', 'This is the second simple text file!') 318 | actual_files = [f.name for f in self.dataset.get_files()] 319 | 320 | assert 'file.txt' in actual_files 321 | assert 'file2.txt' in actual_files 322 | 323 | def test_display_atom_entry(self): 324 | # this just tests we can get an entry back, but does 325 | # not do anything with that xml yet. however, we do use get_entry 326 | # in other methods so this test case is probably covered 327 | assert self.dataset.get_entry() 328 | 329 | def test_display_dataset_statement(self): 330 | # this just tests we can get an entry back, but does 331 | # not do anything with that xml yet. however, we do use get_statement 332 | # in other methods so this test case is probably covered 333 | assert self.dataset.get_statement() 334 | 335 | def test_delete_a_file(self): 336 | self.dataset.upload_file('cat.jpg', b'Whatever a cat looks like goes here.') 337 | 338 | # Add file and confirm 339 | files = self.dataset.get_files() 340 | assert len(files) == 1 341 | assert files[0].name == 'cat.jpg' 342 | 343 | # Delete file and confirm 344 | self.dataset.delete_file(files[0]) 345 | files = self.dataset.get_files() 346 | assert not files 347 | 348 | def test_delete_a_dataset(self): 349 | xmlDataset = Dataset.from_xml_file(ATOM_DATASET) 350 | self.dataverse._add_dataset(xmlDataset) 351 | atomDataset = self.dataverse.get_dataset_by_title('Roasting at Home') 352 | num_datasets = len(self.dataverse.get_datasets()) 353 | 354 | assert num_datasets > 0 355 | self.dataverse.delete_dataset(atomDataset) 356 | assert atomDataset.get_state(refresh=True) == 'DEACCESSIONED' 357 | assert len(self.dataverse.get_datasets()) == num_datasets - 1 358 | 359 | @pytest.mark.skipif(True, reason='Published datasets can no longer be deaccessioned via API') 360 | def test_publish_dataset(self): 361 | assert self.dataset.get_state() == 'DRAFT' 362 | self.dataset.publish() 363 | assert self.dataset.get_state() == 'PUBLISHED' 364 | self.dataverse.delete_dataset(self.dataset) 365 | assert self.dataset.get_state(refresh=True) == 'DEACCESSIONED' 366 | 367 | 368 | if __name__ == '__main__': 369 | pytest.main() 370 | -------------------------------------------------------------------------------- /dataverse/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import os 4 | 5 | from lxml import etree 6 | import bleach 7 | 8 | from dataverse.settings import SWORD_NAMESPACE, REPLACEMENT_DICT, UNIQUE_FIELDS 9 | 10 | 11 | # factor out xpath operations so we don't have to look at its ugliness 12 | def get_element(root, tag='*', namespace=None, attribute=None, attribute_value=None): 13 | elements = get_elements(root, tag, namespace, attribute, attribute_value) 14 | return elements[0] if elements else None 15 | 16 | 17 | def get_elements(root, tag='*', namespace=None, attribute=None, attribute_value=None): 18 | 19 | # If string, convert to etree element 20 | if isinstance(root, (str, bytes)): 21 | root = etree.XML(root) 22 | 23 | namespace = root.nsmap.get(namespace, namespace) 24 | 25 | if namespace is None: 26 | xpath = tag 27 | else: 28 | xpath = '{{{ns}}}{tag}'.format(ns=namespace, tag=tag) 29 | 30 | if attribute and not attribute_value: 31 | xpath += '[@{att}]'.format(att=attribute) 32 | elif not attribute and attribute_value: 33 | raise Exception('You must pass an attribute with attribute_value') 34 | elif attribute and attribute_value: 35 | xpath += "[@{att}='{val}']".format(att=attribute, val=attribute_value) 36 | 37 | return root.findall(xpath) 38 | 39 | 40 | def format_term(term, namespace): 41 | 42 | if term in REPLACEMENT_DICT: 43 | term = REPLACEMENT_DICT[term] 44 | 45 | return '{{{0}}}{1}'.format(SWORD_NAMESPACE[namespace], term) 46 | 47 | 48 | def add_field(entry, key, value, namespace='dcterms'): 49 | 50 | formatted_key = format_term(key, namespace) 51 | element = entry.find(formatted_key) if key in UNIQUE_FIELDS else None 52 | 53 | if element is None: 54 | element = etree.SubElement(entry, formatted_key, nsmap=SWORD_NAMESPACE) 55 | 56 | element.text = value 57 | 58 | 59 | # def add_author(entry, formatted_key, author, namespace): 60 | # 61 | # # TODO Accept base strings? 62 | # 63 | # author_element = etree.SubElement(entry, formatted_key, nsmap=SWORD_NAMESPACE) 64 | # 65 | # name_element = etree.SubElement( 66 | # author_element, 67 | # format_term('name', namespace), 68 | # nsmap=SWORD_NAMESPACE, 69 | # ) 70 | # name_element.text = author.get('name') 71 | # 72 | # if author.get('uri'): 73 | # uri_element = etree.SubElement( 74 | # author_element, 75 | # format_term('uri', namespace), 76 | # nsmap=SWORD_NAMESPACE, 77 | # ) 78 | # uri_element.text = author.get('uri') 79 | # 80 | # if author.get('email'): 81 | # email_element = etree.SubElement( 82 | # author_element, 83 | # format_term('email', namespace), 84 | # nsmap=SWORD_NAMESPACE, 85 | # ) 86 | # email_element.text = author.get('email') 87 | 88 | 89 | def get_files_in_path(path): 90 | path = os.path.normpath(path) + os.sep 91 | filepaths = [] 92 | for filename in os.listdir(path): 93 | filepath = path + filename 94 | if os.path.isdir(filepath): 95 | filepaths += get_files_in_path(filepath) 96 | else: 97 | filepaths.append(filepath) 98 | return filepaths 99 | 100 | 101 | def sanitize(value): 102 | return bleach.clean(value, strip=True, tags=[], attributes=[], styles=[]) 103 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | ## Dataverse API Client 2 | 3 | **PLEASE NOTE: This library is maintained by the Dataverse community, not IQSS.** 4 | 5 | This is a library for writing Python applications that make use of Dataverse 6 | APIs v4.0. The intent is to publish the python client on https://pypi.python.org. 7 | 8 | There is also another Python module for Dataverse, called [pyDataverse](https://github.com/AUSSDA/pyDataverse), which is actively developed and maintained by AUSSDA - The Austrian Social Science Data Archive. 9 | 10 | ## Installation 11 | 12 | $ pip install -e git+https://github.com/IQSS/dataverse-client-python.git#egg=dataverse 13 | 14 | Requires Python >= 2.6. 15 | 16 | 17 | ## Usage 18 | 19 | To use the python client, you will need a dataverse account and an API token. 20 | ```python 21 | from dataverse import Connection 22 | 23 | host = 'demo.dataverse.org' # All clients >4.0 are supported 24 | token = '4d0634d3-74d5-4770-8088-1971847ac75e' # Generated at /account/apitoken 25 | 26 | connection = Connection(host, token) 27 | # For non-https connections (e.g. local dev environment), try: 28 | # connection = Connection(host, token, use_https=False) 29 | ``` 30 | 31 | Dataverse Objects can be retrieved from their respective containers 32 | ```python 33 | dataverse = connection.get_dataverse('ALIAS') 34 | dataset = dataverse.get_dataset_by_doi('DOI:10.5072/FK2/ABC123') 35 | files = dataset.get_files('latest') 36 | ``` 37 | 38 | ## Testing 39 | 40 | ### Configuration 41 | 42 | Create a file at `dataverse/settings/local.py`. The file should contain the following 43 | information: 44 | 45 | ```python 46 | TEST_HOST = 'demo.dataverse.org' 47 | ``` 48 | 49 | Do not commit this file. 50 | 51 | ### Running Tests 52 | 53 | To run tests: 54 | 55 | $ py.test 56 | 57 | Or, to run a specific test: 58 | 59 | $ py.test dataverse/test/test_dataverse.py::TestClassName::test_method_name 60 | 61 | To check for style: 62 | 63 | $ flake8 . 64 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 100 3 | exclude = dataverse/__init__.py,dataverse/settings/__init__.py 4 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from setuptools import setup, find_packages 3 | import unittest 4 | 5 | 6 | REQUIRES = [ 7 | 'bleach>=1.2.2', 8 | 'requests>=2.2.1', 9 | 'lxml>=3.2.5', 10 | ] 11 | 12 | TESTS_REQUIRE = [ 13 | 'httpretty>=0.8.8', 14 | 'pytest>=2.7.0', 15 | 'flake8>=2.4.0', 16 | ] 17 | 18 | 19 | def read(fname): 20 | with open(fname) as fp: 21 | content = fp.read() 22 | return content 23 | 24 | setup( 25 | name='dataverse', 26 | version='0.1.2', 27 | description='Python client for Dataverse version 3.X', 28 | long_description=read("readme.md"), 29 | author='Dataverse', 30 | author_email='rliebz@gmail.com', 31 | url='https://github.com/rliebz/dvn-client-python', 32 | packages=find_packages(), 33 | package_dir={'dvn-client-python': 'dataverse'}, 34 | include_package_data=True, 35 | install_requires=REQUIRES, 36 | license=read("LICENSE"), 37 | zip_safe=False, 38 | keywords='dataverse', 39 | classifiers=[ 40 | 'Development Status :: 2 - Pre-Alpha', 41 | 'Intended Audience :: Developers', 42 | 'License :: OSI Approved :: MIT License', 43 | 'Natural Language :: English', 44 | ], 45 | test_suite='dataverse/test', 46 | tests_require=TESTS_REQUIRE, 47 | cmdclass={'test': unittest} 48 | ) 49 | --------------------------------------------------------------------------------