├── .github
    └── workflows
    │   ├── black.yaml
    │   ├── bot.yaml
    │   ├── codemeta2cff.yml
    │   ├── iga.yaml
    │   └── pypi-publish.yaml
├── .gitignore
├── CITATION.cff
├── CODE_OF_CONDUCT.md
├── LICENSE
├── README.md
├── Uploading_dataset_to_CaltechDATA.ipynb
├── accept.py
├── caltechdata_api
    ├── __init__.py
    ├── caltechdata_edit.py
    ├── caltechdata_write.py
    ├── cli-documentation-for-users.md
    ├── cli.py
    ├── customize_schema.py
    ├── download_file.py
    ├── get_files.py
    ├── get_metadata.py
    ├── md_to_json.py
    ├── pictures-documentation
    │   ├── Interact CLI Step 1(a).png
    │   ├── Interact CLI Step 1(b).png
    │   ├── Interact CLI Step 1(c) Test Instance.png
    │   ├── Interact CLI Step 1(d) Test Instance.png
    │   ├── Interact CLI Step 2(a).png
    │   ├── Interact CLI Step 2(b).png
    │   ├── Interact CLI Step 2(c).png
    │   ├── README.md
    │   ├── Step 1.png
    │   ├── Step 2.png
    │   ├── Step 3(a).png
    │   ├── Step 3(b).png
    │   ├── Step 3(c).png
    │   ├── Step 4(a).png
    │   ├── Step 4(b).png
    │   ├── Step 5.png
    │   ├── Step 6(a).png
    │   └── Step 6(b).png
    ├── utils.py
    ├── vocabularies.yaml
    └── vocabularies
    │   ├── .DS_Store
    │   ├── date_types.yaml
    │   ├── description_types.yaml
    │   ├── identifier_types.yaml
    │   ├── licenses.csv
    │   ├── relation_types.yaml
    │   ├── resource_types.yaml
    │   ├── roles.yaml
    │   └── title_types.yaml
├── codemeta.json
├── completed_dois.json
├── edit.py
├── edit_osn.py
├── example.json
├── example_custom.json
├── excluded_dois.json
├── fix_names.py
├── inspect_dois.py
├── logo.gif
├── new_ids.json
├── outdated
    ├── README.md
    ├── add_doi_minting_date.py
    ├── caltechdata_multipart.py
    ├── edit_all.py
    ├── edit_all_geo.py
    ├── edit_all_github.py
    ├── edit_all_tccon.py
    ├── edit_files.py
    ├── edit_tccon.py
    ├── example_download_and_upload.ipynb
    ├── get_geo.py
    ├── test.py
    ├── test_community.py
    ├── test_file.py
    ├── unembargo.py
    ├── update_thesis_file.py
    └── write_pilot_phase1.py
├── process_tomograms.py
├── pyproject.toml
├── rdm.json
├── run-tests.sh
├── setup.cfg
├── setup.py
├── templates
    └── README.md
├── tests
    ├── bot.py
    ├── bot_yaml.py
    ├── conftest.py
    ├── data
    │   ├── caltechdata
    │   │   ├── 1171.json
    │   │   ├── 1235.json
    │   │   ├── 1250.json
    │   │   ├── 1259.json
    │   │   ├── 1300.json
    │   │   ├── 210.json
    │   │   ├── 266.json
    │   │   ├── 267.json
    │   │   ├── 268.json
    │   │   ├── 283.json
    │   │   ├── 293.json
    │   │   ├── 301.json
    │   │   └── 970.json
    │   ├── datacite43
    │   │   ├── 4yxbs-4mj38.json
    │   │   ├── asjw8-cd908.json
    │   │   ├── b2jqz-qdw65.json
    │   │   ├── cgkcc-ymk88.json
    │   │   ├── d7mk4-f8t44.json
    │   │   ├── dks9f-mj878.json
    │   │   ├── ep884-g0v97.json
    │   │   ├── f40da-hww21.json
    │   │   ├── fbdqe-hez98.json
    │   │   ├── hevaf-20f84.json
    │   │   ├── hhg7x-hgm42.json
    │   │   ├── kxjgj-tfk18.json
    │   │   ├── kxtar-bm759.json
    │   │   ├── n0y4x-xx706.json
    │   │   ├── n13wc-zwc92.json
    │   │   ├── nbtw5-37m55.json
    │   │   ├── rmzp9-9yx96.json
    │   │   ├── t15w6-x9q23.json
    │   │   └── wbty9-bqy29.json
    │   └── invalid_datacite43
    │   │   ├── invalid_metadata_1.json
    │   │   ├── invalid_metadata_10.json
    │   │   ├── invalid_metadata_2.json
    │   │   ├── invalid_metadata_3.json
    │   │   ├── invalid_metadata_4.json
    │   │   ├── invalid_metadata_5.json
    │   │   ├── invalid_metadata_6.json
    │   │   ├── invalid_metadata_7.json
    │   │   ├── invalid_metadata_8.json
    │   │   ├── invalid_metadata_9.json
    │   │   ├── missing_creators.json
    │   │   ├── missing_publisher.json
    │   │   ├── multiple_errors.json
    │   │   └── type_error_creators.json
    ├── helpers.py
    ├── test_download.py
    ├── test_rdm.py
    └── test_unit.py
├── token.bash
├── tomogram_ids.json
├── write.py
├── write_authors.py
└── write_hte.py


/.github/workflows/black.yaml:
--------------------------------------------------------------------------------
 1 | name: Lint
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   lint:
 7 |     runs-on: ubuntu-latest
 8 |     steps:
 9 |       - uses: actions/checkout@v4
10 |       - uses: psf/black@stable
11 | 


--------------------------------------------------------------------------------
/.github/workflows/bot.yaml:
--------------------------------------------------------------------------------
 1 | name: Bot validation
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   validate-metadata:
 7 |     runs-on: ubuntu-latest
 8 |     
 9 |     steps:
10 |     - uses: actions/checkout@v4
11 |       with:
12 |         fetch-depth: 0
13 |     
14 |     - name: Set up Python
15 |       uses: actions/setup-python@v4
16 |       with:
17 |         python-version: '3.x'
18 |     
19 |     - name: Install dependencies
20 |       run: |
21 |         python -m pip install --upgrade pip
22 |         pip install pytest requests s3fs cryptography 
23 |         pip install .
24 | 
25 |     - name: Run against CaltechData Test system
26 |       env:
27 |         RDMTOK: ${{ secrets.CALTECHDATA_TOKEN }}
28 |       run: |
29 |         cd tests
30 |         pytest test_unit.py
31 |         pytest test_rdm.py
32 |     - name: Run Medata Validation Test and RDM
33 |       env:
34 |         RDMTOK: ${{ secrets.CALTECHDATA_TOKEN }}
35 |       run: |
36 |         cd tests
37 |         python bot_yaml.py
38 |         
39 | 


--------------------------------------------------------------------------------
/.github/workflows/codemeta2cff.yml:
--------------------------------------------------------------------------------
 1 | name: CodeMeta2CFF
 2 | run-name: Run CodeMeta2CFF after ${{github.event_name}} by ${{github.actor}}
 3 | 
 4 | on:
 5 |   push:
 6 |     paths: ['codemeta.json']
 7 |   workflow_dispatch:
 8 |     inputs:
 9 |       reason:
10 |         description: 'Reason'
11 |         required: false
12 |         default: 'Manual trigger'
13 | 
14 | jobs:
15 |   CodeMeta2CFF:
16 |     runs-on: ubuntu-latest
17 |     steps:
18 |       - name: Checkout
19 |         uses: actions/checkout@v4
20 |       - name: Convert CFF
21 |         uses: caltechlibrary/codemeta2cff@main
22 |       - name: Install jq for JSON parsing
23 |         run: sudo apt-get install -y jq
24 |       - name: Parse and update setup.cfg
25 |         run: |
26 |             # Extract values from codemeta.json
27 |             NAME=$(jq -r '.name' codemeta.json)
28 |             VERSION=$(jq -r '.version' codemeta.json)
29 |             AUTHORS=$(jq -r '[.author[] | .givenName + " " + .familyName] | join(", ")' codemeta.json)
30 |             AUTHOR_EMAILS=$(jq -r '[.author[] | .email // empty] | join(", ")' codemeta.json)
31 |             DESCRIPTION=$(jq -r '.description' codemeta.json)
32 |             URL=$(jq -r '.codeRepository // .url' codemeta.json)
33 | 
34 |             # Update setup.cfg fields
35 |             sed -i "s/^name = .*/name = $NAME/" setup.cfg
36 |             sed -i "s/^version = .*/version = $VERSION/" setup.cfg
37 |             sed -i "s/^author = .*/author = $AUTHORS/" setup.cfg
38 |             sed -i "s/^author_email = .*/author_email = $AUTHOR_EMAILS/" setup.cfg
39 |             sed -i "s/^description = .*/description = $DESCRIPTION/" setup.cfg
40 |             sed -i "s|^url = .*|url = $URL|" setup.cfg
41 |       - name: Commit CFF
42 |         uses: EndBug/add-and-commit@v9
43 |         with:
44 |           message: 'Add updated CITATION.cff and setup.cfg from codemeta.json file'
45 |           add: '["setup.cfg", "CITATION.cff"]'
46 | 


--------------------------------------------------------------------------------
/.github/workflows/iga.yaml:
--------------------------------------------------------------------------------
  1 | env:
  2 |   INVENIO_SERVER: https://data.caltech.edu
  3 | 
  4 |   # These variables are IGA options. Please see the docs for info.
  5 |   draft:         false
  6 |   all_assets:    false
  7 |   all_metadata:  false
  8 |   community:     none
  9 |   parent_record: "6qhkm-7n074"
 10 |   debug:         false
 11 | 
 12 |   # This variable is a setting for post-archiving CodeMeta file updates.
 13 |   # If you don't have a CodeMeta file, you can remove the add_doi_codemeta
 14 |   # and Coremeta2CFF jobs at the bottom of this file.
 15 |   ref: main
 16 | 
 17 | # ╭────────────────────────────────────────────╮
 18 | # │ The rest of this file should be left as-is │
 19 | # ╰────────────────────────────────────────────╯
 20 | 
 21 | name: InvenioRDM GitHub Archiver
 22 | on:
 23 |   release:
 24 |     types: [published]
 25 |   workflow_dispatch:
 26 |     inputs:
 27 |       release_tag:
 28 |         description: The release tag (empty = latest)
 29 |       parent_record:
 30 |         description: ID of parent record (for versioning)
 31 |       community:
 32 |         description: Name of InvenioRDM community (if any)
 33 |       draft:
 34 |         description: Mark the record as a draft
 35 |         type: boolean
 36 |       all_assets:
 37 |         description: Attach all GitHub assets
 38 |         type: boolean
 39 |       all_metadata:
 40 |         description: Include additional GitHub metadata
 41 |         type: boolean
 42 |       debug:
 43 |         description: Print debug info in the GitHub log
 44 |         type: boolean
 45 | 
 46 | run-name: Archive ${{inputs.release_tag || 'latest release'}} in InvenioRDM
 47 | jobs:
 48 |   run_iga:
 49 |     name: Send to ${{needs.get_repository.outputs.server}}
 50 |     runs-on: ubuntu-latest
 51 |     needs: get_repository
 52 |     outputs:
 53 |       record_doi: ${{steps.iga.outputs.record_doi}}
 54 |     steps:
 55 |       - uses: caltechlibrary/iga@v1
 56 |         id: iga
 57 |         with:
 58 |           INVENIO_SERVER: ${{env.INVENIO_SERVER}}
 59 |           INVENIO_TOKEN:  ${{secrets.INVENIO_TOKEN}}
 60 |           all_assets:     ${{github.event.inputs.all_assets || env.all_assets}}
 61 |           all_metadata:   ${{github.event.inputs.all_metadata || env.all_metadata}}
 62 |           debug:          ${{github.event.inputs.debug || env.debug}}
 63 |           draft:          ${{github.event.inputs.draft || env.draft}}
 64 |           community:      ${{github.event.inputs.community || env.community}}
 65 |           parent_record:  ${{github.event.inputs.parent_record || env.parent_record}}
 66 |           release_tag:    ${{github.event.inputs.release_tag || 'latest'}}
 67 |   get_repository:
 68 |     name: Get repository name
 69 |     runs-on: ubuntu-latest
 70 |     outputs:
 71 |       server: ${{steps.parse.outputs.host}}
 72 |     steps:
 73 |       - name: Extract name from INVENIO_SERVER
 74 |         id: parse
 75 |         run: echo "host=$(cut -d'/' -f3 <<< ${{env.INVENIO_SERVER}} | cut -d':' -f1)" >> $GITHUB_OUTPUT
 76 |   add_doi_codemeta:
 77 |     name: "Add ${{needs.run_iga.outputs.record_doi}} to codemeta.json"
 78 |     needs: run_iga
 79 |     runs-on: ubuntu-latest
 80 |     steps:
 81 |       - name: Checkout
 82 |         uses: actions/checkout@v4
 83 |         with:
 84 |           ref: ${{ env.ref }}
 85 |       - name: Install sde
 86 |         run: pip install sde
 87 |       - name: Add DOI to CodeMeta File
 88 |         run: sde identifier ${{needs.run_iga.outputs.record_doi}} codemeta.json
 89 |       - name: Commit CFF
 90 |         uses: EndBug/add-and-commit@v9
 91 |         with:
 92 |           message: 'Add DOI to codemeta.json file'
 93 |           add: 'codemeta.json'
 94 |   CodeMeta2CFF:
 95 |     runs-on: ubuntu-latest
 96 |     needs: add_doi_codemeta
 97 |     steps:
 98 |       - name: Checkout
 99 |         uses: actions/checkout@v4
100 |         with:
101 |           ref: ${{ env.ref }}
102 |       - name: Convert CFF
103 |         uses: caltechlibrary/codemeta2cff@main
104 |       - name: Commit CFF
105 |         uses: EndBug/add-and-commit@v9
106 |         with:
107 |           message: 'Add updated CITATION.cff from codemeta.json file'
108 |           add: 'CITATION.cff'
109 | 


--------------------------------------------------------------------------------
/.github/workflows/pypi-publish.yaml:
--------------------------------------------------------------------------------
 1 | name: Publish
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - v*
 7 | 
 8 | jobs:
 9 |   build-n-publish:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - uses: actions/checkout@v2
13 |       - name: Set up Python 3.9
14 |         uses: actions/setup-python@v2
15 |         with:
16 |           python-version: 3.9
17 |       - name: Install dependencies
18 |         run: |
19 |           python -m pip install --upgrade pip
20 |           pip install setuptools wheel
21 |       - name: Build package
22 |         run: |
23 |           python setup.py sdist bdist_wheel
24 |       - name: Publish
25 |         uses: pypa/gh-action-pypi-publish@v1.3.1
26 |         with:
27 |           user: __token__
28 |           password: ${{ secrets.pypi_token }}
29 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | dist/
3 | data/
4 | caltechdata_api.egg-info/
5 | caltechdata_api/__pycache__/
6 | tests/__pycache__/
7 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | message: "If you use this software, please cite it as below."
 3 | title: caltechdata_api
 4 | authors:
 5 |   - family-names: Morrell
 6 |     given-names: Thomas E
 7 |     orcid: https://orcid.org/0000-0001-9266-5146
 8 |   - family-names: Bhattarai
 9 |     given-names: Rohan
10 |     orcid: https://orcid.org/0009-0007-0323-4733
11 |   - family-names: Won
12 |     given-names: Elizabeth
13 |     orcid: https://orcid.org/0009-0002-2450-6471
14 |   - family-names: Abakah
15 |     given-names: Alexander A
16 |     orcid: https://orcid.org/0009-0003-5640-6691
17 | abstract: Python wrapper for CaltechDATA API.
18 | repository-code: "https://github.com/caltechlibrary/caltechdata_api"
19 | type: software
20 | doi: 10.22002/3gdk4-j5504
21 | version: 1.10.0
22 | license-url: "https://data.caltech.edu/license"
23 | keywords:
24 |   - GitHub
25 |   - metadata
26 |   - software
27 |   - InvenioRDM
28 | date-released: 2025-04-07
29 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | Contributor Covenant Code of Conduct
 2 | ====================================
 3 | 
 4 | ## Our Pledge
 5 | 
 6 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
 7 | 
 8 | ## Our Standards
 9 | 
10 | Examples of behavior that contributes to creating a positive environment include:
11 | 
12 | * Using welcoming and inclusive language
13 | * Being respectful of differing viewpoints and experiences
14 | * Gracefully accepting constructive criticism
15 | * Focusing on what is best for the community
16 | * Showing empathy towards other community members
17 | 
18 | Examples of unacceptable behavior by participants include:
19 | 
20 | * The use of sexualized language or imagery and unwelcome sexual attention or advances
21 | * Trolling, insulting/derogatory comments, and personal or political attacks
22 | * Public or private harassment
23 | * Publishing others' private information, such as a physical or electronic address, without explicit permission
24 | * Other conduct which could reasonably be considered inappropriate in a professional setting
25 | 
26 | ## Our Responsibilities
27 | 
28 | Project contributors are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
29 | 
30 | Project contributors have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
31 | 
32 | ## Scope
33 | 
34 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project contributors.
35 | 
36 | ## Enforcement
37 | 
38 | If a contributor engages in harassing behaviour, the project organizers may take any action they deem appropriate, including warning the offender or expelling them from online forums, online project resources, face-to-face meetings, or any other project-related activity or resource.
39 | 
40 | If you are being harassed, notice that someone else is being harassed, or have any other concerns, please contact a member of the project team immediately.  Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team.  All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances.  The project team is obligated to maintain confidentiality with regard to the reporter of an incident.  Further details of specific enforcement policies may be posted separately.
41 | 
42 | ## Attribution
43 | 
44 | Portions of this Code of Conduct were adapted from Electron's [Contributor Covenant Code of Conduct](https://github.com/electron/electron/blob/master/CODE_OF_CONDUCT.md), which itself was adapted from the [Contributor Covenant](http://contributor-covenant.org/version/1/4), version 1.4.
45 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2016, Caltech
 2 | All rights not granted herein are expressly reserved by Caltech.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | 1. Redistributions of source code must retain the above copyright notice, this
 8 | list of conditions and the following disclaimer.
 9 | 
10 | 2. Redistributions in binary form must reproduce the above copyright notice,
11 | this list of conditions and the following disclaimer in the documentation
12 | and/or other materials provided with the distribution.
13 | 
14 | 3. Neither the name of the copyright holder nor the names of its contributors
15 | may be used to endorse or promote products derived from this software without
16 | specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 
29 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # CaltechDATA API Python Library
 2 | 
 3 | [![DOI](https://img.shields.io/badge/dynamic/json.svg?label=DOI&query=$.pids.doi.identifier&uri=https://data.caltech.edu/api/records/wfjr5-kw507/versions/latest)](https://data.caltech.edu/records/wfjr5-kw507/latest)
 4 | 
 5 | The `caltechdata_api` Python library provides a convenient interface for interacting with the CaltechDATA API. It allows users to write files, create DataCite 4 standard JSON records, edit existing records, and retrieve metadata from the CaltechDATA repository.
 6 | 
 7 | ## Features
 8 | 
 9 | ### Writing and Editing Records
10 | - `caltechdata_write`: Writes files and a DataCite 4 standard JSON record to the CaltechDATA repository.
11 | - `caltechdata_edit`: Edits existing records in CaltechDATA.
12 | 
13 | ### Metadata Operations
14 | - `get_metadata`: Retrieves metadata from CaltechDATA records.
15 | 
16 | ## Requirements
17 | 
18 | - Python 3.6+
19 | 
20 | ## Installation
21 | 
22 | Install the library via pip:
23 | 
24 | ```shell
25 | pip install caltechdata_api
26 | ```
27 | 
28 | ## Examples
29 | 
30 | There are some example python scripts in the GitHub repository.
31 | 
32 | ### Create a record:
33 | 
34 | ```shell
35 | python write.py example.json -fnames logo.gif
36 | # Output: pbkn6-m9y63 (unique identifier)
37 | ```
38 | > The response will be the unique identifier for the record. You can put this at
39 | the end of a url to visit the record (e.g.
40 | https://data.caltechlibrary.dev/records/pbkn6-m9y63)
41 | 
42 | ### Edit a record 
43 | Make changes to the example.json file to see a change)
44 | ```
45 | python edit.py example.json -id pbkn6-m9y63
46 | 10.33569/pbkn6-m9y63
47 | ```
48 | > The response is the DOI for the record, which includes the unique identifier
49 | for the record in the default configuration.
50 | 
51 | ## Using Custom DOIs 
52 | Some groups have worked with the library to create custom DOIs. These can be
53 | passed in the metadata like:
54 | 
55 | ```shell
56 | python write.py example_custom.json -fnames logo.gif
57 | m6zxz-p4j22
58 | ```
59 | 
60 | And then you can edit with
61 | ```
62 | python edit.py example_custom.json -id m6zxz-p4j22
63 | 10.5281/inveniordm.1234
64 | ```
65 | 
66 | This returns the custom DOI of the record if it is successful.
67 | 
68 | 
69 | ## Setup and Authentication
70 | 
71 | 1. Acquire a personal access token from your CaltechDATA account (found under "Applications" at the top right of your screen).
72 | 2. Copy the token to a file named token.bash.
73 | 3. Load the token in the command line using source token.bash.
74 | 
75 | ## Note on Testing
76 | 
77 | Only test your application on the test repository (`data.caltechlibrary.dev`).  Testing the API on the public 
78 | repository will generate junk records that are annoying to delete.
79 | 
80 | ## Using the Command Line Interface
81 | 
82 | If you would like to interact with the CaltechDATA API using the Command line Interface (CLI), please [see the detailed documentation](https://caltechlibrary.github.io/caltechdata_api/caltechdata_api/cli-documentation-for-users).
83 | 


--------------------------------------------------------------------------------
/accept.py:
--------------------------------------------------------------------------------
 1 | import argparse, os
 2 | from caltechdata_api import caltechdata_accept
 3 | 
 4 | parser = argparse.ArgumentParser(
 5 |     description="Accept records to a community in the CaltechDATA repository"
 6 | )
 7 | parser.add_argument("ids", nargs="*", help="CaltechDATA IDs")
 8 | args = parser.parse_args()
 9 | 
10 | # Get access token set as environment variable with source token.bash
11 | token = os.environ["RDMTOK"]
12 | 
13 | production = True
14 | 
15 | caltechdata_accept(
16 |     args.ids,
17 |     token,
18 |     production,
19 | )
20 | print("Completed")
21 | 


--------------------------------------------------------------------------------
/caltechdata_api/__init__.py:
--------------------------------------------------------------------------------
 1 | from .caltechdata_write import (
 2 |     caltechdata_write,
 3 |     write_files_rdm,
 4 |     add_file_links,
 5 |     send_to_community,
 6 | )
 7 | from .caltechdata_edit import (
 8 |     caltechdata_edit,
 9 |     caltechdata_unembargo,
10 |     caltechdata_accept,
11 | )
12 | from .customize_schema import customize_schema, validate_metadata
13 | from .get_metadata import get_metadata
14 | from .download_file import download_file, download_url
15 | from .utils import humanbytes
16 | from .md_to_json import parse_readme_to_json
17 | 


--------------------------------------------------------------------------------
/caltechdata_api/download_file.py:
--------------------------------------------------------------------------------
 1 | import requests, argparse
 2 | from tqdm.auto import tqdm
 3 | 
 4 | 
 5 | def download_url(doi, media_type=None):
 6 |     """Get a download link for a file listed in the media API for a DataCite DOI"""
 7 |     api_url = "https://api.datacite.org/dois/" + doi + "/media"
 8 |     r = requests.get(api_url).json()
 9 |     data = r["data"]
10 |     if media_type == None:
11 |         url = data[0]["attributes"]["url"]
12 |     else:
13 |         for media in data:
14 |             if media["attributes"]["mediaType"] == media_type:
15 |                 url = media["attributes"]
16 |     return url
17 | 
18 | 
19 | def download_file(doi, fname=None, media_type=None):
20 |     """Download a file  listed in the media API for a DataCite DOI"""
21 |     url = download_url(doi, media_type)
22 |     r = requests.get(url, stream=True)
23 |     # Set file name
24 |     if fname == None:
25 |         fname = doi.replace("/", "-")
26 |     # Download file with progress bar
27 |     if r.status_code == 403:
28 |         print("File Unavailable")
29 |     if "content-length" not in r.headers:
30 |         print("Did not get file")
31 |     else:
32 |         with open(fname, "wb") as f:
33 |             total_length = int(r.headers.get("content-length"))
34 |             pbar = tqdm(total=int(total_length / 1024), unit="B")
35 |             for chunk in r.iter_content(chunk_size=1024):
36 |                 if chunk:
37 |                     pbar.update()
38 |                     f.write(chunk)
39 |         return fname
40 | 
41 | 
42 | if __name__ == "__main__":
43 |     parser = argparse.ArgumentParser(
44 |         description="download_file queries the DaiaCite Media API\
45 |     and downloads the file associated with a DOI"
46 |     )
47 |     parser.add_argument(
48 |         "dois",
49 |         nargs="+",
50 |         help="The DOI for files to be downloaded",
51 |     )
52 |     parser.add_argument(
53 |         "-fname", default=None, help="File name to be used for downloaded file"
54 |     )
55 |     parser.add_argument(
56 |         "-media_type", default=None, help="File (media) type to be downloaded"
57 |     )
58 | 
59 |     args = parser.parse_args()
60 | 
61 |     for doi in args.dois:
62 |         download_file(doi, args.fname, args.media_type)
63 | 


--------------------------------------------------------------------------------
/caltechdata_api/get_files.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import requests
 3 | 
 4 | 
 5 | def get_files(idv, production=True):
 6 |     # Returns file block
 7 | 
 8 |     if production == True:
 9 |         api_url = "https://data.caltech.edu/api/records/"
10 |     else:
11 |         api_url = "https://data.caltechlibrary.dev/api/records/"
12 | 
13 |     r = requests.get(api_url + str(idv) + "/files")
14 |     r_data = r.json()
15 |     if "message" in r_data:
16 |         raise AssertionError(
17 |             "id "
18 |             + str(idv)
19 |             + " expected http status 200, got "
20 |             + str(r.status_code)
21 |             + " "
22 |             + r_data["message"]
23 |         )
24 |     if not "entries" in r_data:
25 |         raise AssertionError("expected as entries property in response, got " + r_data)
26 |     return r_data["entries"]
27 | 
28 | 
29 | if __name__ == "__main__":
30 |     parser = argparse.ArgumentParser(
31 |         description="get_files queries the caltechDATA (Invenio 3) API\
32 |     and returns file information"
33 |     )
34 |     parser.add_argument(
35 |         "ids",
36 |         metavar="ID",
37 |         type=str,
38 |         nargs="+",
39 |         help="The CaltechDATA ID for each record of interest",
40 |     )
41 |     parser.add_argument("-test", dest="production", action="store_false")
42 | 
43 |     args = parser.parse_args()
44 | 
45 |     production = args.production
46 | 
47 |     for idv in args.ids:
48 |         metadata = get_files(idv, production)
49 |         print(metadata)
50 | 


--------------------------------------------------------------------------------
/caltechdata_api/get_metadata.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import csv
  3 | import json
  4 | import os
  5 | 
  6 | import requests
  7 | from datacite import schema43
  8 | 
  9 | 
 10 | def get_metadata(
 11 |     idv,
 12 |     production=True,
 13 |     validate=True,
 14 |     emails=False,
 15 |     schema="43",
 16 |     token=False,
 17 |     authors=False,
 18 | ):
 19 |     # Returns just DataCite metadata or DataCite metadata with emails
 20 | 
 21 |     if production == True:
 22 |         if authors:
 23 |             url = "https://authors.library.caltech.edu/api/records/"
 24 |         else:
 25 |             url = "https://data.caltech.edu/api/records/"
 26 |         verify = True
 27 |     else:
 28 |         if authors:
 29 |             url = "https://authors.caltechlibrary.dev/api/records/"
 30 |         else:
 31 |             url = "https://data.caltechlibrary.dev/api/records/"
 32 |         verify = True
 33 | 
 34 |     if authors:
 35 |         headers = {
 36 |             "accept": "application/json",
 37 |         }
 38 |         validate = False
 39 |     else:
 40 |         headers = {
 41 |             "accept": "application/vnd.datacite.datacite+json",
 42 |         }
 43 | 
 44 |     if token:
 45 |         headers["Authorization"] = "Bearer %s" % token
 46 | 
 47 |     response = requests.get(url + idv, headers=headers, verify=verify)
 48 |     if response.status_code != 200:
 49 |         raise Exception(response.text)
 50 |     else:
 51 |         metadata = response.json()
 52 | 
 53 |         if validate:
 54 |             if schema == "43":
 55 |                 try:
 56 |                     assert schema43.validate(metadata)
 57 |                 except AssertionError:
 58 |                     v = schema43.validator.validate(metadata)
 59 |                     errors = sorted(v.iter_errors(instance), key=lambda e: e.path)
 60 |                     for error in errors:
 61 |                         print(error.message)
 62 | 
 63 |     return metadata
 64 | 
 65 | 
 66 | if __name__ == "__main__":
 67 |     parser = argparse.ArgumentParser(
 68 |         description="get_metadata queries the caltechDATA (Invenio 3) API\
 69 |     and returns DataCite-compatable metadata"
 70 |     )
 71 |     parser.add_argument(
 72 |         "ids",
 73 |         metavar="ID",
 74 |         type=str,
 75 |         nargs="+",
 76 |         help="The CaltechDATA ID for each record of interest",
 77 |     )
 78 |     parser.add_argument("-test", dest="production", action="store_false")
 79 |     parser.add_argument("-authors", dest="authors", action="store_true")
 80 |     parser.add_argument("-xml", dest="save_xml", action="store_true")
 81 |     parser.add_argument(
 82 |         "-skip_validate",
 83 |         dest="skip_validate",
 84 |         action="store_true",
 85 |         help="skip validation of metadata",
 86 |     )
 87 |     parser.add_argument("-schema", default="43", help="Schema Version")
 88 | 
 89 |     args = parser.parse_args()
 90 | 
 91 |     production = args.production
 92 |     schema = args.schema
 93 |     authors = args.authors
 94 |     skip_validate = args.skip_validate
 95 |     if skip_validate:
 96 |         validate = False
 97 |     else:
 98 |         validate = True
 99 | 
100 |     for idv in args.ids:
101 |         metadata = get_metadata(
102 |             idv, production, validate, schema=schema, authors=authors
103 |         )
104 |         outfile = open(str(idv) + ".json", "w")
105 |         outfile.write(json.dumps(metadata, indent=4))
106 |         outfile.close()
107 |         if args.save_xml == True:
108 |             xml = schema40.tostring(metadata)
109 |             outfile = open(str(idv) + ".xml", "w", encoding="utf8")
110 |             outfile.write(xml)
111 | 


--------------------------------------------------------------------------------
/caltechdata_api/md_to_json.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import json
  3 | import requests
  4 | 
  5 | 
  6 | class ReadmeFormatException(Exception):
  7 |     """Custom exception for errors in the README format."""
  8 | 
  9 | 
 10 | def camel_case(s):
 11 |     """Converts a string to camelCase."""
 12 |     s = re.sub(r"(\s|_|-)+", " ", s).title().replace(" ", "")
 13 |     return s[0].lower() + s[1:] if s else ""
 14 | 
 15 | 
 16 | def expand_special_keys(key, value):
 17 |     """Expand special keys into their structured format (affiliation, nameIdentifiers)."""
 18 |     if key == "affiliation":
 19 |         if "ror.org" not in value:
 20 |             raise ValueError("Affiliation Identifier is not a ROR")
 21 |         ror = value.split("ror.org/")[1].split("]")[0]
 22 |         response = requests.get(f"https://api.ror.org/organizations/{ror}").json()
 23 |         return [
 24 |             {
 25 |                 "affiliationIdentifier": ror,
 26 |                 "affiliationIdentifierScheme": "ROR",
 27 |                 "name": response["name"],
 28 |             }
 29 |         ]
 30 |     elif key == "nameIdentifiers":
 31 |         orcid = value.split("orcid.org/")[1].split("]")[0]
 32 |         return [
 33 |             {
 34 |                 "nameIdentifier": orcid,
 35 |                 "nameIdentifierScheme": "ORCID",
 36 |                 "schemeUri": f"https://orcid.org/{value}",
 37 |             }
 38 |         ]
 39 |     return value
 40 | 
 41 | 
 42 | def parse_readme_to_json(readme_path):
 43 |     try:
 44 |         with open(readme_path, "r") as file:
 45 |             lines = file.read().split("\n")
 46 |     except IOError as e:
 47 |         raise ReadmeFormatException(f"Failed to open or read the file: {e}")
 48 | 
 49 |     json_data = {}
 50 |     current_section = None
 51 |     current_object = {}
 52 | 
 53 |     title_line = lines.pop(0)
 54 |     if title_line.startswith("#") == False:
 55 |         raise ValueError('README.md needs to start with "# Title"')
 56 |     else:
 57 |         json_data["titles"] = [{"title": title_line.replace("# ", "")}]
 58 | 
 59 |     contributors = []
 60 |     identifiers = []
 61 |     item_list = []
 62 | 
 63 |     section_pattern = re.compile(r"^##\s+(.*)$")
 64 |     key_value_pattern = re.compile(r"^-\s+(.*?):\s+(.*)$")
 65 |     link_pattern = re.compile(r"\[.*?\]\((.*?)\)")
 66 | 
 67 |     for line_number, line in enumerate(lines, 1):
 68 |         if not line.strip():
 69 |             if item_list and current_section:
 70 |                 json_data[current_section] = item_list
 71 |                 item_list = []
 72 |             elif current_object and current_section:
 73 |                 if current_section == "types":
 74 |                     json_data[current_section] = current_object
 75 |                 elif len(current_object) == 1:
 76 |                     key, value = next(iter(current_object.items()))
 77 |                     if key in ["language", "publicationYear", "publisher", "version"]:
 78 |                         json_data[current_section] = value
 79 |                     else:
 80 |                         json_data[current_section].append(current_object)
 81 |                 elif current_section in ["creators", "contributors"]:
 82 |                     contributors.append(current_object)
 83 |                     current_object = {}
 84 |                 elif current_section == "identifiers":
 85 |                     identifiers.append(current_object)
 86 |                     current_object = {}
 87 |                 else:
 88 |                     json_data[current_section].append(current_object)
 89 |                 current_object = {}
 90 |             continue
 91 | 
 92 |         section_match = section_pattern.match(line)
 93 |         if section_match:
 94 |             if item_list:
 95 |                 json_data[current_section] = item_list
 96 |             elif current_object:
 97 |                 if current_section in json_data:
 98 |                     if isinstance(json_data[current_section], list):
 99 |                         json_data[current_section].append(current_object)
100 |                     elif isinstance(json_data[current_section], dict):
101 |                         json_data[current_section].update(current_object)
102 |                 else:
103 |                     json_data[current_section] = (
104 |                         [current_object]
105 |                         if current_section != "types"
106 |                         else current_object
107 |                     )
108 |                 current_object = {}
109 | 
110 |             elif contributors and current_section in ["creators", "contributors"]:
111 |                 json_data[current_section] = contributors
112 |                 contributors = []
113 |             elif identifiers and current_section == "identifiers":
114 |                 json_data[current_section] = identifiers
115 |                 identifiers = []
116 | 
117 |             elif current_section and current_object:
118 |                 if current_section == "types":
119 |                     json_data[current_section] = current_object
120 |                 elif len(current_object) == 1:
121 |                     key, value = next(iter(current_object.items()))
122 |                     if key in ["language", "publicationYear", "publisher", "version"]:
123 |                         json_data[current_section].append(value)
124 |                     else:
125 |                         json_data[current_section].append(current_object)
126 |                 else:
127 |                     json_data[current_section].append(current_object)
128 |                 current_object = {}
129 |             current_section = camel_case(section_match.group(1))
130 |             json_data[current_section] = [] if current_section != "types" else {}
131 |             continue
132 | 
133 |         key_value_match = key_value_pattern.match(line)
134 |         if key_value_match and current_section:
135 |             key, value = key_value_match.groups()
136 |             key = camel_case(key)
137 | 
138 |             if key in ["affiliation", "nameIdentifiers"]:
139 |                 value = expand_special_keys(key, value)
140 |             elif (
141 |                 key == "nameType"
142 |                 and current_object
143 |                 and current_section in ["creators", "contributors"]
144 |             ):
145 |                 contributors.append(current_object)
146 |                 current_object = {}
147 |             elif current_section in ["subjects"]:
148 |                 item_list.append({key: value})
149 |             elif current_section == "dates":
150 |                 if key == "date":
151 |                     current_object["date"] = value
152 |                 elif key == "dateType":
153 |                     current_object["dateType"] = value
154 |                     item_list.append(current_object)
155 |                     current_object = {}
156 |             else:
157 |                 link_match = link_pattern.search(value)
158 |                 if link_match:
159 |                     value = link_match.group(1)
160 | 
161 |             current_object[key] = value
162 | 
163 |         elif line.strip() and not section_match:
164 |             raise ReadmeFormatException(
165 |                 f"Incorrect format detected at line {line_number}: {line}"
166 |             )
167 | 
168 |     if contributors and current_section in ["creators", "contributors"]:
169 |         json_data[current_section] = contributors
170 |     elif identifiers and current_section == "identifiers":
171 |         json_data[current_section] = identifiers
172 |     elif current_section and current_object:
173 |         if current_section == "types":
174 |             json_data[current_section] = current_object
175 |         elif len(current_object) == 1:
176 |             key, value = next(iter(current_object.items()))
177 |             if key in ["language", "publicationYear", "publisher", "version"]:
178 |                 json_data[current_section].append(value)
179 |             else:
180 |                 json_data[current_section].append(current_object)
181 |         else:
182 |             json_data[current_section].append(current_object)
183 | 
184 |     return json_data
185 | 
186 | 
187 | if __name__ == "__main__":
188 |     readme_path = "/Users/elizabethwon/downloads/exampleREADME.md"
189 |     try:
190 |         json_data = parse_readme_to_json(readme_path)
191 |         output_json_path = "output1.json"
192 |         with open(output_json_path, "w") as json_file:
193 |             json.dump(json_data, json_file, indent=4)
194 |         print(f"Converted JSON saved to {output_json_path}")
195 |     except ReadmeFormatException as e:
196 |         print(f"Error parsing README file: {e}")
197 | 


--------------------------------------------------------------------------------
/caltechdata_api/pictures-documentation/Interact CLI Step 1(a).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/89cb1bfb6513c61458e257a66e5e6c6aaf222195/caltechdata_api/pictures-documentation/Interact CLI Step 1(a).png


--------------------------------------------------------------------------------
/caltechdata_api/pictures-documentation/Interact CLI Step 1(b).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/89cb1bfb6513c61458e257a66e5e6c6aaf222195/caltechdata_api/pictures-documentation/Interact CLI Step 1(b).png


--------------------------------------------------------------------------------
/caltechdata_api/pictures-documentation/Interact CLI Step 1(c) Test Instance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/89cb1bfb6513c61458e257a66e5e6c6aaf222195/caltechdata_api/pictures-documentation/Interact CLI Step 1(c) Test Instance.png


--------------------------------------------------------------------------------
/caltechdata_api/pictures-documentation/Interact CLI Step 1(d) Test Instance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/89cb1bfb6513c61458e257a66e5e6c6aaf222195/caltechdata_api/pictures-documentation/Interact CLI Step 1(d) Test Instance.png


--------------------------------------------------------------------------------
/caltechdata_api/pictures-documentation/Interact CLI Step 2(a).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/89cb1bfb6513c61458e257a66e5e6c6aaf222195/caltechdata_api/pictures-documentation/Interact CLI Step 2(a).png


--------------------------------------------------------------------------------
/caltechdata_api/pictures-documentation/Interact CLI Step 2(b).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/89cb1bfb6513c61458e257a66e5e6c6aaf222195/caltechdata_api/pictures-documentation/Interact CLI Step 2(b).png


--------------------------------------------------------------------------------
/caltechdata_api/pictures-documentation/Interact CLI Step 2(c).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/89cb1bfb6513c61458e257a66e5e6c6aaf222195/caltechdata_api/pictures-documentation/Interact CLI Step 2(c).png


--------------------------------------------------------------------------------
/caltechdata_api/pictures-documentation/README.md:
--------------------------------------------------------------------------------
1 | This subfolder is created to store the pictures for documentation
2 | 


--------------------------------------------------------------------------------
/caltechdata_api/pictures-documentation/Step 1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/89cb1bfb6513c61458e257a66e5e6c6aaf222195/caltechdata_api/pictures-documentation/Step 1.png


--------------------------------------------------------------------------------
/caltechdata_api/pictures-documentation/Step 2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/89cb1bfb6513c61458e257a66e5e6c6aaf222195/caltechdata_api/pictures-documentation/Step 2.png


--------------------------------------------------------------------------------
/caltechdata_api/pictures-documentation/Step 3(a).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/89cb1bfb6513c61458e257a66e5e6c6aaf222195/caltechdata_api/pictures-documentation/Step 3(a).png


--------------------------------------------------------------------------------
/caltechdata_api/pictures-documentation/Step 3(b).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/89cb1bfb6513c61458e257a66e5e6c6aaf222195/caltechdata_api/pictures-documentation/Step 3(b).png


--------------------------------------------------------------------------------
/caltechdata_api/pictures-documentation/Step 3(c).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/89cb1bfb6513c61458e257a66e5e6c6aaf222195/caltechdata_api/pictures-documentation/Step 3(c).png


--------------------------------------------------------------------------------
/caltechdata_api/pictures-documentation/Step 4(a).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/89cb1bfb6513c61458e257a66e5e6c6aaf222195/caltechdata_api/pictures-documentation/Step 4(a).png


--------------------------------------------------------------------------------
/caltechdata_api/pictures-documentation/Step 4(b).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/89cb1bfb6513c61458e257a66e5e6c6aaf222195/caltechdata_api/pictures-documentation/Step 4(b).png


--------------------------------------------------------------------------------
/caltechdata_api/pictures-documentation/Step 5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/89cb1bfb6513c61458e257a66e5e6c6aaf222195/caltechdata_api/pictures-documentation/Step 5.png


--------------------------------------------------------------------------------
/caltechdata_api/pictures-documentation/Step 6(a).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/89cb1bfb6513c61458e257a66e5e6c6aaf222195/caltechdata_api/pictures-documentation/Step 6(a).png


--------------------------------------------------------------------------------
/caltechdata_api/pictures-documentation/Step 6(b).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/89cb1bfb6513c61458e257a66e5e6c6aaf222195/caltechdata_api/pictures-documentation/Step 6(b).png


--------------------------------------------------------------------------------
/caltechdata_api/utils.py:
--------------------------------------------------------------------------------
 1 | # Public domain by Mitch McMabers
 2 | 
 3 | from typing import List, Union
 4 | 
 5 | METRIC_LABELS: List[str] = ["B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"]
 6 | BINARY_LABELS: List[str] = ["B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"]
 7 | PRECISION_OFFSETS: List[float] = [0.5, 0.05, 0.005, 0.0005]  # PREDEFINED FOR SPEED.
 8 | PRECISION_FORMATS: List[str] = [
 9 |     "{}{:.0f} {}",
10 |     "{}{:.1f} {}",
11 |     "{}{:.2f} {}",
12 |     "{}{:.3f} {}",
13 | ]  # PREDEFINED FOR SPEED.
14 | 
15 | 
16 | def humanbytes(num: Union[int, float], metric: bool = True, precision: int = 1) -> str:
17 |     """
18 |     Human-readable formatting of bytes, using binary (powers of 1024)
19 |     or metric (powers of 1000) representation.
20 |     """
21 | 
22 |     assert isinstance(num, (int, float)), "num must be an int or float"
23 |     assert isinstance(metric, bool), "metric must be a bool"
24 |     assert (
25 |         isinstance(precision, int) and precision >= 0 and precision <= 3
26 |     ), "precision must be an int (range 0-3)"
27 | 
28 |     unit_labels = METRIC_LABELS if metric else BINARY_LABELS
29 |     last_label = unit_labels[-1]
30 |     unit_step = 1000 if metric else 1024
31 |     unit_step_thresh = unit_step - PRECISION_OFFSETS[precision]
32 | 
33 |     is_negative = num < 0
34 |     if is_negative:  # Faster than ternary assignment or always running abs().
35 |         num = abs(num)
36 | 
37 |     for unit in unit_labels:
38 |         if num < unit_step_thresh:
39 |             # VERY IMPORTANT:
40 |             # Only accepts the CURRENT unit if we're BELOW the threshold where
41 |             # float rounding behavior would place us into the NEXT unit: F.ex.
42 |             # when rounding a float to 1 decimal, any number ">= 1023.95" will
43 |             # be rounded to "1024.0". Obviously we don't want ugly output such
44 |             # as "1024.0 KiB", since the proper term for that is "1.0 MiB".
45 |             break
46 |         if unit != last_label:
47 |             # We only shrink the number if we HAVEN'T reached the last unit.
48 |             # NOTE: These looped divisions accumulate floating point rounding
49 |             # errors, but each new division pushes the rounding errors further
50 |             # and further down in the decimals, so it doesn't matter at all.
51 |             num /= unit_step
52 | 
53 |     return PRECISION_FORMATS[precision].format("-" if is_negative else "", num, unit)
54 | 
55 | 
56 | if __name__ == "__main__":
57 |     print(humanbytes(2251799813685247))  # 2 pebibytes
58 |     print(humanbytes(2000000000000000, True))  # 2 petabytes
59 |     print(humanbytes(1099511627776))  # 1 tebibyte
60 |     print(humanbytes(1000000000000, True))  # 1 terabyte
61 |     print(humanbytes(1000000000, True))  # 1 gigabyte
62 |     print(humanbytes(4318498233, precision=3))  # 4.022 gibibytes
63 |     print(humanbytes(4318498233, True, 3))  # 4.318 gigabytes
64 |     print(humanbytes(-4318498233, precision=2))  # -4.02 gibibytes
65 | 


--------------------------------------------------------------------------------
/caltechdata_api/vocabularies.yaml:
--------------------------------------------------------------------------------
 1 | creatorsroles:
 2 |   pid-type: crr
 3 |   data-file: vocabularies/roles.yaml
 4 | contributorsroles:
 5 |   pid-type: cor
 6 |   data-file: vocabularies/roles.yaml
 7 | resourcetypes:
 8 |   pid-type: rsrct
 9 |   data-file: vocabularies/resource_types.yaml
10 | descriptiontypes:
11 |   pid-type: dty
12 |   data-file: vocabularies/description_types.yaml
13 | datetypes:
14 |   pid-type: dat
15 |   data-file: vocabularies/date_types.yaml
16 | relationtypes:
17 |   pid-type: rlt
18 |   data-file: vocabularies/relation_types.yaml
19 | titletypes:
20 |   pid-type: ttyp
21 |   data-file: vocabularies/title_types.yaml
22 | identifiertypes:
23 |   pid-type: idt
24 |   data-file: vocabularies/identifier_types.yaml
25 | 


--------------------------------------------------------------------------------
/caltechdata_api/vocabularies/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/89cb1bfb6513c61458e257a66e5e6c6aaf222195/caltechdata_api/vocabularies/.DS_Store


--------------------------------------------------------------------------------
/caltechdata_api/vocabularies/date_types.yaml:
--------------------------------------------------------------------------------
 1 | - id: accepted
 2 |   props:
 3 |     datacite: Accepted
 4 |   title:
 5 |     en: Accepted
 6 | - id: available
 7 |   props:
 8 |     datacite: Available
 9 |   title:
10 |     en: Available
11 | - id: collected
12 |   props:
13 |     datacite: Collected
14 |   title:
15 |     en: Collected
16 | - id: copyrighted
17 |   props:
18 |     datacite: Copyrighted
19 |   title:
20 |     en: Copyrighted
21 | - id: created
22 |   props:
23 |     datacite: Created
24 |   title:
25 |     en: Created
26 | - id: issued
27 |   props:
28 |     datacite: Issued
29 |   title:
30 |     en: Issued
31 | - id: other
32 |   props:
33 |     datacite: Other
34 |   title:
35 |     en: Other
36 | - id: submitted
37 |   props:
38 |     datacite: Submitted
39 |   title:
40 |     en: Submitted
41 | - id: updated
42 |   props:
43 |     datacite: Updated
44 |   title:
45 |     en: Updated
46 | - id: valid
47 |   props:
48 |     datacite: Valid
49 |   title:
50 |     en: Valid
51 | - id: withdrawn
52 |   props:
53 |     datacite: Withdrawn
54 |   title:
55 |     en: Withdrawn
56 | 


--------------------------------------------------------------------------------
/caltechdata_api/vocabularies/description_types.yaml:
--------------------------------------------------------------------------------
 1 | - id: abstract
 2 |   props:
 3 |     datacite: Abstract
 4 |   title:
 5 |     en: Abstract
 6 | - id: methods
 7 |   props:
 8 |     datacite: Methods
 9 |   title:
10 |     en: Methods
11 | - id: series-information
12 |   props:
13 |     datacite: SeriesInformation
14 |   title:
15 |     en: Series information
16 | - id: table-of-contents
17 |   props:
18 |     datacite: TableOfContents
19 |   title:
20 |     en: Table of contents
21 | - id: technical-info
22 |   props:
23 |     datacite: TechnicalInfo
24 |   title:
25 |     en: Technical info
26 | - id: other
27 |   props:
28 |     datacite: Other
29 |   title:
30 |     en: Other
31 | # Not really a datacite mapping, but needed to support passing custom types
32 | - id: files
33 |   props:
34 |     datacite: files
35 | 


--------------------------------------------------------------------------------
/caltechdata_api/vocabularies/identifier_types.yaml:
--------------------------------------------------------------------------------
  1 | - id: ark
  2 |   props:
  3 |     datacite: ARK
  4 |   title:
  5 |     en: ARK
  6 | - id: arxiv
  7 |   props:
  8 |     datacite: arXiv
  9 |   title:
 10 |     en: arXiv
 11 | - id: bibcode
 12 |   props:
 13 |     datacite: bibcode
 14 |   title:
 15 |     en: Bibcode
 16 | - id: doi
 17 |   props:
 18 |     datacite: DOI
 19 |   title:
 20 |     en: DOI
 21 | - id: ean13
 22 |   props:
 23 |     datacite: EAN13
 24 |   title:
 25 |     en: EAN13
 26 | - id: eissn
 27 |   props:
 28 |     datacite: EISSN
 29 |   title:
 30 |     en: EISSN
 31 | - id: handle
 32 |   props:
 33 |     datacite: Handle
 34 |   title:
 35 |     en: Handle
 36 | - id: igsn
 37 |   props:
 38 |     datacite: IGSN
 39 |   title:
 40 |     en: IGSN
 41 | - id: isbn
 42 |   props:
 43 |     datacite: ISBN
 44 |   title:
 45 |     en: ISBN
 46 | - id: issn
 47 |   props:
 48 |     datacite: ISSN
 49 |   title:
 50 |     en: ISSN
 51 | - id: istc
 52 |   props:
 53 |     datacite: ISTC
 54 |   title:
 55 |     en: ISTC
 56 | - id: lissn
 57 |   props:
 58 |     datacite: LISSN
 59 |   title:
 60 |     en: LISSN
 61 | - id: lsid
 62 |   props:
 63 |     datacite: LSID
 64 |   title:
 65 |     en: LSID
 66 | - id: pmid
 67 |   props:
 68 |     datacite: PMID
 69 |   title:
 70 |     en: PMID
 71 | - id: purl
 72 |   props:
 73 |     datacite: PURL
 74 |   title:
 75 |     en: PURL
 76 | - id: upc
 77 |   props:
 78 |     datacite: UPC
 79 |   title:
 80 |     en: UPC
 81 | - id: url
 82 |   props:
 83 |     datacite: URL
 84 |   title:
 85 |     en: URL
 86 | - id: urn
 87 |   props:
 88 |     datacite: URN
 89 |   title:
 90 |     en: URN
 91 | - id: w3id
 92 |   props:
 93 |     datacite: w3id
 94 |   title:
 95 |     en: W3ID
 96 | - id: cdid
 97 |   props:
 98 |     datacite: cdid
 99 |   title:
100 |     en: CALTECHDATA_ID
101 | - id: tiltid
102 |   props:
103 |     datacite: tiltid
104 |   title:
105 |     en: TILT_SERIES_ID
106 | - id: dsa-110-id
107 |   props:
108 |     datacite: dsa-110-id
109 |   title:
110 |     en: DSA_110_ID
111 | 


--------------------------------------------------------------------------------
/caltechdata_api/vocabularies/relation_types.yaml:
--------------------------------------------------------------------------------
  1 | - id: iscitedby
  2 |   props:
  3 |      datacite: IsCitedBy
  4 |   title:
  5 |      en: Is cited by
  6 | - id: cites
  7 |   props:
  8 |      datacite: Cites
  9 |   title:
 10 |      en: Cites
 11 | - id: issupplementto
 12 |   props:
 13 |      datacite: IsSupplementTo
 14 |   title:
 15 |      en: Is supplement to
 16 | - id: issupplementedby
 17 |   props:
 18 |      datacite: IsSupplementedBy
 19 |   title:
 20 |      en: Is supplemented by
 21 | - id: iscontinuedby
 22 |   props:
 23 |      datacite: IsContinuedBy
 24 |   title:
 25 |      en: Is continued by
 26 | - id: continues
 27 |   props:
 28 |      datacite: Continues
 29 |   title:
 30 |      en: Continues
 31 | - id: isdescribedby
 32 |   props:
 33 |      datacite: IsDescribedBy
 34 |   title:
 35 |      en: Is described by
 36 | - id: describes
 37 |   props:
 38 |      datacite: Describes
 39 |   title:
 40 |      en: Describes
 41 | - id: hasversion
 42 |   props:
 43 |      datacite: HasVersion
 44 |   title:
 45 |      en: Has version
 46 | - id: isversionof
 47 |   props:
 48 |      datacite: IsVersionOf
 49 |   title:
 50 |      en: Is version of
 51 | - id: isnewversionof
 52 |   props:
 53 |      datacite: IsNewVersionOf
 54 |   title:
 55 |      en: Is new version of
 56 | - id: ispreviousversionof
 57 |   props:
 58 |      datacite: IsPreviousVersionOf
 59 |   title:
 60 |      en: Is previous version of
 61 | - id: ispartof
 62 |   props:
 63 |      datacite: IsPartOf
 64 |   title:
 65 |      en: Is part of
 66 | - id: haspart
 67 |   props:
 68 |      datacite: HasPart
 69 |   title:
 70 |      en: HasPart
 71 | - id: isreferencedby
 72 |   props:
 73 |      datacite: IsReferencedBy
 74 |   title:
 75 |      en: Is referenced by
 76 | - id: references
 77 |   props:
 78 |      datacite: References
 79 |   title:
 80 |      en: References
 81 | - id: isdocumentedby
 82 |   props:
 83 |      datacite: IsDocumentedBy
 84 |   title:
 85 |      en: Is documented by
 86 | - id: documents
 87 |   props:
 88 |      datacite: Documents
 89 |   title:
 90 |      en: Documents
 91 | - id: iscompiledby
 92 |   props:
 93 |      datacite: IsCompiledBy
 94 |   title:
 95 |      en: Is compiled by
 96 | - id: compiles
 97 |   props:
 98 |      datacite: Compiles
 99 |   title:
100 |      en: Compiles
101 | - id: isvariantformof
102 |   props:
103 |      datacite: IsVariantFormOf
104 |   title:
105 |      en: Is variant form of
106 | - id: isoriginalformof
107 |   props:
108 |      datacite: IsOriginalFormOf
109 |   title:
110 |      en: Is original form of
111 | - id: isidenticalto
112 |   props:
113 |      datacite: IsIdenticalTo
114 |   title:
115 |      en: Is identical to
116 | - id: isreviewedby
117 |   props:
118 |      datacite: IsReviewedBy
119 |   title:
120 |      en: Is reviewed by
121 | - id: reviews
122 |   props:
123 |      datacite: Reviews
124 |   title:
125 |      en: Reviews
126 | - id: isderivedfrom
127 |   props:
128 |      datacite: IsDerivedFrom
129 |   title:
130 |      en: Is derived from
131 | - id: issourceof
132 |   props:
133 |      datacite: IsSourceOf
134 |   title:
135 |      en: Is source of
136 | - id: isrequiredby
137 |   props:
138 |      datacite: IsRequiredBy
139 |   title:
140 |      en: Is required by
141 | - id: requires
142 |   props:
143 |      datacite: Requires
144 |   title:
145 |      en: Requires
146 | - id: isobsoletedby
147 |   props:
148 |      datacite: IsObsoletedBy
149 |   title:
150 |      en: Is obsoleted by
151 | - id: obsoletes
152 |   props:
153 |      datacite: Obsoletes
154 |   title:
155 |      en: Obsoletes
156 | 


--------------------------------------------------------------------------------
/caltechdata_api/vocabularies/roles.yaml:
--------------------------------------------------------------------------------
  1 | - id: contactperson
  2 |   props:
  3 |     datacite: ContactPerson
  4 |   title:
  5 |     en: Contact person
  6 | - id: datacollector
  7 |   props:
  8 |     datacite: DataCollector
  9 |   title:
 10 |     en: Data collector
 11 | - id: datacurator
 12 |   props:
 13 |     datacite: DataCurator
 14 |   title:
 15 |     en: Data curator
 16 | - id: datamanager
 17 |   props:
 18 |     datacite: DataManager
 19 |   title:
 20 |     en: Data manager
 21 | - id: distributor
 22 |   props:
 23 |     datacite: Distributor
 24 |   title:
 25 |     en: Distributor
 26 | - id: editor
 27 |   props:
 28 |     datacite: Editor
 29 |   title:
 30 |     en: Editor
 31 | - id: hostinginstitution
 32 |   props:
 33 |     datacite: HostingInstitution
 34 |   title:
 35 |     en: Hosting institution
 36 | - id: producer
 37 |   props:
 38 |     datacite: Producer
 39 |   title:
 40 |     en: Producer
 41 | - id: projectleader
 42 |   props:
 43 |     datacite: ProjectLeader
 44 |   title:
 45 |     en: Project leader
 46 | - id: projectmanager
 47 |   props:
 48 |     datacite: ProjectManager
 49 |   title:
 50 |     en: Project manager
 51 | - id: projectmember
 52 |   props:
 53 |     datacite: ProjectMember
 54 |   title:
 55 |     en: Project member
 56 | - id: registrationagency
 57 |   props:
 58 |     datacite: RegistrationAgency
 59 |   title:
 60 |     en: Registration agency
 61 | - id: registrationauthority
 62 |   props:
 63 |     datacite: RegistrationAuthority
 64 |   title:
 65 |     en: Registration authority
 66 | - id: relatedperson
 67 |   props:
 68 |     datacite: RelatedPerson
 69 |   title:
 70 |     en: Related person
 71 | - id: researcher
 72 |   props:
 73 |     datacite: Researcher
 74 |   title:
 75 |     en: Researcher
 76 | - id: researchgroup
 77 |   props:
 78 |     datacite: ResearchGroup
 79 |   title:
 80 |     en: Research group
 81 | - id: rightsholder
 82 |   props:
 83 |     datacite: RightsHolder
 84 |   title:
 85 |     en: Rights holder
 86 | - id: sponsor
 87 |   props:
 88 |     datacite: Sponsor
 89 |   title:
 90 |     en: Sponsor
 91 | - id: supervisor
 92 |   props:
 93 |     datacite: Supervisor
 94 |   title:
 95 |     en: Supervisor
 96 | - id: workpackageleader
 97 |   props:
 98 |     datacite: WorkPackageLeader
 99 |   title:
100 |     en: Work package leader
101 | - id: other
102 |   props:
103 |     datacite: Other
104 |   title:
105 |     en: Other
106 | 


--------------------------------------------------------------------------------
/caltechdata_api/vocabularies/title_types.yaml:
--------------------------------------------------------------------------------
 1 | - id: alternative-title
 2 |   props:
 3 |     datacite: AlternativeTitle
 4 |   title:
 5 |     en: Alternative title
 6 | - id: subtitle
 7 |   props:
 8 |     datacite: Subtitle
 9 |   title:
10 |     en: Subtitle
11 | - id: translated-title
12 |   props:
13 |     datacite: TranslatedTitle
14 |   title:
15 |     en: Translated title
16 | - id: other
17 |   props:
18 |     datacite: Other
19 |   title:
20 |     en: Other
21 | 


--------------------------------------------------------------------------------
/codemeta.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "@context": "https://doi.org/10.5063/schema/codemeta-2.0",
 3 |     "@type": "SoftwareSourceCode",
 4 |     "description": "Python wrapper for CaltechDATA API.",
 5 |     "name": "caltechdata_api",
 6 |     "codeRepository": "https://github.com/caltechlibrary/caltechdata_api",
 7 |     "issueTracker": "https://github.com/caltechlibrary/caltechdata_api/issues",
 8 |     "license": "https://data.caltech.edu/license",
 9 |     "version": "1.10.0",
10 |     "author": [
11 |         {
12 |             "@type": "Person",
13 |             "givenName": "Thomas E",
14 |             "familyName": "Morrell",
15 |             "affiliation": {
16 |                 "@type": "Organization",
17 |                 "name": "Caltech Library"
18 |             },
19 |             "email": "tmorrell@caltech.edu",
20 |             "@id": "https://orcid.org/0000-0001-9266-5146"
21 |         },
22 |         {
23 |             "@type": "Person",
24 |             "givenName": "Rohan",
25 |             "familyName": "Bhattarai",
26 |             "affiliation": {
27 |                 "@type": "Organization",
28 |                 "name": "Caltech"
29 |             },
30 |             "@id": "https://orcid.org/0009-0007-0323-4733"
31 |         },
32 |         {
33 |             "@type": "Person",
34 |             "givenName": "Elizabeth",
35 |             "familyName": "Won",
36 |             "affiliation": {
37 |                 "@type": "Organization",
38 |                 "name": "Caltech"
39 |             },
40 |             "@id": "https://orcid.org/0009-0002-2450-6471"
41 |         },
42 |         {
43 |             "@type": "Person",
44 |             "givenName": "Alexander A",
45 |             "familyName": "Abakah",
46 |             "affiliation": {
47 |                 "@type": "Organization",
48 |                 "name": "Caltech Library"
49 |             },
50 |             "email": "aabakah@caltech.edu",
51 |             "@id": "https://orcid.org/0009-0003-5640-6691"
52 |         }
53 |     ],
54 |     "developmentStatus": "active",
55 |     "downloadUrl": "https://github.com/caltechlibrary/caltechdata_api/archive/1.9.1.zip",
56 |     "keywords": [
57 |         "GitHub",
58 |         "metadata",
59 |         "software",
60 |         "InvenioRDM"
61 |     ],
62 |     "maintainer": [
63 |         {
64 |             "@type": "Person",
65 |             "givenName": "Thomas E",
66 |             "familyName": "Morrell",
67 |             "affiliation": {
68 |                 "@type": "Organization",
69 |                 "name": "Caltech Library"
70 |             },
71 |             "email": "tmorrell@caltech.edu",
72 |             "@id": "https://orcid.org/0000-0001-9266-5146"
73 |         }
74 |     ],
75 |     "funding": {
76 |         "@type": "Grant",
77 |         "identifier": "2322420",
78 |         "name": "CC* Data Storage: Closing Caltech's data storage gap: from ad-hoc to well-managed stewardship of large-scale datasets",
79 |         "funder": {
80 |             "@id": "https://doi.org/10.13039/100000001",
81 |             "@type": "Organization",
82 |             "name": "National Science Foundation"
83 |         }
84 |     },
85 |     "programmingLanguage": "Python",
86 |     "identifier": "10.22002/3gdk4-j5504"
87 | }


--------------------------------------------------------------------------------
/edit.py:
--------------------------------------------------------------------------------
 1 | import argparse, os, json
 2 | from caltechdata_api import caltechdata_edit
 3 | 
 4 | parser = argparse.ArgumentParser(
 5 |     description="Write files and a DataCite 4 standard json record\
 6 |         to CaltechDATA repository"
 7 | )
 8 | parser.add_argument(
 9 |     "json_file",
10 |     nargs="?",
11 |     default=None,
12 |     help="file name for json DataCite metadata file",
13 | )
14 | parser.add_argument("-id", help="CaltechDATA IDs")
15 | parser.add_argument("-fnames", nargs="*", help="New Files")
16 | parser.add_argument("-flinks", nargs="*", help="New File Links")
17 | parser.add_argument("-schema", default="43", help="Metadata Schema")
18 | parser.add_argument("-authors", action="store_true", help="Edit CaltechAUTHORS")
19 | args = parser.parse_args()
20 | 
21 | # Get access token set as environment variable with source token.bash
22 | token = os.environ["RDMTOK"]
23 | 
24 | if args.json_file:
25 |     metaf = open(args.json_file, "r")
26 |     metadata = json.load(metaf)
27 | else:
28 |     metadata = {}
29 | 
30 | production = True
31 | publish = True
32 | 
33 | response = caltechdata_edit(
34 |     args.id,
35 |     metadata,
36 |     token,
37 |     args.fnames,
38 |     production,
39 |     args.schema,
40 |     publish,
41 |     args.flinks,
42 |     authors=args.authors,
43 | )
44 | print(response)
45 | 


--------------------------------------------------------------------------------
/edit_osn.py:
--------------------------------------------------------------------------------
 1 | import argparse, os, json
 2 | import s3fs, requests
 3 | from datacite import schema43
 4 | from caltechdata_api import caltechdata_edit, get_metadata
 5 | 
 6 | 
 7 | parser = argparse.ArgumentParser(
 8 |     description="Edits a CaltechDATA record by adding OSN-stored pilot files"
 9 | )
10 | parser.add_argument("folder", nargs=1, help="Folder")
11 | parser.add_argument("-id", nargs=1, help="")
12 | 
13 | args = parser.parse_args()
14 | 
15 | # Get access token as environment variable
16 | token = os.environ["RDMTOK"]
17 | 
18 | endpoint = "https://renc.osn.xsede.org/"
19 | 
20 | # Get metadata and files from bucket
21 | s3 = s3fs.S3FileSystem(anon=True, client_kwargs={"endpoint_url": endpoint})
22 | 
23 | folder = args.folder[0]
24 | 
25 | path = "ini210004tommorrell/" + folder
26 | 
27 | idv = args.id[0]
28 | try:
29 |     metadata = get_metadata(idv, schema="43")
30 | except:
31 |     url = "https://data.caltech.edu/api/records/"
32 | 
33 |     headers = {
34 |         "accept": "application/vnd.datacite.datacite+json",
35 |         "Authorization": "Bearer %s" % token,
36 |     }
37 | 
38 |     response = requests.get(url + idv + "/draft", headers=headers)
39 |     if response.status_code != 200:
40 |         raise Exception(response.text)
41 |     metadata = response.json()
42 | 
43 | # Find the files
44 | files = s3.glob(path + "/*")
45 | 
46 | file_links = []
47 | for link in files:
48 |     fname = link.split("/")[-1]
49 |     if "." not in fname:
50 |         # If there is a directory, get files
51 |         folder_files = s3.glob(link + "/*")
52 |         for file in folder_files:
53 |             name = file.split("/")[-1]
54 |             if "." not in name:
55 |                 level_2_files = s3.glob(file + "/*")
56 |                 for f in level_2_files:
57 |                     name = f.split("/")[-1]
58 |                     if "." not in name:
59 |                         level_3_files = s3.glob(f + "/*")
60 |                         for l3 in level_3_files:
61 |                             file_links.append(endpoint + l3)
62 |                     else:
63 |                         file_links.append(endpoint + f)
64 |             else:
65 |                 file_links.append(endpoint + file)
66 |     else:
67 |         file_links.append(endpoint + link)
68 | 
69 | production = True
70 | 
71 | response = caltechdata_edit(
72 |     idv, metadata, token, [], production, "43", publish=False, file_links=file_links
73 | )
74 | print(response)
75 | 


--------------------------------------------------------------------------------
/example.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "identifiers": [
  3 |             {"identifier": "1924MNRAS..84..308E", "identifierType": "bibcode"}
  4 |     ],
  5 |     "contributors": [
  6 |         {
  7 |           "nameType": "Personal",
  8 |           "affiliation": [
  9 |               {
 10 |                   "name": "DataCitea",
 11 |                   "affiliationIdentifier": "https://ror.org/04wxnsj81",
 12 |                   "affiliationIdentifierScheme": "ROR"
 13 |                 }
 14 |               ],
 15 |             "name": "Contributor Name",
 16 |             "familyName": "Family Name",
 17 |             "givenName": "Given Name",
 18 |             "contributorType": "ContactPerson",
 19 |             "nameIdentifiers": [
 20 |                 {
 21 |                     "nameIdentifier": "0000-0002-1825-0097",
 22 |                     "nameIdentifierScheme": "ORCID",
 23 |                     "schemeUri": "https://orcid.org/"
 24 |                 }
 25 |             ]
 26 |         }
 27 |     ],
 28 |     "creators": [
 29 |         {
 30 |           "nameType": "Personal",
 31 |           "affiliation": [
 32 |               {
 33 |                 "name": "DataCite",
 34 |                 "affiliationIdentifier": "https://ror.org/04wxnsj81",
 35 |                 "affiliationIdentifierScheme": "ROR"
 36 |               }
 37 |             ],
 38 |             "name": "Name",
 39 |             "familyName": "Family Name",
 40 |             "givenName": "Given Name",
 41 |             "nameIdentifiers": [
 42 |                 {
 43 |                     "nameIdentifier": "0000-0002-1825-0097",
 44 |                     "nameIdentifierScheme": "ORCID",
 45 |                     "schemeUri": "https://orcid.org/"
 46 |                 }
 47 |             ]
 48 |         }
 49 |     ],
 50 |     "dates": [
 51 |         {
 52 |             "date": "2014-10-01",
 53 |             "dateType": "Created"
 54 |         },
 55 |         {
 56 |             "date": "2012-05-22/2016-12-21",
 57 |             "dateType": "Collected"
 58 |         }
 59 |     ],
 60 |     "descriptions": [
 61 |         {
 62 |             "description": "Description",
 63 |             "descriptionType": "Abstract"
 64 |         }
 65 |     ],
 66 |     "formats": [
 67 |         "format"
 68 |     ],
 69 |     "fundingReferences": [
 70 |           {
 71 |             "awardTitle": "Measurement of Column-Averaged CO2",
 72 |             "funderName": "National Aeronautics and Space Administration",
 73 |             "funderIdentifierType": "GRID",
 74 |             "funderIdentifier": "grid.238252.c",
 75 |             "awardNumber": "NAG5-12247"
 76 |           }
 77 |     ],
 78 |     "geoLocations": [
 79 |         {
 80 |             "geoLocationPlace": "Place Name",
 81 |             "geoLocationPoint": {
 82 |                 "pointLatitude": "34.138",
 83 |                 "pointLongitude": "-118.1258"
 84 |             }
 85 |         }
 86 |     ],
 87 |     "language": "eng",
 88 |     "publicationYear": "2017",
 89 |     "publisher": "Publisher",
 90 |     "relatedIdentifiers": [
 91 |         {
 92 |             "relatedIdentifier": "http://www.url.org/",
 93 |             "relatedIdentifierType": "URL",
 94 |             "relationType": "IsPartOf"
 95 |         },
 96 |         {
 97 |             "relatedIdentifier": "10.5072/FK2",
 98 |             "relatedIdentifierType": "DOI",
 99 |             "relationType": "IsDocumentedBy"
100 |         }
101 |     ],
102 |     "types": {
103 |         "resourceTypeGeneral": "Dataset",
104 |         "resourceType": "Dataset"
105 |     },
106 |     "rightsList": [
107 |         {
108 |             "rights": "Rights Name",
109 |             "rightsURI": "Rights List"
110 |         }
111 |     ],
112 |     "subjects": [
113 |         {
114 |             "subject": "subject1"
115 |         },
116 |         {
117 |             "subject": "subject2"
118 |         }
119 |     ],
120 |     "titles": [
121 |         {
122 |             "title": "Title"
123 |         },
124 |         {
125 |             "title": "Alternative Title",
126 |             "titleType": "AlternativeTitle"
127 |         }
128 |     ],
129 |     "version": "0",
130 |     "schemaVersion": "http://datacite.org/schema/kernel-4"
131 | }
132 | 


--------------------------------------------------------------------------------
/example_custom.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "identifiers": [
  3 |             {"identifier": "10.5281/inveniordm.1234", "identifierType": "DOI"},
  4 |             {"identifier": "1924MNRAS..84..308E", "identifierType": "bibcode"}
  5 |     ],
  6 |     "contributors": [
  7 |         {
  8 |           "nameType": "Personal",
  9 |           "affiliation": [
 10 |               {
 11 |                   "name": "DataCitea",
 12 |                   "affiliationIdentifier": "https://ror.org/04wxnsj81",
 13 |                   "affiliationIdentifierScheme": "ROR"
 14 |                 }
 15 |               ],
 16 |             "name": "Contributor Name",
 17 |             "familyName": "Family Name",
 18 |             "givenName": "Given Name",
 19 |             "contributorType": "ContactPerson",
 20 |             "nameIdentifiers": [
 21 |                 {
 22 |                     "nameIdentifier": "0000-0002-1825-0097",
 23 |                     "nameIdentifierScheme": "ORCID",
 24 |                     "schemeUri": "https://orcid.org/"
 25 |                 }
 26 |             ]
 27 |         }
 28 |     ],
 29 |     "creators": [
 30 |         {
 31 |           "nameType": "Personal",
 32 |           "affiliation": [
 33 |               {
 34 |                 "name": "DataCite",
 35 |                 "affiliationIdentifier": "https://ror.org/04wxnsj81",
 36 |                 "affiliationIdentifierScheme": "ROR"
 37 |               }
 38 |             ],
 39 |             "name": "Name",
 40 |             "familyName": "Family Name",
 41 |             "givenName": "Given Name",
 42 |             "nameIdentifiers": [
 43 |                 {
 44 |                     "nameIdentifier": "0000-0002-1825-0097",
 45 |                     "nameIdentifierScheme": "ORCID",
 46 |                     "schemeUri": "https://orcid.org/"
 47 |                 }
 48 |             ]
 49 |         }
 50 |     ],
 51 |     "dates": [
 52 |         {
 53 |             "date": "2014-10-01",
 54 |             "dateType": "Created"
 55 |         },
 56 |         {
 57 |             "date": "2012-05-22/2016-12-21",
 58 |             "dateType": "Collected"
 59 |         }
 60 |     ],
 61 |     "descriptions": [
 62 |         {
 63 |             "description": "Description",
 64 |             "descriptionType": "Abstract"
 65 |         }
 66 |     ],
 67 |     "formats": [
 68 |         "format"
 69 |     ],
 70 |     "fundingReferences": [
 71 |           {
 72 |             "awardTitle": "Measurement of Column-Averaged CO2",
 73 |             "funderName": "National Aeronautics and Space Administration",
 74 |             "funderIdentifierType": "GRID",
 75 |             "funderIdentifier": "grid.238252.c",
 76 |             "awardNumber": "NAG5-12247"
 77 |           }
 78 |     ],
 79 |     "geoLocations": [
 80 |         {
 81 |             "geoLocationPlace": "Place Name",
 82 |             "geoLocationPoint": {
 83 |                 "pointLatitude": "34.138",
 84 |                 "pointLongitude": "-118.1258"
 85 |             }
 86 |         }
 87 |     ],
 88 |     "language": "eng",
 89 |     "publicationYear": "2017",
 90 |     "publisher": "Publisher",
 91 |     "relatedIdentifiers": [
 92 |         {
 93 |             "relatedIdentifier": "http://www.url.org/",
 94 |             "relatedIdentifierType": "URL",
 95 |             "relationType": "IsPartOf"
 96 |         },
 97 |         {
 98 |             "relatedIdentifier": "10.5072/FK2",
 99 |             "relatedIdentifierType": "DOI",
100 |             "relationType": "IsDocumentedBy"
101 |         }
102 |     ],
103 |     "types": {
104 |         "resourceTypeGeneral": "Dataset",
105 |         "resourceType": "Dataset"
106 |     },
107 |     "rightsList": [
108 |         {
109 |             "rights": "Rights Name",
110 |             "rightsURI": "Rights List"
111 |         }
112 |     ],
113 |     "subjects": [
114 |         {
115 |             "subject": "subject1"
116 |         },
117 |         {
118 |             "subject": "subject2"
119 |         }
120 |     ],
121 |     "titles": [
122 |         {
123 |             "title": "Title"
124 |         },
125 |         {
126 |             "title": "Alternative Title",
127 |             "titleType": "AlternativeTitle"
128 |         }
129 |     ],
130 |     "version": "0",
131 |     "schemaVersion": "http://datacite.org/schema/kernel-4"
132 | }
133 | 


--------------------------------------------------------------------------------
/excluded_dois.json:
--------------------------------------------------------------------------------
1 | []
2 | 


--------------------------------------------------------------------------------
/fix_names.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import math
 3 | from progressbar import progressbar
 4 | from caltechdata_api import caltechdata_edit
 5 | 
 6 | 
 7 | def fix_name(metadata, fixed):
 8 |     for name in metadata:
 9 |         if name["nameType"] == "Personal":
10 |             if "givenName" not in name:
11 |                 fixed = True
12 |                 given = name["name"].split(",")[1]
13 |                 name["givenName"] = given.strip()
14 |     return metadata, fixed
15 | 
16 | 
17 | url = 'https://data.caltech.edu/api/records?q=-metadata.related_identifiers.identifier%3A"10.25989%2Fes8t-kswe"'
18 | 
19 | headers = {
20 |     "accept": "application/vnd.datacite.datacite+json",
21 | }
22 | 
23 | response = requests.get(f"{url}&search_type=scan&scroll=5m")
24 | 
25 | total = response.json()["hits"]["total"]
26 | pages = math.ceil(int(total) / 1000)
27 | hits = []  # [{'id':'a7f64-a8k10'}]
28 | print(total)
29 | for c in progressbar(range(1, pages + 1)):
30 |     chunkurl = f"{url}&sort=newest&size=1000&page={c}"
31 |     response = requests.get(chunkurl)
32 |     response = response.json()
33 |     hits += response["hits"]["hits"]
34 | 
35 | 
36 | url = "https://data.caltech.edu/api/records"
37 | 
38 | for h in progressbar(hits):
39 |     idv = str(h["id"])
40 | 
41 |     response = requests.get(f"{url}/{idv}", headers=headers)
42 |     if response.status_code != 200:
43 |         print(response.text)
44 |         exit()
45 |     else:
46 |         fixed = False
47 |         metadata = response.json()
48 |         metadata["creators"], fixed = fix_name(metadata["creators"], fixed)
49 |         if "contributors" in metadata:
50 |             metadata["contributors"], fixed = fix_name(metadata["contributors"], fixed)
51 |         if fixed:
52 |             print(idv)
53 |             caltechdata_edit(idv, metadata, production=True, publish=True)
54 | 


--------------------------------------------------------------------------------
/inspect_dois.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import math
 3 | from progressbar import progressbar
 4 | from caltechdata_api import caltechdata_edit
 5 | 
 6 | 
 7 | def fix_name(metadata, fixed):
 8 |     for name in metadata:
 9 |         if name["nameType"] == "Personal":
10 |             if "givenName" not in name:
11 |                 fixed = True
12 |                 given = name["name"].split(",")[1]
13 |                 name["givenName"] = given.strip()
14 |     return metadata, fixed
15 | 
16 | 
17 | url = 'https://data.caltech.edu/api/records?q=-metadata.related_identifiers.identifier%3A"10.25989%2Fes8t-kswe"'
18 | 
19 | headers = {
20 |     "accept": "application/vnd.datacite.datacite+json",
21 | }
22 | 
23 | response = requests.get(f"{url}&search_type=scan&scroll=5m")
24 | 
25 | total = response.json()["hits"]["total"]
26 | pages = math.ceil(int(total) / 1000)
27 | hits = []
28 | print(total)
29 | for c in progressbar(range(1, pages + 1)):
30 |     chunkurl = f"{url}&sort=newest&size=1000&page={c}"
31 |     response = requests.get(chunkurl)
32 |     response = response.json()
33 |     hits += response["hits"]["hits"]
34 | 
35 | 
36 | url = "https://data.caltech.edu/api/records"
37 | 
38 | for h in progressbar(hits):
39 |     idv = str(h["id"])
40 | 
41 |     doi = h["pids"]["doi"]
42 | 
43 |     if "client" not in doi:
44 |         if "10.22002/" in doi["identifier"]:
45 |             response = requests.get(f"{url}/{idv}", headers=headers)
46 |             if response.status_code != 200:
47 |                 print(response.text)
48 |                 exit()
49 |             else:
50 |                 metadata = response.json()
51 |                 print(idv)
52 |                 caltechdata_edit(idv, metadata, production=True, publish=True)
53 | 


--------------------------------------------------------------------------------
/logo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/89cb1bfb6513c61458e257a66e5e6c6aaf222195/logo.gif


--------------------------------------------------------------------------------
/outdated/README.md:
--------------------------------------------------------------------------------
 1 | # caltechdata_api outdated functions
 2 | 
 3 | These functions have yet to be updated to the InvenioRDM version of
 4 | CaltechDATA. Many will be updated in the future, but for now they are available
 5 | here for reference.
 6 | 
 7 | 
 8 | Get geographic metadata from CaltechDATA with WKT representations in a csv file. 
 9 | You can import this to a GIS program like QGIS
10 | using a delimited text import and projection epsg:4326. You'll have to do one
11 | import for Geometry type Point and another for Geometry type Polygon. 
12 | 
13 | ```
14 | python get_geo.py caltechdata_geo.csv
15 | ```
16 | 
17 | You can filter by keyword
18 | 
19 | ```
20 | python get_geo.py caltechdata_geo.csv -keywords TCCON
21 | ```
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/outdated/add_doi_minting_date.py:
--------------------------------------------------------------------------------
 1 | import os, requests
 2 | from progressbar import progressbar
 3 | from caltechdata_api import get_metadata, caltechdata_edit
 4 | 
 5 | 
 6 | def get_datacite_dates(prefix):
 7 |     """Get sumbitted date for DataCite DOIs with specific prefix"""
 8 |     doi_dates = {}
 9 |     doi_urls = {}
10 |     url = (
11 |         "https://api.datacite.org/dois?query=prefix:"
12 |         + prefix
13 |         + "&page[cursor]=1&page[size]=500"
14 |     )
15 |     next_link = url
16 |     meta = requests.get(next_link).json()["meta"]
17 |     for j in progressbar(range(meta["totalPages"])):
18 |         r = requests.get(next_link)
19 |         data = r.json()
20 |         for doi in data["data"]:
21 |             date = doi["attributes"]["registered"].split("T")[0]
22 |             doi_dates[doi["id"]] = date
23 |             doi_urls[doi["id"]] = doi["attributes"]["url"]
24 |         if "next" in data["links"]:
25 |             next_link = data["links"]["next"]
26 |         else:
27 |             next_link = None
28 |     return doi_dates, doi_urls
29 | 
30 | 
31 | token = os.environ["TINDTOK"]
32 | 
33 | doi_dates, doi_urls = get_datacite_dates("10.14291")
34 | for doi in doi_urls:
35 |     if "data.caltech.edu" in doi_urls[doi]:
36 |         caltech_id = doi_urls[doi].split("/")[-1]
37 |         if caltech_id not in ["252", "253", "254", "255"]:
38 |             metadata = get_metadata(caltech_id, emails=True)
39 |             print(caltech_id)
40 |             # print(metadata['dates'])
41 |             for date in metadata["dates"]:
42 |                 if date["dateType"] == "Issued":
43 |                     print(date["date"], doi_dates[doi])
44 |                     date["date"] = doi_dates[doi]
45 |             response = caltechdata_edit(token, caltech_id, metadata, production=True)
46 |             print(response)
47 | 


--------------------------------------------------------------------------------
/outdated/caltechdata_multipart.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | 
  4 | from requests import session
  5 | 
  6 | import boto3
  7 | from caltechdata_api import customize_schema
  8 | 
  9 | 
 10 | def send_s3(filepath, token, production=False):
 11 | 
 12 |     if production == True:
 13 |         s3surl = "https://data.caltech.edu/tindfiles/sign_s3/"
 14 |         chkurl = "https://data.caltech.edu/tindfiles/md5_s3"
 15 |     else:
 16 |         s3surl = "https://cd-sandbox.tind.io/tindfiles/sign_s3/"
 17 |         chkurl = "https://cd-sandbox.tind.io/tindfiles/md5_s3"
 18 | 
 19 |     headers = {"Authorization": "Bearer %s" % token}
 20 | 
 21 |     c = session()
 22 | 
 23 |     response = c.get(s3surl, headers=headers)
 24 |     jresp = response.json()
 25 |     data = jresp["data"]
 26 | 
 27 |     bucket = jresp["bucket"]
 28 |     key = data["fields"]["key"]
 29 |     policy = data["fields"]["policy"]
 30 |     aid = data["fields"]["AWSAccessKeyId"]
 31 |     signature = data["fields"]["signature"]
 32 |     url = data["url"]
 33 | 
 34 |     print(filepath)
 35 |     infile = open(filepath, "rb")
 36 |     size = infile.seek(0, 2)
 37 |     infile.seek(0, 0)  # reset at beginning
 38 | 
 39 |     s3 = boto.client("s3")
 40 |     s3.upload_file(filepath, bucket, key)
 41 | 
 42 |     response = c.get(chkurl + "/" + bucket + "/" + key, headers=headers)
 43 |     print(response)
 44 |     exit()
 45 | 
 46 |     s3headers = {
 47 |         "Host": bucket + ".s3.amazonaws.com",
 48 |         "Date": "date",
 49 |         "x-amz-acl": "public-read",
 50 |         "Access-Control-Allow-Origin": "*",
 51 |     }
 52 | 
 53 |     form = (
 54 |         ("key", key),
 55 |         ("acl", "public-read"),
 56 |         ("AWSAccessKeyID", aid),
 57 |         ("policy", policy),
 58 |         ("signature", signature),
 59 |         ("file", infile),
 60 |     )
 61 | 
 62 |     c = session()
 63 |     response = c.post(url, files=form, headers=s3headers)
 64 |     if response.text:
 65 |         raise Exception(response.text)
 66 | 
 67 |     response = c.get(chkurl + "/" + bucket + "/" + key, headers=headers)
 68 |     md5 = response.json()["md5"]
 69 |     filename = filepath.split("/")[-1]
 70 | 
 71 |     fileinfo = {"url": key, "filename": filename, "md5": md5, "size": size}
 72 | 
 73 |     return fileinfo
 74 | 
 75 | 
 76 | def caltechdata_write(metadata, token, files=[], production=False):
 77 | 
 78 |     # If files is a string - change to single value array
 79 |     if isinstance(files, str) == True:
 80 |         files = [files]
 81 | 
 82 |     fileinfo = []
 83 | 
 84 |     for f in files:
 85 |         fileinfo.append(send_s3(f, token, production))
 86 | 
 87 |     if production == True:
 88 |         url = "https://data.caltech.edu/submit/api/create/"
 89 |     else:
 90 |         url = "https://cd-sandbox.tind.io/submit/api/create/"
 91 | 
 92 |     headers = {"Authorization": "Bearer %s" % token, "Content-type": "application/json"}
 93 | 
 94 |     newdata = customize_schema.customize_schema(metadata)
 95 |     newdata["files"] = fileinfo
 96 |     if "doi" not in newdata:
 97 |         # We want tind to generate the identifier
 98 |         newdata["final_actions"] = [
 99 |             {"type": "create_doi", "parameters": {"type": "records", "field": "doi"}}
100 |         ]
101 | 
102 |     dat = json.dumps({"record": newdata})
103 | 
104 |     c = session()
105 |     response = c.post(url, headers=headers, data=dat)
106 |     return response.text
107 | 


--------------------------------------------------------------------------------
/outdated/edit_all.py:
--------------------------------------------------------------------------------
 1 | import argparse, os, json
 2 | from caltechdata_api import caltechdata_edit
 3 | 
 4 | parser = argparse.ArgumentParser(
 5 |     description="Write files and a DataCite 4 standard json record\
 6 |         to CaltechDATA repository"
 7 | )
 8 | parser.add_argument(
 9 |     "json_file", nargs=1, help="file name for json DataCite metadata file"
10 | )
11 | parser.add_argument("-fnames", nargs="*", help="New Files")
12 | args = parser.parse_args()
13 | 
14 | # Get access token from TIND set as environment variable with source token.bash
15 | token = os.environ["TINDTOK"]
16 | 
17 | metaf = open(args.json_file[0], "r")
18 | metadata = json.load(metaf)
19 | 
20 | production = False
21 | 
22 | ids = range(1, 717)
23 | response = caltechdata_edit(token, ids, metadata, args.fnames, {"pdf"}, production)
24 | print(response)
25 | 


--------------------------------------------------------------------------------
/outdated/edit_all_geo.py:
--------------------------------------------------------------------------------
 1 | import argparse, os, json, requests, csv, dataset
 2 | from caltechdata_api import caltechdata_edit, decustomize_schema
 3 | 
 4 | # Get access token from TIND sed as environment variable with source token.bash
 5 | token = os.environ["TINDTOK"]
 6 | 
 7 | collection = "data/CaltechTHESIS.ds"
 8 | 
 9 | production = True
10 | 
11 | if production == True:
12 |     url = "https://data.caltech.edu/api/records"
13 | else:
14 |     url = "https://cd-sandbox.tind.io/api/records"
15 | 
16 | response = requests.get(url + "/?size=1000&q=subjects:gps")
17 | hits = response.json()
18 | 
19 | # Set up dictionary of links between resolver and thesis IDs
20 | available = os.path.isfile("data/record_list.csv")
21 | if available == False:
22 |     print("You need to run update_thesis_file.py")
23 |     exit()
24 | else:
25 |     record_list = {}
26 |     reader = csv.reader(open("data/record_list.csv"))
27 |     for row in reader:
28 |         record_list[row[1]] = row[0]
29 | 
30 | for h in hits["hits"]["hits"]:
31 |     rid = str(h["id"])
32 |     print(rid)
33 |     record = decustomize_schema(h["metadata"], True)
34 |     if "relatedIdentifiers" in record:
35 |         for r in record["relatedIdentifiers"]:
36 |             if (
37 |                 r["relationType"] == "IsSupplementTo"
38 |                 and r["relatedIdentifierType"] == "URL"
39 |             ):
40 |                 idv = record_list[r["relatedIdentifier"]]
41 |                 thesis_metadata, err = dataset.read(collection, idv)
42 |                 pub_date = thesis_metadata["date"]
43 |                 dates = [{"date": pub_date, "dateType": "Issued"}]
44 |                 for date in record["dates"]:
45 |                     if date["dateType"] == "Issued":
46 |                         dates.append({"date": date["date"], "dateType": "Updated"})
47 |                     elif date["dateType"] == "Updated":
48 |                         pass
49 |                     elif date["dateType"] != "Submitted":
50 |                         dates.append(date)
51 |                 print(dates)
52 |                 metadata = {"dates": dates}
53 |                 response = caltechdata_edit(token, rid, metadata, {}, {}, production)
54 |                 print(response)
55 | 


--------------------------------------------------------------------------------
/outdated/edit_all_github.py:
--------------------------------------------------------------------------------
 1 | import argparse, os, json, requests
 2 | from caltechdata_api import caltechdata_edit, decustomize_schema
 3 | 
 4 | # Get access token from TIND sed as environment variable with source token.bash
 5 | token = os.environ["TINDTOK"]
 6 | 
 7 | production = True
 8 | 
 9 | if production == True:
10 |     url = "https://data.caltech.edu/api/records"
11 | else:
12 |     url = "https://cd-sandbox.tind.io/api/records"
13 | 
14 | response = requests.get(url + "/?size=2000&q=cal_resource_type=software")
15 | hits = response.json()
16 | 
17 | for h in hits["hits"]["hits"]:
18 |     rid = h["id"]
19 |     print(rid)
20 |     record = decustomize_schema(h["metadata"], True)
21 |     replace = False
22 |     # to_update =\
23 |     # [288,269,295,291,279,284,266,281,286,278,280,293,283,287,210,274,276,290,300,285,270,268,267,302,744,282,272,289]
24 |     # if rid in to_update:
25 |     # Find just GitHub records by title
26 |     if "/" in record["titles"][0]["title"]:
27 |         add = True
28 |         for s in record["subjects"]:
29 |             subject = s["subject"]
30 |             if subject == "Github":
31 |                 add = False
32 |             if subject == "GitHub":
33 |                 add = False
34 |             if subject == "Bitbucket":
35 |                 add = False
36 |         if add == True:
37 |             record["subjects"].append({"subject": "GitHub"})
38 |             print(record["titles"][0]["title"])
39 |             response = caltechdata_edit(token, rid, record, {}, {}, production)
40 |             print(response)
41 | 


--------------------------------------------------------------------------------
/outdated/edit_all_tccon.py:
--------------------------------------------------------------------------------
 1 | import argparse, os, json, requests
 2 | from caltechdata_api import caltechdata_edit, decustomize_schema
 3 | 
 4 | # Get access token from TIND sed as environment variable with source token.bash
 5 | token = os.environ["TINDTOK"]
 6 | 
 7 | production = True
 8 | 
 9 | if production == True:
10 |     url = "https://data.caltech.edu/api/records"
11 | else:
12 |     url = "https://cd-sandbox.tind.io/api/records"
13 | 
14 | response = requests.get(url + "/?size=1000&q=subjects:TCCON")
15 | hits = response.json()
16 | 
17 | wiki1 = "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description"
18 | new1 = "https://tccon-wiki.caltech.edu/Main/DataDescription"
19 | wiki2 = "https://tccon-wiki.caltech.edu/Sites"
20 | new2 = "https://tccon-wiki.caltech.edu/Main/TCCONSites"
21 | site = "http://tccondata.org/"
22 | new3 = "https://tccondata.org"
23 | exsite = "http://tccondata.org"
24 | 
25 | for h in hits["hits"]["hits"]:
26 |     rid = h["id"]
27 |     print(rid)
28 |     record = decustomize_schema(h["metadata"], True)
29 |     updated = {}
30 |     if "relatedIdentifiers" in record:
31 |         for related in record["relatedIdentifiers"]:
32 |             if related["relatedIdentifier"] == wiki1:
33 |                 related["relatedIdentifier"] = new1
34 |             if related["relatedIdentifier"] == wiki2:
35 |                 related["relatedIdentifier"] = new2
36 |             if related["relatedIdentifier"] == site:
37 |                 related["relatedIdentifier"] = new3
38 |             if related["relatedIdentifier"] == exsite:
39 |                 related["relatedIdentifier"] = new3
40 |         updated["relatedIdentifiers"] = record["relatedIdentifiers"]
41 |     response = caltechdata_edit(rid, updated, token, {}, {}, production)
42 |     print(response)
43 | 


--------------------------------------------------------------------------------
/outdated/edit_files.py:
--------------------------------------------------------------------------------
 1 | import argparse, os, json
 2 | from caltechdata_api import caltechdata_edit
 3 | 
 4 | parser = argparse.ArgumentParser(
 5 |     description="Write files and a DataCite 4 standard json record\
 6 |         to CaltechDATA repository"
 7 | )
 8 | parser.add_argument("-ids", nargs="*", help="CaltechDATA IDs")
 9 | parser.add_argument("-fnames", nargs="*", help="New Files")
10 | parser.add_argument("-delete", nargs="*", help="Files To Delete")
11 | args = parser.parse_args()
12 | 
13 | # Get access token from TIND sed as environment variable with source token.bash
14 | token = os.environ["TINDTOK"]
15 | 
16 | production = True
17 | 
18 | print(args.delete)
19 | 
20 | response = caltechdata_edit(token, args.ids, {}, args.fnames, args.delete, production)
21 | print(response)
22 | 


--------------------------------------------------------------------------------
/outdated/edit_tccon.py:
--------------------------------------------------------------------------------
 1 | import sys, os, json, requests
 2 | from caltechdata_api import caltechdata_edit, decustomize_schema
 3 | 
 4 | # USAGE: python edit_tccon.py tccon.ggg2014.darwin01.R0.json 269 0 griffith@uow.edu.au
 5 | 
 6 | # Get access token from TIND sed as environment variable with source token.bash
 7 | token = os.environ["TINDTOK"]
 8 | 
 9 | production = True
10 | 
11 | if production == True:
12 |     url = "https://data.caltech.edu/api/records"
13 | else:
14 |     url = "https://cd-sandbox.tind.io/api/records"
15 | 
16 | response = requests.get(url + "/?size=1000&q=subjects:TCCON")
17 | hits = response.json()
18 | 
19 | infile = open(sys.argv[1], "r")
20 | record = json.load(infile)
21 | 
22 | rid = sys.argv[2]
23 | 
24 | group = {"contributorName": "TCCON", "contributorType": "ResearchGroup"}
25 | new = ""
26 | for c in record["contributors"]:
27 |     print(c["contributorType"])
28 |     if c["contributorType"] == "HostingInstitution":
29 |         print("YES")
30 |         c["contributorName"] = "California Institute of Techonolgy, Pasadena, CA (US)"
31 |         c["nameIdentifiers"] = [
32 |             {"nameIdentifier": "grid.20861.3d", "nameIdentifierScheme": "GRID"}
33 |         ]
34 | v = record["contributors"]
35 | v.append(group)
36 | contact = record["creators"][int(sys.argv[3])]
37 | contact["contributorName"] = contact.pop("creatorName")
38 | contact["contributorEmail"] = sys.argv[4]
39 | contact["contributorType"] = "ContactPerson"
40 | v.append(contact)
41 | new = {"contributors": v}
42 | print(new)
43 | response = caltechdata_edit(token, rid, new, {}, {}, production)
44 | print(response)
45 | 


--------------------------------------------------------------------------------
/outdated/example_download_and_upload.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from caltechdata_api import download_file, caltechdata_write\n",
 10 |     "import json"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 2,
 16 |    "metadata": {},
 17 |    "outputs": [
 18 |     {
 19 |      "data": {
 20 |       "application/vnd.jupyter.widget-view+json": {
 21 |        "model_id": "0f9182c455d94474ae1845c7047b4e0a",
 22 |        "version_major": 2,
 23 |        "version_minor": 0
 24 |       },
 25 |       "text/plain": [
 26 |        "HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15990.0), HTML(value='')))"
 27 |       ]
 28 |      },
 29 |      "metadata": {},
 30 |      "output_type": "display_data"
 31 |     }
 32 |    ],
 33 |    "source": [
 34 |     "#By default will download to file named 10.22002-D1.1098\n",
 35 |     "#Can provide filename of interest using fname option\n",
 36 |     "filen = download_file('10.22002/D1.1098')"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": 2,
 42 |    "metadata": {},
 43 |    "outputs": [
 44 |     {
 45 |      "name": "stdout",
 46 |      "output_type": "stream",
 47 |      "text": [
 48 |       "Incorrect access token: <Response [401]>\n"
 49 |      ]
 50 |     },
 51 |     {
 52 |      "ename": "UnboundLocalError",
 53 |      "evalue": "local variable 'jresp' referenced before assignment",
 54 |      "output_type": "error",
 55 |      "traceback": [
 56 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 57 |       "\u001b[0;31mUnboundLocalError\u001b[0m                         Traceback (most recent call last)",
 58 |       "\u001b[0;32m<ipython-input-2-c5bbfbd78786>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      9\u001b[0m \u001b[0mproduction\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     10\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 11\u001b[0;31m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcaltechdata_write\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmetadata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtoken\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfilen\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mproduction\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mschema\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'43'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     12\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresponse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 59 |       "\u001b[0;32m~/Documents/caltechdata_api/caltechdata_api/caltechdata_write.py\u001b[0m in \u001b[0;36mcaltechdata_write\u001b[0;34m(metadata, token, files, production, schema)\u001b[0m\n\u001b[1;32m     88\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mfiles\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     89\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0mf\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mfiles\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 90\u001b[0;31m             \u001b[0mfileinfo\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msend_s3\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtoken\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mproduction\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     91\u001b[0m         \u001b[0mnewdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"files\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfileinfo\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     92\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
 60 |       "\u001b[0;32m~/Documents/caltechdata_api/caltechdata_api/caltechdata_write.py\u001b[0m in \u001b[0;36msend_s3\u001b[0;34m(filepath, token, production)\u001b[0m\n\u001b[1;32m     31\u001b[0m         \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Incorrect access token: {response}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     32\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 33\u001b[0;31m     \u001b[0mbucket\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mjresp\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"bucket\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     34\u001b[0m     \u001b[0mkey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"fields\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"key\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     35\u001b[0m     \u001b[0mpolicy\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"fields\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"policy\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 61 |       "\u001b[0;31mUnboundLocalError\u001b[0m: local variable 'jresp' referenced before assignment"
 62 |      ]
 63 |     }
 64 |    ],
 65 |    "source": [
 66 |     "#Now write a file to CaltechDATA test instance (cd-sandbox.tind.io)\n",
 67 |     "\n",
 68 |     "token = 'TOKEN'\n",
 69 |     "\n",
 70 |     "metaf = open('example43.json', 'r')\n",
 71 |     "metadata = json.load(metaf)\n",
 72 |     "filen = 'logo.gif'\n",
 73 |     "\n",
 74 |     "production = False\n",
 75 |     "\n",
 76 |     "response = caltechdata_write(metadata, token, filen, production, schema='43')\n",
 77 |     "print(response)"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": null,
 83 |    "metadata": {},
 84 |    "outputs": [],
 85 |    "source": []
 86 |   }
 87 |  ],
 88 |  "metadata": {
 89 |   "kernelspec": {
 90 |    "display_name": "Python 3",
 91 |    "language": "python",
 92 |    "name": "python3"
 93 |   },
 94 |   "language_info": {
 95 |    "codemirror_mode": {
 96 |     "name": "ipython",
 97 |     "version": 3
 98 |    },
 99 |    "file_extension": ".py",
100 |    "mimetype": "text/x-python",
101 |    "name": "python",
102 |    "nbconvert_exporter": "python",
103 |    "pygments_lexer": "ipython3",
104 |    "version": "3.8.5"
105 |   }
106 |  },
107 |  "nbformat": 4,
108 |  "nbformat_minor": 4
109 | }
110 | 


--------------------------------------------------------------------------------
/outdated/get_geo.py:
--------------------------------------------------------------------------------
 1 | import os, json, csv, argparse
 2 | import requests
 3 | 
 4 | if __name__ == "__main__":
 5 |     parser = argparse.ArgumentParser(
 6 |         description="get_metadata queries the caltechDATA (Invenio 3) API\
 7 |     and returns DataCite-compatable metadata"
 8 |     )
 9 |     parser.add_argument("output", help="Output file name")
10 |     parser.add_argument("-keywords", nargs="*")
11 | 
12 |     args = parser.parse_args()
13 | 
14 |     url = "https://data.caltech.edu/api/records/?size=5000"
15 | 
16 |     search = ""
17 |     if args.keywords:
18 |         for key in args.keywords:
19 |             if search == "":
20 |                 search = f'&q=subjects:"{key}"'
21 |             else:
22 |                 search = search + f'+"{key}"'
23 |         url = url + search
24 | 
25 |     response = requests.get(url)
26 |     hits = response.json()
27 | 
28 |     outfile = open(args.output, "w")
29 |     writer = csv.writer(outfile)
30 |     writer.writerow(["wkt", "name", "year", "doi"])
31 | 
32 |     for h in hits["hits"]["hits"]:
33 |         metadata = decustomize_schema(h["metadata"])
34 |         if "geoLocations" in metadata:
35 |             doi = "https://doi.org/" + metadata["identifier"]["identifier"]
36 |             title = metadata["titles"][0]["title"].split(":")[0]
37 |             geo = metadata["geoLocations"]
38 |             year = metadata["publicationYear"]
39 |             for g in geo:
40 |                 if "geoLocationBox" in g:
41 |                     box = g["geoLocationBox"]
42 |                     p1 = f"{box['eastBoundLongitude']} {box['northBoundLatitude']}"
43 |                     p2 = f"{box['westBoundLongitude']} {box['northBoundLatitude']}"
44 |                     p3 = f"{box['westBoundLongitude']} {box['southBoundLatitude']}"
45 |                     p4 = f"{box['eastBoundLongitude']} {box['southBoundLatitude']}"
46 |                     wkt = f"POLYGON (({p1}, {p2}, {p3}, {p4}, {p1}))"
47 |                     writer.writerow([wkt, title, year, doi])
48 | 
49 |                 if "geoLocationPoint" in g:
50 |                     point = g["geoLocationPoint"]
51 |                     wkt = f"POINT ({point['pointLongitude']} {point['pointLatitude']})"
52 |                     writer.writerow([wkt, title, year, doi])
53 | 


--------------------------------------------------------------------------------
/outdated/test.py:
--------------------------------------------------------------------------------
 1 | from datacite import schema43
 2 | import io, json
 3 | from os.path import dirname, join
 4 | 
 5 | 
 6 | def load_json_path(path):
 7 |     """Helper method for loading a JSON example file from a path."""
 8 |     path_base = dirname(__file__)
 9 |     with io.open(join(path_base, path), encoding="utf-8") as file:
10 |         content = file.read()
11 |     return json.loads(content)
12 | 
13 | 
14 | metadata = load_json_path("example43.json")
15 | 
16 | valid = schema43.validate(metadata)
17 | if valid == False:
18 |     v = schema43.validator.validate(metadata)
19 |     errors = sorted(v.iter_errors(instance), key=lambda e: e.path)
20 |     for error in errors:
21 |         print(error.message)
22 | 


--------------------------------------------------------------------------------
/outdated/test_community.py:
--------------------------------------------------------------------------------
 1 | import requests, os
 2 | 
 3 | token = os.environ["RDMTOK"]
 4 | 
 5 | url = "https://data.caltechlibrary.dev/"
 6 | 
 7 | headers = {
 8 |     "Authorization": "Bearer %s" % token,
 9 |     "Content-type": "application/json",
10 | }
11 | 
12 | data = {"payload": {"content": "I want this record to be in!", "format": "html"}}
13 | 
14 | result = requests.post(
15 |     url + "/api/records/cxc6m-bef55/draft/actions/submit-review",
16 |     headers=headers,
17 |     json=data,
18 | )
19 | 
20 | print(result.status_code)
21 | print(result.text)
22 | # if result.status_code != 201:
23 | #            print(result.text)
24 | #            exit()
25 | 


--------------------------------------------------------------------------------
/outdated/test_file.py:
--------------------------------------------------------------------------------
 1 | import os, json
 2 | from requests import session
 3 | from caltechdata_api import customize_schema
 4 | 
 5 | # fileinfo = [  {"url": , "filename": filename, "md5": md5, "size": size}]
 6 | 
 7 | token = os.environ["TINDTOK"]
 8 | 
 9 | metaf = open("test_file.json", "r")
10 | metadata = json.load(metaf)
11 | 
12 | url = "https://cd-sandbox.tind.io/submit/api/create/"
13 | 
14 | headers = {"Authorization": "Bearer %s" % token, "Content-type": "application/json"}
15 | 
16 | newdata = customize_schema(metadata)
17 | #    if "doi" not in newdata:
18 | #        # We want tind to generate the identifier
19 | #        newdata["final_actions"] = [
20 | #            {"type": "create_doi", "parameters": {"type": "records", "field": "doi"}}
21 | #        ]
22 | 
23 | dat = json.dumps({"record": newdata})
24 | 
25 | c = session()
26 | response = c.post(url, headers=headers, data=dat)
27 | print(response.text)
28 | 


--------------------------------------------------------------------------------
/outdated/unembargo.py:
--------------------------------------------------------------------------------
 1 | import argparse, os, json
 2 | from caltechdata_api import caltechdata_unembargo
 3 | 
 4 | parser = argparse.ArgumentParser(
 5 |     description="Write files and a DataCite 4 standard json record\
 6 |         to CaltechDATA repository"
 7 | )
 8 | parser.add_argument("-ids", nargs="*", help="CaltechDATA IDs")
 9 | args = parser.parse_args()
10 | 
11 | # Get access token from TIND set as environment variable with source token.bash
12 | token = os.environ["TINDTOK"]
13 | 
14 | production = False
15 | 
16 | response = caltechdata_unembargo(token, args.ids, production)
17 | print(response)
18 | 


--------------------------------------------------------------------------------
/outdated/update_thesis_file.py:
--------------------------------------------------------------------------------
 1 | import os, subprocess, json, csv
 2 | import dataset
 3 | from ames.harvesters import get_caltechfeed
 4 | 
 5 | if os.path.isdir("data") == False:
 6 |     os.mkdir("data")
 7 | os.chdir("data")
 8 | 
 9 | get_caltechfeed("thesis")
10 | 
11 | record_list = {}
12 | collection = "CaltechTHESIS.ds"
13 | keys = dataset.keys(collection)
14 | count = 0
15 | for k in keys:
16 |     count = count + 1
17 |     if count % 100 == 0:
18 |         print(count)
19 |     metadata, err = dataset.read(collection, k)
20 |     if err != "":
21 |         print("Error on read ", err)
22 |         exit()
23 |     if metadata != {}:
24 |         if "official_url" in metadata:
25 |             record_list[k] = metadata["official_url"]
26 |         else:
27 |             print("Missing URL", metadata)
28 |     else:
29 |         print("Bad Record: " + k)
30 |         print(metadata)
31 | with open("record_list.csv", "w") as f:
32 |     w = csv.writer(f)
33 |     w.writerows(record_list.items())
34 | 


--------------------------------------------------------------------------------
/outdated/write_pilot_phase1.py:
--------------------------------------------------------------------------------
 1 | import argparse, os, json
 2 | import s3fs
 3 | from datacite import schema43
 4 | from caltechdata_api import caltechdata_write
 5 | 
 6 | parser = argparse.ArgumentParser(
 7 |     description="Adds S3-stored pilot files and a DataCite 4.3 standard json record\
 8 |         to CaltechDATA repository"
 9 | )
10 | parser.add_argument("folder", nargs=1, help="Folder")
11 | parser.add_argument(
12 |     "json_file", nargs=1, help="file name for json DataCite metadata file"
13 | )
14 | 
15 | args = parser.parse_args()
16 | 
17 | # Get access token as environment variable
18 | token = os.environ["TINDTOK"]
19 | 
20 | endpoint = "https://renc.osn.xsede.org/"
21 | 
22 | # Get metadata and files from bucket
23 | s3 = s3fs.S3FileSystem(anon=True, client_kwargs={"endpoint_url": endpoint})
24 | 
25 | 
26 | path = "ini210004tommorrell/" + args.folder[0] + "/"
27 | meta_path = path + args.json_file[0]
28 | metaf = s3.open(meta_path, "rb")
29 | metadata = json.load(metaf)
30 | 
31 | # Find the files
32 | files = s3.glob(path + "/*.nc")
33 | 
34 | description_string = f"Files available via S3 at {endpoint}{path}<br>"
35 | for link in files:
36 |     fname = link.split("/")[-1]
37 |     link = endpoint + link
38 |     description_string += f"""{fname} <a class="btn btn-xs piwik_download" 
39 |         type="application/octet-stream" href="{link}">
40 |         <i class="fa fa-download"></i> Download</a>    <br>"""
41 | 
42 | metadata["descriptions"].append(
43 |     {"description": description_string, "descriptionType": "Other"}
44 | )
45 | 
46 | # valid = schema43.validate(metadata)
47 | # if not valid:
48 | #    v = schema43.validator.validate(metadata)
49 | #    errors = sorted(v.iter_errors(instance), key=lambda e: e.path)
50 | #    for error in errors:
51 | #        print(error.message)
52 | #    exit()
53 | 
54 | print(metadata)
55 | 
56 | production = True
57 | 
58 | response = caltechdata_write(metadata, token, [], production, "43")
59 | print(response)
60 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=61.0", "wheel"]
3 | build-backend = "setuptools.build_meta"
4 | 


--------------------------------------------------------------------------------
/rdm.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "pids": {
 3 |     },
 4 |     "metadata": {
 5 |         "resource_type": {"id": "dataset"},
 6 |         "creators": [
 7 |             {
 8 |                 "person_or_org": {
 9 |                     "name": "Nielsen, Lars Holm",
10 |                     "type": "personal",
11 |                     "given_name": "Lars Holm",
12 |                     "family_name": "Nielsen",
13 |                     "identifiers": [
14 |                         {"scheme": "orcid", "identifier": "0000-0001-8135-3489"}
15 |                     ]
16 |                 },
17 |                 "affiliations": [{"name": "free-text"}]
18 |             }
19 |         ],
20 |         "title": "InvenioRDM",
21 |         "additional_titles": [
22 |             {
23 |                 "title": "a research data management platform",
24 |                 "type": {"id": "subtitle"},
25 |                 "lang": {"id": "eng"}
26 |             }
27 |         ],
28 |         "publisher": "InvenioRDM",
29 |         "publication_date": "2018/2020-09",
30 |         "subjects": [
31 |             {"subject": "custom"}
32 |         ],
33 |         "contributors": [
34 |             {
35 |                 "person_or_org": {
36 |                     "name": "Nielsen, Lars Holm",
37 |                     "type": "personal",
38 |                     "given_name": "Lars Holm",
39 |                     "family_name": "Nielsen",
40 |                     "identifiers": [
41 |                         {"scheme": "orcid", "identifier": "0000-0001-8135-3489"}
42 |                     ]
43 |                 },
44 |                 "role": {"id": "other"}
45 |             }
46 |         ],
47 |         "dates": [
48 |             {"date": "1939/1945", "type": {"id": "other"}, "description": "A date"}
49 |         ],
50 |         "languages": [{"id": "dan"}, {"id": "eng"}],
51 |         "identifiers": [{"identifier": "1924MNRAS..84..308E", "scheme": "bibcode"}],
52 |         "related_identifiers": [
53 |             {
54 |                 "identifier": "10.1234/foo.bar",
55 |                 "scheme": "doi",
56 |                 "relation_type": {"id": "iscitedby"},
57 |                 "resource_type": {"id": "dataset"}
58 |             }
59 |         ],
60 |         "sizes": ["11 pages"],
61 |         "formats": ["application/pdf"],
62 |         "version": "v1.0",
63 |         "rights": [
64 |             {
65 |                 "title": {"en": "A custom license"},
66 |                 "description": {"en": "A description"},
67 |                 "link": "https://customlicense.org/licenses/by/4.0/"
68 |             },
69 |             {"id": "cc-by-4.0"}
70 |         ],
71 |         "description": "<h1>A description</h1> <p>with HTML tags</p>",
72 |         "additional_descriptions": [
73 |             {
74 |                 "description": "Bla bla bla",
75 |                 "type": {"id": "methods"},
76 |                 "lang": {"id": "eng"}
77 |             }
78 |         ],
79 |         "locations": {
80 |             "features": [
81 |                 {
82 |                     "geometry": {
83 |                         "type": "Point",
84 |                         "coordinates": [-32.94682, -60.63932]
85 |                     },
86 |                     "place": "test location place",
87 |                     "description": "test location description",
88 |                     "identifiers": [
89 |                         {"identifier": "12345abcde", "scheme": "wikidata"},
90 |                         {"identifier": "12345abcde", "scheme": "geonames"}
91 |                     ]
92 |                 }
93 |             ]
94 |         }
95 |     }
96 | }
97 | 


--------------------------------------------------------------------------------
/run-tests.sh:
--------------------------------------------------------------------------------
1 | pytest tests -vv
2 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=64.0","wheel"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [metadata]
 6 | name = caltechdata_api
 7 | version = 1.10.0
 8 | author = Thomas E Morrell, Rohan Bhattarai, Elizabeth Won, Alexander A Abakah
 9 | author_email = tmorrell@caltech.edu, aabakah@caltech.edu
10 | description = Python wrapper for CaltechDATA API.
11 | long_description = file: README.md
12 | long_description_content_type = text/markdown
13 | url = https://github.com/caltechlibrary/caltechdata_api
14 | license = MIT
15 | classifiers =
16 |     License :: OSI Approved :: MIT License
17 |     Programming Language :: Python :: 3
18 |     Programming Language :: Python :: 3.7
19 |     Programming Language :: Python :: 3.8
20 |     Programming Language :: Python :: 3.9
21 |     Programming Language :: Python :: 3.10
22 |     Programming Language :: Python :: Implementation :: CPython
23 |     Operating System :: OS Independent
24 | 
25 | [options]
26 | packages = find:
27 | python_requires = >=3.6.0
28 | install_requires =
29 |     requests
30 |     datacite>1.1.0
31 |     tqdm>=4.62.3
32 |     pyyaml
33 |     s3fs
34 |     cryptography
35 |     s3cmd
36 | include_package_data = True
37 | 
38 | [options.packages.find]
39 | exclude = tests
40 | 
41 | [options.package_data]
42 | caltechdata_api = vocabularies.yaml, vocabularies/* 
43 | 
44 | [options.entry_points]
45 | console_scripts = 
46 |     caltechdata_api=caltechdata_api.cli:main
47 | 
48 | [tool:pytest]
49 | addopts = --verbose
50 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | 
3 | setup()
4 | 


--------------------------------------------------------------------------------
/templates/README.md:
--------------------------------------------------------------------------------
 1 | # This is the title of your submission to CaltechDATA
 2 | 
 3 | ## Creators
 4 | - Name Type: Personal
 5 | - Affiliation: [https://ror.org/04wxnsj81](https://ror.org/04wxnsj81)
 6 | - Name: Name
 7 | - Family Name: Family Name
 8 | - Given Name: Given Name
 9 | - Name Identifiers: [https://orcid.org/0000-0002-1825-0097](https://orcid.org/0000-0002-1825-0097)
10 | 
11 | - Name Type: Personal
12 | - Affiliation: [https://ror.org/04wxnsj81](https://ror.org/04wxnsj81)
13 | - Name: Name2
14 | - Family Name: Family Name 2
15 | - Given Name: Given Name 2
16 | - Name Identifiers: [https://orcid.org/0000-0002-1825-0097](https://orcid.org/0000-0002-1825-0097)
17 | 
18 | ## Descriptions
19 | - Description: Description
20 | - Description Type: Abstract
21 | 
22 | ## Types
23 | - Resource Type General: Dataset
24 | - Resource Type: Dataset
25 | 
26 | ## Rights List
27 | - Rights: Creative Commons Zero v1.0 Universal
28 | - Rights URI: https://creativecommons.org/publicdomain/zero/1.0/legalcode
29 | 
30 | ## Publication Year
31 | - Publication Year: 2024
32 | 
33 | ## Publisher
34 | - Publisher: CaltechDATA
35 | 
36 | ## Dates
37 | - Date: 2014-10-01
38 | - Date Type: Created
39 | - Date: 2012-05-22/2016-12-21
40 | - Date Type: Collected
41 | 
42 | ## Subjects
43 | - Subject: subject1
44 | - Subject: subject2
45 | 
46 | ## Funding References
47 | - Award Title: Measurement of Column-Averaged CO2
48 | - Funder Name: National Aeronautics and Space Administration
49 | - Funder Identifier Type: ROR
50 | - Funder Identifier: https://ror.org/027ka1x80
51 | - Award Number: NAG5-12247
52 | 
53 | ## Related Identifiers
54 | - Related Identifier: [http://www.url.org/](http://www.url.org/)
55 | - Related Identifier Type: URL
56 | - Relation Type: IsPartOf
57 | - Related Identifier: 10.5072/FK2
58 | - Related Identifier Type: DOI
59 | - Relation Type: IsDocumentedBy
60 | 
61 | ## Version
62 | - Version: 1
63 | 
64 | ## Identifiers
65 | - Identifier: 1924MNRAS..84..308E
66 | - Identifier Type: bibcode
67 | 
68 | ## Contributors
69 | - Name Type: Personal
70 | - Affiliation: [https://ror.org/04wxnsj81](https://ror.org/04wxnsj81)
71 | - Name: Contributor Name
72 | - Family Name: Family Name
73 | - Given Name: Given Name
74 | - Contributor Type: ContactPerson
75 | - Name Identifiers: [https://orcid.org/0000-0002-1825-0097](https://orcid.org/0000-0002-1825-0097)
76 | 


--------------------------------------------------------------------------------
/tests/bot.py:
--------------------------------------------------------------------------------
  1 | import subprocess
  2 | import time
  3 | from unittest.mock import patch
  4 | import sys
  5 | import os
  6 | import json
  7 | import requests
  8 | from datetime import datetime
  9 | import pytest
 10 | from customize_schema import validate_metadata as validator43  # Import validator
 11 | 
 12 | 
 13 | class CaltechDataTester:
 14 |     def __init__(self):
 15 |         self.test_dir = "caltech_test_data"
 16 |         self.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
 17 |         if not os.path.exists(self.test_dir):
 18 |             os.makedirs(self.test_dir)
 19 | 
 20 |         # Create test data directory with timestamp
 21 |         self.test_run_dir = os.path.join(self.test_dir, f"test_run_{self.timestamp}")
 22 |         os.makedirs(self.test_run_dir)
 23 | 
 24 |         # Initialize logging
 25 |         self.log_file = os.path.join(self.test_run_dir, "test_log.txt")
 26 | 
 27 |     def log(self, message):
 28 |         """Log message to both console and file"""
 29 |         print(message)
 30 |         with open(self.log_file, "a") as f:
 31 |             f.write(f"{datetime.now()}: {message}\n")
 32 | 
 33 |     def create_test_files(self):
 34 |         """Create necessary test files"""
 35 |         # Create a dummy CSV file
 36 |         csv_path = os.path.join(self.test_run_dir, "test_data.csv")
 37 |         with open(csv_path, "w") as f:
 38 |             f.write("date,temperature,humidity\n")
 39 |             f.write("2023-01-01,25.5,60\n")
 40 |             f.write("2023-01-02,26.0,62\n")
 41 |             f.write("2023-01-03,24.8,65\n")
 42 | 
 43 |         self.log(f"Created test CSV file: {csv_path}")
 44 |         return csv_path
 45 | 
 46 |     def generate_test_responses(self):
 47 |         """Generate test responses for CLI prompts"""
 48 |         return {
 49 |             "Do you want to create or edit a CaltechDATA record? (create/edit): ": "create",
 50 |             "Do you want to use metadata from an existing file or create new metadata? (existing/create): ": "create",
 51 |             "Enter the title of the dataset: ": f"Test Dataset {self.timestamp}",
 52 |             "Enter the abstract or description of the dataset: ": "This is an automated test dataset containing sample climate data for validation purposes.",
 53 |             "Enter the number corresponding to the desired license: ": "1",
 54 |             "Enter your ORCID identifier: ": "0000-0002-1825-0097",
 55 |             "How many funding entries do you want to provide? ": "1",
 56 |             "Enter the award number for funding: ": "NSF-1234567",
 57 |             "Enter the award title for funding: ": "Automated Testing Grant",
 58 |             "Enter the funder ROR (https://ror.org): ": "021nxhr62",
 59 |             "Do you want to upload or link data files? (upload/link/n): ": "upload",
 60 |             "Enter the filename to upload as a supporting file (or 'n' to finish): ": "test_data.csv",
 61 |             "Do you want to add more files? (y/n): ": "n",
 62 |             "Do you want to send this record to CaltechDATA? (y/n): ": "y",
 63 |         }
 64 | 
 65 |     def extract_record_id(self, output_text):
 66 |         """Extract record ID from CLI output"""
 67 |         try:
 68 |             for line in output_text.split("\n"):
 69 |                 if "uploads/" in line:
 70 |                     return line.strip().split("/")[-1]
 71 |         except Exception as e:
 72 |             self.log(f"Error extracting record ID: {e}")
 73 |         return None
 74 | 
 75 |     def download_and_validate_record(self, record_id):
 76 |         """Download and validate the record"""
 77 |         try:
 78 |             # Wait for record to be available
 79 |             time.sleep(5)
 80 | 
 81 |             # Download metadata
 82 |             url = f"https://data.caltechlibrary.dev/records/{record_id}/export/datacite-json"
 83 |             response = requests.get(url)
 84 |             response.raise_for_status()
 85 | 
 86 |             # Save metadata
 87 |             json_path = os.path.join(self.test_run_dir, f"{record_id}.json")
 88 |             with open(json_path, "w") as f:
 89 |                 json.dump(response.json(), f, indent=2)
 90 | 
 91 |             self.log(f"Downloaded metadata to: {json_path}")
 92 | 
 93 |             # Validate metadata using the imported validator
 94 |             validation_errors = validator43(response.json())
 95 | 
 96 |             if validation_errors:
 97 |                 self.log("❌ Validation errors found:")
 98 |                 for error in validation_errors:
 99 |                     self.log(f"  - {error}")
100 |                 return False
101 |             else:
102 |                 self.log("✅ Validation passed successfully")
103 |                 return True
104 | 
105 |         except Exception as e:
106 |             self.log(f"Error in download and validation: {e}")
107 |             return False
108 | 
109 |     def run_test_submission(self):
110 |         """Run the complete test submission process"""
111 |         try:
112 |             self.log("Starting test submission process...")
113 | 
114 |             # Create test files
115 |             test_csv = self.create_test_files()
116 | 
117 |             # Generate responses
118 |             responses = self.generate_test_responses()
119 | 
120 |             # Setup output capture
121 |             class OutputCapture:
122 |                 def __init__(self):
123 |                     self.output = []
124 | 
125 |                 def write(self, text):
126 |                     self.output.append(text)
127 |                     sys.__stdout__.write(text)
128 | 
129 |                 def flush(self):
130 |                     pass
131 | 
132 |                 def get_output(self):
133 |                     return "".join(self.output)
134 | 
135 |             output_capture = OutputCapture()
136 |             sys.stdout = output_capture
137 | 
138 |             # Mock input and run CLI
139 |             def mock_input(prompt):
140 |                 self.log(f"Prompt: {prompt}")
141 |                 if prompt in responses:
142 |                     response = responses[prompt]
143 |                     self.log(f"Response: {response}")
144 |                     return response
145 |                 return ""
146 | 
147 |             with patch("builtins.input", side_effect=mock_input):
148 |                 try:
149 |                     import cli
150 | 
151 |                     cli.main()
152 |                 except Exception as e:
153 |                     self.log(f"Error during CLI execution: {e}")
154 |                     return False
155 | 
156 |             # Restore stdout
157 |             sys.stdout = sys.__stdout__
158 | 
159 |             # Get output and extract record ID
160 |             cli_output = output_capture.get_output()
161 |             record_id = self.extract_record_id(cli_output)
162 | 
163 |             if not record_id:
164 |                 self.log("Failed to extract record ID")
165 |                 return False
166 | 
167 |             self.log(f"Successfully created record with ID: {record_id}")
168 | 
169 |             # Validate the record
170 |             return self.download_and_validate_record(record_id)
171 | 
172 |         except Exception as e:
173 |             self.log(f"Error in test submission: {e}")
174 |             return False
175 |         finally:
176 |             # Cleanup
177 |             if os.path.exists(test_csv):
178 |                 os.remove(test_csv)
179 |             self.log("Test files cleaned up")
180 | 
181 | 
182 | def main():
183 |     tester = CaltechDataTester()
184 | 
185 |     success = tester.run_test_submission()
186 | 
187 |     if success:
188 |         tester.log("\n🎉 Test submission and validation completed successfully!")
189 |     else:
190 |         tester.log("\n❌ Test submission or validation failed - check logs for details")
191 | 
192 |     tester.log(f"\nTest logs available at: {tester.log_file}")
193 | 
194 | 
195 | if __name__ == "__main__":
196 |     main()
197 | 


--------------------------------------------------------------------------------
/tests/bot_yaml.py:
--------------------------------------------------------------------------------
  1 | import subprocess
  2 | import time
  3 | from unittest.mock import patch
  4 | import sys
  5 | import os
  6 | import json
  7 | import requests
  8 | from datetime import datetime
  9 | import pytest
 10 | import importlib.util
 11 | import traceback
 12 | 
 13 | 
 14 | class CaltechDataTester:
 15 |     def __init__(self):
 16 |         # Use GitHub Actions environment or create a local test directory
 17 |         self.test_dir = os.environ.get(
 18 |             "GITHUB_WORKSPACE", os.path.join(os.getcwd(), "caltech_test_data")
 19 |         )
 20 |         self.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
 21 | 
 22 |         # Ensure test directory exists
 23 |         os.makedirs(self.test_dir, exist_ok=True)
 24 | 
 25 |         # Create test run directory
 26 |         self.test_run_dir = os.path.join(self.test_dir, f"test_run_{self.timestamp}")
 27 |         os.makedirs(self.test_run_dir, exist_ok=True)
 28 | 
 29 |         # Initialize logging
 30 |         self.log_file = os.path.join(self.test_run_dir, "test_log.txt")
 31 | 
 32 |     def log(self, message):
 33 |         """Log message to both console and file"""
 34 |         print(message)
 35 |         with open(self.log_file, "a") as f:
 36 |             f.write(f"{datetime.now()}: {message}\n")
 37 | 
 38 |     def create_test_files(self):
 39 |         """Create necessary test files"""
 40 |         csv_path = os.path.join(self.test_run_dir, "test_data.csv")
 41 |         with open(csv_path, "w") as f:
 42 |             f.write("date,temperature,humidity\n")
 43 |             f.write("2023-01-01,25.5,60\n")
 44 |             f.write("2023-01-02,26.0,62\n")
 45 |             f.write("2023-01-03,24.8,65\n")
 46 | 
 47 |         self.log(f"Created test CSV file: {csv_path}")
 48 |         return csv_path
 49 | 
 50 |     def import_cli_module(self):
 51 |         """Dynamically import cli module from the correct path"""
 52 |         cli_path = os.path.join(
 53 |             os.environ.get("GITHUB_WORKSPACE", os.getcwd()), "caltechdata_api", "cli.py"
 54 |         )
 55 |         spec = importlib.util.spec_from_file_location("cli", cli_path)
 56 |         cli_module = importlib.util.module_from_spec(spec)
 57 |         spec.loader.exec_module(cli_module)
 58 |         return cli_module
 59 | 
 60 |     def generate_test_responses(self):
 61 |         """Generate test responses for CLI prompts"""
 62 |         return {
 63 |             "What would you like to do? (create/edit/profile/exit): ": "create",
 64 |             "Do you want to use metadata from an existing file or create new metadata? (existing/create): ": "create",
 65 |             "Enter the title of the dataset: ": f"Test Dataset {self.timestamp}",
 66 |             "Enter the abstract or description of the dataset: ": "This is an automated test dataset containing sample climate data for validation purposes.",
 67 |             "Enter the number corresponding to the desired license: ": "1",
 68 |             "Use saved profile? (y/n): ": "n",
 69 |             "Enter your ORCID identifier: ": os.environ.get(
 70 |                 "TEST_ORCID", "0000-0002-1825-0097"
 71 |             ),
 72 |             "How many funding entries do you want to provide? ": "1",
 73 |             "Enter the award number for funding: ": "NSF-1234567",
 74 |             "Enter the award title for funding: ": "Automated Testing Grant",
 75 |             "Enter the funder ROR (https://ror.org): ": "021nxhr62",
 76 |             "Do you want to upload or link data files? (upload/link/n): ": "upload",
 77 |             "Enter the filename to upload as a supporting file (or 'n' to finish): ": "test_data.csv",
 78 |             "Do you want to add more files? (y/n): ": "n",
 79 |             "Do you want to send this record to CaltechDATA? (y/n): ": "y",
 80 |         }
 81 | 
 82 |     def run_test_submission(self):
 83 |         """Run the complete test submission process"""
 84 |         try:
 85 |             self.log("Starting test submission process...")
 86 | 
 87 |             # Create test files
 88 |             test_csv = self.create_test_files()
 89 | 
 90 |             # Dynamically import cli module
 91 |             cli_module = self.import_cli_module()
 92 | 
 93 |             # Generate responses
 94 |             responses = self.generate_test_responses()
 95 | 
 96 |             # Setup output capture
 97 |             class OutputCapture:
 98 |                 def __init__(self):
 99 |                     self.output = []
100 | 
101 |                 def write(self, text):
102 |                     self.output.append(text)
103 |                     sys.__stdout__.write(text)
104 | 
105 |                 def flush(self):
106 |                     pass
107 | 
108 |                 def get_output(self):
109 |                     return "".join(self.output)
110 | 
111 |             output_capture = OutputCapture()
112 |             sys.stdout = output_capture
113 | 
114 |             # Mock input and run CLI
115 |             def mock_input(prompt):
116 |                 self.log(f"Prompt: {prompt}")
117 |                 if prompt in responses:
118 |                     response = responses[prompt]
119 |                     self.log(f"Response: {response}")
120 |                     return response
121 |                 return ""
122 | 
123 |             with patch("builtins.input", side_effect=mock_input):
124 |                 # Use -test flag to use test mode
125 |                 sys.argv = [sys.argv[0], "-test"]
126 |                 cli_module.main()
127 | 
128 |             # Restore stdout
129 |             sys.stdout = sys.__stdout__
130 | 
131 |             return True
132 | 
133 |         except Exception as e:
134 |             self.log(f"Error in test submission: {e}")
135 |             traceback.print_exc()
136 |             return False
137 |         finally:
138 |             # Cleanup
139 |             if "test_csv" in locals() and os.path.exists(test_csv):
140 |                 os.remove(test_csv)
141 |             self.log("Test files cleaned up")
142 | 
143 | 
144 | def main():
145 |     tester = CaltechDataTester()
146 | 
147 |     success = tester.run_test_submission()
148 | 
149 |     if success:
150 |         tester.log("\n🎉 Test submission completed successfully!")
151 |         sys.exit(0)
152 |     else:
153 |         tester.log("\n❌ Test submission failed - check logs for details")
154 |         sys.exit(1)
155 | 
156 | 
157 | if __name__ == "__main__":
158 |     main()
159 | 


--------------------------------------------------------------------------------
/tests/data/caltechdata/1235.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "created": "2019-04-29T20:13:59.728273+00:00", 
  3 |   "id": 1235, 
  4 |   "links": {
  5 |     "self": "http://data.caltech.edu/api/record/1235"
  6 |   }, 
  7 |   "metadata": {
  8 |     "_form_uuid": "beae3039-29ed-4e20-bd21-6ed6e994afa5", 
  9 |     "alternateIdentifiers": [
 10 |       {
 11 |         "alternateIdentifier": "1235", 
 12 |         "alternateIdentifierType": "CaltechDATA_Identifier"
 13 |       }
 14 |     ], 
 15 |     "authors": [
 16 |       {
 17 |         "authorAffiliation": [
 18 |           "Caltech Library"
 19 |         ], 
 20 |         "authorIdentifiers": [
 21 |           {
 22 |             "authorIdentifier": "0000-0001-9266-5146", 
 23 |             "authorIdentifierScheme": "ORCID"
 24 |           }
 25 |         ], 
 26 |         "authorName": "Morrell, Thomas E"
 27 |       }
 28 |     ], 
 29 |     "control_number": "1235", 
 30 |     "descriptions": [
 31 |       {
 32 |         "descriptionType": "Abstract", 
 33 |         "descriptionValue": "First included in ames, this notebook dynamically shows how many records are in CaltechDATA and where they come from (GitHub, Deposit Form, or API).  This repository is set to work with MyBinder so you can easily reproduce the plot and include new records.  "
 34 |       }, 
 35 |       {
 36 |         "descriptionType": "Other", 
 37 |         "descriptionValue": "<br>Cite this record as:<br>Morrell, T. E. (2019, April 29). caltechlibrary/caltechdata_usage: First release of CaltechDATA Usage notebook (Version v0.0.1). CaltechDATA. <a href=\"https://doi.org/10.22002/d1.1235\">https://doi.org/10.22002/d1.1235</a><br> or choose a <a href=\"https://crosscite.org/?doi=10.22002/D1.1235\"> different citation style.</a><br><a href=\"https://data.datacite.org/application/x-bibtex/10.22002/D1.1235\">Download Citation</a><br>"
 38 |       }, 
 39 |       {
 40 |         "descriptionType": "Other", 
 41 |         "descriptionValue": "<br>Unique Views: 4<br>Unique Downloads: 1<br> between April 29, 2019 and July 02, 2020<br><a href=\"https://data.caltech.edu/stats\">More info on how stats are collected</a><br>"
 42 |       }
 43 |     ], 
 44 |     "doi": "10.22002/D1.1235", 
 45 |     "electronic_location_and_access": [
 46 |       {
 47 |         "access_method": "HTTP", 
 48 |         "electronic_name": [
 49 |           "caltechlibrary_caltechdata_usage-v0.0.1.zip"
 50 |         ], 
 51 |         "embargo_status": "{{embargo_status}}", 
 52 |         "file_size": "87521", 
 53 |         "uniform_resource_identifier": "https://data.caltech.edu/tindfiles/serve/9762705a-1de2-4f2c-9553-5150ba6e98e1/"
 54 |       }
 55 |     ], 
 56 |     "files": [
 57 |       {
 58 |         "id": "9762705a-1de2-4f2c-9553-5150ba6e98e1", 
 59 |         "path": "https://data.caltech.edu/tindfiles/serve/9762705a-1de2-4f2c-9553-5150ba6e98e1/"
 60 |       }
 61 |     ], 
 62 |     "id": "1235", 
 63 |     "owners": [
 64 |       2
 65 |     ], 
 66 |     "pid_value": "1235", 
 67 |     "publicationDate": "2019-04-29", 
 68 |     "publishers": [
 69 |       {
 70 |         "publisherName": "CaltechDATA"
 71 |       }
 72 |     ], 
 73 |     "relatedIdentifiers": [
 74 |       {
 75 |         "relatedIdentifier": "https://github.com/caltechlibrary/caltechdata_usage/releases/tag/v0.0.1", 
 76 |         "relatedIdentifierRelation": "IsIdenticalTo", 
 77 |         "relatedIdentifierScheme": "URL"
 78 |       }
 79 |     ], 
 80 |     "relevantDates": [
 81 |       {
 82 |         "relevantDateType": "Issued", 
 83 |         "relevantDateValue": "2019-04-29"
 84 |       }
 85 |     ], 
 86 |     "resourceType": {
 87 |       "resourceTypeGeneral": "Software"
 88 |     }, 
 89 |     "rightsList": {
 90 |       "rights": "license", 
 91 |       "rightsURI": "https://data.caltech.edu/license"
 92 |     }, 
 93 |     "subjects": [
 94 |       "CaltechDATA", 
 95 |       "reporitory", 
 96 |       "usage", 
 97 |       "Jupyter", 
 98 |       "GitHub"
 99 |     ], 
100 |     "title": "caltechlibrary/caltechdata_usage: First release of CaltechDATA Usage notebook", 
101 |     "version": "v0.0.1"
102 |   }, 
103 |   "updated": "2020-07-02T20:40:41.944666+00:00"
104 | }


--------------------------------------------------------------------------------
/tests/data/caltechdata/1250.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "created": "2019-06-19T16:27:52.460707+00:00", 
  3 |   "id": 1250, 
  4 |   "links": {
  5 |     "self": "http://data.caltech.edu/api/record/1250"
  6 |   }, 
  7 |   "metadata": {
  8 |     "_form_uuid": "beae3039-29ed-4e20-bd21-6ed6e994afa5", 
  9 |     "alternateIdentifiers": [
 10 |       {
 11 |         "alternateIdentifier": "1250", 
 12 |         "alternateIdentifierType": "CaltechDATA_Identifier"
 13 |       }
 14 |     ], 
 15 |     "authors": [
 16 |       {
 17 |         "authorAffiliation": [
 18 |           "Caltech Library"
 19 |         ], 
 20 |         "authorIdentifiers": [
 21 |           {
 22 |             "authorIdentifier": "0000-0001-9266-5146", 
 23 |             "authorIdentifierScheme": "ORCID"
 24 |           }
 25 |         ], 
 26 |         "authorName": "Morrell, Thomas E"
 27 |       }
 28 |     ], 
 29 |     "control_number": "1250", 
 30 |     "descriptions": [
 31 |       {
 32 |         "descriptionType": "Abstract", 
 33 |         "descriptionValue": "This release includes two months more data and has some dependency updates."
 34 |       }, 
 35 |       {
 36 |         "descriptionType": "Other", 
 37 |         "descriptionValue": "Jupyter notebooks highlighting usage of CaltechDATA"
 38 |       }, 
 39 |       {
 40 |         "descriptionType": "Other", 
 41 |         "descriptionValue": "<br>Click to run this software: <a href=\"https://mybinder.org/v2/zenodo/10.22002/d1.1250?filepath=CaltechDATA_Usage_Graphs.ipynb\" ><img src=\"https://mybinder.org/badge_logo.svg\"></a><br>"
 42 |       }, 
 43 |       {
 44 |         "descriptionType": "Other", 
 45 |         "descriptionValue": "<br>Cite this record as:<br>Morrell, T. E. (2019, June 19). caltechlibrary/caltechdata_usage: Jupyter notebook with visualization of submissions to CaltechDATA (Version v0.0.2). CaltechDATA. <a href=\"https://doi.org/10.22002/d1.1250\">https://doi.org/10.22002/d1.1250</a><br> or choose a <a href=\"https://crosscite.org/?doi=10.22002/D1.1250\"> different citation style.</a><br><a href=\"https://data.datacite.org/application/x-bibtex/10.22002/D1.1250\">Download Citation</a><br>"
 46 |       }, 
 47 |       {
 48 |         "descriptionType": "Other", 
 49 |         "descriptionValue": "<br>Unique Views: 85<br>Unique Downloads: 2<br> between June 19, 2019 and July 02, 2020<br><a href=\"https://data.caltech.edu/stats\">More info on how stats are collected</a><br>"
 50 |       }
 51 |     ], 
 52 |     "doi": "10.22002/D1.1250", 
 53 |     "electronic_location_and_access": [
 54 |       {
 55 |         "access_method": "HTTP", 
 56 |         "electronic_name": [
 57 |           "caltechlibrary_caltechdata_usage-v0.0.2.zip"
 58 |         ], 
 59 |         "embargo_status": "{{embargo_status}}", 
 60 |         "file_size": "90421", 
 61 |         "uniform_resource_identifier": "https://data.caltech.edu/tindfiles/serve/45bc4db6-7d54-4cb7-b98a-ad9de15b0e29/"
 62 |       }
 63 |     ], 
 64 |     "files": [
 65 |       {
 66 |         "id": "45bc4db6-7d54-4cb7-b98a-ad9de15b0e29", 
 67 |         "path": "https://data.caltech.edu/tindfiles/serve/45bc4db6-7d54-4cb7-b98a-ad9de15b0e29/"
 68 |       }
 69 |     ], 
 70 |     "id": "1250", 
 71 |     "owners": [
 72 |       2
 73 |     ], 
 74 |     "pid_value": "1250", 
 75 |     "publicationDate": "2019-06-19", 
 76 |     "publishers": [
 77 |       {
 78 |         "publisherName": "CaltechDATA"
 79 |       }
 80 |     ], 
 81 |     "relatedIdentifiers": [
 82 |       {
 83 |         "relatedIdentifier": "https://github.com/caltechlibrary/caltechdata_usage/releases/tag/v0.0.2", 
 84 |         "relatedIdentifierRelation": "IsIdenticalTo", 
 85 |         "relatedIdentifierScheme": "URL"
 86 |       }
 87 |     ], 
 88 |     "relevantDates": [
 89 |       {
 90 |         "relevantDateType": "Issued", 
 91 |         "relevantDateValue": "2019-06-19"
 92 |       }
 93 |     ], 
 94 |     "resourceType": {
 95 |       "resourceTypeGeneral": "Software"
 96 |     }, 
 97 |     "rightsList": {
 98 |       "rights": "license", 
 99 |       "rightsURI": "https://data.caltech.edu/license"
100 |     }, 
101 |     "subjects": [
102 |       "CaltechDATA", 
103 |       "reporitory", 
104 |       "usage", 
105 |       "Jupyter", 
106 |       "GitHub"
107 |     ], 
108 |     "title": "caltechlibrary/caltechdata_usage: Jupyter notebook with visualization of submissions to CaltechDATA", 
109 |     "version": "v0.0.2"
110 |   }, 
111 |   "updated": "2020-07-02T20:38:38.355371+00:00"
112 | }


--------------------------------------------------------------------------------
/tests/data/caltechdata/1259.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "created": "2019-07-16T22:53:14.663052+00:00", 
  3 |   "id": 1259, 
  4 |   "links": {
  5 |     "self": "http://data.caltech.edu/api/record/1259"
  6 |   }, 
  7 |   "metadata": {
  8 |     "_form_uuid": "beae3039-29ed-4e20-bd21-6ed6e994afa5", 
  9 |     "alternateIdentifiers": [
 10 |       {
 11 |         "alternateIdentifier": "1259", 
 12 |         "alternateIdentifierType": "CaltechDATA_Identifier"
 13 |       }
 14 |     ], 
 15 |     "authors": [
 16 |       {
 17 |         "authorAffiliation": [
 18 |           "Caltech Library"
 19 |         ], 
 20 |         "authorIdentifiers": [
 21 |           {
 22 |             "authorIdentifier": "0000-0001-9266-5146", 
 23 |             "authorIdentifierScheme": "ORCID"
 24 |           }
 25 |         ], 
 26 |         "authorName": "Morrell, Thomas E"
 27 |       }
 28 |     ], 
 29 |     "control_number": "1259", 
 30 |     "descriptions": [
 31 |       {
 32 |         "descriptionType": "Abstract", 
 33 |         "descriptionValue": "This release includes a new notebook that determines the use of ORCID iDs across Caltech Library DOIs.  It also updates all notebooks to use the latest version of ames and streamlines dependencies."
 34 |       }, 
 35 |       {
 36 |         "descriptionType": "Other", 
 37 |         "descriptionValue": "Jupyter notebooks highlighting usage of CaltechDATA"
 38 |       }, 
 39 |       {
 40 |         "descriptionType": "Other", 
 41 |         "descriptionValue": "<br>Click to run this software: <a href=\"https://mybinder.org/v2/zenodo/10.22002/d1.1259\" ><img src=\"https://mybinder.org/badge_logo.svg\"></a><br>"
 42 |       }, 
 43 |       {
 44 |         "descriptionType": "Other", 
 45 |         "descriptionValue": "<br>Cite this record as:<br>Morrell, T. E. (2019, July 16). caltechlibrary/caltechdata_usage: Addition of ORCID analysis notebook and update for new ames version (Version v0.1.0). CaltechDATA. <a href=\"https://doi.org/10.22002/d1.1259\">https://doi.org/10.22002/d1.1259</a><br> or choose a <a href=\"https://crosscite.org/?doi=10.22002/D1.1259\"> different citation style.</a><br><a href=\"https://data.datacite.org/application/x-bibtex/10.22002/D1.1259\">Download Citation</a><br>"
 46 |       }, 
 47 |       {
 48 |         "descriptionType": "Other", 
 49 |         "descriptionValue": "<br>Unique Views: 86<br>Unique Downloads: 1<br> between July 16, 2019 and July 02, 2020<br><a href=\"https://data.caltech.edu/stats\">More info on how stats are collected</a><br>"
 50 |       }
 51 |     ], 
 52 |     "doi": "10.22002/D1.1259", 
 53 |     "electronic_location_and_access": [
 54 |       {
 55 |         "access_method": "HTTP", 
 56 |         "electronic_name": [
 57 |           "caltechlibrary_caltechdata_usage-v0.1.0.zip"
 58 |         ], 
 59 |         "embargo_status": "{{embargo_status}}", 
 60 |         "file_size": "101985", 
 61 |         "uniform_resource_identifier": "https://data.caltech.edu/tindfiles/serve/96b518fe-8f28-4ff6-9a4c-d5cc59f4644e/"
 62 |       }
 63 |     ], 
 64 |     "files": [
 65 |       {
 66 |         "id": "96b518fe-8f28-4ff6-9a4c-d5cc59f4644e", 
 67 |         "path": "https://data.caltech.edu/tindfiles/serve/96b518fe-8f28-4ff6-9a4c-d5cc59f4644e/"
 68 |       }
 69 |     ], 
 70 |     "id": "1259", 
 71 |     "owners": [
 72 |       2
 73 |     ], 
 74 |     "pid_value": "1259", 
 75 |     "publicationDate": "2019-07-16", 
 76 |     "publishers": [
 77 |       {
 78 |         "publisherName": "CaltechDATA"
 79 |       }
 80 |     ], 
 81 |     "relatedIdentifiers": [
 82 |       {
 83 |         "relatedIdentifier": "https://github.com/caltechlibrary/caltechdata_usage/releases/tag/v0.1.0", 
 84 |         "relatedIdentifierRelation": "IsIdenticalTo", 
 85 |         "relatedIdentifierScheme": "URL"
 86 |       }
 87 |     ], 
 88 |     "relevantDates": [
 89 |       {
 90 |         "relevantDateType": "Issued", 
 91 |         "relevantDateValue": "2019-07-16"
 92 |       }
 93 |     ], 
 94 |     "resourceType": {
 95 |       "resourceTypeGeneral": "Software"
 96 |     }, 
 97 |     "rightsList": {
 98 |       "rights": "license", 
 99 |       "rightsURI": "https://data.caltech.edu/license"
100 |     }, 
101 |     "subjects": [
102 |       "CaltechDATA", 
103 |       "reporitory", 
104 |       "usage", 
105 |       "Jupyter", 
106 |       "GitHub"
107 |     ], 
108 |     "title": "caltechlibrary/caltechdata_usage: Addition of ORCID analysis notebook and update for new ames version", 
109 |     "version": "v0.1.0"
110 |   }, 
111 |   "updated": "2020-07-02T20:37:16.358971+00:00"
112 | }


--------------------------------------------------------------------------------
/tests/data/caltechdata/293.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "created": "2017-09-13T18:03:12.224037+00:00", 
  3 |   "id": 293, 
  4 |   "links": {
  5 |     "self": "http://data.caltech.edu/api/record/293"
  6 |   }, 
  7 |   "metadata": {
  8 |     "_form_uuid": "beae3039-29ed-4e20-bd21-6ed6e994afa5", 
  9 |     "_oai": {
 10 |       "id": "oai:data.caltech.edu:recid/293", 
 11 |       "updated": "2017-09-13T18:03:12Z"
 12 |     }, 
 13 |     "access_right": "open", 
 14 |     "alternateIdentifiers": [
 15 |       {
 16 |         "alternateIdentifier": "293", 
 17 |         "alternateIdentifierType": "CaltechDATA_Identifier"
 18 |       }
 19 |     ], 
 20 |     "authors": [
 21 |       {
 22 |         "authorAffiliation": [
 23 |           "TCCON Consortium"
 24 |         ], 
 25 |         "authorName": "Total Carbon Column Observing Network (TCCON) Team"
 26 |       }
 27 |     ], 
 28 |     "contributors": [
 29 |       {
 30 |         "contributorAffiliation": [
 31 |           "California Institute of Technology, Pasadena, CA, U.S.A."
 32 |         ], 
 33 |         "contributorEmail": "dwunch@atmosp.physics.utoronto.ca", 
 34 |         "contributorIdentifiers": [
 35 |           {
 36 |             "contributorIdentifier": "0000-0002-4924-0377", 
 37 |             "contributorIdentifierScheme": "ORCID"
 38 |           }
 39 |         ], 
 40 |         "contributorName": "Wunch, Debra", 
 41 |         "contributorType": "ContactPerson"
 42 |       }, 
 43 |       {
 44 |         "contributorAffiliation": [
 45 |           "California Institute of Technology, Pasadena, CA (US)"
 46 |         ], 
 47 |         "contributorEmail": "wennberg@caltech.edu", 
 48 |         "contributorIdentifiers": [
 49 |           {
 50 |             "contributorIdentifier": "0000-0002-6126-3854", 
 51 |             "contributorIdentifierScheme": "ORCID"
 52 |           }
 53 |         ], 
 54 |         "contributorName": "Wennberg, P. O. ", 
 55 |         "contributorType": "ContactPerson"
 56 |       }, 
 57 |       {
 58 |         "contributorAffiliation": [
 59 |           "Centre for Atmospheric Chemistry, School of Chemistry, University of Wollongong, Wollongong, NSW (AU)"
 60 |         ], 
 61 |         "contributorEmail": "griffith@uow.edu.au", 
 62 |         "contributorIdentifiers": [
 63 |           {
 64 |             "contributorIdentifier": " 0000-0002-7986-1924", 
 65 |             "contributorIdentifierScheme": "ORCID"
 66 |           }
 67 |         ], 
 68 |         "contributorName": "Griffith, D. W.T.", 
 69 |         "contributorType": "ContactPerson"
 70 |       }, 
 71 |       {
 72 |         "contributorAffiliation": [
 73 |           " Institute of Environmental Physics, University of Bremen, Bremen (DE), Centre for Atmospheric Chemistry, School of Chemistry, University of Wollongong, Wollongong, NSW (AU) "
 74 |         ], 
 75 |         "contributorEmail": "n_deutscher@iup.physik.uni-bremen.de", 
 76 |         "contributorIdentifiers": [
 77 |           {
 78 |             "contributorIdentifier": "0000-0002-2906-2577", 
 79 |             "contributorIdentifierScheme": "ORCID"
 80 |           }
 81 |         ], 
 82 |         "contributorName": "Deutscher, N. M.", 
 83 |         "contributorType": "ContactPerson"
 84 |       }, 
 85 |       {
 86 |         "contributorAffiliation": [
 87 |           "Max Planck Institute for Biogeochemistry, Jena (DE)"
 88 |         ], 
 89 |         "contributorEmail": "dfeist@bgc-jena.mpg.de", 
 90 |         "contributorIdentifiers": [
 91 |           {
 92 |             "contributorIdentifier": "0000-0002-5890-6687", 
 93 |             "contributorIdentifierScheme": "ORCID"
 94 |           }
 95 |         ], 
 96 |         "contributorName": "Feist, D. G.", 
 97 |         "contributorType": "ContactPerson"
 98 |       }, 
 99 |       {
100 |         "contributorAffiliation": [
101 |           "Institute of Environmental Physics, University of Bremen, Bremen (DE)"
102 |         ], 
103 |         "contributorEmail": "jnotholt@iup.physik.uni-bremen.de", 
104 |         "contributorIdentifiers": [
105 |           {
106 |             "contributorIdentifier": "0000-0002-3324-885X", 
107 |             "contributorIdentifierScheme": "ORCID"
108 |           }
109 |         ], 
110 |         "contributorName": "Notholt, J.", 
111 |         "contributorType": "ContactPerson"
112 |       }
113 |     ], 
114 |     "control_number": "293", 
115 |     "descriptions": [
116 |       {
117 |         "descriptionType": "Other", 
118 |         "descriptionValue": "The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This is the 2014 data release."
119 |       }, 
120 |       {
121 |         "descriptionType": "Other", 
122 |         "descriptionValue": "<br>Unique Views: 953<br>Unique Downloads: 98<br> between September 13, 2017 and July 02, 2020<br><a href=\"https://data.caltech.edu/stats\">More info on how stats are collected</a><br>"
123 |       }, 
124 |       {
125 |         "descriptionType": "Other", 
126 |         "descriptionValue": "<br>Cite this record as:<br>Total Carbon Column Observing Network (TCCON) Team. (2017). <i>2014 TCCON Data Release</i> (Version GGG2014) [Data set]. CaltechDATA. <a href=\"https://doi.org/10.14291/TCCON.GGG2014\">https://doi.org/10.14291/TCCON.GGG2014</a><br> or choose a <a href=\"https://crosscite.org/?doi=10.14291/TCCON.GGG2014\"> different citation style.</a><br><a href=\"https://data.datacite.org/application/x-bibtex/10.14291/TCCON.GGG2014\">Download Citation</a><br>"
127 |       }
128 |     ], 
129 |     "doi": "10.14291/TCCON.GGG2014", 
130 |     "electronic_location_and_access": [
131 |       {
132 |         "access_method": "HTTP", 
133 |         "electronic_name": [
134 |           "tccon.latest.public.tgz"
135 |         ], 
136 |         "embargo_status": "open", 
137 |         "file_size": "236307805", 
138 |         "uniform_resource_identifier": "https://data.caltech.edu/tindfiles/serve/d739803e-d069-4a4e-9070-a85ed6ddb07d/"
139 |       }, 
140 |       {
141 |         "access_method": "HTTP", 
142 |         "electronic_name": [
143 |           "LICENSE.txt"
144 |         ], 
145 |         "embargo_status": "open", 
146 |         "file_size": "11436", 
147 |         "uniform_resource_identifier": "https://data.caltech.edu/tindfiles/serve/24d2401d-d2b7-42e1-83b1-1ee01839d84d/"
148 |       }
149 |     ], 
150 |     "files": [
151 |       {
152 |         "id": "d739803e-d069-4a4e-9070-a85ed6ddb07d", 
153 |         "path": "https://data.caltech.edu/tindfiles/serve/d739803e-d069-4a4e-9070-a85ed6ddb07d/"
154 |       }, 
155 |       {
156 |         "id": "24d2401d-d2b7-42e1-83b1-1ee01839d84d", 
157 |         "path": "https://data.caltech.edu/tindfiles/serve/24d2401d-d2b7-42e1-83b1-1ee01839d84d/"
158 |       }
159 |     ], 
160 |     "format": [
161 |       ".tgz", 
162 |       ".nc"
163 |     ], 
164 |     "id": "293", 
165 |     "language": "eng", 
166 |     "license": "other-license", 
167 |     "owners": [
168 |       2
169 |     ], 
170 |     "pid_value": "293", 
171 |     "publicationDate": "2017-09-13", 
172 |     "publishers": {
173 |       "publisherName": "CaltechDATA"
174 |     }, 
175 |     "relatedIdentifiers": [
176 |       {
177 |         "relatedIdentifier": "10.14291/TCCON.GGG2014.DOCUMENTATION.R0/1221662", 
178 |         "relatedIdentifierRelation": "IsDocumentedBy", 
179 |         "relatedIdentifierScheme": "DOI"
180 |       }, 
181 |       {
182 |         "relatedIdentifier": "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description", 
183 |         "relatedIdentifierRelation": "IsDocumentedBy", 
184 |         "relatedIdentifierScheme": "URL"
185 |       }, 
186 |       {
187 |         "relatedIdentifier": "https://tccon-wiki.caltech.edu/Sites", 
188 |         "relatedIdentifierRelation": "IsDocumentedBy", 
189 |         "relatedIdentifierScheme": "DOI"
190 |       }
191 |     ], 
192 |     "relevantDates": [
193 |       {
194 |         "relevantDateType": "Updated", 
195 |         "relevantDateValue": "2020-07-01"
196 |       }, 
197 |       {
198 |         "relevantDateType": "Submitted", 
199 |         "relevantDateValue": "2017-09-13"
200 |       }
201 |     ], 
202 |     "resourceType": {
203 |       "resourceTypeGeneral": "Dataset"
204 |     }, 
205 |     "rightsList": {
206 |       "rights": "TCCON Data Use Policy", 
207 |       "rightsURI": "https://data.caltech.edu/tindfiles/serve/24d2401d-d2b7-42e1-83b1-1ee01839d84d/"
208 |     }, 
209 |     "subjects": [
210 |       "atmospheric trace gases", 
211 |       " CO2", 
212 |       " CH4", 
213 |       " CO", 
214 |       " N2O", 
215 |       " column-averaged dry-air mole fractions", 
216 |       " remote sensing", 
217 |       " FTIR spectroscopy", 
218 |       " TCCON"
219 |     ], 
220 |     "title": "2014 TCCON Data Release", 
221 |     "version": "GGG2014"
222 |   }, 
223 |   "updated": "2020-07-03T10:56:19.627073+00:00"
224 | }


--------------------------------------------------------------------------------
/tests/data/datacite43/4yxbs-4mj38.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "creators": [
  3 |     {
  4 |       "affiliation": [
  5 |         {
  6 |           "affiliationIdentifier": "05dxps055",
  7 |           "affiliationIdentifierScheme": "ROR",
  8 |           "name": "California Institute of Technology"
  9 |         }
 10 |       ],
 11 |       "familyName": "Law",
 12 |       "givenName": "Casey",
 13 |       "name": "Casey Law",
 14 |       "nameIdentifiers": [
 15 |         {
 16 |           "nameIdentifier": "0000-0002-4119-9963",
 17 |           "nameIdentifierScheme": "ORCID"
 18 |         }
 19 |       ],
 20 |       "nameType": "Personal"
 21 |     }
 22 |   ],
 23 |   "dates": [
 24 |     {
 25 |       "date": "2023",
 26 |       "dateType": "Issued"
 27 |     },
 28 |     {
 29 |       "date": "2023-04-21",
 30 |       "dateType": "Created"
 31 |     }
 32 |   ],
 33 |   "descriptions": [
 34 |     {
 35 |       "description": "Data associated with DSA-110 candidate transient. Each filterbank is saved at maximum native resolution (32.7 microseconds, 30.4 kHz) and contains ~0.669696 seconds (20480 samples) around the burst across the full DSA-110 187 MHz (6144 channels) frequency band. The Stokes parameters have been calibrated using observations of the 3C48 and 3C286 Very Large Array (VLA) calibrators as described in Sherman et al., 2024 (https://doi.org/10.3847/1538-4357/ad275e ; see Appendices D and E). Note that there may be minute differences between the data contained here and that reported in Sherman et al., 2024 due to being calibrated at maximum time resolution, rather than being downsampled first.",
 36 |       "descriptionType": "Abstract"
 37 |     }
 38 |   ],
 39 |   "formats": [
 40 |     "png"
 41 |   ],
 42 |   "fundingReferences": [
 43 |     {
 44 |       "funderIdentifier": "grid.431093.c",
 45 |       "funderIdentifierType": "GRID",
 46 |       "funderName": "National Science Foundation"
 47 |     }
 48 |   ],
 49 |   "geoLocations": [
 50 |     {
 51 |       "geoLocationPlace": "OVRO",
 52 |       "geoLocationPoint": {
 53 |         "pointLatitude": 37.2339,
 54 |         "pointLongitude": -118.282
 55 |       }
 56 |     }
 57 |   ],
 58 |   "identifiers": [
 59 |     {
 60 |       "identifier": "10.22002/4yxbs-4mj38",
 61 |       "identifierType": "DOI"
 62 |     },
 63 |     {
 64 |       "identifier": "oai:data.caltech.edu:4yxbs-4mj38",
 65 |       "identifierType": "oai"
 66 |     },
 67 |     {
 68 |       "identifier": "220506aabd",
 69 |       "identifierType": "dsa-110-id"
 70 |     },
 71 |     {
 72 |       "identifier": "byyt8-y6a26",
 73 |       "identifierType": "cdid"
 74 |     }
 75 |   ],
 76 |   "publicationYear": "2023",
 77 |   "publisher": "Caltech Data",
 78 |   "relatedIdentifiers": [
 79 |     {
 80 |       "relatedIdentifier": "http://deepsynoptic.org",
 81 |       "relatedIdentifierType": "URL",
 82 |       "relationType": "IsDocumentedBy"
 83 |     }
 84 |   ],
 85 |   "rightsList": [
 86 |     {
 87 |       "rights": "cc-by-4.0"
 88 |     }
 89 |   ],
 90 |   "schemaVersion": "http://datacite.org/schema/kernel-4",
 91 |   "subjects": [
 92 |     {
 93 |       "subject": "OVRO"
 94 |     },
 95 |     {
 96 |       "subject": "Astrophysics"
 97 |     },
 98 |     {
 99 |       "subject": "Fast Radio Bursts"
100 |     }
101 |   ],
102 |   "titles": [
103 |     {
104 |       "title": "DSA-110 Data for Candidate Fast Radio Burst 220506aabd"
105 |     }
106 |   ],
107 |   "types": {
108 |     "resourceType": "",
109 |     "resourceTypeGeneral": "Dataset"
110 |   },
111 |   "version": "2.0"
112 | }


--------------------------------------------------------------------------------
/tests/data/datacite43/asjw8-cd908.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "creators": [
 3 |     {
 4 |       "familyName": "Sloan",
 5 |       "givenName": "Julia",
 6 |       "name": "Sloan, Julia",
 7 |       "nameIdentifiers": [
 8 |         {
 9 |           "nameIdentifier": "0000-0003-0200-063X",
10 |           "nameIdentifierScheme": "ORCID"
11 |         }
12 |       ],
13 |       "nameType": "Personal"
14 |     }
15 |   ],
16 |   "dates": [
17 |     {
18 |       "date": "2024-10-25",
19 |       "dateType": "Issued"
20 |     },
21 |     {
22 |       "date": "2024-10-31",
23 |       "dateInformation": "Correct file added",
24 |       "dateType": "Updated"
25 |     }
26 |   ],
27 |   "descriptions": [
28 |     {
29 |       "description": "This artifact contains two datasets: one corresponding to a simulation solving Richards Equation in clay, and another solving it in sand. These experiments were conducted in Gordon Bonan's \"Climate Change and Terrestrial Ecosystem Modeling\" textbook, Chapter 8 supplementary program 1.\nFull citation: Bonan, Gordon. Climate Change and Terrestrial Ecosystem Modeling. Cambridge University Press, 2019.",
30 |       "descriptionType": "Abstract"
31 |     }
32 |   ],
33 |   "identifiers": [
34 |     {
35 |       "identifier": "10.22002/asjw8-cd908",
36 |       "identifierType": "DOI"
37 |     },
38 |     {
39 |       "identifier": "oai:data.caltech.edu:asjw8-cd908",
40 |       "identifierType": "oai"
41 |     }
42 |   ],
43 |   "publicationYear": "2024",
44 |   "publisher": "CaltechDATA",
45 |   "rightsList": [
46 |     {
47 |       "rights": "Creative Commons Zero v1.0 Universal",
48 |       "rightsIdentifier": "cc0-1.0",
49 |       "rightsIdentifierScheme": "spdx",
50 |       "rightsUri": "https://creativecommons.org/publicdomain/zero/1.0/legalcode"
51 |     }
52 |   ],
53 |   "schemaVersion": "http://datacite.org/schema/kernel-4",
54 |   "titles": [
55 |     {
56 |       "title": "Bonan 2019 Richards Eqn Data"
57 |     }
58 |   ],
59 |   "types": {
60 |     "resourceType": "",
61 |     "resourceTypeGeneral": "Dataset"
62 |   }
63 | }


--------------------------------------------------------------------------------
/tests/data/datacite43/b2jqz-qdw65.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "contributors": [
  3 |     {
  4 |       "affiliation": [
  5 |         {
  6 |           "name": "Department of Biological Sciences, Tata Institute of Fundamental Research, Mumbai, Maharashtra, India"
  7 |         }
  8 |       ],
  9 |       "contributorType": "ContactPerson",
 10 |       "familyName": "Koushika",
 11 |       "givenName": "Sandhya P.",
 12 |       "name": "Koushika, Sandhya P.",
 13 |       "nameIdentifiers": [
 14 |         {
 15 |           "nameIdentifier": "0000-0002-1742-7356",
 16 |           "nameIdentifierScheme": "ORCID"
 17 |         }
 18 |       ],
 19 |       "nameType": "Personal"
 20 |     }
 21 |   ],
 22 |   "creators": [
 23 |     {
 24 |       "affiliation": [
 25 |         {
 26 |           "name": "Department of Biological Sciences, Tata Institute of Fundamental Research, Mumbai, Maharashtra, India"
 27 |         }
 28 |       ],
 29 |       "familyName": "Vasudevan",
 30 |       "givenName": "Amruta",
 31 |       "name": "Vasudevan, Amruta",
 32 |       "nameIdentifiers": [
 33 |         {
 34 |           "nameIdentifier": "0000-0002-5777-9508",
 35 |           "nameIdentifierScheme": "ORCID"
 36 |         }
 37 |       ],
 38 |       "nameType": "Personal"
 39 |     }
 40 |   ],
 41 |   "dates": [
 42 |     {
 43 |       "date": "2024-06-25",
 44 |       "dateType": "Issued"
 45 |     },
 46 |     {
 47 |       "date": "2024-06-24",
 48 |       "dateType": "Accepted"
 49 |     }
 50 |   ],
 51 |   "descriptions": [
 52 |     {
 53 |       "description": "Raw data for figure 1",
 54 |       "descriptionType": "Abstract"
 55 |     }
 56 |   ],
 57 |   "fundingReferences": [
 58 |     {
 59 |       "funderName": "The authors gratefully acknowledge support from the Department of Atomic Energy, Government of India (DAE) grants 12-R\\&D-IMS-5.02-0202 and 1303/2/2019/R\\&DII/DAE/2079 (dated 11.02.2020 to S.P.K.), the Howard Hughes Medical Institute (HHMI) International Early Career Scientist (IECS) grant 55007425 (to S.P.K.), CSIR (to S.P.K.), and funding from the PRISM project at the Institute of Mathematical Sciences (to S.P.K.) for research costs. The authors gratefully acknowledge salary support from TIFR-DAE (for A.V.)."
 60 |     }
 61 |   ],
 62 |   "identifiers": [
 63 |     {
 64 |       "identifier": "10.22002/b2jqz-qdw65",
 65 |       "identifierType": "DOI"
 66 |     },
 67 |     {
 68 |       "identifier": "oai:data.caltech.edu:b2jqz-qdw65",
 69 |       "identifierType": "oai"
 70 |     }
 71 |   ],
 72 |   "language": "eng",
 73 |   "publicationYear": "2024",
 74 |   "publisher": "CaltechDATA",
 75 |   "relatedIdentifiers": [
 76 |     {
 77 |       "relatedIdentifier": "10.17912/micropub.biology.001204",
 78 |       "relatedIdentifierType": "DOI",
 79 |       "relationType": "IsPartOf",
 80 |       "resourceTypeGeneral": "Text"
 81 |     }
 82 |   ],
 83 |   "rightsList": [
 84 |     {
 85 |       "rights": "Creative Commons Attribution 4.0 International",
 86 |       "rightsIdentifier": "cc-by-4.0",
 87 |       "rightsIdentifierScheme": "spdx",
 88 |       "rightsUri": "https://creativecommons.org/licenses/by/4.0/legalcode"
 89 |     }
 90 |   ],
 91 |   "schemaVersion": "http://datacite.org/schema/kernel-4",
 92 |   "subjects": [
 93 |     {
 94 |       "subject": "c. elegans"
 95 |     }
 96 |   ],
 97 |   "titles": [
 98 |     {
 99 |       "title": "Dataset: Physical presence of chemical synapses is necessary for turning behavior of anterograde synaptic vesicles at the branch point of PLM neurons in C. elegans"
100 |     }
101 |   ],
102 |   "types": {
103 |     "resourceType": "",
104 |     "resourceTypeGeneral": "Dataset"
105 |   },
106 |   "version": "1.0"
107 | }


--------------------------------------------------------------------------------
/tests/data/datacite43/d7mk4-f8t44.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "contributors": [
  3 |     {
  4 |       "contributorType": "DataCurator",
  5 |       "familyName": "Civilini",
  6 |       "givenName": "Francesco",
  7 |       "name": "Civilini, Francesco",
  8 |       "nameIdentifiers": [
  9 |         {
 10 |           "nameIdentifier": "0000-0003-0669-0404",
 11 |           "nameIdentifierScheme": "ORCID"
 12 |         }
 13 |       ],
 14 |       "nameType": "Personal"
 15 |     },
 16 |     {
 17 |       "affiliation": [
 18 |         {
 19 |           "affiliationIdentifier": "05dxps055",
 20 |           "affiliationIdentifierScheme": "ROR",
 21 |           "name": "California Institute of Technology"
 22 |         }
 23 |       ],
 24 |       "contributorType": "Other",
 25 |       "familyName": "Husker",
 26 |       "givenName": "Allen",
 27 |       "name": "Husker, Allen",
 28 |       "nameIdentifiers": [
 29 |         {
 30 |           "nameIdentifier": "0000-0003-1139-0502",
 31 |           "nameIdentifierScheme": "ORCID"
 32 |         }
 33 |       ],
 34 |       "nameType": "Personal"
 35 |     },
 36 |     {
 37 |       "contributorType": "Other",
 38 |       "familyName": "Weber",
 39 |       "givenName": "Renee",
 40 |       "name": "Weber, Renee",
 41 |       "nameIdentifiers": [
 42 |         {
 43 |           "nameIdentifier": "0000-0002-1649-483X",
 44 |           "nameIdentifierScheme": "ORCID"
 45 |         }
 46 |       ],
 47 |       "nameType": "Personal"
 48 |     }
 49 |   ],
 50 |   "creators": [
 51 |     {
 52 |       "familyName": "Civilini",
 53 |       "givenName": "Francesco",
 54 |       "name": "Civilini, Francesco",
 55 |       "nameIdentifiers": [],
 56 |       "nameType": "Personal"
 57 |     }
 58 |   ],
 59 |   "dates": [
 60 |     {
 61 |       "date": "2024-10-24",
 62 |       "dateType": "Issued"
 63 |     }
 64 |   ],
 65 |   "descriptions": [
 66 |     {
 67 |       "description": "This dataset contains data from the Lunar Seismic Profiling Experiment as well as results from the JGR: Planets publication \"Thermal moonquake characterization and cataloging using frequency-based algorithms and stochastic gradient descent\".\u00a0\nThe code to compute the results can be found in the GitHub here:\nhttps://github.com/civilinifr/thermal_mq_analysis\nalso published through Zenodo here:\nhttp://doi.org/10.5281/zenodo.8025056\n\u00a0\nv2: Fixed files in LSPE_sac_hourly.zip to contain correct start and end times.\u00a0\n\u00a0\nIncludes:\n\nThermal moonquake catalog of Grade-A LSPE events\nDaily ASCII files in units of decompressed volts (filenames daily_ascii_YYYYMMDD_YYYYMMDD.zip)\nHourly SAC files in units of decompressed volts (filename LSPE_sac_hourly.zip)\nHourly SAC files in units of nm/s (filenames LSPE_sac_hourly_phys_p1.zip and LSPE_sac_hourly_phys_p2.zip)\nAnalysis results from the manuscript (filename lunar_output.zip)",
 68 |       "descriptionType": "Abstract"
 69 |     }
 70 |   ],
 71 |   "identifiers": [
 72 |     {
 73 |       "identifier": "10.22002/d7mk4-f8t44",
 74 |       "identifierType": "DOI"
 75 |     },
 76 |     {
 77 |       "identifier": "oai:data.caltech.edu:d7mk4-f8t44",
 78 |       "identifierType": "oai"
 79 |     }
 80 |   ],
 81 |   "publicationYear": "2024",
 82 |   "publisher": "CaltechDATA",
 83 |   "rightsList": [
 84 |     {
 85 |       "rights": "Creative Commons Zero v1.0 Universal",
 86 |       "rightsIdentifier": "cc0-1.0",
 87 |       "rightsIdentifierScheme": "spdx",
 88 |       "rightsUri": "https://creativecommons.org/publicdomain/zero/1.0/legalcode"
 89 |     }
 90 |   ],
 91 |   "schemaVersion": "http://datacite.org/schema/kernel-4",
 92 |   "titles": [
 93 |     {
 94 |       "title": "Apollo 17 Lunar Seismic Profiling Experiment Seismic Data and Thermal Moonquake Catalog"
 95 |     }
 96 |   ],
 97 |   "types": {
 98 |     "resourceType": "",
 99 |     "resourceTypeGeneral": "Dataset"
100 |   },
101 |   "version": "v2.0"
102 | }


--------------------------------------------------------------------------------
/tests/data/datacite43/dks9f-mj878.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "creators": [
  3 |     {
  4 |       "affiliation": [
  5 |         {
  6 |           "affiliationIdentifier": "05dxps055",
  7 |           "affiliationIdentifierScheme": "ROR",
  8 |           "name": "California Institute of Technology"
  9 |         }
 10 |       ],
 11 |       "familyName": "Gray",
 12 |       "givenName": "Robert M.",
 13 |       "name": "Gray, Robert M.",
 14 |       "nameIdentifiers": [
 15 |         {
 16 |           "nameIdentifier": "0000-0001-5980-8774",
 17 |           "nameIdentifierScheme": "ORCID"
 18 |         }
 19 |       ],
 20 |       "nameType": "Personal"
 21 |     },
 22 |     {
 23 |       "affiliation": [
 24 |         {
 25 |           "affiliationIdentifier": "05dxps055",
 26 |           "affiliationIdentifierScheme": "ROR",
 27 |           "name": "California Institute of Technology"
 28 |         }
 29 |       ],
 30 |       "familyName": "Liu",
 31 |       "givenName": "Mingchen",
 32 |       "name": "Liu, Mingchen",
 33 |       "nameIdentifiers": [],
 34 |       "nameType": "Personal"
 35 |     },
 36 |     {
 37 |       "affiliation": [
 38 |         {
 39 |           "affiliationIdentifier": "05dxps055",
 40 |           "affiliationIdentifierScheme": "ROR",
 41 |           "name": "California Institute of Technology"
 42 |         }
 43 |       ],
 44 |       "familyName": "Zhou",
 45 |       "givenName": "Selina",
 46 |       "name": "Zhou, Selina",
 47 |       "nameIdentifiers": [],
 48 |       "nameType": "Personal"
 49 |     }
 50 |   ],
 51 |   "dates": [
 52 |     {
 53 |       "date": "2024-07-20",
 54 |       "dateType": "Issued"
 55 |     }
 56 |   ],
 57 |   "descriptions": [
 58 |     {
 59 |       "description": "Data and processing code corresponding to the manuscript, \"Quadratic-soliton-enhanced mid-IR molecular sensing.\"",
 60 |       "descriptionType": "Abstract"
 61 |     }
 62 |   ],
 63 |   "identifiers": [
 64 |     {
 65 |       "identifier": "10.22002/dks9f-mj878",
 66 |       "identifierType": "DOI"
 67 |     },
 68 |     {
 69 |       "identifier": "oai:data.caltech.edu:dks9f-mj878",
 70 |       "identifierType": "oai"
 71 |     }
 72 |   ],
 73 |   "publicationYear": "2024",
 74 |   "publisher": "CaltechDATA",
 75 |   "relatedIdentifiers": [
 76 |     {
 77 |       "relatedIdentifier": "arXiv:2301.07826",
 78 |       "relatedIdentifierType": "arXiv",
 79 |       "relationType": "IsDescribedBy",
 80 |       "resourceTypeGeneral": "Text"
 81 |     }
 82 |   ],
 83 |   "rightsList": [
 84 |     {
 85 |       "rights": "Creative Commons Attribution 4.0 International",
 86 |       "rightsIdentifier": "cc-by-4.0",
 87 |       "rightsIdentifierScheme": "spdx",
 88 |       "rightsUri": "https://creativecommons.org/licenses/by/4.0/legalcode"
 89 |     }
 90 |   ],
 91 |   "schemaVersion": "http://datacite.org/schema/kernel-4",
 92 |   "titles": [
 93 |     {
 94 |       "title": "Data for \"Quadratic-soliton-enhanced mid-IR molecular sensing\""
 95 |     }
 96 |   ],
 97 |   "types": {
 98 |     "resourceType": "",
 99 |     "resourceTypeGeneral": "Dataset"
100 |   }
101 | }


--------------------------------------------------------------------------------
/tests/data/datacite43/ep884-g0v97.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "creators": [
 3 |     {
 4 |       "familyName": "Chen",
 5 |       "givenName": "Zibo",
 6 |       "name": "Chen, Zibo",
 7 |       "nameIdentifiers": [],
 8 |       "nameType": "Personal"
 9 |     }
10 |   ],
11 |   "dates": [
12 |     {
13 |       "date": "2024-06-19",
14 |       "dateType": "Issued"
15 |     }
16 |   ],
17 |   "identifiers": [
18 |     {
19 |       "identifier": "10.22002/ep884-g0v97",
20 |       "identifierType": "DOI"
21 |     },
22 |     {
23 |       "identifier": "oai:data.caltech.edu:ep884-g0v97",
24 |       "identifierType": "oai"
25 |     }
26 |   ],
27 |   "publicationYear": "2024",
28 |   "publisher": "CaltechDATA",
29 |   "rightsList": [
30 |     {
31 |       "rights": "Creative Commons Zero v1.0 Universal",
32 |       "rightsIdentifier": "cc0-1.0",
33 |       "rightsIdentifierScheme": "spdx",
34 |       "rightsUri": "https://creativecommons.org/publicdomain/zero/1.0/legalcode"
35 |     }
36 |   ],
37 |   "schemaVersion": "http://datacite.org/schema/kernel-4",
38 |   "titles": [
39 |     {
40 |       "title": "A synthetic protein-level neural network in mammalian cells"
41 |     }
42 |   ],
43 |   "types": {
44 |     "resourceType": "",
45 |     "resourceTypeGeneral": "Dataset"
46 |   },
47 |   "version": "2.0"
48 | }


--------------------------------------------------------------------------------
/tests/data/datacite43/f40da-hww21.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "creators": [
  3 |     {
  4 |       "affiliation": [
  5 |         {
  6 |           "affiliationIdentifier": "035a68863",
  7 |           "affiliationIdentifierScheme": "ROR",
  8 |           "name": "United States Geological Survey"
  9 |         },
 10 |         {
 11 |           "affiliationIdentifier": "05dxps055",
 12 |           "affiliationIdentifierScheme": "ROR",
 13 |           "name": "California Institute of Technology"
 14 |         }
 15 |       ],
 16 |       "familyName": "Atterholt",
 17 |       "givenName": "James",
 18 |       "name": "Atterholt, James",
 19 |       "nameIdentifiers": [
 20 |         {
 21 |           "nameIdentifier": "0000-0003-1603-5518",
 22 |           "nameIdentifierScheme": "ORCID"
 23 |         }
 24 |       ],
 25 |       "nameType": "Personal"
 26 |     },
 27 |     {
 28 |       "affiliation": [
 29 |         {
 30 |           "affiliationIdentifier": "05dxps055",
 31 |           "affiliationIdentifierScheme": "ROR",
 32 |           "name": "California Institute of Technology"
 33 |         }
 34 |       ],
 35 |       "familyName": "Wilding",
 36 |       "givenName": "John",
 37 |       "name": "Wilding, John",
 38 |       "nameIdentifiers": [
 39 |         {
 40 |           "nameIdentifier": "0000-0002-0914-2078",
 41 |           "nameIdentifierScheme": "ORCID"
 42 |         }
 43 |       ],
 44 |       "nameType": "Personal"
 45 |     },
 46 |     {
 47 |       "affiliation": [
 48 |         {
 49 |           "affiliationIdentifier": "05dxps055",
 50 |           "affiliationIdentifierScheme": "ROR",
 51 |           "name": "California Institute of Technology"
 52 |         }
 53 |       ],
 54 |       "familyName": "Ross",
 55 |       "givenName": "Zachary",
 56 |       "name": "Ross, Zachary",
 57 |       "nameIdentifiers": [
 58 |         {
 59 |           "nameIdentifier": "0000-0002-6343-8400",
 60 |           "nameIdentifierScheme": "ORCID"
 61 |         }
 62 |       ],
 63 |       "nameType": "Personal"
 64 |     }
 65 |   ],
 66 |   "dates": [
 67 |     {
 68 |       "date": "2025-01-01",
 69 |       "dateType": "Issued"
 70 |     },
 71 |     {
 72 |       "date": "2025-01-01",
 73 |       "dateType": "Available"
 74 |     }
 75 |   ],
 76 |   "descriptions": [
 77 |     {
 78 |       "description": "There are two catalogs in this dataset produced for the study by Atterholt, Wilding, & Ross (2025):\n\nA relocated earthquake hypocenter location catalog made using PhaseNO (Sun et al., 2023) for phase picking, GaMMA (Zhu et al., 2022) for phase association, HypoSVI (Smith et al., 2021) for absolute location, and GrowClust (Trugman & Shearer, 2017) for cross-correlation-based relative relocation.\nA corresponding earthquake moment tensor catalog made using the picks from the hypocenter catalog and the Bayesian framework outlined in Wilding & Ross (2024).\nCitations:\n\nAtterholt, J., Wilding, J. D., & Ross., Z. E. (2025). The evolution of fault orientation in the 2019 Ridgecrest earthquake sequence with a new long-term catalogue of seismicity and moment tensors. Geophysical Journal International, 240(3), 1579\u20131592. https://doi.org/10.1093/gji/ggaf001\nSun, H., Ross, Z. E., Zhu, W., & Azizzadenesheli, K. (2023). Phase Neural Operator for Multi\u2010Station Picking of Seismic Arrivals. Geophysical Research Letters, 50(24), e2023GL106434. https://doi.org/10.1029/2023GL106434\nZhu, W., McBrearty, I. W., Mousavi, S. M., Ellsworth, W. L., & Beroza, G. C. (2022). Earthquake Phase Association Using a Bayesian Gaussian Mixture Model. Journal of Geophysical Research: Solid Earth, 127(5), e2021JB023249. https://doi.org/10.1029/2021JB023249\nSmith, J. D., Ross, Z. E., Azizzadenesheli, K., & Muir, J. B. (2021). HypoSVI: Hypocentre inversion with Stein variational inference and physics informed neural networks. Geophysical Journal International, 228(1), 698\u2013710. https://doi.org/10.1093/gji/ggab309\nTrugman, D. T., & Shearer, P. M. (2017). GrowClust: A Hierarchical Clustering Algorithm for Relative Earthquake Relocation, with Application to the Spanish Springs and Sheldon, Nevada, Earthquake Sequences. Seismological Research Letters, 88(2A), 379\u2013391. https://doi.org/10.1785/0220160188",
 79 |       "descriptionType": "Abstract"
 80 |     }
 81 |   ],
 82 |   "fundingReferences": [
 83 |     {
 84 |       "awardNumber": "1745301",
 85 |       "awardTitle": "Graduate Research Fellowship",
 86 |       "funderIdentifier": "grid.431093.c",
 87 |       "funderIdentifierType": "GRID",
 88 |       "funderName": "National Science Foundation"
 89 |     }
 90 |   ],
 91 |   "identifiers": [
 92 |     {
 93 |       "identifier": "10.22002/f40da-hww21",
 94 |       "identifierType": "DOI"
 95 |     },
 96 |     {
 97 |       "identifier": "oai:data.caltech.edu:f40da-hww21",
 98 |       "identifierType": "oai"
 99 |     }
100 |   ],
101 |   "publicationYear": "2025",
102 |   "publisher": "CaltechDATA",
103 |   "rightsList": [
104 |     {
105 |       "rights": "Creative Commons Zero v1.0 Universal",
106 |       "rightsIdentifier": "cc0-1.0",
107 |       "rightsIdentifierScheme": "spdx",
108 |       "rightsUri": "https://creativecommons.org/publicdomain/zero/1.0/legalcode"
109 |     }
110 |   ],
111 |   "schemaVersion": "http://datacite.org/schema/kernel-4",
112 |   "titles": [
113 |     {
114 |       "title": "2019 Ridgecrest Earthquake Sequence Long-Term Hypocenter and Moment Tensor Catalog"
115 |     }
116 |   ],
117 |   "types": {
118 |     "resourceType": "",
119 |     "resourceTypeGeneral": "Dataset"
120 |   }
121 | }


--------------------------------------------------------------------------------
/tests/data/datacite43/hevaf-20f84.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "creators": [
  3 |     {
  4 |       "affiliation": [
  5 |         {
  6 |           "affiliationIdentifier": "05dxps055",
  7 |           "affiliationIdentifierScheme": "ROR",
  8 |           "name": "California Institute of Technology"
  9 |         }
 10 |       ],
 11 |       "familyName": "Law",
 12 |       "givenName": "Casey",
 13 |       "name": "Casey Law",
 14 |       "nameIdentifiers": [
 15 |         {
 16 |           "nameIdentifier": "0000-0002-4119-9963",
 17 |           "nameIdentifierScheme": "ORCID"
 18 |         }
 19 |       ],
 20 |       "nameType": "Personal"
 21 |     }
 22 |   ],
 23 |   "dates": [
 24 |     {
 25 |       "date": "2024",
 26 |       "dateType": "Issued"
 27 |     },
 28 |     {
 29 |       "date": "2024-04-25",
 30 |       "dateType": "Created"
 31 |     }
 32 |   ],
 33 |   "descriptions": [
 34 |     {
 35 |       "description": "Data associated with DSA-110 candidate transient.",
 36 |       "descriptionType": "Abstract"
 37 |     }
 38 |   ],
 39 |   "formats": [
 40 |     "png"
 41 |   ],
 42 |   "fundingReferences": [
 43 |     {
 44 |       "funderIdentifier": "grid.431093.c",
 45 |       "funderIdentifierType": "GRID",
 46 |       "funderName": "National Science Foundation"
 47 |     }
 48 |   ],
 49 |   "geoLocations": [
 50 |     {
 51 |       "geoLocationPlace": "OVRO",
 52 |       "geoLocationPoint": {
 53 |         "pointLatitude": 37.2339,
 54 |         "pointLongitude": -118.282
 55 |       }
 56 |     }
 57 |   ],
 58 |   "identifiers": [
 59 |     {
 60 |       "identifier": "10.25800/t9jd-fh86",
 61 |       "identifierType": "DOI"
 62 |     },
 63 |     {
 64 |       "identifier": "oai:data.caltech.edu:hevaf-20f84",
 65 |       "identifierType": "oai"
 66 |     },
 67 |     {
 68 |       "identifier": "221116aaab",
 69 |       "identifierType": "dsa-110-id"
 70 |     },
 71 |     {
 72 |       "identifier": "hevaf-20f84",
 73 |       "identifierType": "cdid"
 74 |     }
 75 |   ],
 76 |   "publicationYear": "2024",
 77 |   "publisher": "Caltech Data",
 78 |   "relatedIdentifiers": [
 79 |     {
 80 |       "relatedIdentifier": "http://deepsynoptic.org",
 81 |       "relatedIdentifierType": "URL",
 82 |       "relationType": "IsDocumentedBy"
 83 |     }
 84 |   ],
 85 |   "rightsList": [
 86 |     {
 87 |       "rights": "cc-by-4.0"
 88 |     }
 89 |   ],
 90 |   "schemaVersion": "http://datacite.org/schema/kernel-4",
 91 |   "subjects": [
 92 |     {
 93 |       "subject": "OVRO"
 94 |     },
 95 |     {
 96 |       "subject": "Astrophysics"
 97 |     },
 98 |     {
 99 |       "subject": "Fast Radio Bursts"
100 |     }
101 |   ],
102 |   "titles": [
103 |     {
104 |       "title": "DSA-110 Data for Candidate Fast Radio Burst 221116aaab"
105 |     }
106 |   ],
107 |   "types": {
108 |     "resourceType": "",
109 |     "resourceTypeGeneral": "Dataset"
110 |   },
111 |   "version": "0.1"
112 | }


--------------------------------------------------------------------------------
/tests/data/datacite43/hhg7x-hgm42.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "creators": [
 3 |     {
 4 |       "affiliation": [
 5 |         {
 6 |           "affiliationIdentifier": "05dxps055",
 7 |           "affiliationIdentifierScheme": "ROR",
 8 |           "name": "California Institute of Technology"
 9 |         }
10 |       ],
11 |       "familyName": "Atterholt",
12 |       "givenName": "James",
13 |       "name": "Atterholt, James",
14 |       "nameIdentifiers": [
15 |         {
16 |           "nameIdentifier": "0000-0003-1603-5518",
17 |           "nameIdentifierScheme": "ORCID"
18 |         }
19 |       ],
20 |       "nameType": "Personal"
21 |     },
22 |     {
23 |       "affiliation": [
24 |         {
25 |           "affiliationIdentifier": "05dxps055",
26 |           "affiliationIdentifierScheme": "ROR",
27 |           "name": "California Institute of Technology"
28 |         }
29 |       ],
30 |       "familyName": "Zhan",
31 |       "givenName": "Zhongwen",
32 |       "name": "Zhan, Zhongwen",
33 |       "nameIdentifiers": [
34 |         {
35 |           "nameIdentifier": "0000-0002-5586-2607",
36 |           "nameIdentifierScheme": "ORCID"
37 |         }
38 |       ],
39 |       "nameType": "Personal"
40 |     }
41 |   ],
42 |   "dates": [
43 |     {
44 |       "date": "2024-06-26",
45 |       "dateType": "Issued"
46 |     }
47 |   ],
48 |   "descriptions": [
49 |     {
50 |       "description": "These are h5 files that contain events with PmP observations used in the publication \"Fine Scale Southern California Moho Structure Uncovered with Distributed Acoustic Sensing.\" Events are descriptively named.",
51 |       "descriptionType": "Abstract"
52 |     }
53 |   ],
54 |   "fundingReferences": [
55 |     {
56 |       "awardNumber": "1848166",
57 |       "awardTitle": "CAREER:Potential of fiber acoustic sensing in the next-generation seismic networks",
58 |       "funderIdentifier": "grid.431093.c",
59 |       "funderIdentifierType": "GRID",
60 |       "funderName": "National Science Foundation"
61 |     },
62 |     {
63 |       "funderIdentifier": "grid.452959.6",
64 |       "funderIdentifierType": "GRID",
65 |       "funderName": "Gordon and Betty Moore Foundation"
66 |     }
67 |   ],
68 |   "identifiers": [
69 |     {
70 |       "identifier": "10.22002/hhg7x-hgm42",
71 |       "identifierType": "DOI"
72 |     },
73 |     {
74 |       "identifier": "oai:data.caltech.edu:hhg7x-hgm42",
75 |       "identifierType": "oai"
76 |     }
77 |   ],
78 |   "publicationYear": "2024",
79 |   "publisher": "CaltechDATA",
80 |   "rightsList": [
81 |     {
82 |       "rights": "Creative Commons Zero v1.0 Universal",
83 |       "rightsIdentifier": "cc0-1.0",
84 |       "rightsIdentifierScheme": "spdx",
85 |       "rightsUri": "https://creativecommons.org/publicdomain/zero/1.0/legalcode"
86 |     }
87 |   ],
88 |   "schemaVersion": "http://datacite.org/schema/kernel-4",
89 |   "titles": [
90 |     {
91 |       "title": "Catalog of Events with PmP Phase"
92 |     }
93 |   ],
94 |   "types": {
95 |     "resourceType": "",
96 |     "resourceTypeGeneral": "Dataset"
97 |   }
98 | }


--------------------------------------------------------------------------------
/tests/data/datacite43/kxjgj-tfk18.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "creators": [
 3 |     {
 4 |       "affiliation": [
 5 |         {
 6 |           "affiliationIdentifier": "05dxps055",
 7 |           "affiliationIdentifierScheme": "ROR",
 8 |           "name": "California Institute of Technology"
 9 |         }
10 |       ],
11 |       "familyName": "Ding",
12 |       "givenName": "Xiaozhe",
13 |       "name": "Ding, Xiaozhe",
14 |       "nameIdentifiers": [
15 |         {
16 |           "nameIdentifier": "0000-0002-0267-0791",
17 |           "nameIdentifierScheme": "ORCID"
18 |         }
19 |       ],
20 |       "nameType": "Personal"
21 |     }
22 |   ],
23 |   "dates": [
24 |     {
25 |       "date": "2024-04-16",
26 |       "dateType": "Issued"
27 |     }
28 |   ],
29 |   "descriptions": [
30 |     {
31 |       "description": "Raw data for Ding, X., Chen, X., Sullivan, E.E., Shay, T.F., and Gradinaru, V. (2024). Fast, accurate ranking of engineered proteins by target binding propensity using structure modeling. Molecular Therapy. https://doi.org/10.1016/j.ymthe.2024.04.003",
32 |       "descriptionType": "Abstract"
33 |     }
34 |   ],
35 |   "identifiers": [
36 |     {
37 |       "identifier": "10.22002/kxjgj-tfk18",
38 |       "identifierType": "DOI"
39 |     },
40 |     {
41 |       "identifier": "oai:data.caltech.edu:kxjgj-tfk18",
42 |       "identifierType": "oai"
43 |     }
44 |   ],
45 |   "publicationYear": "2024",
46 |   "publisher": "CaltechDATA",
47 |   "relatedIdentifiers": [
48 |     {
49 |       "relatedIdentifier": "10.1016/j.ymthe.2024.04.003",
50 |       "relatedIdentifierType": "DOI",
51 |       "relationType": "IsSupplementTo",
52 |       "resourceTypeGeneral": "Text"
53 |     }
54 |   ],
55 |   "rightsList": [
56 |     {
57 |       "rights": "Creative Commons Zero v1.0 Universal",
58 |       "rightsIdentifier": "cc0-1.0",
59 |       "rightsIdentifierScheme": "spdx",
60 |       "rightsUri": "https://creativecommons.org/publicdomain/zero/1.0/legalcode"
61 |     }
62 |   ],
63 |   "schemaVersion": "http://datacite.org/schema/kernel-4",
64 |   "titles": [
65 |     {
66 |       "title": "Data for Fast, accurate ranking of engineered proteins by target-binding propensity using structure modeling"
67 |     }
68 |   ],
69 |   "types": {
70 |     "resourceType": "",
71 |     "resourceTypeGeneral": "Dataset"
72 |   }
73 | }


--------------------------------------------------------------------------------
/tests/data/datacite43/kxtar-bm759.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "creators": [
 3 |     {
 4 |       "affiliation": [
 5 |         {
 6 |           "affiliationIdentifier": "05dxps055",
 7 |           "affiliationIdentifierScheme": "ROR",
 8 |           "name": "California Institute of Technology"
 9 |         }
10 |       ],
11 |       "familyName": "Bird",
12 |       "givenName": "Eli",
13 |       "name": "Bird, Eli",
14 |       "nameIdentifiers": [
15 |         {
16 |           "nameIdentifier": "0000-0002-9428-0650",
17 |           "nameIdentifierScheme": "ORCID"
18 |         }
19 |       ],
20 |       "nameType": "Personal"
21 |     },
22 |     {
23 |       "affiliation": [
24 |         {
25 |           "affiliationIdentifier": "05dxps055",
26 |           "affiliationIdentifierScheme": "ROR",
27 |           "name": "California Institute of Technology"
28 |         }
29 |       ],
30 |       "familyName": "Zhan",
31 |       "givenName": "Zhongwen",
32 |       "name": "Zhan, Zhongwen",
33 |       "nameIdentifiers": [
34 |         {
35 |           "nameIdentifier": "0000-0002-5586-2607",
36 |           "nameIdentifierScheme": "ORCID"
37 |         }
38 |       ],
39 |       "nameType": "Personal"
40 |     }
41 |   ],
42 |   "dates": [
43 |     {
44 |       "date": "2024-09-23",
45 |       "dateType": "Issued"
46 |     }
47 |   ],
48 |   "identifiers": [
49 |     {
50 |       "identifier": "10.22002/kxtar-bm759",
51 |       "identifierType": "DOI"
52 |     },
53 |     {
54 |       "identifier": "oai:data.caltech.edu:kxtar-bm759",
55 |       "identifierType": "oai"
56 |     }
57 |   ],
58 |   "publicationYear": "2024",
59 |   "publisher": "CaltechDATA",
60 |   "rightsList": [
61 |     {
62 |       "rights": "Creative Commons Zero v1.0 Universal",
63 |       "rightsIdentifier": "cc0-1.0",
64 |       "rightsIdentifierScheme": "spdx",
65 |       "rightsUri": "https://creativecommons.org/publicdomain/zero/1.0/legalcode"
66 |     }
67 |   ],
68 |   "schemaVersion": "http://datacite.org/schema/kernel-4",
69 |   "titles": [
70 |     {
71 |       "title": "Ambient Noise Cross-Correlation Data associated with Constraining Dike Opening Models With Seismic Velocity Changes Associated with the 2023-2024 Eruption Sequence on the Reykjanes Peninsula\" by Bird et al."
72 |     }
73 |   ],
74 |   "types": {
75 |     "resourceType": "",
76 |     "resourceTypeGeneral": "Dataset"
77 |   }
78 | }


--------------------------------------------------------------------------------
/tests/data/datacite43/n0y4x-xx706.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "contributors": [
 3 |     {
 4 |       "affiliation": [
 5 |         {
 6 |           "name": "Department of Biological Sciences, California State University, Sacramento"
 7 |         }
 8 |       ],
 9 |       "contributorType": "ContactPerson",
10 |       "familyName": "Gleason",
11 |       "givenName": "Lani U.",
12 |       "name": "Gleason, Lani U.",
13 |       "nameIdentifiers": [],
14 |       "nameType": "Personal"
15 |     }
16 |   ],
17 |   "creators": [
18 |     {
19 |       "affiliation": [
20 |         {
21 |           "name": "Department of Biological Sciences, California State University, Sacramento"
22 |         }
23 |       ],
24 |       "familyName": "Gleason",
25 |       "givenName": "Lani U.",
26 |       "name": "Gleason, Lani U.",
27 |       "nameIdentifiers": [],
28 |       "nameType": "Personal"
29 |     }
30 |   ],
31 |   "dates": [
32 |     {
33 |       "date": "2025-01-24",
34 |       "dateType": "Issued"
35 |     },
36 |     {
37 |       "date": "2025-01-22",
38 |       "dateType": "Accepted"
39 |     }
40 |   ],
41 |   "descriptions": [
42 |     {
43 |       "description": "Genes identified to be significantly differentially expressed for each component of the Venn diagram in Figure 1B. The annotation information, RPKM expression value for each individual in each of the four treatments, and the average RPKM expression value per treatment are provided for each gene.",
44 |       "descriptionType": "Abstract"
45 |     }
46 |   ],
47 |   "fundingReferences": [
48 |     {
49 |       "funderName": "This work was supported by a California State University, Sacramento Research & Creative Activity (RCA) Award and a Biological Sciences Genes to Ecosystems (G2E) Award to Lani Gleason. The California State University (CSU) Council on Ocean Affairs, Science, and Technology (COAST) also provided an Undergraduate Research Award to support Hanna Franklin."
50 |     }
51 |   ],
52 |   "identifiers": [
53 |     {
54 |       "identifier": "10.22002/n0y4x-xx706",
55 |       "identifierType": "DOI"
56 |     },
57 |     {
58 |       "identifier": "oai:data.caltech.edu:n0y4x-xx706",
59 |       "identifierType": "oai"
60 |     }
61 |   ],
62 |   "language": "eng",
63 |   "publicationYear": "2025",
64 |   "publisher": "CaltechDATA",
65 |   "relatedIdentifiers": [
66 |     {
67 |       "relatedIdentifier": "10.17912/micropub.biology.001473",
68 |       "relatedIdentifierType": "DOI",
69 |       "relationType": "IsPartOf",
70 |       "resourceTypeGeneral": "Text"
71 |     }
72 |   ],
73 |   "rightsList": [
74 |     {
75 |       "rights": "Creative Commons Attribution 4.0 International",
76 |       "rightsIdentifier": "cc-by-4.0",
77 |       "rightsIdentifierScheme": "spdx",
78 |       "rightsUri": "https://creativecommons.org/licenses/by/4.0/legalcode"
79 |     }
80 |   ],
81 |   "schemaVersion": "http://datacite.org/schema/kernel-4",
82 |   "subjects": [
83 |     {
84 |       "subject": "haliotis rufescens"
85 |     }
86 |   ],
87 |   "titles": [
88 |     {
89 |       "title": "Dataset: Heat Stress, Starvation, and Heat Stress Plus Starvation Cause Unique Transcriptomic Responses in the Economically Important Red Abalone Haliotis rufescens"
90 |     }
91 |   ],
92 |   "types": {
93 |     "resourceType": "",
94 |     "resourceTypeGeneral": "Dataset"
95 |   },
96 |   "version": "1.0"
97 | }


--------------------------------------------------------------------------------
/tests/data/datacite43/n13wc-zwc92.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "creators": [
 3 |     {
 4 |       "affiliation": [
 5 |         {
 6 |           "affiliationIdentifier": "05dxps055",
 7 |           "affiliationIdentifierScheme": "ROR",
 8 |           "name": "California Institute of Technology"
 9 |         }
10 |       ],
11 |       "familyName": "Silevitch",
12 |       "givenName": "Daniel",
13 |       "name": "Silevitch, Daniel",
14 |       "nameIdentifiers": [
15 |         {
16 |           "nameIdentifier": "0000-0002-6347-3513",
17 |           "nameIdentifierScheme": "ORCID"
18 |         }
19 |       ],
20 |       "nameType": "Personal"
21 |     },
22 |     {
23 |       "affiliation": [
24 |         {
25 |           "affiliationIdentifier": "05dxps055",
26 |           "affiliationIdentifierScheme": "ROR",
27 |           "name": "California Institute of Technology"
28 |         }
29 |       ],
30 |       "familyName": "Armstrong",
31 |       "givenName": "Stephen",
32 |       "name": "Armstrong, Stephen",
33 |       "nameIdentifiers": [],
34 |       "nameType": "Personal"
35 |     }
36 |   ],
37 |   "dates": [
38 |     {
39 |       "date": "2025-01-30",
40 |       "dateType": "Issued"
41 |     },
42 |     {
43 |       "date": "2025-01-30",
44 |       "dateType": "Submitted"
45 |     }
46 |   ],
47 |   "descriptions": [
48 |     {
49 |       "description": "Magnetic susceptibility and specific heat data for LiErF4.\u00a0",
50 |       "descriptionType": "Abstract"
51 |     }
52 |   ],
53 |   "identifiers": [
54 |     {
55 |       "identifier": "10.22002/n13wc-zwc92",
56 |       "identifierType": "DOI"
57 |     },
58 |     {
59 |       "identifier": "oai:data.caltech.edu:n13wc-zwc92",
60 |       "identifierType": "oai"
61 |     }
62 |   ],
63 |   "publicationYear": "2025",
64 |   "publisher": "CaltechDATA",
65 |   "rightsList": [
66 |     {
67 |       "rights": "Creative Commons Attribution 4.0 International",
68 |       "rightsIdentifier": "cc-by-4.0",
69 |       "rightsIdentifierScheme": "spdx",
70 |       "rightsUri": "https://creativecommons.org/licenses/by/4.0/legalcode"
71 |     }
72 |   ],
73 |   "schemaVersion": "http://datacite.org/schema/kernel-4",
74 |   "subjects": [
75 |     {
76 |       "subject": "Physical sciences"
77 |     }
78 |   ],
79 |   "titles": [
80 |     {
81 |       "title": "LiErF4 susceptibility and specific heat"
82 |     }
83 |   ],
84 |   "types": {
85 |     "resourceType": "",
86 |     "resourceTypeGeneral": "Dataset"
87 |   }
88 | }


--------------------------------------------------------------------------------
/tests/data/datacite43/nbtw5-37m55.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "creators": [
  3 |     {
  4 |       "affiliation": [
  5 |         {
  6 |           "affiliationIdentifier": "05dxps055",
  7 |           "affiliationIdentifierScheme": "ROR",
  8 |           "name": "California Institute of Technology"
  9 |         }
 10 |       ],
 11 |       "familyName": "Law",
 12 |       "givenName": "Casey",
 13 |       "name": "Casey Law",
 14 |       "nameIdentifiers": [
 15 |         {
 16 |           "nameIdentifier": "0000-0002-4119-9963",
 17 |           "nameIdentifierScheme": "ORCID"
 18 |         }
 19 |       ],
 20 |       "nameType": "Personal"
 21 |     }
 22 |   ],
 23 |   "dates": [
 24 |     {
 25 |       "date": "2024",
 26 |       "dateType": "Issued"
 27 |     },
 28 |     {
 29 |       "date": "2024-04-12",
 30 |       "dateType": "Created"
 31 |     }
 32 |   ],
 33 |   "descriptions": [
 34 |     {
 35 |       "description": "Data associated with DSA-110 candidate transient.",
 36 |       "descriptionType": "Abstract"
 37 |     }
 38 |   ],
 39 |   "formats": [
 40 |     "png"
 41 |   ],
 42 |   "fundingReferences": [
 43 |     {
 44 |       "funderIdentifier": "grid.431093.c",
 45 |       "funderIdentifierType": "GRID",
 46 |       "funderName": "National Science Foundation"
 47 |     }
 48 |   ],
 49 |   "geoLocations": [
 50 |     {
 51 |       "geoLocationPlace": "OVRO",
 52 |       "geoLocationPoint": {
 53 |         "pointLatitude": 37.2339,
 54 |         "pointLongitude": -118.282
 55 |       }
 56 |     }
 57 |   ],
 58 |   "identifiers": [
 59 |     {
 60 |       "identifier": "10.25800/3ghe-8e93",
 61 |       "identifierType": "DOI"
 62 |     },
 63 |     {
 64 |       "identifier": "oai:data.caltech.edu:nbtw5-37m55",
 65 |       "identifierType": "oai"
 66 |     },
 67 |     {
 68 |       "identifier": "231120aabi",
 69 |       "identifierType": "dsa-110-id"
 70 |     },
 71 |     {
 72 |       "identifier": "nbtw5-37m55",
 73 |       "identifierType": "cdid"
 74 |     }
 75 |   ],
 76 |   "publicationYear": "2024",
 77 |   "publisher": "Caltech Data",
 78 |   "relatedIdentifiers": [
 79 |     {
 80 |       "relatedIdentifier": "http://deepsynoptic.org",
 81 |       "relatedIdentifierType": "URL",
 82 |       "relationType": "IsDocumentedBy"
 83 |     }
 84 |   ],
 85 |   "rightsList": [
 86 |     {
 87 |       "rights": "cc-by-4.0"
 88 |     }
 89 |   ],
 90 |   "schemaVersion": "http://datacite.org/schema/kernel-4",
 91 |   "subjects": [
 92 |     {
 93 |       "subject": "OVRO"
 94 |     },
 95 |     {
 96 |       "subject": "Astrophysics"
 97 |     },
 98 |     {
 99 |       "subject": "Fast Radio Bursts"
100 |     }
101 |   ],
102 |   "titles": [
103 |     {
104 |       "title": "DSA-110 Data for Candidate Fast Radio Burst 231120aabi"
105 |     }
106 |   ],
107 |   "types": {
108 |     "resourceType": "",
109 |     "resourceTypeGeneral": "Dataset"
110 |   }
111 | }


--------------------------------------------------------------------------------
/tests/data/datacite43/rmzp9-9yx96.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "creators": [
 3 |     {
 4 |       "familyName": "Devey",
 5 |       "givenName": "Sean",
 6 |       "name": "Devey, Sean",
 7 |       "nameIdentifiers": [
 8 |         {
 9 |           "nameIdentifier": "0000-0002-8937-939X",
10 |           "nameIdentifierScheme": "ORCID"
11 |         }
12 |       ],
13 |       "nameType": "Personal"
14 |     }
15 |   ],
16 |   "dates": [
17 |     {
18 |       "date": "2024-06-07",
19 |       "dateType": "Issued"
20 |     },
21 |     {
22 |       "date": "2023-11-09",
23 |       "dateInformation": "DPIV, CTA data collection",
24 |       "dateType": "Collected"
25 |     },
26 |     {
27 |       "date": "2023-10-22",
28 |       "dateInformation": "No FMS DPIV data collected",
29 |       "dateType": "Collected"
30 |     },
31 |     {
32 |       "date": "2023-11-16",
33 |       "dateInformation": "Cylinder wake dye visualizations collected",
34 |       "dateType": "Collected"
35 |     },
36 |     {
37 |       "date": "2024-05-13",
38 |       "dateInformation": "Shear layer dye visualization",
39 |       "dateType": "Collected"
40 |     },
41 |     {
42 |       "date": "2024-06-07",
43 |       "dateInformation": "date of upload",
44 |       "dateType": "Submitted"
45 |     }
46 |   ],
47 |   "descriptions": [
48 |     {
49 |       "description": "DPIV, CTA measurements and dye visualizations demonstrating flow quality of the Free-surface, Low turbulence, Optically accessible, Water TUnnel in a Box (FLOWTUB) developed at GALCIT 2022-2024.",
50 |       "descriptionType": "Abstract"
51 |     }
52 |   ],
53 |   "fundingReferences": [
54 |     {
55 |       "awardNumber": "DGE-1745301",
56 |       "awardTitle": "Graduate Research Fellowships Program (GRFP)",
57 |       "funderIdentifier": "grid.431093.c",
58 |       "funderIdentifierType": "GRID",
59 |       "funderName": "National Science Foundation"
60 |     }
61 |   ],
62 |   "identifiers": [
63 |     {
64 |       "identifier": "10.22002/rmzp9-9yx96",
65 |       "identifierType": "DOI"
66 |     },
67 |     {
68 |       "identifier": "oai:data.caltech.edu:rmzp9-9yx96",
69 |       "identifierType": "oai"
70 |     }
71 |   ],
72 |   "language": "eng",
73 |   "publicationYear": "2024",
74 |   "publisher": "CaltechDATA",
75 |   "rightsList": [
76 |     {
77 |       "rights": "Creative Commons Attribution 4.0 International",
78 |       "rightsIdentifier": "cc-by-4.0",
79 |       "rightsIdentifierScheme": "spdx",
80 |       "rightsUri": "https://creativecommons.org/licenses/by/4.0/legalcode"
81 |     }
82 |   ],
83 |   "schemaVersion": "http://datacite.org/schema/kernel-4",
84 |   "titles": [
85 |     {
86 |       "title": "FLOWTUB Water Tunnel Validation Data"
87 |     }
88 |   ],
89 |   "types": {
90 |     "resourceType": "",
91 |     "resourceTypeGeneral": "Dataset"
92 |   },
93 |   "version": "1"
94 | }


--------------------------------------------------------------------------------
/tests/data/datacite43/t15w6-x9q23.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "contributors": [
  3 |     {
  4 |       "affiliation": [
  5 |         {
  6 |           "name": "National Eye Institute, National Institutes of Health, Bethesda, Maryland, United States"
  7 |         },
  8 |         {
  9 |           "name": "Intramural Research Program, National Library of Medicine, National Institutes of Health, Bethesda, Maryland, United States"
 10 |         }
 11 |       ],
 12 |       "contributorType": "ContactPerson",
 13 |       "familyName": "Pal",
 14 |       "givenName": "Soumitra",
 15 |       "name": "Pal, Soumitra",
 16 |       "nameIdentifiers": [
 17 |         {
 18 |           "nameIdentifier": "0000-0003-4840-3944",
 19 |           "nameIdentifierScheme": "ORCID"
 20 |         }
 21 |       ],
 22 |       "nameType": "Personal"
 23 |     }
 24 |   ],
 25 |   "creators": [
 26 |     {
 27 |       "affiliation": [
 28 |         {
 29 |           "name": "National Eye Institute, National Institutes of Health, Bethesda, Maryland, United States"
 30 |         },
 31 |         {
 32 |           "name": "Intramural Research Program, National Library of Medicine, National Institutes of Health, Bethesda, Maryland, United States"
 33 |         }
 34 |       ],
 35 |       "familyName": "Pal",
 36 |       "givenName": "Soumitra",
 37 |       "name": "Pal, Soumitra",
 38 |       "nameIdentifiers": [
 39 |         {
 40 |           "nameIdentifier": "0000-0003-4840-3944",
 41 |           "nameIdentifierScheme": "ORCID"
 42 |         }
 43 |       ],
 44 |       "nameType": "Personal"
 45 |     }
 46 |   ],
 47 |   "dates": [
 48 |     {
 49 |       "date": "2025-02-21",
 50 |       "dateType": "Issued"
 51 |     }
 52 |   ],
 53 |   "descriptions": [
 54 |     {
 55 |       "description": "This MS-Excel workbook contains spreadsheets detailing the FCA datasets, clustering resolutions, and the results of our analysis at both the cell and cluster levels.",
 56 |       "descriptionType": "Abstract"
 57 |     }
 58 |   ],
 59 |   "fundingReferences": [
 60 |     {
 61 |       "funderName": "This research was supported in part by the Intramural Research Program of the National Institutes of Health, USA: The National Institute of Diabetes and Digestive and Kidney Diseases (NIDDK) Grant No. ZIADK015600 to B.O. and National Library of Medicine (NLM) Grant No. LM200887 to T.M.P."
 62 |     }
 63 |   ],
 64 |   "identifiers": [
 65 |     {
 66 |       "identifier": "10.22002/t15w6-x9q23",
 67 |       "identifierType": "DOI"
 68 |     },
 69 |     {
 70 |       "identifier": "oai:data.caltech.edu:t15w6-x9q23",
 71 |       "identifierType": "oai"
 72 |     }
 73 |   ],
 74 |   "language": "eng",
 75 |   "publicationYear": "2025",
 76 |   "publisher": "CaltechDATA",
 77 |   "relatedIdentifiers": [
 78 |     {
 79 |       "relatedIdentifier": "10.17912/micropub.biology.001501",
 80 |       "relatedIdentifierType": "DOI",
 81 |       "relationType": "IsPartOf",
 82 |       "resourceTypeGeneral": "Text"
 83 |     }
 84 |   ],
 85 |   "rightsList": [
 86 |     {
 87 |       "rights": "Creative Commons Attribution 4.0 International",
 88 |       "rightsIdentifier": "cc-by-4.0",
 89 |       "rightsIdentifierScheme": "spdx",
 90 |       "rightsUri": "https://creativecommons.org/licenses/by/4.0/legalcode"
 91 |     }
 92 |   ],
 93 |   "schemaVersion": "http://datacite.org/schema/kernel-4",
 94 |   "subjects": [
 95 |     {
 96 |       "subject": "drosophila"
 97 |     }
 98 |   ],
 99 |   "titles": [
100 |     {
101 |       "title": "Dataset: Cell-Type Specific Variation in X-Chromosome Dosage Compensation in Drosophila"
102 |     }
103 |   ],
104 |   "types": {
105 |     "resourceType": "",
106 |     "resourceTypeGeneral": "Dataset"
107 |   },
108 |   "version": "1.0"
109 | }


--------------------------------------------------------------------------------
/tests/data/datacite43/wbty9-bqy29.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "contributors": [
  3 |     {
  4 |       "affiliation": [
  5 |         {
  6 |           "name": "Molecular Biophysics & Biochemistry, Yale University, New Haven, Connecticut, United States"
  7 |         },
  8 |         {
  9 |           "name": "Cell Biology, Yale University School of Medicine"
 10 |         }
 11 |       ],
 12 |       "contributorType": "ContactPerson",
 13 |       "familyName": "Berro",
 14 |       "givenName": "Julien",
 15 |       "name": "Berro, Julien",
 16 |       "nameIdentifiers": [
 17 |         {
 18 |           "nameIdentifier": "0000-0002-9560-8646",
 19 |           "nameIdentifierScheme": "ORCID"
 20 |         }
 21 |       ],
 22 |       "nameType": "Personal"
 23 |     }
 24 |   ],
 25 |   "creators": [
 26 |     {
 27 |       "affiliation": [
 28 |         {
 29 |           "name": "Molecular Biophysics & Biochemistry, Yale University, New Haven, Connecticut, United States"
 30 |         },
 31 |         {
 32 |           "name": "Cell Biology, Yale University School of Medicine"
 33 |         }
 34 |       ],
 35 |       "familyName": "Berro",
 36 |       "givenName": "Julien",
 37 |       "name": "Berro, Julien",
 38 |       "nameIdentifiers": [
 39 |         {
 40 |           "nameIdentifier": "0000-0002-9560-8646",
 41 |           "nameIdentifierScheme": "ORCID"
 42 |         }
 43 |       ],
 44 |       "nameType": "Personal"
 45 |     }
 46 |   ],
 47 |   "dates": [
 48 |     {
 49 |       "date": "2024-05-08",
 50 |       "dateType": "Issued"
 51 |     },
 52 |     {
 53 |       "date": "2024-05-03",
 54 |       "dateType": "Accepted"
 55 |     }
 56 |   ],
 57 |   "descriptions": [
 58 |     {
 59 |       "description": "Primers used in this study",
 60 |       "descriptionType": "Abstract"
 61 |     }
 62 |   ],
 63 |   "fundingReferences": [
 64 |     {
 65 |       "funderName": "This work was partly supported by the National Institutes of Health (R01 GM11563601)."
 66 |     }
 67 |   ],
 68 |   "identifiers": [
 69 |     {
 70 |       "identifier": "10.22002/wbty9-bqy29",
 71 |       "identifierType": "DOI"
 72 |     },
 73 |     {
 74 |       "identifier": "oai:data.caltech.edu:wbty9-bqy29",
 75 |       "identifierType": "oai"
 76 |     }
 77 |   ],
 78 |   "language": "eng",
 79 |   "publicationYear": "2024",
 80 |   "publisher": "CaltechDATA",
 81 |   "relatedIdentifiers": [
 82 |     {
 83 |       "relatedIdentifier": "10.17912/micropub.biology.001191",
 84 |       "relatedIdentifierType": "DOI",
 85 |       "relationType": "IsPartOf",
 86 |       "resourceTypeGeneral": "Text"
 87 |     }
 88 |   ],
 89 |   "rightsList": [
 90 |     {
 91 |       "rights": "Creative Commons Attribution 4.0 International",
 92 |       "rightsIdentifier": "cc-by-4.0",
 93 |       "rightsIdentifierScheme": "spdx",
 94 |       "rightsUri": "https://creativecommons.org/licenses/by/4.0/legalcode"
 95 |     }
 96 |   ],
 97 |   "schemaVersion": "http://datacite.org/schema/kernel-4",
 98 |   "subjects": [
 99 |     {
100 |       "subject": "s. pombe"
101 |     }
102 |   ],
103 |   "titles": [
104 |     {
105 |       "title": "Dataset: CRISPR-Cas9 editing efficiency in fission yeast is not limited by homology search and is improved by combining gap-repair with fluoride selection"
106 |     }
107 |   ],
108 |   "types": {
109 |     "resourceType": "",
110 |     "resourceTypeGeneral": "Dataset"
111 |   },
112 |   "version": "1.0"
113 | }


--------------------------------------------------------------------------------
/tests/data/invalid_datacite43/invalid_metadata_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "creators": [
 3 |         {
 4 |             "name": "John Doe"
 5 |         }
 6 |     ],
 7 |     "publisher": "Caltech",
 8 |     "publicationYear": "2023",
 9 |     "types": {
10 |         "resourceTypeGeneral": "Dataset"
11 |     }
12 | }


--------------------------------------------------------------------------------
/tests/data/invalid_datacite43/invalid_metadata_10.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "titles": [
 3 |         {
 4 |             "title": "Sample Title"
 5 |         }
 6 |     ],
 7 |     "creators": [
 8 |         {
 9 |             "name": "John Doe"
10 |         }
11 |     ],
12 |     "version": 1,
13 |     "publisher": "Caltech",
14 |     "publicationYear": "2023",
15 |     "types": {
16 |         "resourceTypeGeneral": "Dataset"
17 |     }
18 | }


--------------------------------------------------------------------------------
/tests/data/invalid_datacite43/invalid_metadata_2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "titles": [],
 3 |     "creators": [
 4 |         {
 5 |             "name": "John Doe"
 6 |         }
 7 |     ],
 8 |     "publisher": "Caltech",
 9 |     "publicationYear": "2023",
10 |     "types": {
11 |         "resourceTypeGeneral": "Dataset"
12 |     }
13 | }


--------------------------------------------------------------------------------
/tests/data/invalid_datacite43/invalid_metadata_3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "titles": [
 3 |         {
 4 |             "title": "Sample Title"
 5 |         }
 6 |     ],
 7 |     "publisher": "Caltech",
 8 |     "publicationYear": "2023",
 9 |     "types": {
10 |         "resourceTypeGeneral": "Dataset"
11 |     }
12 | }


--------------------------------------------------------------------------------
/tests/data/invalid_datacite43/invalid_metadata_4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "titles": [
 3 |         {
 4 |             "title": "Sample Title"
 5 |         }
 6 |     ],
 7 |     "creators": [
 8 |         {
 9 |             "name": "John Doe"
10 |         }
11 |     ],
12 |     "contributors": [
13 |         {}
14 |     ],
15 |     "publisher": "Caltech",
16 |     "publicationYear": "2023",
17 |     "types": {
18 |         "resourceTypeGeneral": "Dataset"
19 |     }
20 | }


--------------------------------------------------------------------------------
/tests/data/invalid_datacite43/invalid_metadata_5.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "titles": [
 3 |         {
 4 |             "title": "Sample Title"
 5 |         }
 6 |     ],
 7 |     "creators": [
 8 |         {
 9 |             "name": "John Doe"
10 |         }
11 |     ],
12 |     "descriptions": [
13 |         {
14 |             "description": "Sample Description"
15 |         }
16 |     ],
17 |     "publisher": "Caltech",
18 |     "publicationYear": "2023",
19 |     "types": {
20 |         "resourceTypeGeneral": "Dataset"
21 |     }
22 | }


--------------------------------------------------------------------------------
/tests/data/invalid_datacite43/invalid_metadata_6.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "titles": [
 3 |         {
 4 |             "title": "Sample Title"
 5 |         }
 6 |     ],
 7 |     "creators": [
 8 |         {
 9 |             "name": "John Doe"
10 |         }
11 |     ],
12 |     "fundingReferences": [
13 |         {
14 |             "funderIdentifier": "1234"
15 |         }
16 |     ],
17 |     "publisher": "Caltech",
18 |     "publicationYear": "2023",
19 |     "types": {
20 |         "resourceTypeGeneral": "Dataset"
21 |     }
22 | }


--------------------------------------------------------------------------------
/tests/data/invalid_datacite43/invalid_metadata_7.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "titles": [
 3 |         {
 4 |             "title": "Sample Title"
 5 |         }
 6 |     ],
 7 |     "creators": [
 8 |         {
 9 |             "name": "John Doe"
10 |         }
11 |     ],
12 |     "identifiers": [
13 |         {}
14 |     ],
15 |     "publisher": "Caltech",
16 |     "publicationYear": "2023",
17 |     "types": {
18 |         "resourceTypeGeneral": "Dataset"
19 |     }
20 | }


--------------------------------------------------------------------------------
/tests/data/invalid_datacite43/invalid_metadata_8.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "titles": [
 3 |         {
 4 |             "title": "Sample Title"
 5 |         }
 6 |     ],
 7 |     "creators": [
 8 |         {
 9 |             "name": "John Doe"
10 |         }
11 |     ],
12 |     "dates": [
13 |         {}
14 |     ],
15 |     "publisher": "Caltech",
16 |     "publicationYear": "2023",
17 |     "types": {
18 |         "resourceTypeGeneral": "Dataset"
19 |     }
20 | }


--------------------------------------------------------------------------------
/tests/data/invalid_datacite43/invalid_metadata_9.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "titles": [
 3 |         {
 4 |             "title": "Sample Title"
 5 |         }
 6 |     ],
 7 |     "creators": [
 8 |         {
 9 |             "name": "John Doe"
10 |         }
11 |     ],
12 |     "publicationYear": "2023",
13 |     "types": {
14 |         "resourceTypeGeneral": "Dataset"
15 |     }
16 | }


--------------------------------------------------------------------------------
/tests/helpers.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #
 3 | # This file is part of DataCite.
 4 | #
 5 | # Copyright (C) 2015, 2016 CERN.
 6 | #
 7 | # DataCite is free software; you can redistribute it and/or modify it
 8 | # under the terms of the Revised BSD License; see LICENSE file for
 9 | # more details.
10 | 
11 | """Test helpers."""
12 | 
13 | from __future__ import absolute_import, print_function
14 | 
15 | import io
16 | import json
17 | import os
18 | from os.path import dirname, join
19 | 
20 | 
21 | def load_json_path(path):
22 |     """Helper method for loading a JSON example file from a path."""
23 |     path_base = dirname(__file__)
24 |     with io.open(join(path_base, path), encoding="utf-8") as file:
25 |         content = file.read()
26 |     return json.loads(content)
27 | 
28 | 
29 | def write_json_path(path, metadata):
30 |     """Helper method for writing a JSON example file to a path."""
31 |     path_base = dirname(__file__)
32 |     path_full = join(path_base, path)
33 |     print(path_full)
34 |     print(metadata)
35 |     with io.open(path_full, "w", encoding="utf-8") as file:
36 |         json.dump(metadata, file)
37 | 


--------------------------------------------------------------------------------
/tests/test_download.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #
 3 | # This file is part of caltechdata_api.
 4 | #
 5 | # Copyright (C) 2020 Caltech.
 6 | #
 7 | # caltechdata_api is free software; you can redistribute it and/or modify it
 8 | # under the terms of the Revised BSD License; see LICENSE file for
 9 | # more details.
10 | 
11 | """Tests for format transformations."""
12 | 
13 | import pytest
14 | 
15 | from caltechdata_api import download_url, download_file
16 | 
17 | 
18 | @pytest.mark.skip(reason="works, don't want to do unnecessary downloads")
19 | def test_download():
20 |     """Test that downloads from the DataCite Media API work."""
21 |     example_doi = "10.22002/D1.1098"
22 |     expected_url = (
23 |         "https://data.caltech.edu/tindfiles/serve/293d37c5-73f2-4016-bcd5-76cf353ff9d8/"
24 |     )
25 |     assert expected_url == download_url(example_doi)
26 |     filen = download_file(example_doi)
27 |     assert filen == "10.22002-D1.1098"
28 | 


--------------------------------------------------------------------------------
/tests/test_rdm.py:
--------------------------------------------------------------------------------
 1 | from caltechdata_api import (
 2 |     customize_schema,
 3 |     caltechdata_write,
 4 |     caltechdata_edit,
 5 |     get_metadata,
 6 | )
 7 | import json
 8 | import os
 9 | 
10 | 
11 | def test_datacite_rdm_conversion(full_datacite43_record, full_rdm_record):
12 | 
13 |     # Remove DOI from full_datacite43_record
14 |     # since it's prcessed by caltechdata_write or caltechdata_edit
15 |     identifiers = []
16 |     for identifier in full_datacite43_record["identifiers"]:
17 |         if identifier["identifierType"] != "DOI":
18 |             identifiers.append(identifier)
19 |     full_datacite43_record["identifiers"] = identifiers
20 | 
21 |     converted = customize_schema(full_datacite43_record, schema="43")
22 | 
23 |     assert converted == full_rdm_record
24 | 
25 | 
26 | def test_datacite_rdm_create_edit(full_datacite43_record):
27 |     env_token = os.environ.get("RDMTOK")
28 | 
29 |     # Remove DOI from full_datacite43_record
30 |     # since we want the test system to create one
31 |     identifiers = []
32 |     for identifier in full_datacite43_record["identifiers"]:
33 |         if identifier["identifierType"] != "DOI":
34 |             identifiers.append(identifier)
35 |     full_datacite43_record["identifiers"] = identifiers
36 | 
37 |     recid = caltechdata_write(
38 |         full_datacite43_record,
39 |         schema="43",
40 |         production=False,
41 |         publish=True,
42 |         token=env_token,
43 |     )
44 | 
45 |     assert len(recid) == 11
46 | 
47 |     recid = caltechdata_write(
48 |         full_datacite43_record,
49 |         schema="43",
50 |         production=False,
51 |         files=["helpers.py"],
52 |         publish=True,
53 |         token=env_token,
54 |     )
55 | 
56 |     assert len(recid) == 11
57 | 
58 |     full_datacite43_record["publisher"] = "Edited"
59 | 
60 |     doi = caltechdata_edit(
61 |         recid,
62 |         full_datacite43_record,
63 |         schema="43",
64 |         production=False,
65 |         publish=True,
66 |         token=env_token,
67 |     )
68 | 
69 |     assert doi.startswith("10.33569")
70 | 
71 |     # Validate is false until geolocation points are fixed/we move to 4.6
72 |     new_metadata = get_metadata(recid, production=False, validate=False)
73 | 
74 |     assert new_metadata["publisher"] == "Edited"
75 | 
76 |     full_datacite43_record["publisher"] = "Again!"
77 | 
78 |     new_doi = caltechdata_edit(
79 |         recid,
80 |         full_datacite43_record,
81 |         files=["helpers.py"],
82 |         schema="43",
83 |         production=False,
84 |         publish=True,
85 |         token=env_token,
86 |     )
87 | 
88 |     assert new_doi != doi
89 | 
90 |     recid = new_doi.split("/")[1]
91 | 
92 |     # Validate is false until geolocation points are fixed/we move to 4.6
93 |     new_metadata = get_metadata(recid, production=False, validate=False)
94 | 
95 |     assert new_metadata["publisher"] == "Again!"
96 | 


--------------------------------------------------------------------------------
/tests/test_unit.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pytest
  3 | import logging
  4 | from caltechdata_api import validate_metadata as validator43
  5 | from helpers import load_json_path
  6 | 
  7 | # Configure logging
  8 | logging.basicConfig(level=logging.DEBUG)
  9 | logger = logging.getLogger(__name__)
 10 | 
 11 | # Dynamically determine the base path
 12 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 13 | INVALID_DATACITE43_DIR = os.path.join(BASE_DIR, "data", "invalid_datacite43")
 14 | DATACITE43_DIR = os.path.join(BASE_DIR, "data")
 15 | 
 16 | 
 17 | # Function to get all JSON files in the directory
 18 | def get_all_json_files(directory):
 19 |     return [
 20 |         os.path.join(directory, f) for f in os.listdir(directory) if f.endswith(".json")
 21 |     ]
 22 | 
 23 | 
 24 | # Get list of all valid and invalid JSON files
 25 | VALID_DATACITE43_FILES = get_all_json_files(
 26 |     os.path.join(BASE_DIR, "data", "datacite43")
 27 | )
 28 | INVALID_DATACITE43_FILES = get_all_json_files(INVALID_DATACITE43_DIR)
 29 | 
 30 | 
 31 | @pytest.mark.parametrize("valid_file", VALID_DATACITE43_FILES)
 32 | def test_valid_json(valid_file):
 33 |     """Test that valid example files validate successfully."""
 34 |     print(f"\nValidating file: {valid_file}")
 35 |     json_data = load_json_path(valid_file)
 36 |     validation_errors = None
 37 |     try:
 38 |         validation_errors = validator43(json_data)
 39 |     except ValueError as e:
 40 |         pytest.fail(f"Validation failed for: {valid_file}\nErrors: {str(e)}")
 41 | 
 42 |     assert (
 43 |         not validation_errors
 44 |     ), f"Validation failed for: {valid_file}\nErrors: {validation_errors}"
 45 |     print(f"Validation passed for: {valid_file}")
 46 | 
 47 | 
 48 | @pytest.mark.parametrize("invalid_file", INVALID_DATACITE43_FILES)
 49 | def test_invalid_json(invalid_file):
 50 |     """Test that invalid example files do not validate successfully."""
 51 |     logger.debug(f"Attempting to validate invalid file: {invalid_file}")
 52 | 
 53 |     json_data = load_json_path(invalid_file)
 54 | 
 55 |     def validate_wrapper():
 56 |         try:
 57 |             validation_errors = validator43(json_data)
 58 | 
 59 |             logger.debug(f"Validation result for {invalid_file}: {validation_errors}")
 60 | 
 61 |             if validation_errors:
 62 |                 logger.debug(f"Found validation errors in {invalid_file}")
 63 |                 return
 64 | 
 65 |             logger.error(
 66 |                 f"No validation errors found for supposedly invalid file: {invalid_file}"
 67 |             )
 68 |             raise ValueError(
 69 |                 f"Validation did not fail for invalid file: {invalid_file}"
 70 |             )
 71 | 
 72 |         except Exception as e:
 73 |             logger.error(f"Validation exception for {invalid_file}: {str(e)}")
 74 |             raise
 75 | 
 76 |     with pytest.raises((ValueError, KeyError, AssertionError, TypeError)):
 77 |         validate_wrapper()
 78 | 
 79 | 
 80 | @pytest.mark.parametrize(
 81 |     "missing_field_file",
 82 |     [
 83 |         {
 84 |             "file": os.path.join(DATACITE43_DIR, "missing_creators.json"),
 85 |             "missing_field": "creators",
 86 |         },
 87 |         {
 88 |             "file": os.path.join(DATACITE43_DIR, "missing_titles.json"),
 89 |             "missing_field": "titles",
 90 |         },
 91 |     ],
 92 | )
 93 | def test_missing_required_fields(missing_field_file):
 94 |     """Test that JSON files missing required fields fail validation."""
 95 |     print(
 96 |         f"\nTesting missing field: {missing_field_file['missing_field']} in file: {missing_field_file['file']}"
 97 |     )
 98 | 
 99 |     # Skip the test if the file doesn't exist
100 |     if not os.path.exists(missing_field_file["file"]):
101 |         pytest.skip(f"Test file not found: {missing_field_file['file']}")
102 | 
103 |     json_data = load_json_path(missing_field_file["file"])
104 |     with pytest.raises(
105 |         ValueError,
106 |         match=f"Missing required metadata field: {missing_field_file['missing_field']}",
107 |     ):
108 |         validator43(json_data)
109 | 
110 | 
111 | @pytest.mark.parametrize(
112 |     "type_error_file",
113 |     [
114 |         {
115 |             "file": os.path.join(DATACITE43_DIR, "type_error_creators.json"),
116 |             "field": "creators",
117 |         },
118 |         {
119 |             "file": os.path.join(DATACITE43_DIR, "type_error_dates.json"),
120 |             "field": "dates",
121 |         },
122 |     ],
123 | )
124 | def test_incorrect_field_types(type_error_file):
125 |     """Test that JSON files with incorrect field types fail validation."""
126 |     print(
127 |         f"\nTesting incorrect type in field: {type_error_file['field']} for file: {type_error_file['file']}"
128 |     )
129 | 
130 |     # Skip the test if the file doesn't exist
131 |     if not os.path.exists(type_error_file["file"]):
132 |         pytest.skip(f"Test file not found: {type_error_file['file']}")
133 | 
134 |     json_data = load_json_path(type_error_file["file"])
135 |     with pytest.raises(
136 |         ValueError, match=f"Incorrect type for field: {type_error_file['field']}"
137 |     ):
138 |         validator43(json_data)
139 | 
140 | 
141 | def test_multiple_errors():
142 |     """Test JSON file with multiple issues to check all errors are raised."""
143 |     multiple_errors_file = os.path.join(DATACITE43_DIR, "multiple_errors.json")
144 | 
145 |     # Skip the test if the file doesn't exist
146 |     if not os.path.exists(multiple_errors_file):
147 |         pytest.skip(f"Test file not found: {multiple_errors_file}")
148 | 
149 |     json_data = load_json_path(multiple_errors_file)
150 |     with pytest.raises(ValueError, match="Multiple validation errors"):
151 |         validator43(json_data)
152 | 
153 | 
154 | def test_error_logging(caplog):
155 |     """Test that errors are logged correctly during validation."""
156 |     some_invalid_file = os.path.join(INVALID_DATACITE43_DIR, "some_invalid_file.json")
157 | 
158 |     # Skip the test if the file doesn't exist
159 |     if not os.path.exists(some_invalid_file):
160 |         pytest.skip(f"Test file not found: {some_invalid_file}")
161 | 
162 |     json_data = load_json_path(some_invalid_file)
163 |     with caplog.at_level(logging.ERROR):
164 |         with pytest.raises(ValueError):
165 |             validator43(json_data)
166 |     assert "Validation failed" in caplog.text
167 | 


--------------------------------------------------------------------------------
/token.bash:
--------------------------------------------------------------------------------
1 | export RDMTOK="token"
2 | 
3 | 


--------------------------------------------------------------------------------
/write.py:
--------------------------------------------------------------------------------
 1 | import argparse, os, json
 2 | from caltechdata_api import caltechdata_write
 3 | 
 4 | parser = argparse.ArgumentParser(
 5 |     description="Write files and a DataCite 4 standard json record\
 6 |         to CaltechDATA repository"
 7 | )
 8 | parser.add_argument(
 9 |     "json_file", nargs=1, help="file name for json DataCite metadata file"
10 | )
11 | parser.add_argument("-fnames", nargs="*", help="New Files")
12 | parser.add_argument("-schema", default="43", help="Metadata Schema")
13 | 
14 | args = parser.parse_args()
15 | 
16 | # Get access token as environment variable
17 | token = os.environ["RDMTOK"]
18 | 
19 | metaf = open(args.json_file[0], "r")
20 | metadata = json.load(metaf)
21 | 
22 | production = False
23 | publish = True
24 | 
25 | response = caltechdata_write(
26 |     metadata, token, args.fnames, production, args.schema, publish
27 | )
28 | print(response)
29 | 


--------------------------------------------------------------------------------
/write_authors.py:
--------------------------------------------------------------------------------
 1 | import argparse, os, json
 2 | from caltechdata_api import caltechdata_write
 3 | 
 4 | parser = argparse.ArgumentParser(
 5 |     description="Write files and a DataCite 4 standard json record\
 6 |         to CaltechDATA repository"
 7 | )
 8 | parser.add_argument(
 9 |     "json_file", nargs=1, help="file name for json DataCite metadata file"
10 | )
11 | parser.add_argument("-fnames", nargs="*", help="New Files")
12 | parser.add_argument("-schema", default="43", help="Metadata Schema")
13 | 
14 | args = parser.parse_args()
15 | 
16 | # Get access token as environment variable
17 | token = os.environ["RDMTOK"]
18 | 
19 | metaf = open(args.json_file[0], "r")
20 | metadata = json.load(metaf)
21 | 
22 | production = True
23 | publish = False
24 | authors = True
25 | community = "669e5e57-7d9e-4d19-8ab5-9c6158562fb3"
26 | 
27 | response = caltechdata_write(
28 |     metadata,
29 |     token,
30 |     args.fnames,
31 |     production,
32 |     args.schema,
33 |     publish,
34 |     community=community,
35 |     authors=authors,
36 | )
37 | print(response)
38 | 


--------------------------------------------------------------------------------
/write_hte.py:
--------------------------------------------------------------------------------
  1 | import argparse, os, json
  2 | import s3fs
  3 | import requests
  4 | from datacite import schema43, DataCiteRESTClient
  5 | from caltechdata_api import caltechdata_write, caltechdata_edit
  6 | from tqdm import tqdm
  7 | 
  8 | folder = "0_gregoire"
  9 | 
 10 | endpoint = "https://renc.osn.xsede.org/"
 11 | 
 12 | # Get metadata and files from bucket
 13 | s3 = s3fs.S3FileSystem(anon=True, client_kwargs={"endpoint_url": endpoint})
 14 | 
 15 | # Set up datacite client
 16 | password = os.environ["DATACITE"]
 17 | prefix = "10.25989"
 18 | datacite = DataCiteRESTClient(username="CALTECH.HTE", password=password, prefix=prefix)
 19 | 
 20 | path = "ini210004tommorrell/" + folder + "/"
 21 | dirs = s3.ls(path)
 22 | # Strip out reference to top level directory
 23 | repeat = dirs.pop(0)
 24 | assert repeat == path
 25 | # Switch directories to doi
 26 | records = []
 27 | for record in dirs:
 28 |     body = record.split("0_gregoire/")[1]
 29 |     records.append(f"{prefix}/{body}")
 30 | 
 31 | with open("new_ids.json", "r") as infile:
 32 |     record_ids = json.load(infile)
 33 | 
 34 | # We are using the list of unregistered dois
 35 | # with open("unregistered_dois.json", "r") as infile:
 36 | #    data = json.load(infile)
 37 | # records = data["pub"]
 38 | 
 39 | abstract = """This record is a component of the Materials Experiment and
 40 | Analysis Database (MEAD). It contains raw data and metadata from millions 
 41 | of materials synthesis and characterization experiments, as well as the 
 42 | analysis and distillation of that data into property and performance 
 43 | metrics. The unprecedented quantity and diversity of experimental data 
 44 | are searchable by experiment and analysis attributes generated by both 
 45 | researchers and data processing software.
 46 | """
 47 | 
 48 | with open("completed_dois.json", "r") as infile:
 49 |     completed = json.load(infile)
 50 | 
 51 | for doi in completed:
 52 |     if doi in records:
 53 |         records.remove(doi)
 54 |     else:
 55 |         print(doi)
 56 | 
 57 | with open("excluded_dois.json", "r") as infile:
 58 |     excluded = json.load(infile)
 59 | 
 60 | for doi in excluded:
 61 |     records.remove(doi)
 62 | 
 63 | for record in tqdm(records):
 64 |     base = record.split("/")[1]
 65 |     meta_path = path + base + "/metadata.json"
 66 |     metadata = None
 67 |     files = s3.ls(path + base)
 68 |     if len(files) == 0:
 69 |         excluded.append(record)
 70 |         print(f"No files available {record}")
 71 |         with open("excluded_dois.json", "w") as outfile:
 72 |             data = json.dump(excluded, outfile)
 73 |     else:
 74 |         try:
 75 |             metaf = s3.open(meta_path, "rb")
 76 |             metadata = json.load(metaf)
 77 |         except:
 78 |             print(files)
 79 |             excluded.append(record)
 80 |             print(f"Missing metadata {record}")
 81 |             exit()
 82 |             with open("excluded_dois.json", "w") as outfile:
 83 |                 data = json.dump(excluded, outfile)
 84 | 
 85 |     if metadata:
 86 |         metadata["identifiers"] = [{"identifier": record, "identifierType": "DOI"}]
 87 | 
 88 |         # Find the zip file or files
 89 |         zipf = s3.glob(path + base + "/*.zip")
 90 |         file_links = []
 91 | 
 92 |         description_string = f"Files available via S3 at {endpoint}{path}<br>"
 93 |         for link in zipf:
 94 |             fname = link.split("/")[-1]
 95 |             file_links.append(endpoint + link)
 96 | 
 97 |         metadata["types"] = {"resourceType": "", "resourceTypeGeneral": "Dataset"}
 98 |         metadata["schemaVersion"] = "http://datacite.org/schema/kernel-4"
 99 |         metadata["publicationYear"] = str(metadata["publicationYear"])
100 |         metadata["rightsList"] = [
101 |             {
102 |                 "rights": "cc-by-sa-4.0",
103 |                 "rightsUri": "http://creativecommons.org/licenses/by-sa/4.0/",
104 |             }
105 |         ]
106 |         static = [
107 |             {
108 |                 "relatedIdentifier": "10.25989/es8t-kswe",
109 |                 "relationType": "IsPartOf",
110 |                 "relatedIdentifierType": "DOI",
111 |             },
112 |             {
113 |                 "relatedIdentifier": "10.1038/s41524-019-0216-x",
114 |                 "relationType": "IsDocumentedBy",
115 |                 "relatedIdentifierType": "DOI",
116 |             },
117 |         ]
118 |         if "relatedIdentifiers" in metadata:
119 |             metadata["relatedIdentifiers"] += static
120 |         else:
121 |             metadata["relatedIdentifiers"] = static
122 |         metadata["fundingReferences"] = [
123 |             {
124 |                 "funderName": "Office of Science of the U.S. Department of Energy",
125 |                 "awardTitle": "Energy Innovation Hub Renewal - Fuels from Sunlight",
126 |                 "awardNumber": "DE-SC0004993",
127 |             }
128 |         ]
129 | 
130 |         if "descriptions" not in metadata:
131 |             metadata["descriptions"] = [
132 |                 {"description": abstract, "descriptionType": "Abstract"}
133 |             ]
134 |         else:
135 |             print(metadata["descriptions"])
136 |             exit()
137 | 
138 |         for meta in metadata.copy():
139 |             if metadata[meta] == []:
140 |                 metadata.pop(meta)
141 |         for contributor in metadata["contributors"]:
142 |             if contributor["affiliation"] == []:
143 |                 contributor.pop("affiliation")
144 |         new_cre = []
145 |         for creator in metadata["creators"]:
146 |             if creator["affiliation"] == []:
147 |                 creator.pop("affiliation")
148 |             if creator["name"] != "Contributors":
149 |                 new_cre.append(creator)
150 |         metadata["creators"] = new_cre
151 | 
152 |         doi = metadata["doi"].lower()
153 |         unnecessary = [
154 |             "id",
155 |             "doi",
156 |             "container",
157 |             "providerId",
158 |             "clientId",
159 |             "agency",
160 |             "state",
161 |         ]
162 |         for un in unnecessary:
163 |             if un in metadata:
164 |                 metadata.pop(un)
165 |         if "dates" in metadata:
166 |             for d in metadata["dates"]:
167 |                 d["date"] = str(d["date"])
168 |         valid = schema43.validate(metadata)
169 |         if not valid:
170 |             v = schema43.validator.validate(metadata)
171 |             errors = sorted(v.iter_errors(instance), key=lambda e: e.path)
172 |             for error in errors:
173 |                 print(error.message)
174 |             exit()
175 | 
176 |         metadata.pop("language")
177 |         community = "d0de1569-0a01-498f-b6bd-4bc75d54012f"
178 | 
179 |         production = True
180 | 
181 |         # We're now doing new records, so redirects are not needed
182 |         # result = requests.get(f'https://api.datacite.org/dois/{doi}')
183 |         # if result.status_code != 200:
184 |         #    print('DATACITE Failed')
185 |         #    print(result.text)
186 |         #    exit()
187 | 
188 |         # url = result.json()['data']['attributes']['url']
189 |         # old_id = url.split('data.caltech.edu/records/')[1]
190 |         new_id = caltechdata_write(
191 |             metadata,
192 |             schema="43",
193 |             publish=True,
194 |             production=True,
195 |             file_links=file_links,
196 |             s3=s3,
197 |             community=community,
198 |         )
199 |         url = f"https://data.caltech.edu/records/{new_id}"
200 | 
201 |         # record_ids[old_id] = new_id
202 |         # with open("new_ids.json", "w") as outfile:
203 |         #    json.dump(record_ids, outfile)
204 | 
205 |         result = requests.get(f"https://api.datacite.org/dois/{doi}")
206 |         if result.status_code != 200:
207 |             doi = datacite.public_doi(doi=record, metadata=metadata, url=url)
208 |         else:
209 |             doi = datacite.update_doi(doi=record, metadata=metadata, url=url)["doi"]
210 |         completed.append(doi)
211 |         with open("completed_dois.json", "w") as outfile:
212 |             data = json.dump(completed, outfile)
213 | 


--------------------------------------------------------------------------------