├── excluded_dois.json
├── run-tests.sh
├── token.bash
├── setup.py
├── logo.gif
├── caltechdata_api
├── pictures-documentation
│ ├── README.md
│ ├── Step 1.png
│ ├── Step 2.png
│ ├── Step 5.png
│ ├── Step 3(a).png
│ ├── Step 3(b).png
│ ├── Step 3(c).png
│ ├── Step 4(a).png
│ ├── Step 4(b).png
│ ├── Step 6(a).png
│ ├── Step 6(b).png
│ ├── Interact CLI Step 1(a).png
│ ├── Interact CLI Step 1(b).png
│ ├── Interact CLI Step 2(a).png
│ ├── Interact CLI Step 2(b).png
│ ├── Interact CLI Step 2(c).png
│ ├── Interact CLI Step 1(c) Test Instance.png
│ └── Interact CLI Step 1(d) Test Instance.png
├── vocabularies
│ ├── .DS_Store
│ ├── title_types.yaml
│ ├── description_types.yaml
│ ├── date_types.yaml
│ ├── identifier_types.yaml
│ ├── roles.yaml
│ └── relation_types.yaml
├── __init__.py
├── vocabularies.yaml
├── get_files.py
├── download_file.py
├── utils.py
├── get_metadata.py
└── md_to_json.py
├── pyproject.toml
├── .gitignore
├── .github
└── workflows
│ ├── black.yaml
│ ├── pypi-publish.yaml
│ ├── bot.yaml
│ ├── codemeta2cff.yml
│ └── iga.yaml
├── tests
├── data
│ ├── invalid_datacite43
│ │ ├── invalid_metadata_1.json
│ │ ├── invalid_metadata_3.json
│ │ ├── invalid_metadata_2.json
│ │ ├── invalid_metadata_9.json
│ │ ├── invalid_metadata_10.json
│ │ ├── invalid_metadata_8.json
│ │ ├── invalid_metadata_4.json
│ │ ├── invalid_metadata_7.json
│ │ ├── invalid_metadata_6.json
│ │ └── invalid_metadata_5.json
│ ├── datacite43
│ │ ├── ep884-g0v97.json
│ │ ├── asjw8-cd908.json
│ │ ├── kxjgj-tfk18.json
│ │ ├── kxtar-bm759.json
│ │ ├── n13wc-zwc92.json
│ │ ├── rmzp9-9yx96.json
│ │ ├── nbtw5-37m55.json
│ │ ├── hevaf-20f84.json
│ │ ├── hhg7x-hgm42.json
│ │ ├── dks9f-mj878.json
│ │ ├── n0y4x-xx706.json
│ │ ├── wbty9-bqy29.json
│ │ ├── 4yxbs-4mj38.json
│ │ ├── b2jqz-qdw65.json
│ │ ├── t15w6-x9q23.json
│ │ ├── d7mk4-f8t44.json
│ │ └── f40da-hww21.json
│ └── caltechdata
│ │ ├── 1235.json
│ │ ├── 1250.json
│ │ └── 1259.json
├── test_download.py
├── helpers.py
├── test_rdm.py
├── test_unit.py
├── bot_yaml.py
└── bot.py
├── accept.py
├── outdated
├── unembargo.py
├── test_community.py
├── test.py
├── README.md
├── edit_files.py
├── edit_all.py
├── test_file.py
├── update_thesis_file.py
├── edit_all_github.py
├── edit_tccon.py
├── edit_all_tccon.py
├── add_doi_minting_date.py
├── write_pilot_phase1.py
├── edit_all_geo.py
├── get_geo.py
├── caltechdata_multipart.py
└── example_download_and_upload.ipynb
├── write.py
├── CITATION.cff
├── write_authors.py
├── update_osn_links.py
├── edit.py
├── add_files_authors.py
├── setup.cfg
├── LICENSE
├── inspect_dois.py
├── fix_names.py
├── templates
└── README.md
├── edit_osn.py
├── update_descriptions.py
├── README.md
├── rdm.json
├── CODE_OF_CONDUCT.md
├── codemeta.json
├── example.json
├── example_custom.json
└── write_hte.py
/excluded_dois.json:
--------------------------------------------------------------------------------
1 | []
2 |
--------------------------------------------------------------------------------
/run-tests.sh:
--------------------------------------------------------------------------------
1 | pytest tests -vv
2 |
--------------------------------------------------------------------------------
/token.bash:
--------------------------------------------------------------------------------
1 | export RDMTOK="token"
2 |
3 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 |
3 | setup()
4 |
--------------------------------------------------------------------------------
/logo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/HEAD/logo.gif
--------------------------------------------------------------------------------
/caltechdata_api/pictures-documentation/README.md:
--------------------------------------------------------------------------------
1 | This subfolder is created to store the pictures for documentation
2 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=61.0", "wheel"]
3 | build-backend = "setuptools.build_meta"
4 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | dist/
3 | data/
4 | caltechdata_api.egg-info/
5 | caltechdata_api/__pycache__/
6 | tests/__pycache__/
7 |
--------------------------------------------------------------------------------
/caltechdata_api/vocabularies/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/HEAD/caltechdata_api/vocabularies/.DS_Store
--------------------------------------------------------------------------------
/caltechdata_api/pictures-documentation/Step 1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/HEAD/caltechdata_api/pictures-documentation/Step 1.png
--------------------------------------------------------------------------------
/caltechdata_api/pictures-documentation/Step 2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/HEAD/caltechdata_api/pictures-documentation/Step 2.png
--------------------------------------------------------------------------------
/caltechdata_api/pictures-documentation/Step 5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/HEAD/caltechdata_api/pictures-documentation/Step 5.png
--------------------------------------------------------------------------------
/caltechdata_api/pictures-documentation/Step 3(a).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/HEAD/caltechdata_api/pictures-documentation/Step 3(a).png
--------------------------------------------------------------------------------
/caltechdata_api/pictures-documentation/Step 3(b).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/HEAD/caltechdata_api/pictures-documentation/Step 3(b).png
--------------------------------------------------------------------------------
/caltechdata_api/pictures-documentation/Step 3(c).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/HEAD/caltechdata_api/pictures-documentation/Step 3(c).png
--------------------------------------------------------------------------------
/caltechdata_api/pictures-documentation/Step 4(a).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/HEAD/caltechdata_api/pictures-documentation/Step 4(a).png
--------------------------------------------------------------------------------
/caltechdata_api/pictures-documentation/Step 4(b).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/HEAD/caltechdata_api/pictures-documentation/Step 4(b).png
--------------------------------------------------------------------------------
/caltechdata_api/pictures-documentation/Step 6(a).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/HEAD/caltechdata_api/pictures-documentation/Step 6(a).png
--------------------------------------------------------------------------------
/caltechdata_api/pictures-documentation/Step 6(b).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/HEAD/caltechdata_api/pictures-documentation/Step 6(b).png
--------------------------------------------------------------------------------
/caltechdata_api/pictures-documentation/Interact CLI Step 1(a).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/HEAD/caltechdata_api/pictures-documentation/Interact CLI Step 1(a).png
--------------------------------------------------------------------------------
/caltechdata_api/pictures-documentation/Interact CLI Step 1(b).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/HEAD/caltechdata_api/pictures-documentation/Interact CLI Step 1(b).png
--------------------------------------------------------------------------------
/caltechdata_api/pictures-documentation/Interact CLI Step 2(a).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/HEAD/caltechdata_api/pictures-documentation/Interact CLI Step 2(a).png
--------------------------------------------------------------------------------
/caltechdata_api/pictures-documentation/Interact CLI Step 2(b).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/HEAD/caltechdata_api/pictures-documentation/Interact CLI Step 2(b).png
--------------------------------------------------------------------------------
/caltechdata_api/pictures-documentation/Interact CLI Step 2(c).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/HEAD/caltechdata_api/pictures-documentation/Interact CLI Step 2(c).png
--------------------------------------------------------------------------------
/caltechdata_api/pictures-documentation/Interact CLI Step 1(c) Test Instance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/HEAD/caltechdata_api/pictures-documentation/Interact CLI Step 1(c) Test Instance.png
--------------------------------------------------------------------------------
/caltechdata_api/pictures-documentation/Interact CLI Step 1(d) Test Instance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caltechlibrary/caltechdata_api/HEAD/caltechdata_api/pictures-documentation/Interact CLI Step 1(d) Test Instance.png
--------------------------------------------------------------------------------
/.github/workflows/black.yaml:
--------------------------------------------------------------------------------
1 | name: Lint
2 |
3 | on: [push, pull_request]
4 |
5 | jobs:
6 | lint:
7 | runs-on: ubuntu-latest
8 | steps:
9 | - uses: actions/checkout@v4
10 | - uses: psf/black@stable
11 |
--------------------------------------------------------------------------------
/tests/data/invalid_datacite43/invalid_metadata_1.json:
--------------------------------------------------------------------------------
1 | {
2 | "creators": [
3 | {
4 | "name": "John Doe"
5 | }
6 | ],
7 | "publisher": "Caltech",
8 | "publicationYear": "2023",
9 | "types": {
10 | "resourceTypeGeneral": "Dataset"
11 | }
12 | }
--------------------------------------------------------------------------------
/tests/data/invalid_datacite43/invalid_metadata_3.json:
--------------------------------------------------------------------------------
1 | {
2 | "titles": [
3 | {
4 | "title": "Sample Title"
5 | }
6 | ],
7 | "publisher": "Caltech",
8 | "publicationYear": "2023",
9 | "types": {
10 | "resourceTypeGeneral": "Dataset"
11 | }
12 | }
--------------------------------------------------------------------------------
/tests/data/invalid_datacite43/invalid_metadata_2.json:
--------------------------------------------------------------------------------
1 | {
2 | "titles": [],
3 | "creators": [
4 | {
5 | "name": "John Doe"
6 | }
7 | ],
8 | "publisher": "Caltech",
9 | "publicationYear": "2023",
10 | "types": {
11 | "resourceTypeGeneral": "Dataset"
12 | }
13 | }
--------------------------------------------------------------------------------
/tests/data/invalid_datacite43/invalid_metadata_9.json:
--------------------------------------------------------------------------------
1 | {
2 | "titles": [
3 | {
4 | "title": "Sample Title"
5 | }
6 | ],
7 | "creators": [
8 | {
9 | "name": "John Doe"
10 | }
11 | ],
12 | "publicationYear": "2023",
13 | "types": {
14 | "resourceTypeGeneral": "Dataset"
15 | }
16 | }
--------------------------------------------------------------------------------
/tests/data/invalid_datacite43/invalid_metadata_10.json:
--------------------------------------------------------------------------------
1 | {
2 | "titles": [
3 | {
4 | "title": "Sample Title"
5 | }
6 | ],
7 | "creators": [
8 | {
9 | "name": "John Doe"
10 | }
11 | ],
12 | "version": 1,
13 | "publisher": "Caltech",
14 | "publicationYear": "2023",
15 | "types": {
16 | "resourceTypeGeneral": "Dataset"
17 | }
18 | }
--------------------------------------------------------------------------------
/tests/data/invalid_datacite43/invalid_metadata_8.json:
--------------------------------------------------------------------------------
1 | {
2 | "titles": [
3 | {
4 | "title": "Sample Title"
5 | }
6 | ],
7 | "creators": [
8 | {
9 | "name": "John Doe"
10 | }
11 | ],
12 | "dates": [
13 | {}
14 | ],
15 | "publisher": "Caltech",
16 | "publicationYear": "2023",
17 | "types": {
18 | "resourceTypeGeneral": "Dataset"
19 | }
20 | }
--------------------------------------------------------------------------------
/tests/data/invalid_datacite43/invalid_metadata_4.json:
--------------------------------------------------------------------------------
1 | {
2 | "titles": [
3 | {
4 | "title": "Sample Title"
5 | }
6 | ],
7 | "creators": [
8 | {
9 | "name": "John Doe"
10 | }
11 | ],
12 | "contributors": [
13 | {}
14 | ],
15 | "publisher": "Caltech",
16 | "publicationYear": "2023",
17 | "types": {
18 | "resourceTypeGeneral": "Dataset"
19 | }
20 | }
--------------------------------------------------------------------------------
/tests/data/invalid_datacite43/invalid_metadata_7.json:
--------------------------------------------------------------------------------
1 | {
2 | "titles": [
3 | {
4 | "title": "Sample Title"
5 | }
6 | ],
7 | "creators": [
8 | {
9 | "name": "John Doe"
10 | }
11 | ],
12 | "identifiers": [
13 | {}
14 | ],
15 | "publisher": "Caltech",
16 | "publicationYear": "2023",
17 | "types": {
18 | "resourceTypeGeneral": "Dataset"
19 | }
20 | }
--------------------------------------------------------------------------------
/caltechdata_api/vocabularies/title_types.yaml:
--------------------------------------------------------------------------------
1 | - id: alternative-title
2 | props:
3 | datacite: AlternativeTitle
4 | title:
5 | en: Alternative title
6 | - id: subtitle
7 | props:
8 | datacite: Subtitle
9 | title:
10 | en: Subtitle
11 | - id: translated-title
12 | props:
13 | datacite: TranslatedTitle
14 | title:
15 | en: Translated title
16 | - id: other
17 | props:
18 | datacite: Other
19 | title:
20 | en: Other
21 |
--------------------------------------------------------------------------------
/tests/data/invalid_datacite43/invalid_metadata_6.json:
--------------------------------------------------------------------------------
1 | {
2 | "titles": [
3 | {
4 | "title": "Sample Title"
5 | }
6 | ],
7 | "creators": [
8 | {
9 | "name": "John Doe"
10 | }
11 | ],
12 | "fundingReferences": [
13 | {
14 | "funderIdentifier": "1234"
15 | }
16 | ],
17 | "publisher": "Caltech",
18 | "publicationYear": "2023",
19 | "types": {
20 | "resourceTypeGeneral": "Dataset"
21 | }
22 | }
--------------------------------------------------------------------------------
/tests/data/invalid_datacite43/invalid_metadata_5.json:
--------------------------------------------------------------------------------
1 | {
2 | "titles": [
3 | {
4 | "title": "Sample Title"
5 | }
6 | ],
7 | "creators": [
8 | {
9 | "name": "John Doe"
10 | }
11 | ],
12 | "descriptions": [
13 | {
14 | "description": "Sample Description"
15 | }
16 | ],
17 | "publisher": "Caltech",
18 | "publicationYear": "2023",
19 | "types": {
20 | "resourceTypeGeneral": "Dataset"
21 | }
22 | }
--------------------------------------------------------------------------------
/accept.py:
--------------------------------------------------------------------------------
1 | import argparse, os
2 | from caltechdata_api import caltechdata_accept
3 |
4 | parser = argparse.ArgumentParser(
5 | description="Accept records to a community in the CaltechDATA repository"
6 | )
7 | parser.add_argument("ids", nargs="*", help="CaltechDATA IDs")
8 | args = parser.parse_args()
9 |
10 | # Get access token set as environment variable with source token.bash
11 | token = os.environ["RDMTOK"]
12 |
13 | production = True
14 |
15 | caltechdata_accept(
16 | args.ids,
17 | token,
18 | production,
19 | )
20 | print("Completed")
21 |
--------------------------------------------------------------------------------
/caltechdata_api/__init__.py:
--------------------------------------------------------------------------------
1 | from .caltechdata_write import (
2 | caltechdata_write,
3 | write_files_rdm,
4 | add_file_links,
5 | send_to_community,
6 | )
7 | from .caltechdata_edit import (
8 | caltechdata_edit,
9 | caltechdata_unembargo,
10 | caltechdata_accept,
11 | caltechdata_reject,
12 | )
13 | from .customize_schema import customize_schema, validate_metadata
14 | from .get_metadata import get_metadata
15 | from .download_file import download_file, download_url
16 | from .utils import humanbytes
17 | from .md_to_json import parse_readme_to_json
18 |
--------------------------------------------------------------------------------
/outdated/unembargo.py:
--------------------------------------------------------------------------------
1 | import argparse, os, json
2 | from caltechdata_api import caltechdata_unembargo
3 |
4 | parser = argparse.ArgumentParser(
5 | description="Write files and a DataCite 4 standard json record\
6 | to CaltechDATA repository"
7 | )
8 | parser.add_argument("-ids", nargs="*", help="CaltechDATA IDs")
9 | args = parser.parse_args()
10 |
11 | # Get access token from TIND set as environment variable with source token.bash
12 | token = os.environ["TINDTOK"]
13 |
14 | production = False
15 |
16 | response = caltechdata_unembargo(token, args.ids, production)
17 | print(response)
18 |
--------------------------------------------------------------------------------
/outdated/test_community.py:
--------------------------------------------------------------------------------
1 | import requests, os
2 |
3 | token = os.environ["RDMTOK"]
4 |
5 | url = "https://data.caltechlibrary.dev/"
6 |
7 | headers = {
8 | "Authorization": "Bearer %s" % token,
9 | "Content-type": "application/json",
10 | }
11 |
12 | data = {"payload": {"content": "I want this record to be in!", "format": "html"}}
13 |
14 | result = requests.post(
15 | url + "/api/records/cxc6m-bef55/draft/actions/submit-review",
16 | headers=headers,
17 | json=data,
18 | )
19 |
20 | print(result.status_code)
21 | print(result.text)
22 | # if result.status_code != 201:
23 | # print(result.text)
24 | # exit()
25 |
--------------------------------------------------------------------------------
/outdated/test.py:
--------------------------------------------------------------------------------
1 | from datacite import schema43
2 | import io, json
3 | from os.path import dirname, join
4 |
5 |
6 | def load_json_path(path):
7 | """Helper method for loading a JSON example file from a path."""
8 | path_base = dirname(__file__)
9 | with io.open(join(path_base, path), encoding="utf-8") as file:
10 | content = file.read()
11 | return json.loads(content)
12 |
13 |
14 | metadata = load_json_path("example43.json")
15 |
16 | valid = schema43.validate(metadata)
17 | if valid == False:
18 | v = schema43.validator.validate(metadata)
19 | errors = sorted(v.iter_errors(instance), key=lambda e: e.path)
20 | for error in errors:
21 | print(error.message)
22 |
--------------------------------------------------------------------------------
/caltechdata_api/vocabularies.yaml:
--------------------------------------------------------------------------------
1 | creatorsroles:
2 | pid-type: crr
3 | data-file: vocabularies/roles.yaml
4 | contributorsroles:
5 | pid-type: cor
6 | data-file: vocabularies/roles.yaml
7 | resourcetypes:
8 | pid-type: rsrct
9 | data-file: vocabularies/resource_types.yaml
10 | descriptiontypes:
11 | pid-type: dty
12 | data-file: vocabularies/description_types.yaml
13 | datetypes:
14 | pid-type: dat
15 | data-file: vocabularies/date_types.yaml
16 | relationtypes:
17 | pid-type: rlt
18 | data-file: vocabularies/relation_types.yaml
19 | titletypes:
20 | pid-type: ttyp
21 | data-file: vocabularies/title_types.yaml
22 | identifiertypes:
23 | pid-type: idt
24 | data-file: vocabularies/identifier_types.yaml
25 |
--------------------------------------------------------------------------------
/outdated/README.md:
--------------------------------------------------------------------------------
1 | # caltechdata_api outdated functions
2 |
3 | These functions have yet to be updated to the InvenioRDM version of
4 | CaltechDATA. Many will be updated in the future, but for now they are available
5 | here for reference.
6 |
7 |
8 | Get geographic metadata from CaltechDATA with WKT representations in a csv file.
9 | You can import this to a GIS program like QGIS
10 | using a delimited text import and projection epsg:4326. You'll have to do one
11 | import for Geometry type Point and another for Geometry type Polygon.
12 |
13 | ```
14 | python get_geo.py caltechdata_geo.csv
15 | ```
16 |
17 | You can filter by keyword
18 |
19 | ```
20 | python get_geo.py caltechdata_geo.csv -keywords TCCON
21 | ```
22 |
23 |
24 |
--------------------------------------------------------------------------------
/outdated/edit_files.py:
--------------------------------------------------------------------------------
1 | import argparse, os, json
2 | from caltechdata_api import caltechdata_edit
3 |
4 | parser = argparse.ArgumentParser(
5 | description="Write files and a DataCite 4 standard json record\
6 | to CaltechDATA repository"
7 | )
8 | parser.add_argument("-ids", nargs="*", help="CaltechDATA IDs")
9 | parser.add_argument("-fnames", nargs="*", help="New Files")
10 | parser.add_argument("-delete", nargs="*", help="Files To Delete")
11 | args = parser.parse_args()
12 |
13 | # Get access token from TIND sed as environment variable with source token.bash
14 | token = os.environ["TINDTOK"]
15 |
16 | production = True
17 |
18 | print(args.delete)
19 |
20 | response = caltechdata_edit(token, args.ids, {}, args.fnames, args.delete, production)
21 | print(response)
22 |
--------------------------------------------------------------------------------
/.github/workflows/pypi-publish.yaml:
--------------------------------------------------------------------------------
1 | name: Publish
2 |
3 | on:
4 | push:
5 | tags:
6 | - v*
7 |
8 | jobs:
9 | build-n-publish:
10 | runs-on: ubuntu-latest
11 | steps:
12 | - uses: actions/checkout@v2
13 | - name: Set up Python 3.9
14 | uses: actions/setup-python@v2
15 | with:
16 | python-version: 3.9
17 | - name: Install dependencies
18 | run: |
19 | python -m pip install --upgrade pip
20 | pip install setuptools wheel
21 | - name: Build package
22 | run: |
23 | python setup.py sdist bdist_wheel
24 | - name: Publish
25 | uses: pypa/gh-action-pypi-publish@v1.13.0
26 | with:
27 | user: __token__
28 | password: ${{ secrets.pypi_token }}
29 |
--------------------------------------------------------------------------------
/caltechdata_api/vocabularies/description_types.yaml:
--------------------------------------------------------------------------------
1 | - id: abstract
2 | props:
3 | datacite: Abstract
4 | title:
5 | en: Abstract
6 | - id: methods
7 | props:
8 | datacite: Methods
9 | title:
10 | en: Methods
11 | - id: series-information
12 | props:
13 | datacite: SeriesInformation
14 | title:
15 | en: Series information
16 | - id: table-of-contents
17 | props:
18 | datacite: TableOfContents
19 | title:
20 | en: Table of contents
21 | - id: technical-info
22 | props:
23 | datacite: TechnicalInfo
24 | title:
25 | en: Technical info
26 | - id: other
27 | props:
28 | datacite: Other
29 | title:
30 | en: Other
31 | # Not really a datacite mapping, but needed to support passing custom types
32 | - id: files
33 | props:
34 | datacite: files
35 |
--------------------------------------------------------------------------------
/outdated/edit_all.py:
--------------------------------------------------------------------------------
1 | import argparse, os, json
2 | from caltechdata_api import caltechdata_edit
3 |
4 | parser = argparse.ArgumentParser(
5 | description="Write files and a DataCite 4 standard json record\
6 | to CaltechDATA repository"
7 | )
8 | parser.add_argument(
9 | "json_file", nargs=1, help="file name for json DataCite metadata file"
10 | )
11 | parser.add_argument("-fnames", nargs="*", help="New Files")
12 | args = parser.parse_args()
13 |
14 | # Get access token from TIND set as environment variable with source token.bash
15 | token = os.environ["TINDTOK"]
16 |
17 | metaf = open(args.json_file[0], "r")
18 | metadata = json.load(metaf)
19 |
20 | production = False
21 |
22 | ids = range(1, 717)
23 | response = caltechdata_edit(token, ids, metadata, args.fnames, {"pdf"}, production)
24 | print(response)
25 |
--------------------------------------------------------------------------------
/write.py:
--------------------------------------------------------------------------------
1 | import argparse, os, json
2 | from caltechdata_api import caltechdata_write
3 |
4 | parser = argparse.ArgumentParser(
5 | description="Write files and a DataCite 4 standard json record\
6 | to CaltechDATA repository"
7 | )
8 | parser.add_argument(
9 | "json_file", nargs=1, help="file name for json DataCite metadata file"
10 | )
11 | parser.add_argument("-fnames", nargs="*", help="New Files")
12 | parser.add_argument("-schema", default="43", help="Metadata Schema")
13 |
14 | args = parser.parse_args()
15 |
16 | # Get access token as environment variable
17 | token = os.environ["RDMTOK"]
18 |
19 | metaf = open(args.json_file[0], "r")
20 | metadata = json.load(metaf)
21 |
22 | production = False
23 | publish = True
24 |
25 | response = caltechdata_write(
26 | metadata, token, args.fnames, production, args.schema, publish
27 | )
28 | print(response)
29 |
--------------------------------------------------------------------------------
/outdated/test_file.py:
--------------------------------------------------------------------------------
1 | import os, json
2 | from requests import session
3 | from caltechdata_api import customize_schema
4 |
5 | # fileinfo = [ {"url": , "filename": filename, "md5": md5, "size": size}]
6 |
7 | token = os.environ["TINDTOK"]
8 |
9 | metaf = open("test_file.json", "r")
10 | metadata = json.load(metaf)
11 |
12 | url = "https://cd-sandbox.tind.io/submit/api/create/"
13 |
14 | headers = {"Authorization": "Bearer %s" % token, "Content-type": "application/json"}
15 |
16 | newdata = customize_schema(metadata)
17 | # if "doi" not in newdata:
18 | # # We want tind to generate the identifier
19 | # newdata["final_actions"] = [
20 | # {"type": "create_doi", "parameters": {"type": "records", "field": "doi"}}
21 | # ]
22 |
23 | dat = json.dumps({"record": newdata})
24 |
25 | c = session()
26 | response = c.post(url, headers=headers, data=dat)
27 | print(response.text)
28 |
--------------------------------------------------------------------------------
/tests/test_download.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # This file is part of caltechdata_api.
4 | #
5 | # Copyright (C) 2020 Caltech.
6 | #
7 | # caltechdata_api is free software; you can redistribute it and/or modify it
8 | # under the terms of the Revised BSD License; see LICENSE file for
9 | # more details.
10 |
11 | """Tests for format transformations."""
12 |
13 | import pytest
14 |
15 | from caltechdata_api import download_url, download_file
16 |
17 |
18 | @pytest.mark.skip(reason="works, don't want to do unnecessary downloads")
19 | def test_download():
20 | """Test that downloads from the DataCite Media API work."""
21 | example_doi = "10.22002/D1.1098"
22 | expected_url = (
23 | "https://data.caltech.edu/tindfiles/serve/293d37c5-73f2-4016-bcd5-76cf353ff9d8/"
24 | )
25 | assert expected_url == download_url(example_doi)
26 | filen = download_file(example_doi)
27 | assert filen == "10.22002-D1.1098"
28 |
--------------------------------------------------------------------------------
/outdated/update_thesis_file.py:
--------------------------------------------------------------------------------
1 | import os, subprocess, json, csv
2 | import dataset
3 | from ames.harvesters import get_caltechfeed
4 |
5 | if os.path.isdir("data") == False:
6 | os.mkdir("data")
7 | os.chdir("data")
8 |
9 | get_caltechfeed("thesis")
10 |
11 | record_list = {}
12 | collection = "CaltechTHESIS.ds"
13 | keys = dataset.keys(collection)
14 | count = 0
15 | for k in keys:
16 | count = count + 1
17 | if count % 100 == 0:
18 | print(count)
19 | metadata, err = dataset.read(collection, k)
20 | if err != "":
21 | print("Error on read ", err)
22 | exit()
23 | if metadata != {}:
24 | if "official_url" in metadata:
25 | record_list[k] = metadata["official_url"]
26 | else:
27 | print("Missing URL", metadata)
28 | else:
29 | print("Bad Record: " + k)
30 | print(metadata)
31 | with open("record_list.csv", "w") as f:
32 | w = csv.writer(f)
33 | w.writerows(record_list.items())
34 |
--------------------------------------------------------------------------------
/.github/workflows/bot.yaml:
--------------------------------------------------------------------------------
1 | name: Bot validation
2 |
3 | on: [push, pull_request]
4 |
5 | jobs:
6 | validate-metadata:
7 | runs-on: ubuntu-latest
8 |
9 | steps:
10 | - uses: actions/checkout@v4
11 | with:
12 | fetch-depth: 0
13 |
14 | - name: Set up Python
15 | uses: actions/setup-python@v4
16 | with:
17 | python-version: '3.x'
18 |
19 | - name: Install dependencies
20 | run: |
21 | python -m pip install --upgrade pip
22 | pip install pytest requests s3fs cryptography
23 | pip install .
24 |
25 | - name: Run against CaltechData Test system
26 | env:
27 | RDMTOK: ${{ secrets.CALTECHDATA_TOKEN }}
28 | run: |
29 | cd tests
30 | pytest test_unit.py
31 | pytest test_rdm.py
32 | - name: Run Medata Validation Test and RDM
33 | env:
34 | RDMTOK: ${{ secrets.CALTECHDATA_TOKEN }}
35 | run: |
36 | cd tests
37 | python bot_yaml.py
38 |
39 |
--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
1 | cff-version: 1.2.0
2 | message: "If you use this software, please cite it as below."
3 | title: caltechdata_api
4 | authors:
5 | - family-names: Morrell
6 | given-names: Thomas E
7 | orcid: https://orcid.org/0000-0001-9266-5146
8 | - family-names: Bhattarai
9 | given-names: Rohan
10 | orcid: https://orcid.org/0009-0007-0323-4733
11 | - family-names: Won
12 | given-names: Elizabeth
13 | orcid: https://orcid.org/0009-0002-2450-6471
14 | - family-names: Abakah
15 | given-names: Alexander A
16 | orcid: https://orcid.org/0009-0003-5640-6691
17 | - family-names: Nagi
18 | given-names: Kshemaahna
19 | orcid: https://orcid.org/0009-0002-8113-3763
20 | abstract: Python wrapper for CaltechDATA API.
21 | repository-code: "https://github.com/caltechlibrary/caltechdata_api"
22 | type: software
23 | doi: 10.22002/2g4c7-zva46
24 | version: 1.10.6
25 | license-url: "https://data.caltech.edu/license"
26 | keywords:
27 | - GitHub
28 | - metadata
29 | - software
30 | - InvenioRDM
31 | date-released: 2025-10-18
32 |
--------------------------------------------------------------------------------
/write_authors.py:
--------------------------------------------------------------------------------
1 | import argparse, os, json
2 | from caltechdata_api import caltechdata_write
3 |
4 | parser = argparse.ArgumentParser(
5 | description="Write files and a DataCite 4 standard json record\
6 | to CaltechDATA repository"
7 | )
8 | parser.add_argument(
9 | "json_file", nargs=1, help="file name for json DataCite metadata file"
10 | )
11 | parser.add_argument("-fnames", nargs="*", help="New Files")
12 | parser.add_argument("-schema", default="43", help="Metadata Schema")
13 |
14 | args = parser.parse_args()
15 |
16 | # Get access token as environment variable
17 | token = os.environ["RDMTOK"]
18 |
19 | metaf = open(args.json_file[0], "r")
20 | metadata = json.load(metaf)
21 |
22 | production = True
23 | publish = False
24 | authors = True
25 | community = "669e5e57-7d9e-4d19-8ab5-9c6158562fb3"
26 |
27 | response = caltechdata_write(
28 | metadata,
29 | token,
30 | args.fnames,
31 | production,
32 | args.schema,
33 | publish,
34 | community=community,
35 | authors=authors,
36 | )
37 | print(response)
38 |
--------------------------------------------------------------------------------
/caltechdata_api/vocabularies/date_types.yaml:
--------------------------------------------------------------------------------
1 | - id: accepted
2 | props:
3 | datacite: Accepted
4 | title:
5 | en: Accepted
6 | - id: available
7 | props:
8 | datacite: Available
9 | title:
10 | en: Available
11 | - id: collected
12 | props:
13 | datacite: Collected
14 | title:
15 | en: Collected
16 | - id: copyrighted
17 | props:
18 | datacite: Copyrighted
19 | title:
20 | en: Copyrighted
21 | - id: created
22 | props:
23 | datacite: Created
24 | title:
25 | en: Created
26 | - id: issued
27 | props:
28 | datacite: Issued
29 | title:
30 | en: Issued
31 | - id: other
32 | props:
33 | datacite: Other
34 | title:
35 | en: Other
36 | - id: submitted
37 | props:
38 | datacite: Submitted
39 | title:
40 | en: Submitted
41 | - id: updated
42 | props:
43 | datacite: Updated
44 | title:
45 | en: Updated
46 | - id: valid
47 | props:
48 | datacite: Valid
49 | title:
50 | en: Valid
51 | - id: withdrawn
52 | props:
53 | datacite: Withdrawn
54 | title:
55 | en: Withdrawn
56 |
--------------------------------------------------------------------------------
/tests/helpers.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # This file is part of DataCite.
4 | #
5 | # Copyright (C) 2015, 2016 CERN.
6 | #
7 | # DataCite is free software; you can redistribute it and/or modify it
8 | # under the terms of the Revised BSD License; see LICENSE file for
9 | # more details.
10 |
11 | """Test helpers."""
12 |
13 | from __future__ import absolute_import, print_function
14 |
15 | import io
16 | import json
17 | import os
18 | from os.path import dirname, join
19 |
20 |
21 | def load_json_path(path):
22 | """Helper method for loading a JSON example file from a path."""
23 | path_base = dirname(__file__)
24 | with io.open(join(path_base, path), encoding="utf-8") as file:
25 | content = file.read()
26 | return json.loads(content)
27 |
28 |
29 | def write_json_path(path, metadata):
30 | """Helper method for writing a JSON example file to a path."""
31 | path_base = dirname(__file__)
32 | path_full = join(path_base, path)
33 | print(path_full)
34 | print(metadata)
35 | with io.open(path_full, "w", encoding="utf-8") as file:
36 | json.dump(metadata, file)
37 |
--------------------------------------------------------------------------------
/update_osn_links.py:
--------------------------------------------------------------------------------
1 | import os, requests, json, math
2 | from caltechdata_api import get_metadata, caltechdata_edit
3 |
4 | token = os.environ["RDMTOK"]
5 |
6 | url = "https://data.caltech.edu/api/records"
7 | query = '?q=metadata.additional_descriptions.description:"renc.osn.xsede.org"&allversions=true'
8 |
9 | headers = {
10 | "Authorization": "Bearer %s" % token,
11 | "Content-type": "application/json",
12 | }
13 |
14 | url = url + query
15 | response = requests.get(url, headers=headers)
16 | total = response.json()["hits"]["total"]
17 | pages = math.ceil(int(total) / 10)
18 | for c in range(1, pages + 1):
19 | chunkurl = f"{url}&size=10&page={c}"
20 | response = requests.get(chunkurl, headers=headers).json()
21 | for hit in response["hits"]["hits"]:
22 | idv = hit["id"]
23 | print(idv)
24 | metadata = get_metadata(idv, token=token, validate=False)
25 | for desc in metadata["descriptions"]:
26 | desc["description"] = desc["description"].replace(
27 | "renc.osn.xsede.org",
28 | "sdsc.osn.xsede.org",
29 | )
30 | caltechdata_edit(idv, metadata, token=token, production=True, publish=True)
31 |
--------------------------------------------------------------------------------
/tests/data/datacite43/ep884-g0v97.json:
--------------------------------------------------------------------------------
1 | {
2 | "creators": [
3 | {
4 | "familyName": "Chen",
5 | "givenName": "Zibo",
6 | "name": "Chen, Zibo",
7 | "nameIdentifiers": [],
8 | "nameType": "Personal"
9 | }
10 | ],
11 | "dates": [
12 | {
13 | "date": "2024-06-19",
14 | "dateType": "Issued"
15 | }
16 | ],
17 | "identifiers": [
18 | {
19 | "identifier": "10.22002/ep884-g0v97",
20 | "identifierType": "DOI"
21 | },
22 | {
23 | "identifier": "oai:data.caltech.edu:ep884-g0v97",
24 | "identifierType": "oai"
25 | }
26 | ],
27 | "publicationYear": "2024",
28 | "publisher": "CaltechDATA",
29 | "rightsList": [
30 | {
31 | "rights": "Creative Commons Zero v1.0 Universal",
32 | "rightsIdentifier": "cc0-1.0",
33 | "rightsIdentifierScheme": "spdx",
34 | "rightsUri": "https://creativecommons.org/publicdomain/zero/1.0/legalcode"
35 | }
36 | ],
37 | "schemaVersion": "http://datacite.org/schema/kernel-4",
38 | "titles": [
39 | {
40 | "title": "A synthetic protein-level neural network in mammalian cells"
41 | }
42 | ],
43 | "types": {
44 | "resourceType": "",
45 | "resourceTypeGeneral": "Dataset"
46 | },
47 | "version": "2.0"
48 | }
--------------------------------------------------------------------------------
/edit.py:
--------------------------------------------------------------------------------
1 | import argparse, os, json
2 | from caltechdata_api import caltechdata_edit
3 |
4 | parser = argparse.ArgumentParser(
5 | description="Write files and a DataCite 4 standard json record\
6 | to CaltechDATA repository"
7 | )
8 | parser.add_argument(
9 | "json_file",
10 | nargs="?",
11 | default=None,
12 | help="file name for json DataCite metadata file",
13 | )
14 | parser.add_argument("-id", help="CaltechDATA IDs")
15 | parser.add_argument("-fnames", nargs="*", help="New Files")
16 | parser.add_argument("-flinks", nargs="*", help="New File Links")
17 | parser.add_argument("-schema", default="43", help="Metadata Schema")
18 | parser.add_argument("-authors", action="store_true", help="Edit CaltechAUTHORS")
19 | args = parser.parse_args()
20 |
21 | # Get access token set as environment variable with source token.bash
22 | token = os.environ["RDMTOK"]
23 |
24 | if args.json_file:
25 | metaf = open(args.json_file, "r")
26 | metadata = json.load(metaf)
27 | else:
28 | metadata = {}
29 |
30 | production = True
31 | publish = True
32 |
33 | response = caltechdata_edit(
34 | args.id,
35 | metadata,
36 | token,
37 | args.fnames,
38 | production,
39 | args.schema,
40 | publish,
41 | args.flinks,
42 | authors=args.authors,
43 | )
44 | print(response)
45 |
--------------------------------------------------------------------------------
/add_files_authors.py:
--------------------------------------------------------------------------------
1 | import requests, os, argparse
2 | from caltechdata_api import write_files_rdm
3 |
4 | parser = argparse.ArgumentParser(
5 | description="Add files to an existing CaltechAUTHORS record."
6 | )
7 | parser.add_argument(
8 | "idv",
9 | type=str,
10 | help="The CaltechAUTHORS record idv to edit.",
11 | )
12 | parser.add_argument(
13 | "files",
14 | type=str,
15 | nargs="+",
16 | help="The files to upload to the record.",
17 | )
18 | args = parser.parse_args()
19 | idv = args.idv
20 | files = args.files
21 | token = os.environ["RDMTOK"]
22 | url = "https://authors.library.caltech.edu"
23 |
24 | headers = {
25 | "Authorization": "Bearer %s" % token,
26 | "Content-type": "application/json",
27 | }
28 | f_headers = {
29 | "Authorization": "Bearer %s" % token,
30 | "Content-type": "application/octet-stream",
31 | }
32 |
33 | existing = requests.get(
34 | url + "/api/records/" + idv + "/draft",
35 | headers=headers,
36 | )
37 | if existing.status_code != 200:
38 | raise Exception(f"Record {idv} does not exist, cannot edit")
39 | data = existing.json()
40 | data["files"] = {"enabled": True}
41 | # Update metadata
42 | result = requests.put(
43 | url + "/api/records/" + idv + "/draft",
44 | headers=headers,
45 | json=data,
46 | )
47 | if result.status_code != 200:
48 | raise Exception(result.text)
49 | file_link = result.json()["links"]["files"]
50 | write_files_rdm(files, file_link, headers, f_headers)
51 |
--------------------------------------------------------------------------------
/caltechdata_api/get_files.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import requests
3 |
4 |
5 | def get_files(idv, production=True):
6 | # Returns file block
7 |
8 | if production == True:
9 | api_url = "https://data.caltech.edu/api/records/"
10 | else:
11 | api_url = "https://data.caltechlibrary.dev/api/records/"
12 |
13 | r = requests.get(api_url + str(idv) + "/files")
14 | r_data = r.json()
15 | if "message" in r_data:
16 | raise AssertionError(
17 | "id "
18 | + str(idv)
19 | + " expected http status 200, got "
20 | + str(r.status_code)
21 | + " "
22 | + r_data["message"]
23 | )
24 | if not "entries" in r_data:
25 | raise AssertionError("expected as entries property in response, got " + r_data)
26 | return r_data["entries"]
27 |
28 |
29 | if __name__ == "__main__":
30 | parser = argparse.ArgumentParser(
31 | description="get_files queries the caltechDATA (Invenio 3) API\
32 | and returns file information"
33 | )
34 | parser.add_argument(
35 | "ids",
36 | metavar="ID",
37 | type=str,
38 | nargs="+",
39 | help="The CaltechDATA ID for each record of interest",
40 | )
41 | parser.add_argument("-test", dest="production", action="store_false")
42 |
43 | args = parser.parse_args()
44 |
45 | production = args.production
46 |
47 | for idv in args.ids:
48 | metadata = get_files(idv, production)
49 | print(metadata)
50 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=64.0","wheel"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [metadata]
6 | name = caltechdata_api
7 | version = 1.10.6
8 | author = Thomas E Morrell, Rohan Bhattarai, Elizabeth Won, Alexander A Abakah, Kshemaahna Nagi
9 | author_email = tmorrell@caltech.edu, aabakah@caltech.edu, knagi@caltech.edu
10 | description = Python wrapper for CaltechDATA API.
11 | long_description = file: README.md
12 | long_description_content_type = text/markdown
13 | url = https://github.com/caltechlibrary/caltechdata_api
14 | license = MIT
15 | classifiers =
16 | License :: OSI Approved :: MIT License
17 | Programming Language :: Python :: 3
18 | Programming Language :: Python :: 3.7
19 | Programming Language :: Python :: 3.8
20 | Programming Language :: Python :: 3.9
21 | Programming Language :: Python :: 3.10
22 | Programming Language :: Python :: Implementation :: CPython
23 | Operating System :: OS Independent
24 |
25 | [options]
26 | packages = find:
27 | python_requires = >=3.6.0
28 | install_requires =
29 | requests
30 | datacite>1.1.0
31 | tqdm>=4.62.3
32 | pyyaml
33 | s3fs
34 | cryptography
35 | s3cmd
36 | include_package_data = True
37 |
38 | [options.packages.find]
39 | exclude = tests
40 |
41 | [options.package_data]
42 | caltechdata_api = vocabularies.yaml, vocabularies/*
43 |
44 | [options.entry_points]
45 | console_scripts =
46 | caltechdata_api=caltechdata_api.cli:main
47 |
48 | [tool:pytest]
49 | addopts = --verbose
50 |
--------------------------------------------------------------------------------
/outdated/edit_all_github.py:
--------------------------------------------------------------------------------
1 | import argparse, os, json, requests
2 | from caltechdata_api import caltechdata_edit, decustomize_schema
3 |
4 | # Get access token from TIND sed as environment variable with source token.bash
5 | token = os.environ["TINDTOK"]
6 |
7 | production = True
8 |
9 | if production == True:
10 | url = "https://data.caltech.edu/api/records"
11 | else:
12 | url = "https://cd-sandbox.tind.io/api/records"
13 |
14 | response = requests.get(url + "/?size=2000&q=cal_resource_type=software")
15 | hits = response.json()
16 |
17 | for h in hits["hits"]["hits"]:
18 | rid = h["id"]
19 | print(rid)
20 | record = decustomize_schema(h["metadata"], True)
21 | replace = False
22 | # to_update =\
23 | # [288,269,295,291,279,284,266,281,286,278,280,293,283,287,210,274,276,290,300,285,270,268,267,302,744,282,272,289]
24 | # if rid in to_update:
25 | # Find just GitHub records by title
26 | if "/" in record["titles"][0]["title"]:
27 | add = True
28 | for s in record["subjects"]:
29 | subject = s["subject"]
30 | if subject == "Github":
31 | add = False
32 | if subject == "GitHub":
33 | add = False
34 | if subject == "Bitbucket":
35 | add = False
36 | if add == True:
37 | record["subjects"].append({"subject": "GitHub"})
38 | print(record["titles"][0]["title"])
39 | response = caltechdata_edit(token, rid, record, {}, {}, production)
40 | print(response)
41 |
--------------------------------------------------------------------------------
/outdated/edit_tccon.py:
--------------------------------------------------------------------------------
1 | import sys, os, json, requests
2 | from caltechdata_api import caltechdata_edit, decustomize_schema
3 |
4 | # USAGE: python edit_tccon.py tccon.ggg2014.darwin01.R0.json 269 0 griffith@uow.edu.au
5 |
6 | # Get access token from TIND sed as environment variable with source token.bash
7 | token = os.environ["TINDTOK"]
8 |
9 | production = True
10 |
11 | if production == True:
12 | url = "https://data.caltech.edu/api/records"
13 | else:
14 | url = "https://cd-sandbox.tind.io/api/records"
15 |
16 | response = requests.get(url + "/?size=1000&q=subjects:TCCON")
17 | hits = response.json()
18 |
19 | infile = open(sys.argv[1], "r")
20 | record = json.load(infile)
21 |
22 | rid = sys.argv[2]
23 |
24 | group = {"contributorName": "TCCON", "contributorType": "ResearchGroup"}
25 | new = ""
26 | for c in record["contributors"]:
27 | print(c["contributorType"])
28 | if c["contributorType"] == "HostingInstitution":
29 | print("YES")
30 | c["contributorName"] = "California Institute of Techonolgy, Pasadena, CA (US)"
31 | c["nameIdentifiers"] = [
32 | {"nameIdentifier": "grid.20861.3d", "nameIdentifierScheme": "GRID"}
33 | ]
34 | v = record["contributors"]
35 | v.append(group)
36 | contact = record["creators"][int(sys.argv[3])]
37 | contact["contributorName"] = contact.pop("creatorName")
38 | contact["contributorEmail"] = sys.argv[4]
39 | contact["contributorType"] = "ContactPerson"
40 | v.append(contact)
41 | new = {"contributors": v}
42 | print(new)
43 | response = caltechdata_edit(token, rid, new, {}, {}, production)
44 | print(response)
45 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2016, Caltech
2 | All rights not granted herein are expressly reserved by Caltech.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions are met:
6 |
7 | 1. Redistributions of source code must retain the above copyright notice, this
8 | list of conditions and the following disclaimer.
9 |
10 | 2. Redistributions in binary form must reproduce the above copyright notice,
11 | this list of conditions and the following disclaimer in the documentation
12 | and/or other materials provided with the distribution.
13 |
14 | 3. Neither the name of the copyright holder nor the names of its contributors
15 | may be used to endorse or promote products derived from this software without
16 | specific prior written permission.
17 |
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 |
29 |
--------------------------------------------------------------------------------
/inspect_dois.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import math
3 | from progressbar import progressbar
4 | from caltechdata_api import caltechdata_edit
5 |
6 |
7 | def fix_name(metadata, fixed):
8 | for name in metadata:
9 | if name["nameType"] == "Personal":
10 | if "givenName" not in name:
11 | fixed = True
12 | given = name["name"].split(",")[1]
13 | name["givenName"] = given.strip()
14 | return metadata, fixed
15 |
16 |
17 | url = 'https://data.caltech.edu/api/records?q=-metadata.related_identifiers.identifier%3A"10.25989%2Fes8t-kswe"'
18 |
19 | headers = {
20 | "accept": "application/vnd.datacite.datacite+json",
21 | }
22 |
23 | response = requests.get(f"{url}&search_type=scan&scroll=5m")
24 |
25 | total = response.json()["hits"]["total"]
26 | pages = math.ceil(int(total) / 1000)
27 | hits = []
28 | print(total)
29 | for c in progressbar(range(1, pages + 1)):
30 | chunkurl = f"{url}&sort=newest&size=1000&page={c}"
31 | response = requests.get(chunkurl)
32 | response = response.json()
33 | hits += response["hits"]["hits"]
34 |
35 |
36 | url = "https://data.caltech.edu/api/records"
37 |
38 | for h in progressbar(hits):
39 | idv = str(h["id"])
40 |
41 | doi = h["pids"]["doi"]
42 |
43 | if "client" not in doi:
44 | if "10.22002/" in doi["identifier"]:
45 | response = requests.get(f"{url}/{idv}", headers=headers)
46 | if response.status_code != 200:
47 | print(response.text)
48 | exit()
49 | else:
50 | metadata = response.json()
51 | print(idv)
52 | caltechdata_edit(idv, metadata, production=True, publish=True)
53 |
--------------------------------------------------------------------------------
/outdated/edit_all_tccon.py:
--------------------------------------------------------------------------------
1 | import argparse, os, json, requests
2 | from caltechdata_api import caltechdata_edit, decustomize_schema
3 |
4 | # Get access token from TIND sed as environment variable with source token.bash
5 | token = os.environ["TINDTOK"]
6 |
7 | production = True
8 |
9 | if production == True:
10 | url = "https://data.caltech.edu/api/records"
11 | else:
12 | url = "https://cd-sandbox.tind.io/api/records"
13 |
14 | response = requests.get(url + "/?size=1000&q=subjects:TCCON")
15 | hits = response.json()
16 |
17 | wiki1 = "https://tccon-wiki.caltech.edu/Network_Policy/Data_Use_Policy/Data_Description"
18 | new1 = "https://tccon-wiki.caltech.edu/Main/DataDescription"
19 | wiki2 = "https://tccon-wiki.caltech.edu/Sites"
20 | new2 = "https://tccon-wiki.caltech.edu/Main/TCCONSites"
21 | site = "http://tccondata.org/"
22 | new3 = "https://tccondata.org"
23 | exsite = "http://tccondata.org"
24 |
25 | for h in hits["hits"]["hits"]:
26 | rid = h["id"]
27 | print(rid)
28 | record = decustomize_schema(h["metadata"], True)
29 | updated = {}
30 | if "relatedIdentifiers" in record:
31 | for related in record["relatedIdentifiers"]:
32 | if related["relatedIdentifier"] == wiki1:
33 | related["relatedIdentifier"] = new1
34 | if related["relatedIdentifier"] == wiki2:
35 | related["relatedIdentifier"] = new2
36 | if related["relatedIdentifier"] == site:
37 | related["relatedIdentifier"] = new3
38 | if related["relatedIdentifier"] == exsite:
39 | related["relatedIdentifier"] = new3
40 | updated["relatedIdentifiers"] = record["relatedIdentifiers"]
41 | response = caltechdata_edit(rid, updated, token, {}, {}, production)
42 | print(response)
43 |
--------------------------------------------------------------------------------
/outdated/add_doi_minting_date.py:
--------------------------------------------------------------------------------
1 | import os, requests
2 | from progressbar import progressbar
3 | from caltechdata_api import get_metadata, caltechdata_edit
4 |
5 |
6 | def get_datacite_dates(prefix):
7 | """Get sumbitted date for DataCite DOIs with specific prefix"""
8 | doi_dates = {}
9 | doi_urls = {}
10 | url = (
11 | "https://api.datacite.org/dois?query=prefix:"
12 | + prefix
13 | + "&page[cursor]=1&page[size]=500"
14 | )
15 | next_link = url
16 | meta = requests.get(next_link).json()["meta"]
17 | for j in progressbar(range(meta["totalPages"])):
18 | r = requests.get(next_link)
19 | data = r.json()
20 | for doi in data["data"]:
21 | date = doi["attributes"]["registered"].split("T")[0]
22 | doi_dates[doi["id"]] = date
23 | doi_urls[doi["id"]] = doi["attributes"]["url"]
24 | if "next" in data["links"]:
25 | next_link = data["links"]["next"]
26 | else:
27 | next_link = None
28 | return doi_dates, doi_urls
29 |
30 |
31 | token = os.environ["TINDTOK"]
32 |
33 | doi_dates, doi_urls = get_datacite_dates("10.14291")
34 | for doi in doi_urls:
35 | if "data.caltech.edu" in doi_urls[doi]:
36 | caltech_id = doi_urls[doi].split("/")[-1]
37 | if caltech_id not in ["252", "253", "254", "255"]:
38 | metadata = get_metadata(caltech_id, emails=True)
39 | print(caltech_id)
40 | # print(metadata['dates'])
41 | for date in metadata["dates"]:
42 | if date["dateType"] == "Issued":
43 | print(date["date"], doi_dates[doi])
44 | date["date"] = doi_dates[doi]
45 | response = caltechdata_edit(token, caltech_id, metadata, production=True)
46 | print(response)
47 |
--------------------------------------------------------------------------------
/fix_names.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import math
3 | from progressbar import progressbar
4 | from caltechdata_api import caltechdata_edit
5 |
6 |
7 | def fix_name(metadata, fixed):
8 | for name in metadata:
9 | if name["nameType"] == "Personal":
10 | if "givenName" not in name:
11 | fixed = True
12 | given = name["name"].split(",")[1]
13 | name["givenName"] = given.strip()
14 | return metadata, fixed
15 |
16 |
17 | url = 'https://data.caltech.edu/api/records?q=-metadata.related_identifiers.identifier%3A"10.25989%2Fes8t-kswe"'
18 |
19 | headers = {
20 | "accept": "application/vnd.datacite.datacite+json",
21 | }
22 |
23 | response = requests.get(f"{url}&search_type=scan&scroll=5m")
24 |
25 | total = response.json()["hits"]["total"]
26 | pages = math.ceil(int(total) / 1000)
27 | hits = [] # [{'id':'a7f64-a8k10'}]
28 | print(total)
29 | for c in progressbar(range(1, pages + 1)):
30 | chunkurl = f"{url}&sort=newest&size=1000&page={c}"
31 | response = requests.get(chunkurl)
32 | response = response.json()
33 | hits += response["hits"]["hits"]
34 |
35 |
36 | url = "https://data.caltech.edu/api/records"
37 |
38 | for h in progressbar(hits):
39 | idv = str(h["id"])
40 |
41 | response = requests.get(f"{url}/{idv}", headers=headers)
42 | if response.status_code != 200:
43 | print(response.text)
44 | exit()
45 | else:
46 | fixed = False
47 | metadata = response.json()
48 | metadata["creators"], fixed = fix_name(metadata["creators"], fixed)
49 | if "contributors" in metadata:
50 | metadata["contributors"], fixed = fix_name(metadata["contributors"], fixed)
51 | if fixed:
52 | print(idv)
53 | caltechdata_edit(idv, metadata, production=True, publish=True)
54 |
--------------------------------------------------------------------------------
/.github/workflows/codemeta2cff.yml:
--------------------------------------------------------------------------------
1 | name: CodeMeta2CFF
2 | run-name: Run CodeMeta2CFF after ${{github.event_name}} by ${{github.actor}}
3 |
4 | on:
5 | push:
6 | paths: ['codemeta.json']
7 | workflow_dispatch:
8 | inputs:
9 | reason:
10 | description: 'Reason'
11 | required: false
12 | default: 'Manual trigger'
13 |
14 | jobs:
15 | CodeMeta2CFF:
16 | runs-on: ubuntu-latest
17 | steps:
18 | - name: Checkout
19 | uses: actions/checkout@v4
20 | - name: Convert CFF
21 | uses: caltechlibrary/codemeta2cff@main
22 | - name: Install jq for JSON parsing
23 | run: sudo apt-get install -y jq
24 | - name: Parse and update setup.cfg
25 | run: |
26 | # Extract values from codemeta.json
27 | NAME=$(jq -r '.name' codemeta.json)
28 | VERSION=$(jq -r '.version' codemeta.json)
29 | AUTHORS=$(jq -r '[.author[] | .givenName + " " + .familyName] | join(", ")' codemeta.json)
30 | AUTHOR_EMAILS=$(jq -r '[.author[] | .email // empty] | join(", ")' codemeta.json)
31 | DESCRIPTION=$(jq -r '.description' codemeta.json)
32 | URL=$(jq -r '.codeRepository // .url' codemeta.json)
33 |
34 | # Update setup.cfg fields
35 | sed -i "s/^name = .*/name = $NAME/" setup.cfg
36 | sed -i "s/^version = .*/version = $VERSION/" setup.cfg
37 | sed -i "s/^author = .*/author = $AUTHORS/" setup.cfg
38 | sed -i "s/^author_email = .*/author_email = $AUTHOR_EMAILS/" setup.cfg
39 | sed -i "s/^description = .*/description = $DESCRIPTION/" setup.cfg
40 | sed -i "s|^url = .*|url = $URL|" setup.cfg
41 | - name: Commit CFF
42 | uses: EndBug/add-and-commit@v9
43 | with:
44 | message: 'Add updated CITATION.cff and setup.cfg from codemeta.json file'
45 | add: '["setup.cfg", "CITATION.cff"]'
46 |
--------------------------------------------------------------------------------
/outdated/write_pilot_phase1.py:
--------------------------------------------------------------------------------
1 | import argparse, os, json
2 | import s3fs
3 | from datacite import schema43
4 | from caltechdata_api import caltechdata_write
5 |
6 | parser = argparse.ArgumentParser(
7 | description="Adds S3-stored pilot files and a DataCite 4.3 standard json record\
8 | to CaltechDATA repository"
9 | )
10 | parser.add_argument("folder", nargs=1, help="Folder")
11 | parser.add_argument(
12 | "json_file", nargs=1, help="file name for json DataCite metadata file"
13 | )
14 |
15 | args = parser.parse_args()
16 |
17 | # Get access token as environment variable
18 | token = os.environ["TINDTOK"]
19 |
20 | endpoint = "https://renc.osn.xsede.org/"
21 |
22 | # Get metadata and files from bucket
23 | s3 = s3fs.S3FileSystem(anon=True, client_kwargs={"endpoint_url": endpoint})
24 |
25 |
26 | path = "ini210004tommorrell/" + args.folder[0] + "/"
27 | meta_path = path + args.json_file[0]
28 | metaf = s3.open(meta_path, "rb")
29 | metadata = json.load(metaf)
30 |
31 | # Find the files
32 | files = s3.glob(path + "/*.nc")
33 |
34 | description_string = f"Files available via S3 at {endpoint}{path}
"
35 | for link in files:
36 | fname = link.split("/")[-1]
37 | link = endpoint + link
38 | description_string += f"""{fname}
40 | Download
"""
41 |
42 | metadata["descriptions"].append(
43 | {"description": description_string, "descriptionType": "Other"}
44 | )
45 |
46 | # valid = schema43.validate(metadata)
47 | # if not valid:
48 | # v = schema43.validator.validate(metadata)
49 | # errors = sorted(v.iter_errors(instance), key=lambda e: e.path)
50 | # for error in errors:
51 | # print(error.message)
52 | # exit()
53 |
54 | print(metadata)
55 |
56 | production = True
57 |
58 | response = caltechdata_write(metadata, token, [], production, "43")
59 | print(response)
60 |
--------------------------------------------------------------------------------
/tests/data/datacite43/asjw8-cd908.json:
--------------------------------------------------------------------------------
1 | {
2 | "creators": [
3 | {
4 | "familyName": "Sloan",
5 | "givenName": "Julia",
6 | "name": "Sloan, Julia",
7 | "nameIdentifiers": [
8 | {
9 | "nameIdentifier": "0000-0003-0200-063X",
10 | "nameIdentifierScheme": "ORCID"
11 | }
12 | ],
13 | "nameType": "Personal"
14 | }
15 | ],
16 | "dates": [
17 | {
18 | "date": "2024-10-25",
19 | "dateType": "Issued"
20 | },
21 | {
22 | "date": "2024-10-31",
23 | "dateInformation": "Correct file added",
24 | "dateType": "Updated"
25 | }
26 | ],
27 | "descriptions": [
28 | {
29 | "description": "This artifact contains two datasets: one corresponding to a simulation solving Richards Equation in clay, and another solving it in sand. These experiments were conducted in Gordon Bonan's \"Climate Change and Terrestrial Ecosystem Modeling\" textbook, Chapter 8 supplementary program 1.\nFull citation: Bonan, Gordon. Climate Change and Terrestrial Ecosystem Modeling. Cambridge University Press, 2019.",
30 | "descriptionType": "Abstract"
31 | }
32 | ],
33 | "identifiers": [
34 | {
35 | "identifier": "10.22002/asjw8-cd908",
36 | "identifierType": "DOI"
37 | },
38 | {
39 | "identifier": "oai:data.caltech.edu:asjw8-cd908",
40 | "identifierType": "oai"
41 | }
42 | ],
43 | "publicationYear": "2024",
44 | "publisher": "CaltechDATA",
45 | "rightsList": [
46 | {
47 | "rights": "Creative Commons Zero v1.0 Universal",
48 | "rightsIdentifier": "cc0-1.0",
49 | "rightsIdentifierScheme": "spdx",
50 | "rightsUri": "https://creativecommons.org/publicdomain/zero/1.0/legalcode"
51 | }
52 | ],
53 | "schemaVersion": "http://datacite.org/schema/kernel-4",
54 | "titles": [
55 | {
56 | "title": "Bonan 2019 Richards Eqn Data"
57 | }
58 | ],
59 | "types": {
60 | "resourceType": "",
61 | "resourceTypeGeneral": "Dataset"
62 | }
63 | }
--------------------------------------------------------------------------------
/caltechdata_api/vocabularies/identifier_types.yaml:
--------------------------------------------------------------------------------
1 | - id: ark
2 | props:
3 | datacite: ARK
4 | title:
5 | en: ARK
6 | - id: arxiv
7 | props:
8 | datacite: arXiv
9 | title:
10 | en: arXiv
11 | - id: bibcode
12 | props:
13 | datacite: bibcode
14 | title:
15 | en: Bibcode
16 | - id: doi
17 | props:
18 | datacite: DOI
19 | title:
20 | en: DOI
21 | - id: ean13
22 | props:
23 | datacite: EAN13
24 | title:
25 | en: EAN13
26 | - id: eissn
27 | props:
28 | datacite: EISSN
29 | title:
30 | en: EISSN
31 | - id: handle
32 | props:
33 | datacite: Handle
34 | title:
35 | en: Handle
36 | - id: igsn
37 | props:
38 | datacite: IGSN
39 | title:
40 | en: IGSN
41 | - id: isbn
42 | props:
43 | datacite: ISBN
44 | title:
45 | en: ISBN
46 | - id: issn
47 | props:
48 | datacite: ISSN
49 | title:
50 | en: ISSN
51 | - id: istc
52 | props:
53 | datacite: ISTC
54 | title:
55 | en: ISTC
56 | - id: lissn
57 | props:
58 | datacite: LISSN
59 | title:
60 | en: LISSN
61 | - id: lsid
62 | props:
63 | datacite: LSID
64 | title:
65 | en: LSID
66 | - id: pmid
67 | props:
68 | datacite: PMID
69 | title:
70 | en: PMID
71 | - id: purl
72 | props:
73 | datacite: PURL
74 | title:
75 | en: PURL
76 | - id: upc
77 | props:
78 | datacite: UPC
79 | title:
80 | en: UPC
81 | - id: url
82 | props:
83 | datacite: URL
84 | title:
85 | en: URL
86 | - id: urn
87 | props:
88 | datacite: URN
89 | title:
90 | en: URN
91 | - id: w3id
92 | props:
93 | datacite: w3id
94 | title:
95 | en: W3ID
96 | - id: cdid
97 | props:
98 | datacite: cdid
99 | title:
100 | en: CALTECHDATA_ID
101 | - id: tiltid
102 | props:
103 | datacite: tiltid
104 | title:
105 | en: TILT_SERIES_ID
106 | - id: dsa-110-id
107 | props:
108 | datacite: dsa-110-id
109 | title:
110 | en: DSA_110_ID
111 |
--------------------------------------------------------------------------------
/outdated/edit_all_geo.py:
--------------------------------------------------------------------------------
1 | import argparse, os, json, requests, csv, dataset
2 | from caltechdata_api import caltechdata_edit, decustomize_schema
3 |
4 | # Get access token from TIND sed as environment variable with source token.bash
5 | token = os.environ["TINDTOK"]
6 |
7 | collection = "data/CaltechTHESIS.ds"
8 |
9 | production = True
10 |
11 | if production == True:
12 | url = "https://data.caltech.edu/api/records"
13 | else:
14 | url = "https://cd-sandbox.tind.io/api/records"
15 |
16 | response = requests.get(url + "/?size=1000&q=subjects:gps")
17 | hits = response.json()
18 |
19 | # Set up dictionary of links between resolver and thesis IDs
20 | available = os.path.isfile("data/record_list.csv")
21 | if available == False:
22 | print("You need to run update_thesis_file.py")
23 | exit()
24 | else:
25 | record_list = {}
26 | reader = csv.reader(open("data/record_list.csv"))
27 | for row in reader:
28 | record_list[row[1]] = row[0]
29 |
30 | for h in hits["hits"]["hits"]:
31 | rid = str(h["id"])
32 | print(rid)
33 | record = decustomize_schema(h["metadata"], True)
34 | if "relatedIdentifiers" in record:
35 | for r in record["relatedIdentifiers"]:
36 | if (
37 | r["relationType"] == "IsSupplementTo"
38 | and r["relatedIdentifierType"] == "URL"
39 | ):
40 | idv = record_list[r["relatedIdentifier"]]
41 | thesis_metadata, err = dataset.read(collection, idv)
42 | pub_date = thesis_metadata["date"]
43 | dates = [{"date": pub_date, "dateType": "Issued"}]
44 | for date in record["dates"]:
45 | if date["dateType"] == "Issued":
46 | dates.append({"date": date["date"], "dateType": "Updated"})
47 | elif date["dateType"] == "Updated":
48 | pass
49 | elif date["dateType"] != "Submitted":
50 | dates.append(date)
51 | print(dates)
52 | metadata = {"dates": dates}
53 | response = caltechdata_edit(token, rid, metadata, {}, {}, production)
54 | print(response)
55 |
--------------------------------------------------------------------------------
/caltechdata_api/download_file.py:
--------------------------------------------------------------------------------
1 | import requests, argparse
2 | from tqdm.auto import tqdm
3 |
4 |
5 | def download_url(doi, media_type=None):
6 | """Get a download link for a file listed in the media API for a DataCite DOI"""
7 | api_url = "https://api.datacite.org/dois/" + doi + "/media"
8 | r = requests.get(api_url).json()
9 | data = r["data"]
10 | if media_type == None:
11 | url = data[0]["attributes"]["url"]
12 | else:
13 | for media in data:
14 | if media["attributes"]["mediaType"] == media_type:
15 | url = media["attributes"]
16 | return url
17 |
18 |
19 | def download_file(doi, fname=None, media_type=None):
20 | """Download a file listed in the media API for a DataCite DOI"""
21 | url = download_url(doi, media_type)
22 | r = requests.get(url, stream=True)
23 | # Set file name
24 | if fname == None:
25 | fname = doi.replace("/", "-")
26 | # Download file with progress bar
27 | if r.status_code == 403:
28 | print("File Unavailable")
29 | if "content-length" not in r.headers:
30 | print("Did not get file")
31 | else:
32 | with open(fname, "wb") as f:
33 | total_length = int(r.headers.get("content-length"))
34 | pbar = tqdm(total=int(total_length / 1024), unit="B")
35 | for chunk in r.iter_content(chunk_size=1024):
36 | if chunk:
37 | pbar.update()
38 | f.write(chunk)
39 | return fname
40 |
41 |
42 | if __name__ == "__main__":
43 | parser = argparse.ArgumentParser(
44 | description="download_file queries the DaiaCite Media API\
45 | and downloads the file associated with a DOI"
46 | )
47 | parser.add_argument(
48 | "dois",
49 | nargs="+",
50 | help="The DOI for files to be downloaded",
51 | )
52 | parser.add_argument(
53 | "-fname", default=None, help="File name to be used for downloaded file"
54 | )
55 | parser.add_argument(
56 | "-media_type", default=None, help="File (media) type to be downloaded"
57 | )
58 |
59 | args = parser.parse_args()
60 |
61 | for doi in args.dois:
62 | download_file(doi, args.fname, args.media_type)
63 |
--------------------------------------------------------------------------------
/outdated/get_geo.py:
--------------------------------------------------------------------------------
1 | import os, json, csv, argparse
2 | import requests
3 |
4 | if __name__ == "__main__":
5 | parser = argparse.ArgumentParser(
6 | description="get_metadata queries the caltechDATA (Invenio 3) API\
7 | and returns DataCite-compatable metadata"
8 | )
9 | parser.add_argument("output", help="Output file name")
10 | parser.add_argument("-keywords", nargs="*")
11 |
12 | args = parser.parse_args()
13 |
14 | url = "https://data.caltech.edu/api/records/?size=5000"
15 |
16 | search = ""
17 | if args.keywords:
18 | for key in args.keywords:
19 | if search == "":
20 | search = f'&q=subjects:"{key}"'
21 | else:
22 | search = search + f'+"{key}"'
23 | url = url + search
24 |
25 | response = requests.get(url)
26 | hits = response.json()
27 |
28 | outfile = open(args.output, "w")
29 | writer = csv.writer(outfile)
30 | writer.writerow(["wkt", "name", "year", "doi"])
31 |
32 | for h in hits["hits"]["hits"]:
33 | metadata = decustomize_schema(h["metadata"])
34 | if "geoLocations" in metadata:
35 | doi = "https://doi.org/" + metadata["identifier"]["identifier"]
36 | title = metadata["titles"][0]["title"].split(":")[0]
37 | geo = metadata["geoLocations"]
38 | year = metadata["publicationYear"]
39 | for g in geo:
40 | if "geoLocationBox" in g:
41 | box = g["geoLocationBox"]
42 | p1 = f"{box['eastBoundLongitude']} {box['northBoundLatitude']}"
43 | p2 = f"{box['westBoundLongitude']} {box['northBoundLatitude']}"
44 | p3 = f"{box['westBoundLongitude']} {box['southBoundLatitude']}"
45 | p4 = f"{box['eastBoundLongitude']} {box['southBoundLatitude']}"
46 | wkt = f"POLYGON (({p1}, {p2}, {p3}, {p4}, {p1}))"
47 | writer.writerow([wkt, title, year, doi])
48 |
49 | if "geoLocationPoint" in g:
50 | point = g["geoLocationPoint"]
51 | wkt = f"POINT ({point['pointLongitude']} {point['pointLatitude']})"
52 | writer.writerow([wkt, title, year, doi])
53 |
--------------------------------------------------------------------------------
/tests/data/datacite43/kxjgj-tfk18.json:
--------------------------------------------------------------------------------
1 | {
2 | "creators": [
3 | {
4 | "affiliation": [
5 | {
6 | "affiliationIdentifier": "05dxps055",
7 | "affiliationIdentifierScheme": "ROR",
8 | "name": "California Institute of Technology"
9 | }
10 | ],
11 | "familyName": "Ding",
12 | "givenName": "Xiaozhe",
13 | "name": "Ding, Xiaozhe",
14 | "nameIdentifiers": [
15 | {
16 | "nameIdentifier": "0000-0002-0267-0791",
17 | "nameIdentifierScheme": "ORCID"
18 | }
19 | ],
20 | "nameType": "Personal"
21 | }
22 | ],
23 | "dates": [
24 | {
25 | "date": "2024-04-16",
26 | "dateType": "Issued"
27 | }
28 | ],
29 | "descriptions": [
30 | {
31 | "description": "Raw data for Ding, X., Chen, X., Sullivan, E.E., Shay, T.F., and Gradinaru, V. (2024). Fast, accurate ranking of engineered proteins by target binding propensity using structure modeling. Molecular Therapy. https://doi.org/10.1016/j.ymthe.2024.04.003",
32 | "descriptionType": "Abstract"
33 | }
34 | ],
35 | "identifiers": [
36 | {
37 | "identifier": "10.22002/kxjgj-tfk18",
38 | "identifierType": "DOI"
39 | },
40 | {
41 | "identifier": "oai:data.caltech.edu:kxjgj-tfk18",
42 | "identifierType": "oai"
43 | }
44 | ],
45 | "publicationYear": "2024",
46 | "publisher": "CaltechDATA",
47 | "relatedIdentifiers": [
48 | {
49 | "relatedIdentifier": "10.1016/j.ymthe.2024.04.003",
50 | "relatedIdentifierType": "DOI",
51 | "relationType": "IsSupplementTo",
52 | "resourceTypeGeneral": "Text"
53 | }
54 | ],
55 | "rightsList": [
56 | {
57 | "rights": "Creative Commons Zero v1.0 Universal",
58 | "rightsIdentifier": "cc0-1.0",
59 | "rightsIdentifierScheme": "spdx",
60 | "rightsUri": "https://creativecommons.org/publicdomain/zero/1.0/legalcode"
61 | }
62 | ],
63 | "schemaVersion": "http://datacite.org/schema/kernel-4",
64 | "titles": [
65 | {
66 | "title": "Data for Fast, accurate ranking of engineered proteins by target-binding propensity using structure modeling"
67 | }
68 | ],
69 | "types": {
70 | "resourceType": "",
71 | "resourceTypeGeneral": "Dataset"
72 | }
73 | }
--------------------------------------------------------------------------------
/templates/README.md:
--------------------------------------------------------------------------------
1 | # This is the title of your submission to CaltechDATA
2 |
3 | ## Creators
4 | - Name Type: Personal
5 | - Affiliation: [https://ror.org/04wxnsj81](https://ror.org/04wxnsj81)
6 | - Name: Name
7 | - Family Name: Family Name
8 | - Given Name: Given Name
9 | - Name Identifiers: [https://orcid.org/0000-0002-1825-0097](https://orcid.org/0000-0002-1825-0097)
10 |
11 | - Name Type: Personal
12 | - Affiliation: [https://ror.org/04wxnsj81](https://ror.org/04wxnsj81)
13 | - Name: Name2
14 | - Family Name: Family Name 2
15 | - Given Name: Given Name 2
16 | - Name Identifiers: [https://orcid.org/0000-0002-1825-0097](https://orcid.org/0000-0002-1825-0097)
17 |
18 | ## Descriptions
19 | - Description: Description
20 | - Description Type: Abstract
21 |
22 | ## Types
23 | - Resource Type General: Dataset
24 | - Resource Type: Dataset
25 |
26 | ## Rights List
27 | - Rights: Creative Commons Zero v1.0 Universal
28 | - Rights URI: https://creativecommons.org/publicdomain/zero/1.0/legalcode
29 |
30 | ## Publication Year
31 | - Publication Year: 2024
32 |
33 | ## Publisher
34 | - Publisher: CaltechDATA
35 |
36 | ## Dates
37 | - Date: 2014-10-01
38 | - Date Type: Created
39 | - Date: 2012-05-22/2016-12-21
40 | - Date Type: Collected
41 |
42 | ## Subjects
43 | - Subject: subject1
44 | - Subject: subject2
45 |
46 | ## Funding References
47 | - Award Title: Measurement of Column-Averaged CO2
48 | - Funder Name: National Aeronautics and Space Administration
49 | - Funder Identifier Type: ROR
50 | - Funder Identifier: https://ror.org/027ka1x80
51 | - Award Number: NAG5-12247
52 |
53 | ## Related Identifiers
54 | - Related Identifier: [http://www.url.org/](http://www.url.org/)
55 | - Related Identifier Type: URL
56 | - Relation Type: IsPartOf
57 | - Related Identifier: 10.5072/FK2
58 | - Related Identifier Type: DOI
59 | - Relation Type: IsDocumentedBy
60 |
61 | ## Version
62 | - Version: 1
63 |
64 | ## Identifiers
65 | - Identifier: 1924MNRAS..84..308E
66 | - Identifier Type: bibcode
67 |
68 | ## Contributors
69 | - Name Type: Personal
70 | - Affiliation: [https://ror.org/04wxnsj81](https://ror.org/04wxnsj81)
71 | - Name: Contributor Name
72 | - Family Name: Family Name
73 | - Given Name: Given Name
74 | - Contributor Type: ContactPerson
75 | - Name Identifiers: [https://orcid.org/0000-0002-1825-0097](https://orcid.org/0000-0002-1825-0097)
76 |
--------------------------------------------------------------------------------
/tests/data/datacite43/kxtar-bm759.json:
--------------------------------------------------------------------------------
1 | {
2 | "creators": [
3 | {
4 | "affiliation": [
5 | {
6 | "affiliationIdentifier": "05dxps055",
7 | "affiliationIdentifierScheme": "ROR",
8 | "name": "California Institute of Technology"
9 | }
10 | ],
11 | "familyName": "Bird",
12 | "givenName": "Eli",
13 | "name": "Bird, Eli",
14 | "nameIdentifiers": [
15 | {
16 | "nameIdentifier": "0000-0002-9428-0650",
17 | "nameIdentifierScheme": "ORCID"
18 | }
19 | ],
20 | "nameType": "Personal"
21 | },
22 | {
23 | "affiliation": [
24 | {
25 | "affiliationIdentifier": "05dxps055",
26 | "affiliationIdentifierScheme": "ROR",
27 | "name": "California Institute of Technology"
28 | }
29 | ],
30 | "familyName": "Zhan",
31 | "givenName": "Zhongwen",
32 | "name": "Zhan, Zhongwen",
33 | "nameIdentifiers": [
34 | {
35 | "nameIdentifier": "0000-0002-5586-2607",
36 | "nameIdentifierScheme": "ORCID"
37 | }
38 | ],
39 | "nameType": "Personal"
40 | }
41 | ],
42 | "dates": [
43 | {
44 | "date": "2024-09-23",
45 | "dateType": "Issued"
46 | }
47 | ],
48 | "identifiers": [
49 | {
50 | "identifier": "10.22002/kxtar-bm759",
51 | "identifierType": "DOI"
52 | },
53 | {
54 | "identifier": "oai:data.caltech.edu:kxtar-bm759",
55 | "identifierType": "oai"
56 | }
57 | ],
58 | "publicationYear": "2024",
59 | "publisher": "CaltechDATA",
60 | "rightsList": [
61 | {
62 | "rights": "Creative Commons Zero v1.0 Universal",
63 | "rightsIdentifier": "cc0-1.0",
64 | "rightsIdentifierScheme": "spdx",
65 | "rightsUri": "https://creativecommons.org/publicdomain/zero/1.0/legalcode"
66 | }
67 | ],
68 | "schemaVersion": "http://datacite.org/schema/kernel-4",
69 | "titles": [
70 | {
71 | "title": "Ambient Noise Cross-Correlation Data associated with Constraining Dike Opening Models With Seismic Velocity Changes Associated with the 2023-2024 Eruption Sequence on the Reykjanes Peninsula\" by Bird et al."
72 | }
73 | ],
74 | "types": {
75 | "resourceType": "",
76 | "resourceTypeGeneral": "Dataset"
77 | }
78 | }
--------------------------------------------------------------------------------
/caltechdata_api/vocabularies/roles.yaml:
--------------------------------------------------------------------------------
1 | - id: contactperson
2 | props:
3 | datacite: ContactPerson
4 | title:
5 | en: Contact person
6 | - id: datacollector
7 | props:
8 | datacite: DataCollector
9 | title:
10 | en: Data collector
11 | - id: datacurator
12 | props:
13 | datacite: DataCurator
14 | title:
15 | en: Data curator
16 | - id: datamanager
17 | props:
18 | datacite: DataManager
19 | title:
20 | en: Data manager
21 | - id: distributor
22 | props:
23 | datacite: Distributor
24 | title:
25 | en: Distributor
26 | - id: editor
27 | props:
28 | datacite: Editor
29 | title:
30 | en: Editor
31 | - id: hostinginstitution
32 | props:
33 | datacite: HostingInstitution
34 | title:
35 | en: Hosting institution
36 | - id: producer
37 | props:
38 | datacite: Producer
39 | title:
40 | en: Producer
41 | - id: projectleader
42 | props:
43 | datacite: ProjectLeader
44 | title:
45 | en: Project leader
46 | - id: projectmanager
47 | props:
48 | datacite: ProjectManager
49 | title:
50 | en: Project manager
51 | - id: projectmember
52 | props:
53 | datacite: ProjectMember
54 | title:
55 | en: Project member
56 | - id: registrationagency
57 | props:
58 | datacite: RegistrationAgency
59 | title:
60 | en: Registration agency
61 | - id: registrationauthority
62 | props:
63 | datacite: RegistrationAuthority
64 | title:
65 | en: Registration authority
66 | - id: relatedperson
67 | props:
68 | datacite: RelatedPerson
69 | title:
70 | en: Related person
71 | - id: researcher
72 | props:
73 | datacite: Researcher
74 | title:
75 | en: Researcher
76 | - id: researchgroup
77 | props:
78 | datacite: ResearchGroup
79 | title:
80 | en: Research group
81 | - id: rightsholder
82 | props:
83 | datacite: RightsHolder
84 | title:
85 | en: Rights holder
86 | - id: sponsor
87 | props:
88 | datacite: Sponsor
89 | title:
90 | en: Sponsor
91 | - id: supervisor
92 | props:
93 | datacite: Supervisor
94 | title:
95 | en: Supervisor
96 | - id: workpackageleader
97 | props:
98 | datacite: WorkPackageLeader
99 | title:
100 | en: Work package leader
101 | - id: other
102 | props:
103 | datacite: Other
104 | title:
105 | en: Other
106 |
--------------------------------------------------------------------------------
/tests/data/datacite43/n13wc-zwc92.json:
--------------------------------------------------------------------------------
1 | {
2 | "creators": [
3 | {
4 | "affiliation": [
5 | {
6 | "affiliationIdentifier": "05dxps055",
7 | "affiliationIdentifierScheme": "ROR",
8 | "name": "California Institute of Technology"
9 | }
10 | ],
11 | "familyName": "Silevitch",
12 | "givenName": "Daniel",
13 | "name": "Silevitch, Daniel",
14 | "nameIdentifiers": [
15 | {
16 | "nameIdentifier": "0000-0002-6347-3513",
17 | "nameIdentifierScheme": "ORCID"
18 | }
19 | ],
20 | "nameType": "Personal"
21 | },
22 | {
23 | "affiliation": [
24 | {
25 | "affiliationIdentifier": "05dxps055",
26 | "affiliationIdentifierScheme": "ROR",
27 | "name": "California Institute of Technology"
28 | }
29 | ],
30 | "familyName": "Armstrong",
31 | "givenName": "Stephen",
32 | "name": "Armstrong, Stephen",
33 | "nameIdentifiers": [],
34 | "nameType": "Personal"
35 | }
36 | ],
37 | "dates": [
38 | {
39 | "date": "2025-01-30",
40 | "dateType": "Issued"
41 | },
42 | {
43 | "date": "2025-01-30",
44 | "dateType": "Submitted"
45 | }
46 | ],
47 | "descriptions": [
48 | {
49 | "description": "Magnetic susceptibility and specific heat data for LiErF4.\u00a0",
50 | "descriptionType": "Abstract"
51 | }
52 | ],
53 | "identifiers": [
54 | {
55 | "identifier": "10.22002/n13wc-zwc92",
56 | "identifierType": "DOI"
57 | },
58 | {
59 | "identifier": "oai:data.caltech.edu:n13wc-zwc92",
60 | "identifierType": "oai"
61 | }
62 | ],
63 | "publicationYear": "2025",
64 | "publisher": "CaltechDATA",
65 | "rightsList": [
66 | {
67 | "rights": "Creative Commons Attribution 4.0 International",
68 | "rightsIdentifier": "cc-by-4.0",
69 | "rightsIdentifierScheme": "spdx",
70 | "rightsUri": "https://creativecommons.org/licenses/by/4.0/legalcode"
71 | }
72 | ],
73 | "schemaVersion": "http://datacite.org/schema/kernel-4",
74 | "subjects": [
75 | {
76 | "subject": "Physical sciences"
77 | }
78 | ],
79 | "titles": [
80 | {
81 | "title": "LiErF4 susceptibility and specific heat"
82 | }
83 | ],
84 | "types": {
85 | "resourceType": "",
86 | "resourceTypeGeneral": "Dataset"
87 | }
88 | }
--------------------------------------------------------------------------------
/edit_osn.py:
--------------------------------------------------------------------------------
1 | import argparse, os, json
2 | import s3fs, requests
3 | from datacite import schema43
4 | from caltechdata_api import caltechdata_edit, get_metadata
5 |
6 |
7 | parser = argparse.ArgumentParser(
8 | description="Edits a CaltechDATA record by adding OSN-stored pilot files"
9 | )
10 | parser.add_argument("folder", nargs=1, help="Folder")
11 | parser.add_argument("-id", nargs=1, help="")
12 |
13 | args = parser.parse_args()
14 |
15 | # Get access token as environment variable
16 | token = os.environ["RDMTOK"]
17 |
18 | endpoint = "https://sdsc.osn.xsede.org/"
19 |
20 | # Get metadata and files from bucket
21 | s3 = s3fs.S3FileSystem(anon=True, client_kwargs={"endpoint_url": endpoint})
22 |
23 | folder = args.folder[0]
24 |
25 | path = "ini210004tommorrell/" + folder
26 |
27 | idv = args.id[0]
28 | try:
29 | metadata = get_metadata(idv, schema="43")
30 | except:
31 | url = "https://data.caltech.edu/api/records/"
32 |
33 | headers = {
34 | "accept": "application/vnd.datacite.datacite+json",
35 | "Authorization": "Bearer %s" % token,
36 | }
37 |
38 | response = requests.get(url + idv + "/draft", headers=headers)
39 | if response.status_code != 200:
40 | raise Exception(response.text)
41 | metadata = response.json()
42 |
43 | # Find the files
44 | files = s3.glob(path + "/*")
45 |
46 | file_links = []
47 | for link in files:
48 | fname = link.split("/")[-1]
49 | if "." not in fname:
50 | # If there is a directory, get files
51 | folder_files = s3.glob(link + "/*")
52 | for file in folder_files:
53 | name = file.split("/")[-1]
54 | if "." not in name:
55 | level_2_files = s3.glob(file + "/*")
56 | for f in level_2_files:
57 | name = f.split("/")[-1]
58 | if "." not in name:
59 | level_3_files = s3.glob(f + "/*")
60 | for l3 in level_3_files:
61 | file_links.append(endpoint + l3)
62 | else:
63 | file_links.append(endpoint + f)
64 | else:
65 | file_links.append(endpoint + file)
66 | else:
67 | file_links.append(endpoint + link)
68 |
69 | production = True
70 |
71 | response = caltechdata_edit(
72 | idv, metadata, token, [], production, "43", publish=True, file_links=file_links
73 | )
74 | print(response)
75 |
--------------------------------------------------------------------------------
/update_descriptions.py:
--------------------------------------------------------------------------------
1 | import os, requests, json, math
2 | from caltechdata_api import get_metadata, caltechdata_edit
3 |
4 | token = os.environ["RDMTOK"]
5 |
6 | url = "https://data.caltech.edu/api/communities/0497183f-f3b1-483d-b8bb-133c731c939a/records"
7 | query = "?q=NOT%20_exists_%3Ametadata.description&f=allversions:true"
8 |
9 | headers = {
10 | "Authorization": "Bearer %s" % token,
11 | "Content-type": "application/json",
12 | }
13 |
14 | url = url + query
15 | response = requests.get(url, headers=headers)
16 | total = response.json()["hits"]["total"]
17 | pages = math.ceil(int(total) / 10)
18 | for c in range(1, pages + 1):
19 | chunkurl = f"{url}&size=10&page={c}"
20 | response = requests.get(chunkurl, headers=headers).json()
21 | for hit in response["hits"]["hits"]:
22 | idv = hit["id"]
23 | print(idv)
24 | result = requests.post(
25 | "https://data.caltech.edu/api/records/" + idv + "/draft",
26 | headers=headers,
27 | )
28 | if result.status_code != 201:
29 | raise Exception(result.text)
30 | metadata = result.json()
31 | metadata["metadata"]["description"] = hit["metadata"]["title"]
32 | for desc in metadata["metadata"]["additional_descriptions"]:
33 | if "title" in desc["type"]:
34 | desc["type"].pop("title")
35 | for date in metadata["metadata"]["dates"]:
36 | if "title" in date["type"]:
37 | date["type"].pop("title")
38 | if "icon" in metadata["metadata"]["rights"][0]:
39 | metadata["metadata"]["rights"][0].pop("icon")
40 | metadata["metadata"]["rights"][0].pop("title")
41 | metadata["metadata"]["rights"][0].pop("description")
42 | metadata["metadata"]["rights"][0].pop("props")
43 | if "title" in metadata["metadata"]["languages"][0]:
44 | metadata["metadata"]["languages"][0].pop("title")
45 | if "title" in metadata["metadata"]["resource_type"]:
46 | metadata["metadata"]["resource_type"].pop("title")
47 | result = requests.put(
48 | "https://data.caltech.edu/api/records/" + idv + "/draft",
49 | headers=headers,
50 | json=metadata,
51 | )
52 | if result.status_code != 200:
53 | raise Exception(result.text)
54 | publish_link = (
55 | f"https://data.caltech.edu/api/records/{idv}/draft/actions/publish"
56 | )
57 | result = requests.post(publish_link, headers=headers)
58 | if result.status_code != 202:
59 | raise Exception(result.text)
60 |
--------------------------------------------------------------------------------
/tests/data/datacite43/rmzp9-9yx96.json:
--------------------------------------------------------------------------------
1 | {
2 | "creators": [
3 | {
4 | "familyName": "Devey",
5 | "givenName": "Sean",
6 | "name": "Devey, Sean",
7 | "nameIdentifiers": [
8 | {
9 | "nameIdentifier": "0000-0002-8937-939X",
10 | "nameIdentifierScheme": "ORCID"
11 | }
12 | ],
13 | "nameType": "Personal"
14 | }
15 | ],
16 | "dates": [
17 | {
18 | "date": "2024-06-07",
19 | "dateType": "Issued"
20 | },
21 | {
22 | "date": "2023-11-09",
23 | "dateInformation": "DPIV, CTA data collection",
24 | "dateType": "Collected"
25 | },
26 | {
27 | "date": "2023-10-22",
28 | "dateInformation": "No FMS DPIV data collected",
29 | "dateType": "Collected"
30 | },
31 | {
32 | "date": "2023-11-16",
33 | "dateInformation": "Cylinder wake dye visualizations collected",
34 | "dateType": "Collected"
35 | },
36 | {
37 | "date": "2024-05-13",
38 | "dateInformation": "Shear layer dye visualization",
39 | "dateType": "Collected"
40 | },
41 | {
42 | "date": "2024-06-07",
43 | "dateInformation": "date of upload",
44 | "dateType": "Submitted"
45 | }
46 | ],
47 | "descriptions": [
48 | {
49 | "description": "DPIV, CTA measurements and dye visualizations demonstrating flow quality of the Free-surface, Low turbulence, Optically accessible, Water TUnnel in a Box (FLOWTUB) developed at GALCIT 2022-2024.",
50 | "descriptionType": "Abstract"
51 | }
52 | ],
53 | "fundingReferences": [
54 | {
55 | "awardNumber": "DGE-1745301",
56 | "awardTitle": "Graduate Research Fellowships Program (GRFP)",
57 | "funderIdentifier": "grid.431093.c",
58 | "funderIdentifierType": "GRID",
59 | "funderName": "National Science Foundation"
60 | }
61 | ],
62 | "identifiers": [
63 | {
64 | "identifier": "10.22002/rmzp9-9yx96",
65 | "identifierType": "DOI"
66 | },
67 | {
68 | "identifier": "oai:data.caltech.edu:rmzp9-9yx96",
69 | "identifierType": "oai"
70 | }
71 | ],
72 | "language": "eng",
73 | "publicationYear": "2024",
74 | "publisher": "CaltechDATA",
75 | "rightsList": [
76 | {
77 | "rights": "Creative Commons Attribution 4.0 International",
78 | "rightsIdentifier": "cc-by-4.0",
79 | "rightsIdentifierScheme": "spdx",
80 | "rightsUri": "https://creativecommons.org/licenses/by/4.0/legalcode"
81 | }
82 | ],
83 | "schemaVersion": "http://datacite.org/schema/kernel-4",
84 | "titles": [
85 | {
86 | "title": "FLOWTUB Water Tunnel Validation Data"
87 | }
88 | ],
89 | "types": {
90 | "resourceType": "",
91 | "resourceTypeGeneral": "Dataset"
92 | },
93 | "version": "1"
94 | }
--------------------------------------------------------------------------------
/tests/data/datacite43/nbtw5-37m55.json:
--------------------------------------------------------------------------------
1 | {
2 | "creators": [
3 | {
4 | "affiliation": [
5 | {
6 | "affiliationIdentifier": "05dxps055",
7 | "affiliationIdentifierScheme": "ROR",
8 | "name": "California Institute of Technology"
9 | }
10 | ],
11 | "familyName": "Law",
12 | "givenName": "Casey",
13 | "name": "Casey Law",
14 | "nameIdentifiers": [
15 | {
16 | "nameIdentifier": "0000-0002-4119-9963",
17 | "nameIdentifierScheme": "ORCID"
18 | }
19 | ],
20 | "nameType": "Personal"
21 | }
22 | ],
23 | "dates": [
24 | {
25 | "date": "2024",
26 | "dateType": "Issued"
27 | },
28 | {
29 | "date": "2024-04-12",
30 | "dateType": "Created"
31 | }
32 | ],
33 | "descriptions": [
34 | {
35 | "description": "Data associated with DSA-110 candidate transient.",
36 | "descriptionType": "Abstract"
37 | }
38 | ],
39 | "formats": [
40 | "png"
41 | ],
42 | "fundingReferences": [
43 | {
44 | "funderIdentifier": "grid.431093.c",
45 | "funderIdentifierType": "GRID",
46 | "funderName": "National Science Foundation"
47 | }
48 | ],
49 | "geoLocations": [
50 | {
51 | "geoLocationPlace": "OVRO",
52 | "geoLocationPoint": {
53 | "pointLatitude": 37.2339,
54 | "pointLongitude": -118.282
55 | }
56 | }
57 | ],
58 | "identifiers": [
59 | {
60 | "identifier": "10.25800/3ghe-8e93",
61 | "identifierType": "DOI"
62 | },
63 | {
64 | "identifier": "oai:data.caltech.edu:nbtw5-37m55",
65 | "identifierType": "oai"
66 | },
67 | {
68 | "identifier": "231120aabi",
69 | "identifierType": "dsa-110-id"
70 | },
71 | {
72 | "identifier": "nbtw5-37m55",
73 | "identifierType": "cdid"
74 | }
75 | ],
76 | "publicationYear": "2024",
77 | "publisher": "Caltech Data",
78 | "relatedIdentifiers": [
79 | {
80 | "relatedIdentifier": "http://deepsynoptic.org",
81 | "relatedIdentifierType": "URL",
82 | "relationType": "IsDocumentedBy"
83 | }
84 | ],
85 | "rightsList": [
86 | {
87 | "rights": "cc-by-4.0"
88 | }
89 | ],
90 | "schemaVersion": "http://datacite.org/schema/kernel-4",
91 | "subjects": [
92 | {
93 | "subject": "OVRO"
94 | },
95 | {
96 | "subject": "Astrophysics"
97 | },
98 | {
99 | "subject": "Fast Radio Bursts"
100 | }
101 | ],
102 | "titles": [
103 | {
104 | "title": "DSA-110 Data for Candidate Fast Radio Burst 231120aabi"
105 | }
106 | ],
107 | "types": {
108 | "resourceType": "",
109 | "resourceTypeGeneral": "Dataset"
110 | }
111 | }
--------------------------------------------------------------------------------
/tests/data/datacite43/hevaf-20f84.json:
--------------------------------------------------------------------------------
1 | {
2 | "creators": [
3 | {
4 | "affiliation": [
5 | {
6 | "affiliationIdentifier": "05dxps055",
7 | "affiliationIdentifierScheme": "ROR",
8 | "name": "California Institute of Technology"
9 | }
10 | ],
11 | "familyName": "Law",
12 | "givenName": "Casey",
13 | "name": "Casey Law",
14 | "nameIdentifiers": [
15 | {
16 | "nameIdentifier": "0000-0002-4119-9963",
17 | "nameIdentifierScheme": "ORCID"
18 | }
19 | ],
20 | "nameType": "Personal"
21 | }
22 | ],
23 | "dates": [
24 | {
25 | "date": "2024",
26 | "dateType": "Issued"
27 | },
28 | {
29 | "date": "2024-04-25",
30 | "dateType": "Created"
31 | }
32 | ],
33 | "descriptions": [
34 | {
35 | "description": "Data associated with DSA-110 candidate transient.",
36 | "descriptionType": "Abstract"
37 | }
38 | ],
39 | "formats": [
40 | "png"
41 | ],
42 | "fundingReferences": [
43 | {
44 | "funderIdentifier": "grid.431093.c",
45 | "funderIdentifierType": "GRID",
46 | "funderName": "National Science Foundation"
47 | }
48 | ],
49 | "geoLocations": [
50 | {
51 | "geoLocationPlace": "OVRO",
52 | "geoLocationPoint": {
53 | "pointLatitude": 37.2339,
54 | "pointLongitude": -118.282
55 | }
56 | }
57 | ],
58 | "identifiers": [
59 | {
60 | "identifier": "10.25800/t9jd-fh86",
61 | "identifierType": "DOI"
62 | },
63 | {
64 | "identifier": "oai:data.caltech.edu:hevaf-20f84",
65 | "identifierType": "oai"
66 | },
67 | {
68 | "identifier": "221116aaab",
69 | "identifierType": "dsa-110-id"
70 | },
71 | {
72 | "identifier": "hevaf-20f84",
73 | "identifierType": "cdid"
74 | }
75 | ],
76 | "publicationYear": "2024",
77 | "publisher": "Caltech Data",
78 | "relatedIdentifiers": [
79 | {
80 | "relatedIdentifier": "http://deepsynoptic.org",
81 | "relatedIdentifierType": "URL",
82 | "relationType": "IsDocumentedBy"
83 | }
84 | ],
85 | "rightsList": [
86 | {
87 | "rights": "cc-by-4.0"
88 | }
89 | ],
90 | "schemaVersion": "http://datacite.org/schema/kernel-4",
91 | "subjects": [
92 | {
93 | "subject": "OVRO"
94 | },
95 | {
96 | "subject": "Astrophysics"
97 | },
98 | {
99 | "subject": "Fast Radio Bursts"
100 | }
101 | ],
102 | "titles": [
103 | {
104 | "title": "DSA-110 Data for Candidate Fast Radio Burst 221116aaab"
105 | }
106 | ],
107 | "types": {
108 | "resourceType": "",
109 | "resourceTypeGeneral": "Dataset"
110 | },
111 | "version": "0.1"
112 | }
--------------------------------------------------------------------------------
/tests/test_rdm.py:
--------------------------------------------------------------------------------
1 | from caltechdata_api import (
2 | customize_schema,
3 | caltechdata_write,
4 | caltechdata_edit,
5 | get_metadata,
6 | )
7 | import json
8 | import os
9 |
10 |
11 | def test_datacite_rdm_conversion(full_datacite43_record, full_rdm_record):
12 |
13 | # Remove DOI from full_datacite43_record
14 | # since it's prcessed by caltechdata_write or caltechdata_edit
15 | identifiers = []
16 | for identifier in full_datacite43_record["identifiers"]:
17 | if identifier["identifierType"] != "DOI":
18 | identifiers.append(identifier)
19 | full_datacite43_record["identifiers"] = identifiers
20 |
21 | converted = customize_schema(full_datacite43_record, schema="43")
22 |
23 | assert converted == full_rdm_record
24 |
25 |
26 | def test_datacite_rdm_create_edit(full_datacite43_record):
27 | env_token = os.environ.get("RDMTOK")
28 |
29 | # Remove DOI from full_datacite43_record
30 | # since we want the test system to create one
31 | identifiers = []
32 | for identifier in full_datacite43_record["identifiers"]:
33 | if identifier["identifierType"] != "DOI":
34 | identifiers.append(identifier)
35 | full_datacite43_record["identifiers"] = identifiers
36 |
37 | recid = caltechdata_write(
38 | full_datacite43_record,
39 | schema="43",
40 | production=False,
41 | publish=True,
42 | token=env_token,
43 | )
44 |
45 | assert len(recid) == 11
46 |
47 | recid = caltechdata_write(
48 | full_datacite43_record,
49 | schema="43",
50 | production=False,
51 | files=["helpers.py"],
52 | publish=True,
53 | token=env_token,
54 | )
55 |
56 | assert len(recid) == 11
57 |
58 | full_datacite43_record["publisher"] = "Edited"
59 |
60 | doi = caltechdata_edit(
61 | recid,
62 | full_datacite43_record,
63 | schema="43",
64 | production=False,
65 | publish=True,
66 | token=env_token,
67 | )
68 |
69 | assert doi.startswith("10.33569")
70 |
71 | # Validate is false until geolocation points are fixed/we move to 4.6
72 | new_metadata = get_metadata(recid, production=False, validate=False)
73 |
74 | assert new_metadata["publisher"] == "Edited"
75 |
76 | full_datacite43_record["publisher"] = "Again!"
77 |
78 | new_doi = caltechdata_edit(
79 | recid,
80 | full_datacite43_record,
81 | files=["helpers.py"],
82 | schema="43",
83 | production=False,
84 | publish=True,
85 | token=env_token,
86 | )
87 |
88 | assert new_doi != doi
89 |
90 | recid = new_doi.split("/")[1]
91 |
92 | # Validate is false until geolocation points are fixed/we move to 4.6
93 | new_metadata = get_metadata(recid, production=False, validate=False)
94 |
95 | assert new_metadata["publisher"] == "Again!"
96 |
--------------------------------------------------------------------------------
/caltechdata_api/utils.py:
--------------------------------------------------------------------------------
1 | # Public domain by Mitch McMabers
2 |
3 | from typing import List, Union
4 |
5 | METRIC_LABELS: List[str] = ["B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"]
6 | BINARY_LABELS: List[str] = ["B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"]
7 | PRECISION_OFFSETS: List[float] = [0.5, 0.05, 0.005, 0.0005] # PREDEFINED FOR SPEED.
8 | PRECISION_FORMATS: List[str] = [
9 | "{}{:.0f} {}",
10 | "{}{:.1f} {}",
11 | "{}{:.2f} {}",
12 | "{}{:.3f} {}",
13 | ] # PREDEFINED FOR SPEED.
14 |
15 |
16 | def humanbytes(num: Union[int, float], metric: bool = True, precision: int = 1) -> str:
17 | """
18 | Human-readable formatting of bytes, using binary (powers of 1024)
19 | or metric (powers of 1000) representation.
20 | """
21 |
22 | assert isinstance(num, (int, float)), "num must be an int or float"
23 | assert isinstance(metric, bool), "metric must be a bool"
24 | assert (
25 | isinstance(precision, int) and precision >= 0 and precision <= 3
26 | ), "precision must be an int (range 0-3)"
27 |
28 | unit_labels = METRIC_LABELS if metric else BINARY_LABELS
29 | last_label = unit_labels[-1]
30 | unit_step = 1000 if metric else 1024
31 | unit_step_thresh = unit_step - PRECISION_OFFSETS[precision]
32 |
33 | is_negative = num < 0
34 | if is_negative: # Faster than ternary assignment or always running abs().
35 | num = abs(num)
36 |
37 | for unit in unit_labels:
38 | if num < unit_step_thresh:
39 | # VERY IMPORTANT:
40 | # Only accepts the CURRENT unit if we're BELOW the threshold where
41 | # float rounding behavior would place us into the NEXT unit: F.ex.
42 | # when rounding a float to 1 decimal, any number ">= 1023.95" will
43 | # be rounded to "1024.0". Obviously we don't want ugly output such
44 | # as "1024.0 KiB", since the proper term for that is "1.0 MiB".
45 | break
46 | if unit != last_label:
47 | # We only shrink the number if we HAVEN'T reached the last unit.
48 | # NOTE: These looped divisions accumulate floating point rounding
49 | # errors, but each new division pushes the rounding errors further
50 | # and further down in the decimals, so it doesn't matter at all.
51 | num /= unit_step
52 |
53 | return PRECISION_FORMATS[precision].format("-" if is_negative else "", num, unit)
54 |
55 |
56 | if __name__ == "__main__":
57 | print(humanbytes(2251799813685247)) # 2 pebibytes
58 | print(humanbytes(2000000000000000, True)) # 2 petabytes
59 | print(humanbytes(1099511627776)) # 1 tebibyte
60 | print(humanbytes(1000000000000, True)) # 1 terabyte
61 | print(humanbytes(1000000000, True)) # 1 gigabyte
62 | print(humanbytes(4318498233, precision=3)) # 4.022 gibibytes
63 | print(humanbytes(4318498233, True, 3)) # 4.318 gigabytes
64 | print(humanbytes(-4318498233, precision=2)) # -4.02 gibibytes
65 |
--------------------------------------------------------------------------------
/tests/data/datacite43/hhg7x-hgm42.json:
--------------------------------------------------------------------------------
1 | {
2 | "creators": [
3 | {
4 | "affiliation": [
5 | {
6 | "affiliationIdentifier": "05dxps055",
7 | "affiliationIdentifierScheme": "ROR",
8 | "name": "California Institute of Technology"
9 | }
10 | ],
11 | "familyName": "Atterholt",
12 | "givenName": "James",
13 | "name": "Atterholt, James",
14 | "nameIdentifiers": [
15 | {
16 | "nameIdentifier": "0000-0003-1603-5518",
17 | "nameIdentifierScheme": "ORCID"
18 | }
19 | ],
20 | "nameType": "Personal"
21 | },
22 | {
23 | "affiliation": [
24 | {
25 | "affiliationIdentifier": "05dxps055",
26 | "affiliationIdentifierScheme": "ROR",
27 | "name": "California Institute of Technology"
28 | }
29 | ],
30 | "familyName": "Zhan",
31 | "givenName": "Zhongwen",
32 | "name": "Zhan, Zhongwen",
33 | "nameIdentifiers": [
34 | {
35 | "nameIdentifier": "0000-0002-5586-2607",
36 | "nameIdentifierScheme": "ORCID"
37 | }
38 | ],
39 | "nameType": "Personal"
40 | }
41 | ],
42 | "dates": [
43 | {
44 | "date": "2024-06-26",
45 | "dateType": "Issued"
46 | }
47 | ],
48 | "descriptions": [
49 | {
50 | "description": "These are h5 files that contain events with PmP observations used in the publication \"Fine Scale Southern California Moho Structure Uncovered with Distributed Acoustic Sensing.\" Events are descriptively named.",
51 | "descriptionType": "Abstract"
52 | }
53 | ],
54 | "fundingReferences": [
55 | {
56 | "awardNumber": "1848166",
57 | "awardTitle": "CAREER:Potential of fiber acoustic sensing in the next-generation seismic networks",
58 | "funderIdentifier": "grid.431093.c",
59 | "funderIdentifierType": "GRID",
60 | "funderName": "National Science Foundation"
61 | },
62 | {
63 | "funderIdentifier": "grid.452959.6",
64 | "funderIdentifierType": "GRID",
65 | "funderName": "Gordon and Betty Moore Foundation"
66 | }
67 | ],
68 | "identifiers": [
69 | {
70 | "identifier": "10.22002/hhg7x-hgm42",
71 | "identifierType": "DOI"
72 | },
73 | {
74 | "identifier": "oai:data.caltech.edu:hhg7x-hgm42",
75 | "identifierType": "oai"
76 | }
77 | ],
78 | "publicationYear": "2024",
79 | "publisher": "CaltechDATA",
80 | "rightsList": [
81 | {
82 | "rights": "Creative Commons Zero v1.0 Universal",
83 | "rightsIdentifier": "cc0-1.0",
84 | "rightsIdentifierScheme": "spdx",
85 | "rightsUri": "https://creativecommons.org/publicdomain/zero/1.0/legalcode"
86 | }
87 | ],
88 | "schemaVersion": "http://datacite.org/schema/kernel-4",
89 | "titles": [
90 | {
91 | "title": "Catalog of Events with PmP Phase"
92 | }
93 | ],
94 | "types": {
95 | "resourceType": "",
96 | "resourceTypeGeneral": "Dataset"
97 | }
98 | }
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # CaltechDATA API Python Library
2 |
3 | [](https://data.caltech.edu/records/wfjr5-kw507/latest)
4 |
5 | The `caltechdata_api` Python library provides a convenient interface for interacting with the CaltechDATA API. It allows users to write files, create DataCite 4 standard JSON records, edit existing records, and retrieve metadata from the CaltechDATA repository.
6 |
7 | ## Features
8 |
9 | ### Writing and Editing Records
10 | - `caltechdata_write`: Writes files and a DataCite 4 standard JSON record to the CaltechDATA repository.
11 | - `caltechdata_edit`: Edits existing records in CaltechDATA.
12 |
13 | ### Metadata Operations
14 | - `get_metadata`: Retrieves metadata from CaltechDATA records.
15 |
16 | ## Requirements
17 |
18 | - Python 3.6+
19 |
20 | ## Installation
21 |
22 | Install the library via pip:
23 |
24 | ```shell
25 | pip install caltechdata_api
26 | ```
27 |
28 | ## Examples
29 |
30 | There are some example python scripts in the GitHub repository.
31 |
32 | ### Create a record:
33 |
34 | ```shell
35 | python write.py example.json -fnames logo.gif
36 | # Output: pbkn6-m9y63 (unique identifier)
37 | ```
38 | > The response will be the unique identifier for the record. You can put this at
39 | the end of a url to visit the record (e.g.
40 | https://data.caltechlibrary.dev/records/pbkn6-m9y63)
41 |
42 | ### Edit a record
43 | Make changes to the example.json file to see a change)
44 | ```
45 | python edit.py example.json -id pbkn6-m9y63
46 | 10.33569/pbkn6-m9y63
47 | ```
48 | > The response is the DOI for the record, which includes the unique identifier
49 | for the record in the default configuration.
50 |
51 | ## Using Custom DOIs
52 | Some groups have worked with the library to create custom DOIs. These can be
53 | passed in the metadata like:
54 |
55 | ```shell
56 | python write.py example_custom.json -fnames logo.gif
57 | m6zxz-p4j22
58 | ```
59 |
60 | And then you can edit with
61 | ```
62 | python edit.py example_custom.json -id m6zxz-p4j22
63 | 10.5281/inveniordm.1234
64 | ```
65 |
66 | This returns the custom DOI of the record if it is successful.
67 |
68 |
69 | ## Setup and Authentication
70 |
71 | 1. Acquire a personal access token from your CaltechDATA account (found under "Applications" at the top right of your screen).
72 | 2. Copy the token to a file named token.bash.
73 | 3. Load the token in the command line using source token.bash.
74 |
75 | ## Note on Testing
76 |
77 | Only test your application on the test repository (`data.caltechlibrary.dev`). Testing the API on the public
78 | repository will generate junk records that are annoying to delete.
79 |
80 | ## Using the Command Line Interface
81 |
82 | If you would like to interact with the CaltechDATA API using the Command line Interface (CLI), please [see the detailed documentation](https://caltechlibrary.github.io/caltechdata_api/caltechdata_api/cli-documentation-for-users).
83 |
--------------------------------------------------------------------------------
/tests/data/datacite43/dks9f-mj878.json:
--------------------------------------------------------------------------------
1 | {
2 | "creators": [
3 | {
4 | "affiliation": [
5 | {
6 | "affiliationIdentifier": "05dxps055",
7 | "affiliationIdentifierScheme": "ROR",
8 | "name": "California Institute of Technology"
9 | }
10 | ],
11 | "familyName": "Gray",
12 | "givenName": "Robert M.",
13 | "name": "Gray, Robert M.",
14 | "nameIdentifiers": [
15 | {
16 | "nameIdentifier": "0000-0001-5980-8774",
17 | "nameIdentifierScheme": "ORCID"
18 | }
19 | ],
20 | "nameType": "Personal"
21 | },
22 | {
23 | "affiliation": [
24 | {
25 | "affiliationIdentifier": "05dxps055",
26 | "affiliationIdentifierScheme": "ROR",
27 | "name": "California Institute of Technology"
28 | }
29 | ],
30 | "familyName": "Liu",
31 | "givenName": "Mingchen",
32 | "name": "Liu, Mingchen",
33 | "nameIdentifiers": [],
34 | "nameType": "Personal"
35 | },
36 | {
37 | "affiliation": [
38 | {
39 | "affiliationIdentifier": "05dxps055",
40 | "affiliationIdentifierScheme": "ROR",
41 | "name": "California Institute of Technology"
42 | }
43 | ],
44 | "familyName": "Zhou",
45 | "givenName": "Selina",
46 | "name": "Zhou, Selina",
47 | "nameIdentifiers": [],
48 | "nameType": "Personal"
49 | }
50 | ],
51 | "dates": [
52 | {
53 | "date": "2024-07-20",
54 | "dateType": "Issued"
55 | }
56 | ],
57 | "descriptions": [
58 | {
59 | "description": "Data and processing code corresponding to the manuscript, \"Quadratic-soliton-enhanced mid-IR molecular sensing.\"",
60 | "descriptionType": "Abstract"
61 | }
62 | ],
63 | "identifiers": [
64 | {
65 | "identifier": "10.22002/dks9f-mj878",
66 | "identifierType": "DOI"
67 | },
68 | {
69 | "identifier": "oai:data.caltech.edu:dks9f-mj878",
70 | "identifierType": "oai"
71 | }
72 | ],
73 | "publicationYear": "2024",
74 | "publisher": "CaltechDATA",
75 | "relatedIdentifiers": [
76 | {
77 | "relatedIdentifier": "arXiv:2301.07826",
78 | "relatedIdentifierType": "arXiv",
79 | "relationType": "IsDescribedBy",
80 | "resourceTypeGeneral": "Text"
81 | }
82 | ],
83 | "rightsList": [
84 | {
85 | "rights": "Creative Commons Attribution 4.0 International",
86 | "rightsIdentifier": "cc-by-4.0",
87 | "rightsIdentifierScheme": "spdx",
88 | "rightsUri": "https://creativecommons.org/licenses/by/4.0/legalcode"
89 | }
90 | ],
91 | "schemaVersion": "http://datacite.org/schema/kernel-4",
92 | "titles": [
93 | {
94 | "title": "Data for \"Quadratic-soliton-enhanced mid-IR molecular sensing\""
95 | }
96 | ],
97 | "types": {
98 | "resourceType": "",
99 | "resourceTypeGeneral": "Dataset"
100 | }
101 | }
--------------------------------------------------------------------------------
/tests/data/datacite43/n0y4x-xx706.json:
--------------------------------------------------------------------------------
1 | {
2 | "contributors": [
3 | {
4 | "affiliation": [
5 | {
6 | "name": "Department of Biological Sciences, California State University, Sacramento"
7 | }
8 | ],
9 | "contributorType": "ContactPerson",
10 | "familyName": "Gleason",
11 | "givenName": "Lani U.",
12 | "name": "Gleason, Lani U.",
13 | "nameIdentifiers": [],
14 | "nameType": "Personal"
15 | }
16 | ],
17 | "creators": [
18 | {
19 | "affiliation": [
20 | {
21 | "name": "Department of Biological Sciences, California State University, Sacramento"
22 | }
23 | ],
24 | "familyName": "Gleason",
25 | "givenName": "Lani U.",
26 | "name": "Gleason, Lani U.",
27 | "nameIdentifiers": [],
28 | "nameType": "Personal"
29 | }
30 | ],
31 | "dates": [
32 | {
33 | "date": "2025-01-24",
34 | "dateType": "Issued"
35 | },
36 | {
37 | "date": "2025-01-22",
38 | "dateType": "Accepted"
39 | }
40 | ],
41 | "descriptions": [
42 | {
43 | "description": "Genes identified to be significantly differentially expressed for each component of the Venn diagram in Figure 1B. The annotation information, RPKM expression value for each individual in each of the four treatments, and the average RPKM expression value per treatment are provided for each gene.",
44 | "descriptionType": "Abstract"
45 | }
46 | ],
47 | "fundingReferences": [
48 | {
49 | "funderName": "This work was supported by a California State University, Sacramento Research & Creative Activity (RCA) Award and a Biological Sciences Genes to Ecosystems (G2E) Award to Lani Gleason. The California State University (CSU) Council on Ocean Affairs, Science, and Technology (COAST) also provided an Undergraduate Research Award to support Hanna Franklin."
50 | }
51 | ],
52 | "identifiers": [
53 | {
54 | "identifier": "10.22002/n0y4x-xx706",
55 | "identifierType": "DOI"
56 | },
57 | {
58 | "identifier": "oai:data.caltech.edu:n0y4x-xx706",
59 | "identifierType": "oai"
60 | }
61 | ],
62 | "language": "eng",
63 | "publicationYear": "2025",
64 | "publisher": "CaltechDATA",
65 | "relatedIdentifiers": [
66 | {
67 | "relatedIdentifier": "10.17912/micropub.biology.001473",
68 | "relatedIdentifierType": "DOI",
69 | "relationType": "IsPartOf",
70 | "resourceTypeGeneral": "Text"
71 | }
72 | ],
73 | "rightsList": [
74 | {
75 | "rights": "Creative Commons Attribution 4.0 International",
76 | "rightsIdentifier": "cc-by-4.0",
77 | "rightsIdentifierScheme": "spdx",
78 | "rightsUri": "https://creativecommons.org/licenses/by/4.0/legalcode"
79 | }
80 | ],
81 | "schemaVersion": "http://datacite.org/schema/kernel-4",
82 | "subjects": [
83 | {
84 | "subject": "haliotis rufescens"
85 | }
86 | ],
87 | "titles": [
88 | {
89 | "title": "Dataset: Heat Stress, Starvation, and Heat Stress Plus Starvation Cause Unique Transcriptomic Responses in the Economically Important Red Abalone Haliotis rufescens"
90 | }
91 | ],
92 | "types": {
93 | "resourceType": "",
94 | "resourceTypeGeneral": "Dataset"
95 | },
96 | "version": "1.0"
97 | }
--------------------------------------------------------------------------------
/tests/data/datacite43/wbty9-bqy29.json:
--------------------------------------------------------------------------------
1 | {
2 | "contributors": [
3 | {
4 | "affiliation": [
5 | {
6 | "name": "Molecular Biophysics & Biochemistry, Yale University, New Haven, Connecticut, United States"
7 | },
8 | {
9 | "name": "Cell Biology, Yale University School of Medicine"
10 | }
11 | ],
12 | "contributorType": "ContactPerson",
13 | "familyName": "Berro",
14 | "givenName": "Julien",
15 | "name": "Berro, Julien",
16 | "nameIdentifiers": [
17 | {
18 | "nameIdentifier": "0000-0002-9560-8646",
19 | "nameIdentifierScheme": "ORCID"
20 | }
21 | ],
22 | "nameType": "Personal"
23 | }
24 | ],
25 | "creators": [
26 | {
27 | "affiliation": [
28 | {
29 | "name": "Molecular Biophysics & Biochemistry, Yale University, New Haven, Connecticut, United States"
30 | },
31 | {
32 | "name": "Cell Biology, Yale University School of Medicine"
33 | }
34 | ],
35 | "familyName": "Berro",
36 | "givenName": "Julien",
37 | "name": "Berro, Julien",
38 | "nameIdentifiers": [
39 | {
40 | "nameIdentifier": "0000-0002-9560-8646",
41 | "nameIdentifierScheme": "ORCID"
42 | }
43 | ],
44 | "nameType": "Personal"
45 | }
46 | ],
47 | "dates": [
48 | {
49 | "date": "2024-05-08",
50 | "dateType": "Issued"
51 | },
52 | {
53 | "date": "2024-05-03",
54 | "dateType": "Accepted"
55 | }
56 | ],
57 | "descriptions": [
58 | {
59 | "description": "Primers used in this study",
60 | "descriptionType": "Abstract"
61 | }
62 | ],
63 | "fundingReferences": [
64 | {
65 | "funderName": "This work was partly supported by the National Institutes of Health (R01 GM11563601)."
66 | }
67 | ],
68 | "identifiers": [
69 | {
70 | "identifier": "10.22002/wbty9-bqy29",
71 | "identifierType": "DOI"
72 | },
73 | {
74 | "identifier": "oai:data.caltech.edu:wbty9-bqy29",
75 | "identifierType": "oai"
76 | }
77 | ],
78 | "language": "eng",
79 | "publicationYear": "2024",
80 | "publisher": "CaltechDATA",
81 | "relatedIdentifiers": [
82 | {
83 | "relatedIdentifier": "10.17912/micropub.biology.001191",
84 | "relatedIdentifierType": "DOI",
85 | "relationType": "IsPartOf",
86 | "resourceTypeGeneral": "Text"
87 | }
88 | ],
89 | "rightsList": [
90 | {
91 | "rights": "Creative Commons Attribution 4.0 International",
92 | "rightsIdentifier": "cc-by-4.0",
93 | "rightsIdentifierScheme": "spdx",
94 | "rightsUri": "https://creativecommons.org/licenses/by/4.0/legalcode"
95 | }
96 | ],
97 | "schemaVersion": "http://datacite.org/schema/kernel-4",
98 | "subjects": [
99 | {
100 | "subject": "s. pombe"
101 | }
102 | ],
103 | "titles": [
104 | {
105 | "title": "Dataset: CRISPR-Cas9 editing efficiency in fission yeast is not limited by homology search and is improved by combining gap-repair with fluoride selection"
106 | }
107 | ],
108 | "types": {
109 | "resourceType": "",
110 | "resourceTypeGeneral": "Dataset"
111 | },
112 | "version": "1.0"
113 | }
--------------------------------------------------------------------------------
/outdated/caltechdata_multipart.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 |
4 | from requests import session
5 |
6 | import boto3
7 | from caltechdata_api import customize_schema
8 |
9 |
10 | def send_s3(filepath, token, production=False):
11 |
12 | if production == True:
13 | s3surl = "https://data.caltech.edu/tindfiles/sign_s3/"
14 | chkurl = "https://data.caltech.edu/tindfiles/md5_s3"
15 | else:
16 | s3surl = "https://cd-sandbox.tind.io/tindfiles/sign_s3/"
17 | chkurl = "https://cd-sandbox.tind.io/tindfiles/md5_s3"
18 |
19 | headers = {"Authorization": "Bearer %s" % token}
20 |
21 | c = session()
22 |
23 | response = c.get(s3surl, headers=headers)
24 | jresp = response.json()
25 | data = jresp["data"]
26 |
27 | bucket = jresp["bucket"]
28 | key = data["fields"]["key"]
29 | policy = data["fields"]["policy"]
30 | aid = data["fields"]["AWSAccessKeyId"]
31 | signature = data["fields"]["signature"]
32 | url = data["url"]
33 |
34 | print(filepath)
35 | infile = open(filepath, "rb")
36 | size = infile.seek(0, 2)
37 | infile.seek(0, 0) # reset at beginning
38 |
39 | s3 = boto.client("s3")
40 | s3.upload_file(filepath, bucket, key)
41 |
42 | response = c.get(chkurl + "/" + bucket + "/" + key, headers=headers)
43 | print(response)
44 | exit()
45 |
46 | s3headers = {
47 | "Host": bucket + ".s3.amazonaws.com",
48 | "Date": "date",
49 | "x-amz-acl": "public-read",
50 | "Access-Control-Allow-Origin": "*",
51 | }
52 |
53 | form = (
54 | ("key", key),
55 | ("acl", "public-read"),
56 | ("AWSAccessKeyID", aid),
57 | ("policy", policy),
58 | ("signature", signature),
59 | ("file", infile),
60 | )
61 |
62 | c = session()
63 | response = c.post(url, files=form, headers=s3headers)
64 | if response.text:
65 | raise Exception(response.text)
66 |
67 | response = c.get(chkurl + "/" + bucket + "/" + key, headers=headers)
68 | md5 = response.json()["md5"]
69 | filename = filepath.split("/")[-1]
70 |
71 | fileinfo = {"url": key, "filename": filename, "md5": md5, "size": size}
72 |
73 | return fileinfo
74 |
75 |
76 | def caltechdata_write(metadata, token, files=[], production=False):
77 |
78 | # If files is a string - change to single value array
79 | if isinstance(files, str) == True:
80 | files = [files]
81 |
82 | fileinfo = []
83 |
84 | for f in files:
85 | fileinfo.append(send_s3(f, token, production))
86 |
87 | if production == True:
88 | url = "https://data.caltech.edu/submit/api/create/"
89 | else:
90 | url = "https://cd-sandbox.tind.io/submit/api/create/"
91 |
92 | headers = {"Authorization": "Bearer %s" % token, "Content-type": "application/json"}
93 |
94 | newdata = customize_schema.customize_schema(metadata)
95 | newdata["files"] = fileinfo
96 | if "doi" not in newdata:
97 | # We want tind to generate the identifier
98 | newdata["final_actions"] = [
99 | {"type": "create_doi", "parameters": {"type": "records", "field": "doi"}}
100 | ]
101 |
102 | dat = json.dumps({"record": newdata})
103 |
104 | c = session()
105 | response = c.post(url, headers=headers, data=dat)
106 | return response.text
107 |
--------------------------------------------------------------------------------
/tests/data/datacite43/4yxbs-4mj38.json:
--------------------------------------------------------------------------------
1 | {
2 | "creators": [
3 | {
4 | "affiliation": [
5 | {
6 | "affiliationIdentifier": "05dxps055",
7 | "affiliationIdentifierScheme": "ROR",
8 | "name": "California Institute of Technology"
9 | }
10 | ],
11 | "familyName": "Law",
12 | "givenName": "Casey",
13 | "name": "Casey Law",
14 | "nameIdentifiers": [
15 | {
16 | "nameIdentifier": "0000-0002-4119-9963",
17 | "nameIdentifierScheme": "ORCID"
18 | }
19 | ],
20 | "nameType": "Personal"
21 | }
22 | ],
23 | "dates": [
24 | {
25 | "date": "2023",
26 | "dateType": "Issued"
27 | },
28 | {
29 | "date": "2023-04-21",
30 | "dateType": "Created"
31 | }
32 | ],
33 | "descriptions": [
34 | {
35 | "description": "Data associated with DSA-110 candidate transient. Each filterbank is saved at maximum native resolution (32.7 microseconds, 30.4 kHz) and contains ~0.669696 seconds (20480 samples) around the burst across the full DSA-110 187 MHz (6144 channels) frequency band. The Stokes parameters have been calibrated using observations of the 3C48 and 3C286 Very Large Array (VLA) calibrators as described in Sherman et al., 2024 (https://doi.org/10.3847/1538-4357/ad275e ; see Appendices D and E). Note that there may be minute differences between the data contained here and that reported in Sherman et al., 2024 due to being calibrated at maximum time resolution, rather than being downsampled first.",
36 | "descriptionType": "Abstract"
37 | }
38 | ],
39 | "formats": [
40 | "png"
41 | ],
42 | "fundingReferences": [
43 | {
44 | "funderIdentifier": "grid.431093.c",
45 | "funderIdentifierType": "GRID",
46 | "funderName": "National Science Foundation"
47 | }
48 | ],
49 | "geoLocations": [
50 | {
51 | "geoLocationPlace": "OVRO",
52 | "geoLocationPoint": {
53 | "pointLatitude": 37.2339,
54 | "pointLongitude": -118.282
55 | }
56 | }
57 | ],
58 | "identifiers": [
59 | {
60 | "identifier": "10.22002/4yxbs-4mj38",
61 | "identifierType": "DOI"
62 | },
63 | {
64 | "identifier": "oai:data.caltech.edu:4yxbs-4mj38",
65 | "identifierType": "oai"
66 | },
67 | {
68 | "identifier": "220506aabd",
69 | "identifierType": "dsa-110-id"
70 | },
71 | {
72 | "identifier": "byyt8-y6a26",
73 | "identifierType": "cdid"
74 | }
75 | ],
76 | "publicationYear": "2023",
77 | "publisher": "Caltech Data",
78 | "relatedIdentifiers": [
79 | {
80 | "relatedIdentifier": "http://deepsynoptic.org",
81 | "relatedIdentifierType": "URL",
82 | "relationType": "IsDocumentedBy"
83 | }
84 | ],
85 | "rightsList": [
86 | {
87 | "rights": "cc-by-4.0"
88 | }
89 | ],
90 | "schemaVersion": "http://datacite.org/schema/kernel-4",
91 | "subjects": [
92 | {
93 | "subject": "OVRO"
94 | },
95 | {
96 | "subject": "Astrophysics"
97 | },
98 | {
99 | "subject": "Fast Radio Bursts"
100 | }
101 | ],
102 | "titles": [
103 | {
104 | "title": "DSA-110 Data for Candidate Fast Radio Burst 220506aabd"
105 | }
106 | ],
107 | "types": {
108 | "resourceType": "",
109 | "resourceTypeGeneral": "Dataset"
110 | },
111 | "version": "2.0"
112 | }
--------------------------------------------------------------------------------
/caltechdata_api/vocabularies/relation_types.yaml:
--------------------------------------------------------------------------------
1 | - id: iscitedby
2 | props:
3 | datacite: IsCitedBy
4 | title:
5 | en: Is cited by
6 | - id: cites
7 | props:
8 | datacite: Cites
9 | title:
10 | en: Cites
11 | - id: issupplementto
12 | props:
13 | datacite: IsSupplementTo
14 | title:
15 | en: Is supplement to
16 | - id: issupplementedby
17 | props:
18 | datacite: IsSupplementedBy
19 | title:
20 | en: Is supplemented by
21 | - id: iscontinuedby
22 | props:
23 | datacite: IsContinuedBy
24 | title:
25 | en: Is continued by
26 | - id: continues
27 | props:
28 | datacite: Continues
29 | title:
30 | en: Continues
31 | - id: isdescribedby
32 | props:
33 | datacite: IsDescribedBy
34 | title:
35 | en: Is described by
36 | - id: describes
37 | props:
38 | datacite: Describes
39 | title:
40 | en: Describes
41 | - id: hasversion
42 | props:
43 | datacite: HasVersion
44 | title:
45 | en: Has version
46 | - id: isversionof
47 | props:
48 | datacite: IsVersionOf
49 | title:
50 | en: Is version of
51 | - id: isnewversionof
52 | props:
53 | datacite: IsNewVersionOf
54 | title:
55 | en: Is new version of
56 | - id: ispreviousversionof
57 | props:
58 | datacite: IsPreviousVersionOf
59 | title:
60 | en: Is previous version of
61 | - id: ispartof
62 | props:
63 | datacite: IsPartOf
64 | title:
65 | en: Is part of
66 | - id: haspart
67 | props:
68 | datacite: HasPart
69 | title:
70 | en: HasPart
71 | - id: isreferencedby
72 | props:
73 | datacite: IsReferencedBy
74 | title:
75 | en: Is referenced by
76 | - id: references
77 | props:
78 | datacite: References
79 | title:
80 | en: References
81 | - id: isdocumentedby
82 | props:
83 | datacite: IsDocumentedBy
84 | title:
85 | en: Is documented by
86 | - id: documents
87 | props:
88 | datacite: Documents
89 | title:
90 | en: Documents
91 | - id: iscompiledby
92 | props:
93 | datacite: IsCompiledBy
94 | title:
95 | en: Is compiled by
96 | - id: compiles
97 | props:
98 | datacite: Compiles
99 | title:
100 | en: Compiles
101 | - id: isvariantformof
102 | props:
103 | datacite: IsVariantFormOf
104 | title:
105 | en: Is variant form of
106 | - id: isoriginalformof
107 | props:
108 | datacite: IsOriginalFormOf
109 | title:
110 | en: Is original form of
111 | - id: isidenticalto
112 | props:
113 | datacite: IsIdenticalTo
114 | title:
115 | en: Is identical to
116 | - id: isreviewedby
117 | props:
118 | datacite: IsReviewedBy
119 | title:
120 | en: Is reviewed by
121 | - id: reviews
122 | props:
123 | datacite: Reviews
124 | title:
125 | en: Reviews
126 | - id: isderivedfrom
127 | props:
128 | datacite: IsDerivedFrom
129 | title:
130 | en: Is derived from
131 | - id: issourceof
132 | props:
133 | datacite: IsSourceOf
134 | title:
135 | en: Is source of
136 | - id: isrequiredby
137 | props:
138 | datacite: IsRequiredBy
139 | title:
140 | en: Is required by
141 | - id: requires
142 | props:
143 | datacite: Requires
144 | title:
145 | en: Requires
146 | - id: isobsoletedby
147 | props:
148 | datacite: IsObsoletedBy
149 | title:
150 | en: Is obsoleted by
151 | - id: obsoletes
152 | props:
153 | datacite: Obsoletes
154 | title:
155 | en: Obsoletes
156 |
--------------------------------------------------------------------------------
/rdm.json:
--------------------------------------------------------------------------------
1 | {
2 | "pids": {
3 | },
4 | "metadata": {
5 | "resource_type": {"id": "dataset"},
6 | "creators": [
7 | {
8 | "person_or_org": {
9 | "name": "Nielsen, Lars Holm",
10 | "type": "personal",
11 | "given_name": "Lars Holm",
12 | "family_name": "Nielsen",
13 | "identifiers": [
14 | {"scheme": "orcid", "identifier": "0000-0001-8135-3489"}
15 | ]
16 | },
17 | "affiliations": [{"name": "free-text"}]
18 | }
19 | ],
20 | "title": "InvenioRDM",
21 | "additional_titles": [
22 | {
23 | "title": "a research data management platform",
24 | "type": {"id": "subtitle"},
25 | "lang": {"id": "eng"}
26 | }
27 | ],
28 | "publisher": "InvenioRDM",
29 | "publication_date": "2018/2020-09",
30 | "subjects": [
31 | {"subject": "custom"}
32 | ],
33 | "contributors": [
34 | {
35 | "person_or_org": {
36 | "name": "Nielsen, Lars Holm",
37 | "type": "personal",
38 | "given_name": "Lars Holm",
39 | "family_name": "Nielsen",
40 | "identifiers": [
41 | {"scheme": "orcid", "identifier": "0000-0001-8135-3489"}
42 | ]
43 | },
44 | "role": {"id": "other"}
45 | }
46 | ],
47 | "dates": [
48 | {"date": "1939/1945", "type": {"id": "other"}, "description": "A date"}
49 | ],
50 | "languages": [{"id": "dan"}, {"id": "eng"}],
51 | "identifiers": [{"identifier": "1924MNRAS..84..308E", "scheme": "bibcode"}],
52 | "related_identifiers": [
53 | {
54 | "identifier": "10.1234/foo.bar",
55 | "scheme": "doi",
56 | "relation_type": {"id": "iscitedby"},
57 | "resource_type": {"id": "dataset"}
58 | }
59 | ],
60 | "sizes": ["11 pages"],
61 | "formats": ["application/pdf"],
62 | "version": "v1.0",
63 | "rights": [
64 | {
65 | "title": {"en": "A custom license"},
66 | "description": {"en": "A description"},
67 | "link": "https://customlicense.org/licenses/by/4.0/"
68 | },
69 | {"id": "cc-by-4.0"}
70 | ],
71 | "description": "
with HTML tags
", 72 | "additional_descriptions": [ 73 | { 74 | "description": "Bla bla bla", 75 | "type": {"id": "methods"}, 76 | "lang": {"id": "eng"} 77 | } 78 | ], 79 | "locations": { 80 | "features": [ 81 | { 82 | "geometry": { 83 | "type": "Point", 84 | "coordinates": [-32.94682, -60.63932] 85 | }, 86 | "place": "test location place", 87 | "description": "test location description", 88 | "identifiers": [ 89 | {"identifier": "12345abcde", "scheme": "wikidata"}, 90 | {"identifier": "12345abcde", "scheme": "geonames"} 91 | ] 92 | } 93 | ] 94 | } 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /tests/data/datacite43/b2jqz-qdw65.json: -------------------------------------------------------------------------------- 1 | { 2 | "contributors": [ 3 | { 4 | "affiliation": [ 5 | { 6 | "name": "Department of Biological Sciences, Tata Institute of Fundamental Research, Mumbai, Maharashtra, India" 7 | } 8 | ], 9 | "contributorType": "ContactPerson", 10 | "familyName": "Koushika", 11 | "givenName": "Sandhya P.", 12 | "name": "Koushika, Sandhya P.", 13 | "nameIdentifiers": [ 14 | { 15 | "nameIdentifier": "0000-0002-1742-7356", 16 | "nameIdentifierScheme": "ORCID" 17 | } 18 | ], 19 | "nameType": "Personal" 20 | } 21 | ], 22 | "creators": [ 23 | { 24 | "affiliation": [ 25 | { 26 | "name": "Department of Biological Sciences, Tata Institute of Fundamental Research, Mumbai, Maharashtra, India" 27 | } 28 | ], 29 | "familyName": "Vasudevan", 30 | "givenName": "Amruta", 31 | "name": "Vasudevan, Amruta", 32 | "nameIdentifiers": [ 33 | { 34 | "nameIdentifier": "0000-0002-5777-9508", 35 | "nameIdentifierScheme": "ORCID" 36 | } 37 | ], 38 | "nameType": "Personal" 39 | } 40 | ], 41 | "dates": [ 42 | { 43 | "date": "2024-06-25", 44 | "dateType": "Issued" 45 | }, 46 | { 47 | "date": "2024-06-24", 48 | "dateType": "Accepted" 49 | } 50 | ], 51 | "descriptions": [ 52 | { 53 | "description": "Raw data for figure 1", 54 | "descriptionType": "Abstract" 55 | } 56 | ], 57 | "fundingReferences": [ 58 | { 59 | "funderName": "The authors gratefully acknowledge support from the Department of Atomic Energy, Government of India (DAE) grants 12-R\\&D-IMS-5.02-0202 and 1303/2/2019/R\\&DII/DAE/2079 (dated 11.02.2020 to S.P.K.), the Howard Hughes Medical Institute (HHMI) International Early Career Scientist (IECS) grant 55007425 (to S.P.K.), CSIR (to S.P.K.), and funding from the PRISM project at the Institute of Mathematical Sciences (to S.P.K.) for research costs. The authors gratefully acknowledge salary support from TIFR-DAE (for A.V.)." 60 | } 61 | ], 62 | "identifiers": [ 63 | { 64 | "identifier": "10.22002/b2jqz-qdw65", 65 | "identifierType": "DOI" 66 | }, 67 | { 68 | "identifier": "oai:data.caltech.edu:b2jqz-qdw65", 69 | "identifierType": "oai" 70 | } 71 | ], 72 | "language": "eng", 73 | "publicationYear": "2024", 74 | "publisher": "CaltechDATA", 75 | "relatedIdentifiers": [ 76 | { 77 | "relatedIdentifier": "10.17912/micropub.biology.001204", 78 | "relatedIdentifierType": "DOI", 79 | "relationType": "IsPartOf", 80 | "resourceTypeGeneral": "Text" 81 | } 82 | ], 83 | "rightsList": [ 84 | { 85 | "rights": "Creative Commons Attribution 4.0 International", 86 | "rightsIdentifier": "cc-by-4.0", 87 | "rightsIdentifierScheme": "spdx", 88 | "rightsUri": "https://creativecommons.org/licenses/by/4.0/legalcode" 89 | } 90 | ], 91 | "schemaVersion": "http://datacite.org/schema/kernel-4", 92 | "subjects": [ 93 | { 94 | "subject": "c. elegans" 95 | } 96 | ], 97 | "titles": [ 98 | { 99 | "title": "Dataset: Physical presence of chemical synapses is necessary for turning behavior of anterograde synaptic vesicles at the branch point of PLM neurons in C. elegans" 100 | } 101 | ], 102 | "types": { 103 | "resourceType": "", 104 | "resourceTypeGeneral": "Dataset" 105 | }, 106 | "version": "1.0" 107 | } -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | Contributor Covenant Code of Conduct 2 | ==================================== 3 | 4 | ## Our Pledge 5 | 6 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 7 | 8 | ## Our Standards 9 | 10 | Examples of behavior that contributes to creating a positive environment include: 11 | 12 | * Using welcoming and inclusive language 13 | * Being respectful of differing viewpoints and experiences 14 | * Gracefully accepting constructive criticism 15 | * Focusing on what is best for the community 16 | * Showing empathy towards other community members 17 | 18 | Examples of unacceptable behavior by participants include: 19 | 20 | * The use of sexualized language or imagery and unwelcome sexual attention or advances 21 | * Trolling, insulting/derogatory comments, and personal or political attacks 22 | * Public or private harassment 23 | * Publishing others' private information, such as a physical or electronic address, without explicit permission 24 | * Other conduct which could reasonably be considered inappropriate in a professional setting 25 | 26 | ## Our Responsibilities 27 | 28 | Project contributors are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 29 | 30 | Project contributors have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 31 | 32 | ## Scope 33 | 34 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project contributors. 35 | 36 | ## Enforcement 37 | 38 | If a contributor engages in harassing behaviour, the project organizers may take any action they deem appropriate, including warning the offender or expelling them from online forums, online project resources, face-to-face meetings, or any other project-related activity or resource. 39 | 40 | If you are being harassed, notice that someone else is being harassed, or have any other concerns, please contact a member of the project team immediately. Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. 41 | 42 | ## Attribution 43 | 44 | Portions of this Code of Conduct were adapted from Electron's [Contributor Covenant Code of Conduct](https://github.com/electron/electron/blob/master/CODE_OF_CONDUCT.md), which itself was adapted from the [Contributor Covenant](http://contributor-covenant.org/version/1/4), version 1.4. 45 | -------------------------------------------------------------------------------- /tests/data/datacite43/t15w6-x9q23.json: -------------------------------------------------------------------------------- 1 | { 2 | "contributors": [ 3 | { 4 | "affiliation": [ 5 | { 6 | "name": "National Eye Institute, National Institutes of Health, Bethesda, Maryland, United States" 7 | }, 8 | { 9 | "name": "Intramural Research Program, National Library of Medicine, National Institutes of Health, Bethesda, Maryland, United States" 10 | } 11 | ], 12 | "contributorType": "ContactPerson", 13 | "familyName": "Pal", 14 | "givenName": "Soumitra", 15 | "name": "Pal, Soumitra", 16 | "nameIdentifiers": [ 17 | { 18 | "nameIdentifier": "0000-0003-4840-3944", 19 | "nameIdentifierScheme": "ORCID" 20 | } 21 | ], 22 | "nameType": "Personal" 23 | } 24 | ], 25 | "creators": [ 26 | { 27 | "affiliation": [ 28 | { 29 | "name": "National Eye Institute, National Institutes of Health, Bethesda, Maryland, United States" 30 | }, 31 | { 32 | "name": "Intramural Research Program, National Library of Medicine, National Institutes of Health, Bethesda, Maryland, United States" 33 | } 34 | ], 35 | "familyName": "Pal", 36 | "givenName": "Soumitra", 37 | "name": "Pal, Soumitra", 38 | "nameIdentifiers": [ 39 | { 40 | "nameIdentifier": "0000-0003-4840-3944", 41 | "nameIdentifierScheme": "ORCID" 42 | } 43 | ], 44 | "nameType": "Personal" 45 | } 46 | ], 47 | "dates": [ 48 | { 49 | "date": "2025-02-21", 50 | "dateType": "Issued" 51 | } 52 | ], 53 | "descriptions": [ 54 | { 55 | "description": "This MS-Excel workbook contains spreadsheets detailing the FCA datasets, clustering resolutions, and the results of our analysis at both the cell and cluster levels.", 56 | "descriptionType": "Abstract" 57 | } 58 | ], 59 | "fundingReferences": [ 60 | { 61 | "funderName": "This research was supported in part by the Intramural Research Program of the National Institutes of Health, USA: The National Institute of Diabetes and Digestive and Kidney Diseases (NIDDK) Grant No. ZIADK015600 to B.O. and National Library of Medicine (NLM) Grant No. LM200887 to T.M.P." 62 | } 63 | ], 64 | "identifiers": [ 65 | { 66 | "identifier": "10.22002/t15w6-x9q23", 67 | "identifierType": "DOI" 68 | }, 69 | { 70 | "identifier": "oai:data.caltech.edu:t15w6-x9q23", 71 | "identifierType": "oai" 72 | } 73 | ], 74 | "language": "eng", 75 | "publicationYear": "2025", 76 | "publisher": "CaltechDATA", 77 | "relatedIdentifiers": [ 78 | { 79 | "relatedIdentifier": "10.17912/micropub.biology.001501", 80 | "relatedIdentifierType": "DOI", 81 | "relationType": "IsPartOf", 82 | "resourceTypeGeneral": "Text" 83 | } 84 | ], 85 | "rightsList": [ 86 | { 87 | "rights": "Creative Commons Attribution 4.0 International", 88 | "rightsIdentifier": "cc-by-4.0", 89 | "rightsIdentifierScheme": "spdx", 90 | "rightsUri": "https://creativecommons.org/licenses/by/4.0/legalcode" 91 | } 92 | ], 93 | "schemaVersion": "http://datacite.org/schema/kernel-4", 94 | "subjects": [ 95 | { 96 | "subject": "drosophila" 97 | } 98 | ], 99 | "titles": [ 100 | { 101 | "title": "Dataset: Cell-Type Specific Variation in X-Chromosome Dosage Compensation in Drosophila" 102 | } 103 | ], 104 | "types": { 105 | "resourceType": "", 106 | "resourceTypeGeneral": "Dataset" 107 | }, 108 | "version": "1.0" 109 | } -------------------------------------------------------------------------------- /tests/data/datacite43/d7mk4-f8t44.json: -------------------------------------------------------------------------------- 1 | { 2 | "contributors": [ 3 | { 4 | "contributorType": "DataCurator", 5 | "familyName": "Civilini", 6 | "givenName": "Francesco", 7 | "name": "Civilini, Francesco", 8 | "nameIdentifiers": [ 9 | { 10 | "nameIdentifier": "0000-0003-0669-0404", 11 | "nameIdentifierScheme": "ORCID" 12 | } 13 | ], 14 | "nameType": "Personal" 15 | }, 16 | { 17 | "affiliation": [ 18 | { 19 | "affiliationIdentifier": "05dxps055", 20 | "affiliationIdentifierScheme": "ROR", 21 | "name": "California Institute of Technology" 22 | } 23 | ], 24 | "contributorType": "Other", 25 | "familyName": "Husker", 26 | "givenName": "Allen", 27 | "name": "Husker, Allen", 28 | "nameIdentifiers": [ 29 | { 30 | "nameIdentifier": "0000-0003-1139-0502", 31 | "nameIdentifierScheme": "ORCID" 32 | } 33 | ], 34 | "nameType": "Personal" 35 | }, 36 | { 37 | "contributorType": "Other", 38 | "familyName": "Weber", 39 | "givenName": "Renee", 40 | "name": "Weber, Renee", 41 | "nameIdentifiers": [ 42 | { 43 | "nameIdentifier": "0000-0002-1649-483X", 44 | "nameIdentifierScheme": "ORCID" 45 | } 46 | ], 47 | "nameType": "Personal" 48 | } 49 | ], 50 | "creators": [ 51 | { 52 | "familyName": "Civilini", 53 | "givenName": "Francesco", 54 | "name": "Civilini, Francesco", 55 | "nameIdentifiers": [], 56 | "nameType": "Personal" 57 | } 58 | ], 59 | "dates": [ 60 | { 61 | "date": "2024-10-24", 62 | "dateType": "Issued" 63 | } 64 | ], 65 | "descriptions": [ 66 | { 67 | "description": "This dataset contains data from the Lunar Seismic Profiling Experiment as well as results from the JGR: Planets publication \"Thermal moonquake characterization and cataloging using frequency-based algorithms and stochastic gradient descent\".\u00a0\nThe code to compute the results can be found in the GitHub here:\nhttps://github.com/civilinifr/thermal_mq_analysis\nalso published through Zenodo here:\nhttp://doi.org/10.5281/zenodo.8025056\n\u00a0\nv2: Fixed files in LSPE_sac_hourly.zip to contain correct start and end times.\u00a0\n\u00a0\nIncludes:\n\nThermal moonquake catalog of Grade-A LSPE events\nDaily ASCII files in units of decompressed volts (filenames daily_ascii_YYYYMMDD_YYYYMMDD.zip)\nHourly SAC files in units of decompressed volts (filename LSPE_sac_hourly.zip)\nHourly SAC files in units of nm/s (filenames LSPE_sac_hourly_phys_p1.zip and LSPE_sac_hourly_phys_p2.zip)\nAnalysis results from the manuscript (filename lunar_output.zip)", 68 | "descriptionType": "Abstract" 69 | } 70 | ], 71 | "identifiers": [ 72 | { 73 | "identifier": "10.22002/d7mk4-f8t44", 74 | "identifierType": "DOI" 75 | }, 76 | { 77 | "identifier": "oai:data.caltech.edu:d7mk4-f8t44", 78 | "identifierType": "oai" 79 | } 80 | ], 81 | "publicationYear": "2024", 82 | "publisher": "CaltechDATA", 83 | "rightsList": [ 84 | { 85 | "rights": "Creative Commons Zero v1.0 Universal", 86 | "rightsIdentifier": "cc0-1.0", 87 | "rightsIdentifierScheme": "spdx", 88 | "rightsUri": "https://creativecommons.org/publicdomain/zero/1.0/legalcode" 89 | } 90 | ], 91 | "schemaVersion": "http://datacite.org/schema/kernel-4", 92 | "titles": [ 93 | { 94 | "title": "Apollo 17 Lunar Seismic Profiling Experiment Seismic Data and Thermal Moonquake Catalog" 95 | } 96 | ], 97 | "types": { 98 | "resourceType": "", 99 | "resourceTypeGeneral": "Dataset" 100 | }, 101 | "version": "v2.0" 102 | } -------------------------------------------------------------------------------- /codemeta.json: -------------------------------------------------------------------------------- 1 | { 2 | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", 3 | "@type": "SoftwareSourceCode", 4 | "description": "Python wrapper for CaltechDATA API.", 5 | "name": "caltechdata_api", 6 | "codeRepository": "https://github.com/caltechlibrary/caltechdata_api", 7 | "issueTracker": "https://github.com/caltechlibrary/caltechdata_api/issues", 8 | "license": "https://data.caltech.edu/license", 9 | "version": "1.10.6", 10 | "author": [ 11 | { 12 | "@type": "Person", 13 | "givenName": "Thomas E", 14 | "familyName": "Morrell", 15 | "affiliation": { 16 | "@id": "https://ror.org/05dxps055", 17 | "@type": "Organization", 18 | "name": "Caltech Library" 19 | }, 20 | "email": "tmorrell@caltech.edu", 21 | "@id": "https://orcid.org/0000-0001-9266-5146" 22 | }, 23 | { 24 | "@type": "Person", 25 | "givenName": "Rohan", 26 | "familyName": "Bhattarai", 27 | "affiliation": { 28 | "@id": "https://ror.org/05dxps055", 29 | "@type": "Organization", 30 | "name": "Caltech" 31 | }, 32 | "@id": "https://orcid.org/0009-0007-0323-4733" 33 | }, 34 | { 35 | "@type": "Person", 36 | "givenName": "Elizabeth", 37 | "familyName": "Won", 38 | "affiliation": { 39 | "@id": "https://ror.org/05dxps055", 40 | "@type": "Organization", 41 | "name": "Caltech" 42 | }, 43 | "@id": "https://orcid.org/0009-0002-2450-6471" 44 | }, 45 | { 46 | "@type": "Person", 47 | "givenName": "Alexander A", 48 | "familyName": "Abakah", 49 | "affiliation": { 50 | "@id": "https://ror.org/05dxps055", 51 | "@type": "Organization", 52 | "name": "Caltech Library" 53 | }, 54 | "email": "aabakah@caltech.edu", 55 | "@id": "https://orcid.org/0009-0003-5640-6691" 56 | }, 57 | { 58 | "@type": "Person", 59 | "givenName": "Kshemaahna", 60 | "familyName": "Nagi", 61 | "affiliation": { 62 | "@id": "https://ror.org/05dxps055", 63 | "@type": "Organization", 64 | "name": "Caltech" 65 | }, 66 | "email": "knagi@caltech.edu", 67 | "@id": "https://orcid.org/0009-0002-8113-3763" 68 | } 69 | ], 70 | "developmentStatus": "active", 71 | "downloadUrl": "https://github.com/caltechlibrary/caltechdata_api/archive/main.zip", 72 | "keywords": [ 73 | "GitHub", 74 | "metadata", 75 | "software", 76 | "InvenioRDM" 77 | ], 78 | "maintainer": [ 79 | { 80 | "@type": "Person", 81 | "givenName": "Thomas E", 82 | "familyName": "Morrell", 83 | "affiliation": { 84 | "@id": "https://ror.org/05dxps055", 85 | "@type": "Organization", 86 | "name": "Caltech Library" 87 | }, 88 | "email": "tmorrell@caltech.edu", 89 | "@id": "https://orcid.org/0000-0001-9266-5146" 90 | } 91 | ], 92 | "funding": { 93 | "@type": "Grant", 94 | "identifier": "2322420", 95 | "name": "CC* Data Storage: Closing Caltech's data storage gap: from ad-hoc to well-managed stewardship of large-scale datasets", 96 | "funder": { 97 | "@id": "https://doi.org/10.13039/100000001", 98 | "@type": "Organization", 99 | "name": "National Science Foundation" 100 | } 101 | }, 102 | "programmingLanguage": "Python", 103 | "identifier": "10.22002/2g4c7-zva46" 104 | } -------------------------------------------------------------------------------- /tests/data/caltechdata/1235.json: -------------------------------------------------------------------------------- 1 | { 2 | "created": "2019-04-29T20:13:59.728273+00:00", 3 | "id": 1235, 4 | "links": { 5 | "self": "http://data.caltech.edu/api/record/1235" 6 | }, 7 | "metadata": { 8 | "_form_uuid": "beae3039-29ed-4e20-bd21-6ed6e994afa5", 9 | "alternateIdentifiers": [ 10 | { 11 | "alternateIdentifier": "1235", 12 | "alternateIdentifierType": "CaltechDATA_Identifier" 13 | } 14 | ], 15 | "authors": [ 16 | { 17 | "authorAffiliation": [ 18 | "Caltech Library" 19 | ], 20 | "authorIdentifiers": [ 21 | { 22 | "authorIdentifier": "0000-0001-9266-5146", 23 | "authorIdentifierScheme": "ORCID" 24 | } 25 | ], 26 | "authorName": "Morrell, Thomas E" 27 | } 28 | ], 29 | "control_number": "1235", 30 | "descriptions": [ 31 | { 32 | "descriptionType": "Abstract", 33 | "descriptionValue": "First included in ames, this notebook dynamically shows how many records are in CaltechDATA and where they come from (GitHub, Deposit Form, or API). This repository is set to work with MyBinder so you can easily reproduce the plot and include new records. " 34 | }, 35 | { 36 | "descriptionType": "Other", 37 | "descriptionValue": "