├── .github
└── workflows
│ ├── ci.yaml
│ ├── docs.yaml
│ ├── lint.yaml
│ └── pypi.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yml
├── .secrets.baseline
├── CHANGELOG.md
├── LICENSE
├── MANIFEST.in
├── NOTICE
├── README.md
├── docs
├── .nojekyll
├── Makefile
├── _build
│ ├── doctrees
│ │ ├── auth.doctree
│ │ ├── environment.pickle
│ │ ├── file.doctree
│ │ ├── index.doctree
│ │ ├── indexing.doctree
│ │ ├── jobs.doctree
│ │ ├── metadata.doctree
│ │ ├── object.doctree
│ │ ├── query.doctree
│ │ ├── submission.doctree
│ │ ├── tools.doctree
│ │ ├── tools
│ │ │ ├── drs_pull.doctree
│ │ │ ├── indexing.doctree
│ │ │ └── metadata.doctree
│ │ └── wss.doctree
│ └── html
│ │ ├── .buildinfo
│ │ ├── _modules
│ │ ├── gen3
│ │ │ ├── auth.html
│ │ │ ├── file.html
│ │ │ ├── index.html
│ │ │ ├── jobs.html
│ │ │ ├── metadata.html
│ │ │ ├── object.html
│ │ │ ├── query.html
│ │ │ ├── submission.html
│ │ │ ├── tools
│ │ │ │ ├── download
│ │ │ │ │ └── drs_download.html
│ │ │ │ ├── indexing
│ │ │ │ │ ├── download_manifest.html
│ │ │ │ │ ├── index_manifest.html
│ │ │ │ │ └── verify_manifest.html
│ │ │ │ └── metadata
│ │ │ │ │ └── ingest_manifest.html
│ │ │ └── wss.html
│ │ └── index.html
│ │ ├── _sources
│ │ ├── auth.rst.txt
│ │ ├── file.rst.txt
│ │ ├── index.rst.txt
│ │ ├── indexing.rst.txt
│ │ ├── jobs.rst.txt
│ │ ├── metadata.rst.txt
│ │ ├── object.rst.txt
│ │ ├── query.rst.txt
│ │ ├── submission.rst.txt
│ │ ├── tools.rst.txt
│ │ ├── tools
│ │ │ ├── drs_pull.rst.txt
│ │ │ ├── indexing.rst.txt
│ │ │ └── metadata.rst.txt
│ │ └── wss.rst.txt
│ │ ├── _static
│ │ ├── alabaster.css
│ │ ├── basic.css
│ │ ├── custom.css
│ │ ├── doctools.js
│ │ ├── documentation_options.js
│ │ ├── file.png
│ │ ├── language_data.js
│ │ ├── minus.png
│ │ ├── plus.png
│ │ ├── pygments.css
│ │ ├── searchtools.js
│ │ └── sphinx_highlight.js
│ │ ├── auth.html
│ │ ├── file.html
│ │ ├── genindex.html
│ │ ├── index.html
│ │ ├── indexing.html
│ │ ├── jobs.html
│ │ ├── metadata.html
│ │ ├── object.html
│ │ ├── objects.inv
│ │ ├── py-modindex.html
│ │ ├── query.html
│ │ ├── search.html
│ │ ├── searchindex.js
│ │ ├── submission.html
│ │ ├── tools.html
│ │ ├── tools
│ │ ├── drs_pull.html
│ │ ├── indexing.html
│ │ └── metadata.html
│ │ └── wss.html
├── auth.rst
├── conf.py
├── file.rst
├── howto
│ ├── bundleTools.md
│ ├── cli.md
│ ├── crosswalk.md
│ ├── devTest.md
│ ├── diirmIndexing.md
│ ├── discoveryMetadataTools.md
│ ├── drsDownloading.md
│ ├── externalFileDownloading.md
│ └── metadataTools.md
├── index.rst
├── indexing.rst
├── jobs.rst
├── metadata.rst
├── object.rst
├── query.rst
├── reference
│ └── sdkClasses.md
├── submission.rst
├── tools.rst
├── tools
│ ├── drs_pull.rst
│ ├── indexing.rst
│ └── metadata.rst
├── tutorial
│ └── quickStart.md
└── wss.rst
├── gen3
├── __init__.py
├── auth.py
├── cli
│ ├── __init__.py
│ ├── __main__.py
│ ├── auth.py
│ ├── configure.py
│ ├── discovery.py
│ ├── drs_pull.py
│ ├── file.py
│ ├── nih.py
│ ├── objects.py
│ ├── pfb.py
│ ├── users.py
│ ├── wrap.py
│ └── wss.py
├── configure.py
├── discovery_dois.py
├── doi.py
├── external
│ ├── __init__.py
│ ├── external.py
│ └── nih
│ │ ├── __init__.py
│ │ ├── dbgap_doi.py
│ │ ├── dbgap_fhir.py
│ │ ├── dbgap_study_registration.py
│ │ └── utils.py
├── file.py
├── index.py
├── jobs.py
├── metadata.py
├── object.py
├── query.py
├── submission.py
├── tools
│ ├── __init__.py
│ ├── bundle
│ │ └── ingest_manifest.py
│ ├── diff.py
│ ├── download
│ │ ├── __init__.py
│ │ ├── drs_download.py
│ │ ├── drs_resolvers.py
│ │ └── external_file_download.py
│ ├── expansion.py
│ ├── indexing
│ │ ├── __init__.py
│ │ ├── download_manifest.py
│ │ ├── index_manifest.py
│ │ ├── merge_manifests.py
│ │ ├── post_indexing_validation.py
│ │ ├── validate_manifest_format.py
│ │ └── verify_manifest.py
│ ├── merge.py
│ ├── metadata
│ │ ├── __init__.py
│ │ ├── crosswalk.py
│ │ ├── discovery.py
│ │ ├── discovery_objects.py
│ │ ├── ingest_manifest.py
│ │ └── verify_manifest.py
│ ├── utils.py
│ └── wrap.py
├── utils.py
└── wss.py
├── poetry.lock
├── pyproject.toml
└── tests
├── __init__.py
├── bundle_tests
├── invalid_manifest.csv
├── test_bundle_ingestion.py
└── valid_manifest.csv
├── conftest.py
├── download_tests
├── expected
│ └── manifest_test_drs_compact_object_list.json
├── resources
│ ├── bad_format.json
│ ├── dataguids_commons1.json
│ ├── download_test_data.json
│ ├── drs_object_commons3.json
│ ├── drs_objects.json
│ ├── expired_drs_host_cache.json
│ ├── gen3_metadata_external_file_metadata.json
│ ├── index_dist.json
│ ├── manifest_package.json
│ ├── manifest_test_1.json
│ ├── manifest_test_2.json
│ ├── manifest_test_3.json
│ ├── manifest_test_bad_id.json
│ ├── manifest_test_drs_compact.json
│ ├── manifest_test_hostname_not_in_wts.json
│ ├── mds_package.json
│ ├── valid_external_file_metadata.json
│ └── wts_oidc.json
├── test_async_download.py
├── test_download.py
├── test_external_download.py
└── test_resolvers.py
├── merge_manifests
├── column_mismatch
│ ├── expected-merged-output-manifest.tsv
│ └── input
│ │ ├── manifest1.tsv
│ │ └── manifest2.tsv
├── discovery_combine
│ ├── combined_discovery_metadata.tsv
│ ├── combined_discovery_metadata_exact_match.tsv
│ ├── discovery.tsv
│ ├── metadata_file.tsv
│ └── metadata_file_exact_match.tsv
├── duplicate_values
│ ├── expected-merged-output-manifest.tsv
│ └── input
│ │ ├── manifest1.tsv
│ │ ├── manifest2.tsv
│ │ └── manifest3.tsv
├── multiple_guids_per_hash
│ ├── expected-merged-output-manifest.tsv
│ └── input
│ │ ├── manifest1.tsv
│ │ └── manifest2.tsv
├── multiple_urls
│ ├── expected-merged-output-manifest.tsv
│ └── input
│ │ ├── manifest1.tsv
│ │ └── manifest2.tsv
├── no_guid_same_md5_order
│ ├── expected-merged-output-manifest.tsv
│ └── input
│ │ ├── manifest_WITHOUT_guid.tsv
│ │ └── manifest_with_guid.tsv
├── regular
│ ├── expected-merged-output-manifest.tsv
│ └── input
│ │ ├── manifest1.tsv
│ │ └── manifest2.tsv
├── same_guid_for_same_hash
│ ├── expected-merged-output-manifest.tsv
│ └── input
│ │ ├── manifest1.tsv
│ │ └── manifest2.tsv
├── size_mismatch
│ └── input
│ │ ├── manifest1.tsv
│ │ └── manifest2.tsv
└── test_manifest_merge.py
├── test_auth.py
├── test_configure.py
├── test_crosswalk.py
├── test_data
├── crosswalk
│ ├── crosswalk_1.csv
│ ├── crosswalk_2.csv
│ ├── crosswalk_optional_info_1.csv
│ ├── crosswalk_optional_info_2.csv
│ ├── empty_crosswalk_1.csv
│ ├── empty_crosswalk_optional_info_1.csv
│ ├── empty_file.csv
│ ├── full_crosswalk.csv
│ ├── full_crosswalk_optional_info.csv
│ ├── invalid_a_crosswalk_1.csv
│ ├── invalid_a_crosswalk_optional_info_1.csv
│ ├── invalid_b_crosswalk_1.csv
│ ├── invalid_b_crosswalk_optional_info_1.csv
│ ├── invalid_c_crosswalk_1.csv
│ └── invalid_c_crosswalk_optional_info_1.csv
├── diff_manifests
│ ├── manifest3.tsv
│ └── manifest4.tsv
├── fhir_metadata.tsv
├── manifest1.csv
├── manifest2.csv
├── manifest_additional_metadata.tsv
├── manifest_additional_metadata_mult_guids.tsv
├── packages_manifest_bad_format.tsv
├── packages_manifest_not_a_package.tsv
├── packages_manifest_ok.tsv
├── test.tsv
├── test2.tsv
└── test_manifest.csv
├── test_dbgap_fhir.py
├── test_dbgap_study_registration.py
├── test_diff.py
├── test_discovery.py
├── test_discovery_objects.py
├── test_doi.py
├── test_doi_discovery.py
├── test_expansion.py
├── test_file.py
├── test_import.py
├── test_index.py
├── test_jobs.py
├── test_manifests.py
├── test_metadata.py
├── test_object.py
├── test_post_indexing_validation.py
├── test_query.py
├── test_submission.py
├── test_wrap.py
├── test_wss.py
├── utils_mock_dbgap_study_registration_response.py
├── utils_mock_fhir_response.py
└── validate_manifest_format
├── manifests
├── manifest_with_custom_column_names.tsv
├── manifest_with_custom_url_protocols.tsv
├── manifest_with_empty_url.tsv
├── manifest_with_invalid_authz_resources.tsv
├── manifest_with_invalid_md5_values.tsv
├── manifest_with_invalid_sizes.tsv
├── manifest_with_invalid_urls.tsv
├── manifest_with_many_types_of_errors.tsv
├── manifest_with_missing_md5_column.tsv
├── manifest_with_missing_size_column.tsv
├── manifest_with_missing_url_column.tsv
├── manifest_with_no_errors.tsv
└── manifest_with_wide_row.tsv
└── test_is_valid_manifest_format.py
/.github/workflows/ci.yaml:
--------------------------------------------------------------------------------
1 | name: CI
2 |
3 | on:
4 | # run on pushed commits to master and on new commits on pull requests
5 | push:
6 | pull_request:
7 | types: [opened, synchronize]
8 |
9 | jobs:
10 | Security:
11 | name: Security Pipeline
12 | uses: uc-cdis/.github/.github/workflows/securitypipeline.yaml@master
13 | with:
14 | python-poetry: 'false'
15 | secrets: inherit # pragma: allowlist secret
16 | UnitTest:
17 | name: Python Unit Test
18 | uses: uc-cdis/.github/.github/workflows/python_unit_test.yaml@master
19 | with:
20 | python-version: '3.9'
--------------------------------------------------------------------------------
/.github/workflows/docs.yaml:
--------------------------------------------------------------------------------
1 | name: Docs
2 |
3 | on:
4 | # DON'T run on pushed commits to master, ONLY on new commits on pull requests
5 | push:
6 | branches-ignore:
7 | - 'master'
8 | paths-ignore:
9 | - 'docs/**'
10 | # Use push trigger since 'paths-ignore' are not respected as expected by 'on.pull_request.paths-ignore', see https://github.com/actions/runner/issues/2324
11 | pull_request:
12 | types: [opened]
13 |
14 | jobs:
15 | build_docs:
16 | runs-on: ubuntu-latest
17 |
18 | steps:
19 | - uses: actions/checkout@v2
20 | with:
21 | ref: ${{github.event.pull_request.head.ref}}
22 | repository: ${{github.event.pull_request.head.repo.full_name}}
23 | # Passing a PAT from the PlanXCybrog bot account here, since otherwise commits pushed by this action won't be able to trigger other actions, see https://github.com/stefanzweifel/git-auto-commit-action?tab=readme-ov-file#commits-made-by-this-action-do-not-trigger-new-workflow-runs
24 | token: ${{ secrets.PLANXCYBORG_PAT }}
25 | - name: Set up Python 3.9
26 | uses: actions/setup-python@v1
27 | with:
28 | python-version: 3.9
29 | - uses: actions/cache@preview
30 | with:
31 | path: ~/.cache/pypoetry/virtualenvs
32 | key: ${{ runner.os }}-poetry-${{ hashFiles(format('{0}{1}', github.workspace, '/poetry.lock')) }}
33 | restore-keys: |
34 | ${{ runner.os }}-poetry-
35 | - name: Install dependencies
36 | run: |
37 | pip install poetry
38 | poetry config virtualenvs.create false
39 | poetry install -vv --all-extras --no-interaction
40 | poetry show -vv
41 |
42 | # install sphinx from PyPI (as of 03/16/21 python3-sphinx is broken)
43 | # sudo apt-get install python3-sphinx
44 | pip install sphinx
45 | pip uninstall -y asyncio
46 | pip list
47 | cd
48 | - name: Build docs
49 | run: |
50 | sphinx-build --version
51 | export PYTHONPATH="${PYTHONPATH}:${{ env.pythonLocation }}/lib/python3.9/site-packages"
52 | cd docs
53 | poetry run make html
54 | cd ..
55 |
56 | - uses: stefanzweifel/git-auto-commit-action@v4.8.0
57 | with:
58 | commit_message: Apply automatic documentation changes
59 |
60 | # Optional name of the branch the commit should be pushed to
61 | # Required if Action is used in Workflow listening to the `pull_request` event
62 | branch: ${{ github.head_ref }}
63 |
--------------------------------------------------------------------------------
/.github/workflows/lint.yaml:
--------------------------------------------------------------------------------
1 | on:
2 | pull_request:
3 | types: [opened, synchronize]
4 | issue_comment:
5 | types: [created, edited]
6 |
7 | name: Wool
8 |
9 | jobs:
10 | runWool:
11 | name: Run black
12 | runs-on: ubuntu-latest
13 | steps:
14 | - uses: actions/checkout@master
15 | - uses: uc-cdis/wool@master
16 | env:
17 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--------------------------------------------------------------------------------
/.github/workflows/pypi.yaml:
--------------------------------------------------------------------------------
1 | name: PyPI
2 | on:
3 | push:
4 | tags:
5 | - '*'
6 | jobs:
7 | PyPIPoetryPublish:
8 | name: PyPI Poetry Publish
9 | uses: uc-cdis/.github/.github/workflows/python_package_index_publish.yaml@master
10 | with:
11 | PYTHON_VERSION: '3.9'
12 | # This will attempt push to test PyPI first and only push to prod if it works
13 | DO_TEST_PUBLISH_FIRST: true
14 | secrets:
15 | PYPI_TEST_API_TOKEN: ${{ secrets.PYPI_TEST_API_TOKEN }}
16 | PYPI_PROD_API_TOKEN: ${{ secrets.PYPI_PROD_API_TOKEN }}
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.DS_Store
2 | *.vscode
3 | .idea
4 | # Byte-compiled / optimized / DLL files
5 | __pycache__/
6 | *.py[cod]
7 | *$py.class
8 |
9 | # C extensions
10 | *.so
11 |
12 | # Distribution / packaging
13 | .Python
14 | env/
15 | build/
16 | develop-eggs/
17 | dist/
18 | downloads/
19 | eggs/
20 | .eggs/
21 | lib/
22 | lib64/
23 | parts/
24 | sdist/
25 | var/
26 | wheels/
27 | *.egg-info/
28 | .installed.cfg
29 | *.egg
30 |
31 | # PyInstaller
32 | # Usually these files are written by a python script from a template
33 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
34 | *.manifest
35 | *.spec
36 |
37 | # Installer logs
38 | pip-log.txt
39 | pip-delete-this-directory.txt
40 |
41 | # Unit test / coverage reports
42 | htmlcov/
43 | .tox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | .hypothesis/
51 |
52 | # Translations
53 | *.mo
54 | *.pot
55 |
56 | # Django stuff:
57 | *.log
58 | local_settings.py
59 |
60 | # Flask stuff:
61 | instance/
62 | .webassets-cache
63 |
64 | # Scrapy stuff:
65 | .scrapy
66 |
67 | # PyBuilder
68 | target/
69 |
70 | # Jupyter Notebook
71 | .ipynb_checkpoints
72 |
73 | # pyenv
74 | .python-version
75 |
76 | # celery beat schedule file
77 | celerybeat-schedule
78 |
79 | # SageMath parsed files
80 | *.sage.py
81 |
82 | # dotenv
83 | .env
84 |
85 | # virtualenv
86 | .venv
87 | venv/
88 | ENV/
89 |
90 | # Spyder project settings
91 | .spyderproject
92 | .spyproject
93 |
94 | # Rope project settings
95 | .ropeproject
96 |
97 | # mkdocs documentation
98 | /site
99 |
100 | # mypy
101 | .mypy_cache/
102 |
103 | # jwt keys
104 | keys/*.pem
105 | tests/resources/keys/*.pem
106 |
107 | # graphql schema
108 | schema.json
109 |
110 | # pytest outputs
111 | *.sq3
112 | merged-output-test-manifest.*
113 | gen3/tools/indexing/tmp/
114 | indexing-output-manifest.csv
115 | object-manifest.csv
116 | output_manifest.csv
117 |
118 | .dccache
119 | .idea
120 | # pytest output
121 | /input.csv
122 | /test_combined_discovery_metadata.tsv
123 | /test_combined_discovery_metadata_exact_match.tsv
124 | /tmp_output_file.csv
125 | /tmp_output_file_info.csv
126 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: git@github.com:Yelp/detect-secrets
3 | rev: v1.4.0
4 | hooks:
5 | - id: detect-secrets
6 | args: ['--baseline', '.secrets.baseline']
7 | exclude: '(docs\/_build|poetry.lock)'
8 | - repo: https://github.com/pre-commit/pre-commit-hooks
9 | rev: v4.4.0
10 | hooks:
11 | - id: no-commit-to-branch
12 | args: [--branch, develop, --branch, master, --pattern, release/.*]
13 | - repo: https://github.com/psf/black
14 | rev: 23.1.0
15 | hooks:
16 | - id: black
17 | additional_dependencies: ['click==8.0.4']
18 |
--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
1 | build:
2 | image: latest
3 |
4 | python:
5 | version: 3.9
6 | setup_py_install: true
7 |
8 | formats: []
9 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | ## 0.1.0
4 | Initial release
5 | Functionality for IndexClient, and Submission client
6 |
7 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | include NOTICE
3 | include README.md
4 | include CHANGELOG.md
5 |
--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | Copyright 2018 University of Chicago
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
4 |
5 | http://www.apache.org/licenses/LICENSE-2.0
6 |
7 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
8 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Gen3 SDK for Python (w/ CLI)
2 |
3 | The Gen3 Software Development Kit (SDK) for Python provides classes and functions for handling common tasks when interacting with a Gen3 commons. It also exposes a Command Line Interface (CLI).
4 |
5 | The API for a commons can be overwhelming, so this SDK/CLI aims
6 | to simplify communication with various microservices.
7 |
8 | The docs here contain general descriptions of the different pieces of the SDK and example scripts. For detailed API documentation, see the link below:
9 |
10 | * [Detailed API Documentation](https://uc-cdis.github.io/gen3sdk-python/_build/html/index.html)
11 |
12 |
13 | ## Prerequisites
14 |
15 | This project is built with Python. Ensure you have Python 3.6 or later installed.
16 |
17 | Other prerequisites include:
18 | - [pip](https://pip.pypa.io/en/stable/)
19 | - Access to a Gen3 commons.
20 |
21 | ## Installation Steps:
22 |
23 | ### Using pip
24 | To install the latest released version of the SDK, run:
25 |
26 | ```
27 | pip install gen3
28 | ```
29 |
30 | This SDK exposes a Command Line Interface (CLI). You can import functions from `gen3` into your own Python scripts or use the CLI:
31 |
32 | ```
33 | gen3 --help
34 | ```
35 |
36 | ## External Documents
37 |
38 | Additional documentation for different components is available:
39 |
40 | - [Scripting Quickstart](docs/tutorial/quickStart.md)
41 | - [Available Classes](docs/reference/sdkClasses.md)
42 | - [Indexing Tools](docs/howto/diirmIndexing.md)
43 | - [Metadata Tools](docs/howto/metadataTools.md)
44 | - [Gen3 Discovery Page Metadata Tools](docs/howto/discoveryMetadataTools.md)
45 | - [Gen3 Subject-level Crosswalk Metadata Tools](docs/howto/crosswalk.md)
46 | - [Bundle Tools](docs/howto/bundleTools.md)
47 | - [Development](docs/howto/devTest.md)
48 | - [CLI](docs/howto/cli.md)
49 |
50 | ## Help and Support
51 |
52 | For FAQs and commonly encountered errors, consult the documentation or use the CLI `--help` option:
53 |
54 | ```
55 | gen3 --help
56 | ```
57 |
58 | If you encounter issues, raise them on the [Gen3 SDK GitHub Issues page](https://github.com/uc-cdis/gen3sdk-python/issues).
59 |
--------------------------------------------------------------------------------
/docs/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uc-cdis/gen3sdk-python/0ca70c579d55cb0f6218fa7a534d02a8910acda1/docs/.nojekyll
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/_build/doctrees/auth.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uc-cdis/gen3sdk-python/0ca70c579d55cb0f6218fa7a534d02a8910acda1/docs/_build/doctrees/auth.doctree
--------------------------------------------------------------------------------
/docs/_build/doctrees/environment.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uc-cdis/gen3sdk-python/0ca70c579d55cb0f6218fa7a534d02a8910acda1/docs/_build/doctrees/environment.pickle
--------------------------------------------------------------------------------
/docs/_build/doctrees/file.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uc-cdis/gen3sdk-python/0ca70c579d55cb0f6218fa7a534d02a8910acda1/docs/_build/doctrees/file.doctree
--------------------------------------------------------------------------------
/docs/_build/doctrees/index.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uc-cdis/gen3sdk-python/0ca70c579d55cb0f6218fa7a534d02a8910acda1/docs/_build/doctrees/index.doctree
--------------------------------------------------------------------------------
/docs/_build/doctrees/indexing.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uc-cdis/gen3sdk-python/0ca70c579d55cb0f6218fa7a534d02a8910acda1/docs/_build/doctrees/indexing.doctree
--------------------------------------------------------------------------------
/docs/_build/doctrees/jobs.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uc-cdis/gen3sdk-python/0ca70c579d55cb0f6218fa7a534d02a8910acda1/docs/_build/doctrees/jobs.doctree
--------------------------------------------------------------------------------
/docs/_build/doctrees/metadata.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uc-cdis/gen3sdk-python/0ca70c579d55cb0f6218fa7a534d02a8910acda1/docs/_build/doctrees/metadata.doctree
--------------------------------------------------------------------------------
/docs/_build/doctrees/object.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uc-cdis/gen3sdk-python/0ca70c579d55cb0f6218fa7a534d02a8910acda1/docs/_build/doctrees/object.doctree
--------------------------------------------------------------------------------
/docs/_build/doctrees/query.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uc-cdis/gen3sdk-python/0ca70c579d55cb0f6218fa7a534d02a8910acda1/docs/_build/doctrees/query.doctree
--------------------------------------------------------------------------------
/docs/_build/doctrees/submission.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uc-cdis/gen3sdk-python/0ca70c579d55cb0f6218fa7a534d02a8910acda1/docs/_build/doctrees/submission.doctree
--------------------------------------------------------------------------------
/docs/_build/doctrees/tools.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uc-cdis/gen3sdk-python/0ca70c579d55cb0f6218fa7a534d02a8910acda1/docs/_build/doctrees/tools.doctree
--------------------------------------------------------------------------------
/docs/_build/doctrees/tools/drs_pull.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uc-cdis/gen3sdk-python/0ca70c579d55cb0f6218fa7a534d02a8910acda1/docs/_build/doctrees/tools/drs_pull.doctree
--------------------------------------------------------------------------------
/docs/_build/doctrees/tools/indexing.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uc-cdis/gen3sdk-python/0ca70c579d55cb0f6218fa7a534d02a8910acda1/docs/_build/doctrees/tools/indexing.doctree
--------------------------------------------------------------------------------
/docs/_build/doctrees/tools/metadata.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uc-cdis/gen3sdk-python/0ca70c579d55cb0f6218fa7a534d02a8910acda1/docs/_build/doctrees/tools/metadata.doctree
--------------------------------------------------------------------------------
/docs/_build/doctrees/wss.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uc-cdis/gen3sdk-python/0ca70c579d55cb0f6218fa7a534d02a8910acda1/docs/_build/doctrees/wss.doctree
--------------------------------------------------------------------------------
/docs/_build/html/.buildinfo:
--------------------------------------------------------------------------------
1 | # Sphinx build info version 1
2 | # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3 | config: 0743ca845f6724be988ecc05e5144ced
4 | tags: 645f666f9bcd5a90fca523b33c5a78b7
5 |
--------------------------------------------------------------------------------
/docs/_build/html/_modules/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | Overview: module code — Gen3 SDK documentation
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
All modules for which code is available
33 |
48 |
49 |
50 |
51 |
52 |
53 |
112 |
113 |
114 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
--------------------------------------------------------------------------------
/docs/_build/html/_sources/auth.rst.txt:
--------------------------------------------------------------------------------
1 | Gen3 Auth Helper
2 | ----------------
3 |
4 | .. autoclass:: gen3.auth.Gen3Auth
5 | :members:
6 | :show-inheritance:
7 |
--------------------------------------------------------------------------------
/docs/_build/html/_sources/file.rst.txt:
--------------------------------------------------------------------------------
1 | Gen3 File Class
2 | ----------------
3 |
4 | .. autoclass:: gen3.file.Gen3File
5 | :members:
6 | :show-inheritance:
7 |
--------------------------------------------------------------------------------
/docs/_build/html/_sources/index.rst.txt:
--------------------------------------------------------------------------------
1 | Welcome to Gen3 SDK's documentation!
2 | ====================================
3 |
4 | The README in the Gen3 Python SDK Github Repo contains installation and setup information along with some quickstart scripts. This contains more detailed documentation about the various classes and functions available.
5 |
6 | This documentation is mostly auto-generated from the docstrings within the source code.
7 |
8 | .. toctree::
9 | :glob:
10 |
11 | *
12 |
13 | Indices and tables
14 | ==================
15 |
16 | * :ref:`genindex`
17 | * :ref:`modindex`
18 | * :ref:`search`
19 |
--------------------------------------------------------------------------------
/docs/_build/html/_sources/indexing.rst.txt:
--------------------------------------------------------------------------------
1 | Gen3 Index Class
2 | ----------------
3 |
4 | .. autoclass:: gen3.index.Gen3Index
5 | :members:
6 | :show-inheritance:
7 |
--------------------------------------------------------------------------------
/docs/_build/html/_sources/jobs.rst.txt:
--------------------------------------------------------------------------------
1 | Gen3 Jobs Class
2 | ----------------
3 |
4 | .. autoclass:: gen3.jobs.Gen3Jobs
5 | :members:
6 | :show-inheritance:
7 |
--------------------------------------------------------------------------------
/docs/_build/html/_sources/metadata.rst.txt:
--------------------------------------------------------------------------------
1 | Gen3 Metadata Class
2 | -------------------
3 |
4 | .. autoclass:: gen3.metadata.Gen3Metadata
5 | :members:
6 | :show-inheritance:
7 |
--------------------------------------------------------------------------------
/docs/_build/html/_sources/object.rst.txt:
--------------------------------------------------------------------------------
1 | Gen3 Object Class
2 | -------------------
3 |
4 | .. autoclass:: gen3.object.Gen3Object
5 | :members:
6 | :show-inheritance:
7 |
--------------------------------------------------------------------------------
/docs/_build/html/_sources/query.rst.txt:
--------------------------------------------------------------------------------
1 | Gen3 Query Class
2 | ---------------------
3 |
4 | .. autoclass:: gen3.query.Gen3Query
5 | :members:
6 | :show-inheritance:
7 |
--------------------------------------------------------------------------------
/docs/_build/html/_sources/submission.rst.txt:
--------------------------------------------------------------------------------
1 | Gen3 Submission Class
2 | ---------------------
3 |
4 | .. autoclass:: gen3.submission.Gen3Submission
5 | :members:
6 | :show-inheritance:
7 |
--------------------------------------------------------------------------------
/docs/_build/html/_sources/tools.rst.txt:
--------------------------------------------------------------------------------
1 | Gen3 Tools
2 | ----------
3 |
4 | Tools and functions for common actions in Gen3. These tools are broken up into broad categories like indexing (for tasks related to the file object persistent identifiers within the system) and metadata (for tasks relating to file object metadata within the system).
5 |
6 | Such common indexing tasks may involve indexing file object URLs into Gen3 to assign persistent identifiers, downloading a manifest of every file object that already exists, and verifying that a Gen3 instance contains the expected indexed file objects based on a file.
7 |
8 | For metadata, the task may be ingesting a large amount of metadata from a file into the system.
9 |
10 | Most of these tools utilize async capabilities of Python to make common tasks more efficient.
11 |
12 | .. toctree::
13 | :glob:
14 |
15 | tools/*
16 |
17 | .. automodule:: gen3.tools
18 | :members:
19 | :show-inheritance:
20 |
--------------------------------------------------------------------------------
/docs/_build/html/_sources/tools/drs_pull.rst.txt:
--------------------------------------------------------------------------------
1 | DRS Download Tools
2 | ------------------
3 |
4 | .. automodule:: gen3.tools.download.drs_download
5 | :members: download_files_in_drs_manifest, download_drs_object, list_files_in_drs_manifest,
6 | list_drs_object, list_access_in_drs_manifest,
7 | Manifest, DownloadManager, Downloadable, DownloadStatus,
8 |
--------------------------------------------------------------------------------
/docs/_build/html/_sources/tools/indexing.rst.txt:
--------------------------------------------------------------------------------
1 | Indexing Tools
2 | --------------
3 |
4 |
5 | Download
6 | ========
7 |
8 | .. automodule:: gen3.tools.indexing.download_manifest
9 | :members:
10 | :show-inheritance:
11 |
12 | Index
13 | ========
14 |
15 | .. automodule:: gen3.tools.indexing.index_manifest
16 | :members:
17 | :show-inheritance:
18 |
19 | Verify
20 | ========
21 |
22 | .. automodule:: gen3.tools.indexing.verify_manifest
23 | :members:
24 | :show-inheritance:
--------------------------------------------------------------------------------
/docs/_build/html/_sources/tools/metadata.rst.txt:
--------------------------------------------------------------------------------
1 | Metadata Tools
2 | --------------
3 |
4 |
5 | Ingest
6 | ========
7 |
8 | .. automodule:: gen3.tools.metadata.ingest_manifest
9 | :members:
10 | :show-inheritance:
11 |
--------------------------------------------------------------------------------
/docs/_build/html/_sources/wss.rst.txt:
--------------------------------------------------------------------------------
1 | Gen3 Workspace Storage
2 | -----------------------
3 |
4 | .. autoclass:: gen3.wss.Gen3WsStorage
5 | :members:
6 | :show-inheritance:
7 |
--------------------------------------------------------------------------------
/docs/_build/html/_static/custom.css:
--------------------------------------------------------------------------------
1 | /* This file intentionally left blank. */
2 |
--------------------------------------------------------------------------------
/docs/_build/html/_static/doctools.js:
--------------------------------------------------------------------------------
1 | /*
2 | * doctools.js
3 | * ~~~~~~~~~~~
4 | *
5 | * Base JavaScript utilities for all Sphinx HTML documentation.
6 | *
7 | * :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS.
8 | * :license: BSD, see LICENSE for details.
9 | *
10 | */
11 | "use strict";
12 |
13 | const BLACKLISTED_KEY_CONTROL_ELEMENTS = new Set([
14 | "TEXTAREA",
15 | "INPUT",
16 | "SELECT",
17 | "BUTTON",
18 | ]);
19 |
20 | const _ready = (callback) => {
21 | if (document.readyState !== "loading") {
22 | callback();
23 | } else {
24 | document.addEventListener("DOMContentLoaded", callback);
25 | }
26 | };
27 |
28 | /**
29 | * Small JavaScript module for the documentation.
30 | */
31 | const Documentation = {
32 | init: () => {
33 | Documentation.initDomainIndexTable();
34 | Documentation.initOnKeyListeners();
35 | },
36 |
37 | /**
38 | * i18n support
39 | */
40 | TRANSLATIONS: {},
41 | PLURAL_EXPR: (n) => (n === 1 ? 0 : 1),
42 | LOCALE: "unknown",
43 |
44 | // gettext and ngettext don't access this so that the functions
45 | // can safely bound to a different name (_ = Documentation.gettext)
46 | gettext: (string) => {
47 | const translated = Documentation.TRANSLATIONS[string];
48 | switch (typeof translated) {
49 | case "undefined":
50 | return string; // no translation
51 | case "string":
52 | return translated; // translation exists
53 | default:
54 | return translated[0]; // (singular, plural) translation tuple exists
55 | }
56 | },
57 |
58 | ngettext: (singular, plural, n) => {
59 | const translated = Documentation.TRANSLATIONS[singular];
60 | if (typeof translated !== "undefined")
61 | return translated[Documentation.PLURAL_EXPR(n)];
62 | return n === 1 ? singular : plural;
63 | },
64 |
65 | addTranslations: (catalog) => {
66 | Object.assign(Documentation.TRANSLATIONS, catalog.messages);
67 | Documentation.PLURAL_EXPR = new Function(
68 | "n",
69 | `return (${catalog.plural_expr})`
70 | );
71 | Documentation.LOCALE = catalog.locale;
72 | },
73 |
74 | /**
75 | * helper function to focus on search bar
76 | */
77 | focusSearchBar: () => {
78 | document.querySelectorAll("input[name=q]")[0]?.focus();
79 | },
80 |
81 | /**
82 | * Initialise the domain index toggle buttons
83 | */
84 | initDomainIndexTable: () => {
85 | const toggler = (el) => {
86 | const idNumber = el.id.substr(7);
87 | const toggledRows = document.querySelectorAll(`tr.cg-${idNumber}`);
88 | if (el.src.substr(-9) === "minus.png") {
89 | el.src = `${el.src.substr(0, el.src.length - 9)}plus.png`;
90 | toggledRows.forEach((el) => (el.style.display = "none"));
91 | } else {
92 | el.src = `${el.src.substr(0, el.src.length - 8)}minus.png`;
93 | toggledRows.forEach((el) => (el.style.display = ""));
94 | }
95 | };
96 |
97 | const togglerElements = document.querySelectorAll("img.toggler");
98 | togglerElements.forEach((el) =>
99 | el.addEventListener("click", (event) => toggler(event.currentTarget))
100 | );
101 | togglerElements.forEach((el) => (el.style.display = ""));
102 | if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) togglerElements.forEach(toggler);
103 | },
104 |
105 | initOnKeyListeners: () => {
106 | // only install a listener if it is really needed
107 | if (
108 | !DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS &&
109 | !DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS
110 | )
111 | return;
112 |
113 | document.addEventListener("keydown", (event) => {
114 | // bail for input elements
115 | if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return;
116 | // bail with special keys
117 | if (event.altKey || event.ctrlKey || event.metaKey) return;
118 |
119 | if (!event.shiftKey) {
120 | switch (event.key) {
121 | case "ArrowLeft":
122 | if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break;
123 |
124 | const prevLink = document.querySelector('link[rel="prev"]');
125 | if (prevLink && prevLink.href) {
126 | window.location.href = prevLink.href;
127 | event.preventDefault();
128 | }
129 | break;
130 | case "ArrowRight":
131 | if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break;
132 |
133 | const nextLink = document.querySelector('link[rel="next"]');
134 | if (nextLink && nextLink.href) {
135 | window.location.href = nextLink.href;
136 | event.preventDefault();
137 | }
138 | break;
139 | }
140 | }
141 |
142 | // some keyboard layouts may need Shift to get /
143 | switch (event.key) {
144 | case "/":
145 | if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) break;
146 | Documentation.focusSearchBar();
147 | event.preventDefault();
148 | }
149 | });
150 | },
151 | };
152 |
153 | // quick alias for translations
154 | const _ = Documentation.gettext;
155 |
156 | _ready(Documentation.init);
157 |
--------------------------------------------------------------------------------
/docs/_build/html/_static/documentation_options.js:
--------------------------------------------------------------------------------
1 | const DOCUMENTATION_OPTIONS = {
2 | VERSION: '',
3 | LANGUAGE: 'en',
4 | COLLAPSE_INDEX: false,
5 | BUILDER: 'html',
6 | FILE_SUFFIX: '.html',
7 | LINK_SUFFIX: '.html',
8 | HAS_SOURCE: true,
9 | SOURCELINK_SUFFIX: '.txt',
10 | NAVIGATION_WITH_KEYS: false,
11 | SHOW_SEARCH_SUMMARY: true,
12 | ENABLE_SEARCH_SHORTCUTS: true,
13 | };
--------------------------------------------------------------------------------
/docs/_build/html/_static/file.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uc-cdis/gen3sdk-python/0ca70c579d55cb0f6218fa7a534d02a8910acda1/docs/_build/html/_static/file.png
--------------------------------------------------------------------------------
/docs/_build/html/_static/language_data.js:
--------------------------------------------------------------------------------
1 | /*
2 | * language_data.js
3 | * ~~~~~~~~~~~~~~~~
4 | *
5 | * This script contains the language-specific data used by searchtools.js,
6 | * namely the list of stopwords, stemmer, scorer and splitter.
7 | *
8 | * :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS.
9 | * :license: BSD, see LICENSE for details.
10 | *
11 | */
12 |
13 | var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"];
14 |
15 |
16 | /* Non-minified version is copied as a separate JS file, if available */
17 |
18 | /**
19 | * Porter Stemmer
20 | */
21 | var Stemmer = function() {
22 |
23 | var step2list = {
24 | ational: 'ate',
25 | tional: 'tion',
26 | enci: 'ence',
27 | anci: 'ance',
28 | izer: 'ize',
29 | bli: 'ble',
30 | alli: 'al',
31 | entli: 'ent',
32 | eli: 'e',
33 | ousli: 'ous',
34 | ization: 'ize',
35 | ation: 'ate',
36 | ator: 'ate',
37 | alism: 'al',
38 | iveness: 'ive',
39 | fulness: 'ful',
40 | ousness: 'ous',
41 | aliti: 'al',
42 | iviti: 'ive',
43 | biliti: 'ble',
44 | logi: 'log'
45 | };
46 |
47 | var step3list = {
48 | icate: 'ic',
49 | ative: '',
50 | alize: 'al',
51 | iciti: 'ic',
52 | ical: 'ic',
53 | ful: '',
54 | ness: ''
55 | };
56 |
57 | var c = "[^aeiou]"; // consonant
58 | var v = "[aeiouy]"; // vowel
59 | var C = c + "[^aeiouy]*"; // consonant sequence
60 | var V = v + "[aeiou]*"; // vowel sequence
61 |
62 | var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0
63 | var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1
64 | var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1
65 | var s_v = "^(" + C + ")?" + v; // vowel in stem
66 |
67 | this.stemWord = function (w) {
68 | var stem;
69 | var suffix;
70 | var firstch;
71 | var origword = w;
72 |
73 | if (w.length < 3)
74 | return w;
75 |
76 | var re;
77 | var re2;
78 | var re3;
79 | var re4;
80 |
81 | firstch = w.substr(0,1);
82 | if (firstch == "y")
83 | w = firstch.toUpperCase() + w.substr(1);
84 |
85 | // Step 1a
86 | re = /^(.+?)(ss|i)es$/;
87 | re2 = /^(.+?)([^s])s$/;
88 |
89 | if (re.test(w))
90 | w = w.replace(re,"$1$2");
91 | else if (re2.test(w))
92 | w = w.replace(re2,"$1$2");
93 |
94 | // Step 1b
95 | re = /^(.+?)eed$/;
96 | re2 = /^(.+?)(ed|ing)$/;
97 | if (re.test(w)) {
98 | var fp = re.exec(w);
99 | re = new RegExp(mgr0);
100 | if (re.test(fp[1])) {
101 | re = /.$/;
102 | w = w.replace(re,"");
103 | }
104 | }
105 | else if (re2.test(w)) {
106 | var fp = re2.exec(w);
107 | stem = fp[1];
108 | re2 = new RegExp(s_v);
109 | if (re2.test(stem)) {
110 | w = stem;
111 | re2 = /(at|bl|iz)$/;
112 | re3 = new RegExp("([^aeiouylsz])\\1$");
113 | re4 = new RegExp("^" + C + v + "[^aeiouwxy]$");
114 | if (re2.test(w))
115 | w = w + "e";
116 | else if (re3.test(w)) {
117 | re = /.$/;
118 | w = w.replace(re,"");
119 | }
120 | else if (re4.test(w))
121 | w = w + "e";
122 | }
123 | }
124 |
125 | // Step 1c
126 | re = /^(.+?)y$/;
127 | if (re.test(w)) {
128 | var fp = re.exec(w);
129 | stem = fp[1];
130 | re = new RegExp(s_v);
131 | if (re.test(stem))
132 | w = stem + "i";
133 | }
134 |
135 | // Step 2
136 | re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
137 | if (re.test(w)) {
138 | var fp = re.exec(w);
139 | stem = fp[1];
140 | suffix = fp[2];
141 | re = new RegExp(mgr0);
142 | if (re.test(stem))
143 | w = stem + step2list[suffix];
144 | }
145 |
146 | // Step 3
147 | re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
148 | if (re.test(w)) {
149 | var fp = re.exec(w);
150 | stem = fp[1];
151 | suffix = fp[2];
152 | re = new RegExp(mgr0);
153 | if (re.test(stem))
154 | w = stem + step3list[suffix];
155 | }
156 |
157 | // Step 4
158 | re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
159 | re2 = /^(.+?)(s|t)(ion)$/;
160 | if (re.test(w)) {
161 | var fp = re.exec(w);
162 | stem = fp[1];
163 | re = new RegExp(mgr1);
164 | if (re.test(stem))
165 | w = stem;
166 | }
167 | else if (re2.test(w)) {
168 | var fp = re2.exec(w);
169 | stem = fp[1] + fp[2];
170 | re2 = new RegExp(mgr1);
171 | if (re2.test(stem))
172 | w = stem;
173 | }
174 |
175 | // Step 5
176 | re = /^(.+?)e$/;
177 | if (re.test(w)) {
178 | var fp = re.exec(w);
179 | stem = fp[1];
180 | re = new RegExp(mgr1);
181 | re2 = new RegExp(meq1);
182 | re3 = new RegExp("^" + C + v + "[^aeiouwxy]$");
183 | if (re.test(stem) || (re2.test(stem) && !(re3.test(stem))))
184 | w = stem;
185 | }
186 | re = /ll$/;
187 | re2 = new RegExp(mgr1);
188 | if (re.test(w) && re2.test(w)) {
189 | re = /.$/;
190 | w = w.replace(re,"");
191 | }
192 |
193 | // and turn initial Y back to y
194 | if (firstch == "y")
195 | w = firstch.toLowerCase() + w.substr(1);
196 | return w;
197 | }
198 | }
199 |
200 |
--------------------------------------------------------------------------------
/docs/_build/html/_static/minus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uc-cdis/gen3sdk-python/0ca70c579d55cb0f6218fa7a534d02a8910acda1/docs/_build/html/_static/minus.png
--------------------------------------------------------------------------------
/docs/_build/html/_static/plus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uc-cdis/gen3sdk-python/0ca70c579d55cb0f6218fa7a534d02a8910acda1/docs/_build/html/_static/plus.png
--------------------------------------------------------------------------------
/docs/_build/html/_static/pygments.css:
--------------------------------------------------------------------------------
1 | pre { line-height: 125%; }
2 | td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
3 | span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
4 | td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
5 | span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
6 | .highlight .hll { background-color: #ffffcc }
7 | .highlight { background: #f8f8f8; }
8 | .highlight .c { color: #8F5902; font-style: italic } /* Comment */
9 | .highlight .err { color: #A40000; border: 1px solid #EF2929 } /* Error */
10 | .highlight .g { color: #000 } /* Generic */
11 | .highlight .k { color: #004461; font-weight: bold } /* Keyword */
12 | .highlight .l { color: #000 } /* Literal */
13 | .highlight .n { color: #000 } /* Name */
14 | .highlight .o { color: #582800 } /* Operator */
15 | .highlight .x { color: #000 } /* Other */
16 | .highlight .p { color: #000; font-weight: bold } /* Punctuation */
17 | .highlight .ch { color: #8F5902; font-style: italic } /* Comment.Hashbang */
18 | .highlight .cm { color: #8F5902; font-style: italic } /* Comment.Multiline */
19 | .highlight .cp { color: #8F5902 } /* Comment.Preproc */
20 | .highlight .cpf { color: #8F5902; font-style: italic } /* Comment.PreprocFile */
21 | .highlight .c1 { color: #8F5902; font-style: italic } /* Comment.Single */
22 | .highlight .cs { color: #8F5902; font-style: italic } /* Comment.Special */
23 | .highlight .gd { color: #A40000 } /* Generic.Deleted */
24 | .highlight .ge { color: #000; font-style: italic } /* Generic.Emph */
25 | .highlight .ges { color: #000 } /* Generic.EmphStrong */
26 | .highlight .gr { color: #EF2929 } /* Generic.Error */
27 | .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */
28 | .highlight .gi { color: #00A000 } /* Generic.Inserted */
29 | .highlight .go { color: #888 } /* Generic.Output */
30 | .highlight .gp { color: #745334 } /* Generic.Prompt */
31 | .highlight .gs { color: #000; font-weight: bold } /* Generic.Strong */
32 | .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
33 | .highlight .gt { color: #A40000; font-weight: bold } /* Generic.Traceback */
34 | .highlight .kc { color: #004461; font-weight: bold } /* Keyword.Constant */
35 | .highlight .kd { color: #004461; font-weight: bold } /* Keyword.Declaration */
36 | .highlight .kn { color: #004461; font-weight: bold } /* Keyword.Namespace */
37 | .highlight .kp { color: #004461; font-weight: bold } /* Keyword.Pseudo */
38 | .highlight .kr { color: #004461; font-weight: bold } /* Keyword.Reserved */
39 | .highlight .kt { color: #004461; font-weight: bold } /* Keyword.Type */
40 | .highlight .ld { color: #000 } /* Literal.Date */
41 | .highlight .m { color: #900 } /* Literal.Number */
42 | .highlight .s { color: #4E9A06 } /* Literal.String */
43 | .highlight .na { color: #C4A000 } /* Name.Attribute */
44 | .highlight .nb { color: #004461 } /* Name.Builtin */
45 | .highlight .nc { color: #000 } /* Name.Class */
46 | .highlight .no { color: #000 } /* Name.Constant */
47 | .highlight .nd { color: #888 } /* Name.Decorator */
48 | .highlight .ni { color: #CE5C00 } /* Name.Entity */
49 | .highlight .ne { color: #C00; font-weight: bold } /* Name.Exception */
50 | .highlight .nf { color: #000 } /* Name.Function */
51 | .highlight .nl { color: #F57900 } /* Name.Label */
52 | .highlight .nn { color: #000 } /* Name.Namespace */
53 | .highlight .nx { color: #000 } /* Name.Other */
54 | .highlight .py { color: #000 } /* Name.Property */
55 | .highlight .nt { color: #004461; font-weight: bold } /* Name.Tag */
56 | .highlight .nv { color: #000 } /* Name.Variable */
57 | .highlight .ow { color: #004461; font-weight: bold } /* Operator.Word */
58 | .highlight .pm { color: #000; font-weight: bold } /* Punctuation.Marker */
59 | .highlight .w { color: #F8F8F8 } /* Text.Whitespace */
60 | .highlight .mb { color: #900 } /* Literal.Number.Bin */
61 | .highlight .mf { color: #900 } /* Literal.Number.Float */
62 | .highlight .mh { color: #900 } /* Literal.Number.Hex */
63 | .highlight .mi { color: #900 } /* Literal.Number.Integer */
64 | .highlight .mo { color: #900 } /* Literal.Number.Oct */
65 | .highlight .sa { color: #4E9A06 } /* Literal.String.Affix */
66 | .highlight .sb { color: #4E9A06 } /* Literal.String.Backtick */
67 | .highlight .sc { color: #4E9A06 } /* Literal.String.Char */
68 | .highlight .dl { color: #4E9A06 } /* Literal.String.Delimiter */
69 | .highlight .sd { color: #8F5902; font-style: italic } /* Literal.String.Doc */
70 | .highlight .s2 { color: #4E9A06 } /* Literal.String.Double */
71 | .highlight .se { color: #4E9A06 } /* Literal.String.Escape */
72 | .highlight .sh { color: #4E9A06 } /* Literal.String.Heredoc */
73 | .highlight .si { color: #4E9A06 } /* Literal.String.Interpol */
74 | .highlight .sx { color: #4E9A06 } /* Literal.String.Other */
75 | .highlight .sr { color: #4E9A06 } /* Literal.String.Regex */
76 | .highlight .s1 { color: #4E9A06 } /* Literal.String.Single */
77 | .highlight .ss { color: #4E9A06 } /* Literal.String.Symbol */
78 | .highlight .bp { color: #3465A4 } /* Name.Builtin.Pseudo */
79 | .highlight .fm { color: #000 } /* Name.Function.Magic */
80 | .highlight .vc { color: #000 } /* Name.Variable.Class */
81 | .highlight .vg { color: #000 } /* Name.Variable.Global */
82 | .highlight .vi { color: #000 } /* Name.Variable.Instance */
83 | .highlight .vm { color: #000 } /* Name.Variable.Magic */
84 | .highlight .il { color: #900 } /* Literal.Number.Integer.Long */
--------------------------------------------------------------------------------
/docs/_build/html/_static/sphinx_highlight.js:
--------------------------------------------------------------------------------
1 | /* Highlighting utilities for Sphinx HTML documentation. */
2 | "use strict";
3 |
4 | const SPHINX_HIGHLIGHT_ENABLED = true
5 |
6 | /**
7 | * highlight a given string on a node by wrapping it in
8 | * span elements with the given class name.
9 | */
10 | const _highlight = (node, addItems, text, className) => {
11 | if (node.nodeType === Node.TEXT_NODE) {
12 | const val = node.nodeValue;
13 | const parent = node.parentNode;
14 | const pos = val.toLowerCase().indexOf(text);
15 | if (
16 | pos >= 0 &&
17 | !parent.classList.contains(className) &&
18 | !parent.classList.contains("nohighlight")
19 | ) {
20 | let span;
21 |
22 | const closestNode = parent.closest("body, svg, foreignObject");
23 | const isInSVG = closestNode && closestNode.matches("svg");
24 | if (isInSVG) {
25 | span = document.createElementNS("http://www.w3.org/2000/svg", "tspan");
26 | } else {
27 | span = document.createElement("span");
28 | span.classList.add(className);
29 | }
30 |
31 | span.appendChild(document.createTextNode(val.substr(pos, text.length)));
32 | const rest = document.createTextNode(val.substr(pos + text.length));
33 | parent.insertBefore(
34 | span,
35 | parent.insertBefore(
36 | rest,
37 | node.nextSibling
38 | )
39 | );
40 | node.nodeValue = val.substr(0, pos);
41 | /* There may be more occurrences of search term in this node. So call this
42 | * function recursively on the remaining fragment.
43 | */
44 | _highlight(rest, addItems, text, className);
45 |
46 | if (isInSVG) {
47 | const rect = document.createElementNS(
48 | "http://www.w3.org/2000/svg",
49 | "rect"
50 | );
51 | const bbox = parent.getBBox();
52 | rect.x.baseVal.value = bbox.x;
53 | rect.y.baseVal.value = bbox.y;
54 | rect.width.baseVal.value = bbox.width;
55 | rect.height.baseVal.value = bbox.height;
56 | rect.setAttribute("class", className);
57 | addItems.push({ parent: parent, target: rect });
58 | }
59 | }
60 | } else if (node.matches && !node.matches("button, select, textarea")) {
61 | node.childNodes.forEach((el) => _highlight(el, addItems, text, className));
62 | }
63 | };
64 | const _highlightText = (thisNode, text, className) => {
65 | let addItems = [];
66 | _highlight(thisNode, addItems, text, className);
67 | addItems.forEach((obj) =>
68 | obj.parent.insertAdjacentElement("beforebegin", obj.target)
69 | );
70 | };
71 |
72 | /**
73 | * Small JavaScript module for the documentation.
74 | */
75 | const SphinxHighlight = {
76 |
77 | /**
78 | * highlight the search words provided in localstorage in the text
79 | */
80 | highlightSearchWords: () => {
81 | if (!SPHINX_HIGHLIGHT_ENABLED) return; // bail if no highlight
82 |
83 | // get and clear terms from localstorage
84 | const url = new URL(window.location);
85 | const highlight =
86 | localStorage.getItem("sphinx_highlight_terms")
87 | || url.searchParams.get("highlight")
88 | || "";
89 | localStorage.removeItem("sphinx_highlight_terms")
90 | url.searchParams.delete("highlight");
91 | window.history.replaceState({}, "", url);
92 |
93 | // get individual terms from highlight string
94 | const terms = highlight.toLowerCase().split(/\s+/).filter(x => x);
95 | if (terms.length === 0) return; // nothing to do
96 |
97 | // There should never be more than one element matching "div.body"
98 | const divBody = document.querySelectorAll("div.body");
99 | const body = divBody.length ? divBody[0] : document.querySelector("body");
100 | window.setTimeout(() => {
101 | terms.forEach((term) => _highlightText(body, term, "highlighted"));
102 | }, 10);
103 |
104 | const searchBox = document.getElementById("searchbox");
105 | if (searchBox === null) return;
106 | searchBox.appendChild(
107 | document
108 | .createRange()
109 | .createContextualFragment(
110 | '' +
111 | '' +
112 | _("Hide Search Matches") +
113 | "
"
114 | )
115 | );
116 | },
117 |
118 | /**
119 | * helper function to hide the search marks again
120 | */
121 | hideSearchWords: () => {
122 | document
123 | .querySelectorAll("#searchbox .highlight-link")
124 | .forEach((el) => el.remove());
125 | document
126 | .querySelectorAll("span.highlighted")
127 | .forEach((el) => el.classList.remove("highlighted"));
128 | localStorage.removeItem("sphinx_highlight_terms")
129 | },
130 |
131 | initEscapeListener: () => {
132 | // only install a listener if it is really needed
133 | if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) return;
134 |
135 | document.addEventListener("keydown", (event) => {
136 | // bail for input elements
137 | if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return;
138 | // bail with special keys
139 | if (event.shiftKey || event.altKey || event.ctrlKey || event.metaKey) return;
140 | if (DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS && (event.key === "Escape")) {
141 | SphinxHighlight.hideSearchWords();
142 | event.preventDefault();
143 | }
144 | });
145 | },
146 | };
147 |
148 | _ready(() => {
149 | /* Do not call highlightSearchWords() when we are on the search page.
150 | * It will highlight words from the *previous* search query.
151 | */
152 | if (typeof Search === "undefined") SphinxHighlight.highlightSearchWords();
153 | SphinxHighlight.initEscapeListener();
154 | });
155 |
--------------------------------------------------------------------------------
/docs/_build/html/objects.inv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uc-cdis/gen3sdk-python/0ca70c579d55cb0f6218fa7a534d02a8910acda1/docs/_build/html/objects.inv
--------------------------------------------------------------------------------
/docs/_build/html/py-modindex.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | Python Module Index — Gen3 SDK documentation
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
Python Module Index
37 |
38 |
41 |
42 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
148 |
149 |
150 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
--------------------------------------------------------------------------------
/docs/_build/html/search.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | Search — Gen3 SDK documentation
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
Search
40 |
41 |
49 |
50 |
51 |
52 | Searching for multiple words only shows matches that contain
53 | all words.
54 |
55 |
56 |
57 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
120 |
121 |
122 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
--------------------------------------------------------------------------------
/docs/auth.rst:
--------------------------------------------------------------------------------
1 | Gen3 Auth Helper
2 | ----------------
3 |
4 | .. autoclass:: gen3.auth.Gen3Auth
5 | :members:
6 | :show-inheritance:
7 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # This file only contains a selection of the most common options. For a full
4 | # list see the documentation:
5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
6 |
7 | # -- Path setup --------------------------------------------------------------
8 |
9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import os
14 | import sys
15 |
16 | sys.path.insert(0, os.path.abspath(".."))
17 |
18 |
19 | # -- Project information -----------------------------------------------------
20 |
21 | project = "Gen3 SDK"
22 | copyright = "2021, Center for Translational Data Science"
23 | author = "Center for Translational Data Science"
24 |
25 | master_doc = "index"
26 |
27 | # -- General configuration ---------------------------------------------------
28 |
29 | # Add any Sphinx extension module names here, as strings. They can be
30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
31 | # ones.
32 | extensions = ["sphinx.ext.autodoc", "sphinx.ext.viewcode", "sphinx.ext.napoleon"]
33 |
34 |
35 | # Add any paths that contain templates here, relative to this directory.
36 | templates_path = ["_templates"]
37 |
38 | # List of patterns, relative to source directory, that match files and
39 | # directories to ignore when looking for source files.
40 | # This pattern also affects html_static_path and html_extra_path.
41 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
42 |
43 |
44 | # -- Options for HTML output -------------------------------------------------
45 |
46 | # The theme to use for HTML and HTML Help pages. See the documentation for
47 | # a list of builtin themes.
48 | #
49 | html_theme = "alabaster"
50 |
51 | # Add any paths that contain custom static files (such as style sheets) here,
52 | # relative to this directory. They are copied after the builtin static files,
53 | # so a file named "default.css" will overwrite the builtin "default.css".
54 | html_static_path = ["_static"]
55 |
56 | # Theme options are theme-specific and customize the look and feel of a theme
57 | # further. For a list of options available for each theme, see the
58 | # documentation.
59 | #
60 | html_theme_options = {
61 | "show_powered_by": False,
62 | "github_user": "uc-cdis",
63 | "github_repo": "gen3sdk-python",
64 | "github_banner": True,
65 | "show_related": False,
66 | }
67 |
--------------------------------------------------------------------------------
/docs/file.rst:
--------------------------------------------------------------------------------
1 | Gen3 File Class
2 | ----------------
3 |
4 | .. autoclass:: gen3.file.Gen3File
5 | :members:
6 | :show-inheritance:
7 |
--------------------------------------------------------------------------------
/docs/howto/bundleTools.md:
--------------------------------------------------------------------------------
1 | ## Bundle Tools
2 |
3 | TOC
4 | - [Ingest Manifest](#ingest-manifest)
5 | - [Example Bundle](#example-bundle)
6 |
7 | ### Ingest Manifest
8 |
9 | The only required columns are `bundle_name` and `ids`(list of object or bundles). The order of bundles in the manifest matters: you can reference a `bundle_name` in the `ids` list if that bundle_name appears before the bundle containing it. Lowest level bundles should only contain GUIDs to File Objects and should live at the top of the manifest. If a GUID is not provided for a bundle, indexd will assign a GUID for the bundle. Within the manifest `bundle_names` is to be used as a unique identifier.
10 |
11 | #### Example Bundle:
12 | To create the following bundle:
13 | ```
14 | Bundle A
15 | +- BundleB
16 | +- BundleC
17 | +-File1
18 | +-File2
19 | +-File3
20 | ```
21 | the manifest should look like this
22 | ```
23 | bundle_name,ids,GUID(optional) // can also include other optional fields defined by the DRS spec
24 | BundleC,[File1-GUID File2-GUID],
25 | BundleB,[File3-GUID BundleC],
26 | BundleA, [BundleB BundleC],
27 | ```
28 |
29 | The following is an example csv manifest:
30 | ```
31 | bundle_name,ids,GUID,size,type,checksum,description
32 | A,[dg.TEST/f2a39f98-6ae1-48a5-8d48-825a0c52a22b dg.TEST/1e9d3103-cbe2-4c39-917c-b3abad4750d2],,,,,some description
33 | B,['dg.TEST/1e9d3103-cbe2-4c39-917c-b3abad4750d2' 'dg.TEST/f2a39f98-6ae1-48a5-8d48-825a0c52a22b'],,789,,,something
34 | C,[A 'B' dg.TEST/ed8f4658-6acd-4f96-9dd8-3709890c959e],,120,,,lalala
35 | D,[A B C],,,[md5 sha256],[1234567 abc12345],
36 | E,[A B],dg.xxxx/590ee63d-2790-477a-bbf8-d53873ca4933,,md5 sha256,abcdefg abcd123,
37 | ```
38 | NOTE: DrsObjects/Bundles support multiple checksums so in the manifest define type and the hash respectively.
39 |
40 | Example:
41 | ```
42 | type,checksum
43 | md5 sha256, abcde 12345
44 | ```
45 | The above manifest would result to the following `checksums` field in the bundle:
46 | ```json
47 | "checksums":[{"type": "md5", "checksum": "abcde"}, {"type": "sha256", "checksum": "12345"}]
48 | ```
49 |
50 | ```python
51 | import sys
52 | import logging
53 |
54 | from gen3.auth import Gen3Auth
55 | from gen3.tools.bundle.ingest_manifest import ingest_bundle_manifest
56 |
57 | logging.basicConfig(filename="output.log", level=logging.DEBUG)
58 | logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
59 |
60 | COMMONS = "https://{{insert-commons-here}}/"
61 | MANIFEST = "./example_manifest.tsv"
62 |
63 | def main():
64 | auth = Gen3Auth(COMMONS, refresh_file="credentials.json")
65 |
66 | # use basic auth for admin privileges in indexd
67 | # auth = ("basic_auth_username", "basic_auth_password")
68 |
69 | ingest_bundle_manifest(
70 | commons_url=COMMONS,
71 | manifest_file=MANIFEST,
72 | out_manifest_file="ingest_out.csv",
73 | auth=auth,
74 | )
75 |
76 | if __name__ == "__main__":
77 | main()
78 | ```
79 |
--------------------------------------------------------------------------------
/docs/howto/cli.md:
--------------------------------------------------------------------------------
1 | ## CLI
2 |
3 | The CLI can be invoked as follows
4 |
5 | `gen3 [OPTIONS] COMMAND [ARGS]`
6 |
7 | For a list of commands and options run
8 |
9 | `gen3 --help`
10 |
11 | For example, the following can validate `user.yaml` files
12 |
13 | `gen3 users validate first_user.yaml second_user.yaml`
14 |
15 | The [gen3users](https://github.com/uc-cdis/gen3users) documentation describes the `users` functionality.
16 |
17 |
--------------------------------------------------------------------------------
/docs/howto/crosswalk.md:
--------------------------------------------------------------------------------
1 | # Gen3 Cross-Commons Subject Linking and Crosswalk
2 |
3 | A general solution that supports the linking of subjects across commons. A researcher should be able to “crosswalk” the information from one commons to combine with subject data from other commons.
4 |
5 | We accomplish this by utilizing the Gen3 Framework Services API, specifically the metadata/semi-structured data support. The SDK simplifies the management of this crosswalk data within the framework services.
6 |
7 | > Note that all data in the Gen3 Metadata API **MUST BE OPEN ACCESS AND CONTAIN NO PII**
8 |
9 | ## Centralized Mapping Information Format
10 |
11 | Note that this block effectively represents subject-level data, with the crosswalk being a namespace within that overall block. This **example** is a metadata record from the Gen3 Metadata API (powered by the metadata-service).
12 |
13 | ```json
14 | "GUID": {
15 | "crosswalk": {
16 | "subject": {
17 | "{{commons_url}}": {
18 | "{{field_name}}": {
19 | "value": "",
20 | "type": "",
21 | "description": ""
22 | }
23 | // ... more field entries here
24 | },
25 | // ... more commons entries here
26 | "mapping_methodologies": [
27 | ""
28 | ]
29 | }
30 | }
31 | }
32 |
33 | ```
34 |
35 | Example:
36 |
37 | ```json
38 | "GUID": {
39 | "crosswalk": {
40 | "subject": {
41 | "https://gen3.biodatacatalyst.nhlbi.nih.gov": {
42 | "Subject.submitter_id": {
43 | "value": "phs002363.v1_RC-1358",
44 | "type": "gen3_node_property",
45 | "description": "These identifiers are constructed as part of the data ingestion process in BDCat and concatenate the study and version with the study-provided subject ID (with a _ delimiting)."
46 | }
47 | },
48 | "https://data.midrc.org": {
49 | "Case.submitter_id": {
50 | "value": "A01-00888",
51 | "type": "gen3_node_property",
52 | "description": "The uniquely assigned case identifier in MIDRC."
53 | },
54 | "Case.data_submission_guid": {
55 | "value": "foobar",
56 | "type": "gen3_node_property",
57 | "description": "The identifier for this subject as provided by the site’s submission of Datavant tokens to MIDRC."
58 | },
59 | "masked_n3c_id": {
60 | "value": "123dfj4ia5oi*@a",
61 | "type": "masked_n3c_id",
62 | "description": "Masked National COVID Consortium ID provided by a Linkage Honest Broker to the MIDRC system."
63 | }
64 | },
65 | "mapping_methodologies": [
66 | "NHLBI provided a file of subject IDs for the PETAL study that directly associate a PETAL ID with a BDCat Subject Identifier.",
67 | "A Linkage Honest Broker provided MIDRC with what Masked N3C IDs match MIDRC cases via a system-to-system handoff."
68 | ]
69 | }
70 | }
71 | }
72 | ```
73 |
74 | ## Crosswalk Data Upload Using Gen3 SDK/CLI
75 |
76 | ### `crosswalk.csv`
77 |
78 | To provide mapping from one commons identifier to another.
79 |
80 | * Columns are pipe-delimited and contain necessary information for crosswalk metadata
81 | * `{{commons url}}|{{identifier type}}|{{identifier name}}`
82 | * `{{identifier type}}` for Gen3 Graph node property: `gen3_node_property`
83 | * `{{identifier name}}` for Gen3 Graph node property: `{{node}}.{{property}}`
84 | * File name does not matter
85 |
86 | ### `crosswalk_optional_info.csv`
87 |
88 | To provide descriptions for commons identifiers.
89 |
90 | * `{{commons url}}, {{identifier name}}, {{description}}`
91 | * File name does not matter
92 |
93 | ### Example 1
94 |
95 | `crosswalk_1.csv`
96 |
97 | ```
98 | https://data.midrc.org|gen3_node_property|Case.submitter_id, https://gen3.biodatacatalyst.nhlbi.nih.gov|gen3_node_property|Subject.submitter_id
99 | A01-00888, phs002363.v1_RC-1358
100 | …
101 | ```
102 |
103 | (optional) `crosswalk_optional_info_1.csv`
104 |
105 | > Note: MUST include headers `commons_url,identifier_name,description`
106 |
107 | ```
108 | commons_url,identifier_name,description
109 | https://data.midrc.org, Case.submitter_id, The uniquely assigned case identifier in MIDRC.
110 | https://gen3.biodatacatalyst.nhlbi.nih.gov, Subject.submitter_id, These identifiers are constructed as part of the data ingestion process in BDCat and concatonate the study and version with the study-provided subject ID (with a _ delimiting).
111 | ```
112 |
113 | Gen3 SDK Command
114 |
115 | ```
116 | gen3 objects crosswalk publish ./tests/test_data/crosswalk/crosswalk_1.csv -m "NHLBI provided a file of subject IDs for the PETAL study that directly associate a PETAL ID with a BDCat Subject Identifier." --info ./tests/test_data/crosswalk/crosswalk_optional_info_1.csv
117 | ```
118 |
119 | > `publish` merges any new crosswalk data with existing data
120 |
121 | ### Example 2
122 |
123 | `crosswalk_2.csv`
124 |
125 | ```
126 | https://data.midrc.org|gen3_node_property|Case.submitter_id, https://data.midrc.org|gen3_node_property|Case.data_submission_guid,
127 | https://data.midrc.org|masked_n3c_id|Masked N3C ID
128 | A01-00888, foobar, 123dfj4ia5oi*@a
129 | …
130 | ```
131 |
132 | (optional) `crosswalk_optional_info_2.csv`
133 |
134 | > Note: MUST include headers `commons_url,identifier_name,description`
135 |
136 | ```
137 | commons_url,identifier_name,description
138 | https://data.midrc.org, Case.data_submission_guid, The identifier for this subject as provided by the site’s submission of Datavant tokens to MIDRC.
139 | https://data.midrc.org,Masked N3C ID,Masked National COVID Consortium ID provided by a Linkage Honest Broker to the MIDRC system.
140 | ```
141 |
142 | Gen3 SDK Command
143 |
144 | ```
145 | gen3 objects crosswalk publish crosswalk_2.csv -m "A Linkage Honest Broker provided MIDRC with what Masked N3C IDs match MIDRC cases via a system-to-system handoff." --info crosswalk_optional_info_2.csv
146 | ```
147 |
148 | > `publish` merges any new crosswalk data with existing data
149 |
150 | ## Gen3 SDK handling crosswalk.csv submission to MDS
151 |
152 | ```
153 | gen3 objects crosswalk --help
154 | ```
155 |
156 | General flow for `publish` is:
157 |
158 | - Parse provided `crosswalk.csv` and optionally `crosswalk_optional_info.csv` file(s)
159 | - Validate format(s) (especially column names)
160 | - Convert information from `crosswalk.csv` into a payload to push to the MDS based on "Centralized Mapping Information Format" above
161 |
--------------------------------------------------------------------------------
/docs/howto/devTest.md:
--------------------------------------------------------------------------------
1 | ## Dev-Test
2 |
3 | ### Set up Python Virtual Environment
4 |
5 | You can set up a Python development environment with a virtual environment:
6 |
7 | ```bash
8 | python3 -m venv py3
9 | ```
10 |
11 | Make sure that you have the virtual environment activated:
12 |
13 | ```bash
14 | . py3/bin/activate
15 | ```
16 |
17 | ### Install poetry
18 |
19 | To use the latest code in this repo (or to develop new features) you can clone this repo, install `poetry`:
20 |
21 | ```
22 | curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python -
23 | ```
24 |
25 | and then use `poetry` to install this package:
26 |
27 | ```
28 | poetry install -vv
29 | ```
30 |
31 |
32 | ### Setup and run tests
33 |
34 | Local development like this:
35 |
36 | ```
37 | poetry shell
38 | poetry install -vv
39 | python3 -m pytest
40 | ```
41 |
42 | There are various ways to select a subset of python unit-tests - see: https://stackoverflow.com/questions/36456920/is-there-a-way-to-specify-which-pytest-tests-to-run-from-a-file
43 |
44 | ### Manual Testing
45 |
46 | You can also set up credentials to submit data to the graph in your data commons. This assumes that you can get API access by downloading your [credentials.json](https://docs.gen3.org/gen3-resources/user-guide/using-api/#credentials-to-send-api-requests).
47 |
48 | > Make sure that your python virtual environment and dependencies are updated. Also, check that your credentials have appropriate permissions to make the service calls too.
49 | ```python
50 | COMMONS_URL = "https://mycommons.azurefd.net"
51 | PROGRAM_NAME = "MyProgram"
52 | PROJECT_NAME = "MyProject"
53 | CREDENTIALS_FILE_PATH = "credentials.json"
54 | gen3_node_json = {
55 | "projects": {"code": PROJECT_NAME},
56 | "type": "core_metadata_collection",
57 | "submitter_id": "core_metadata_collection_myid123456",
58 | }
59 | auth = Gen3Auth(endpoint=COMMONS_URL, refresh_file=CREDENTIALS_FILE_PATH)
60 | sheepdog_client = Gen3Submission(COMMONS_URL, auth)
61 | json_result = sheepdog_client.submit_record(PROGRAM_NAME, PROJECT_NAME, gen3_node_json)
62 | ```
63 |
64 | ### CLI
65 |
66 | If the `gen3` cli is not in your path, or is overwritten by a shell function, then you can still invoke the cli:
67 |
68 | ```
69 | python -m gen3.cli --help
70 | or
71 | poetry run gen3 --help
72 | ```
73 |
--------------------------------------------------------------------------------
/docs/howto/externalFileDownloading.md:
--------------------------------------------------------------------------------
1 | ## Downloading files from external repositories
2 |
3 | ## External file metadata
4 |
5 | The study metadata should indicate if data are hosted in an external repository.
6 | This is specified in the `external_file_metadata` field. An example is shown below.
7 |
8 |
9 | ```json
10 | {
11 | "_guid_type": "discovery_metadata",
12 | "gen3_discovery": {
13 | // Gen3 administrative fields
14 | ...,
15 |
16 | [
17 | {
18 | "external_oidc_idp": "externaldata-keycloak",
19 | "file_retriever": "QDR",
20 | "study_id": "QDR_study_01"
21 | },
22 | {
23 | "external_oidc_idp": "externaldata-keycloak",
24 | "file_retriever": "QDR",
25 | "file_id": "QDR_file_02"
26 | },
27 | ]
28 | }
29 | }
30 | ```
31 |
32 | The `'file_retriever'` field is required. It is used to determine how to retrieve the file.
33 |
34 | The `'external_oidc_idp'` field is required if retrieving the file(s) requires to get a token from the `workspace token service`. if the file(s) can be retrieved without any tokens, this field could be omitted.
35 |
36 | The `'study_id'` and `'file_id'` fields are allowed but are not required.
37 |
38 | ## Example code with external file download using a retriever function
39 |
40 | The code should import `download_files_from_metadata` as well as a retriever function.
41 |
42 | Prior to running the download code, there should be a call to the WTS `authorization_url` endpoint,
43 |
44 | `/oauth2/authorization_url?idp=`
45 |
46 | followed by the user logging in to the external-idp.
47 |
48 | ```python
49 | from gen3.auth import Gen3Auth
50 | from gen3.tools.download.external_file_download import download_files_from_metadata
51 | # example retriever function
52 | from heal.qdr_downloads import get_syracuse_qdr_files
53 |
54 | # host for commons where wts_server is configured for QDR tokens
55 | wts_hostname = "my-dev.planx-pla.net"
56 | credentials_file = "credentials_my-dev.json"
57 | # retriever will use Gen3Auth to request a QDR token from the gen3 commons
58 | auth = Gen3Auth(refresh_file=credentials_file)
59 | # the referenced retriever function should have been imported into this module
60 | retrievers = {"QDR": get_syracuse_qdr_files}
61 | download_path = "data/qdr"
62 |
63 | test_external_file_metadata = [
64 | {
65 | "external_oidc_idp": "externaldata-keycloak",
66 | "file_retriever": "QDR",
67 | "study_id": "doi:10.5064/F6N2GOC9"
68 | }
69 | ]
70 |
71 | download_status = download_files_from_metadata(
72 | hostname=wts_hostname,
73 | auth=auth,
74 | external_file_metadata=test_external_file_metadata,
75 | retrievers=retrievers,
76 | download_path=download_path
77 | )
78 | print(f"Download status = {download_status}")
79 | ```
80 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | Welcome to Gen3 SDK's documentation!
2 | ====================================
3 |
4 | The README in the Gen3 Python SDK Github Repo contains installation and setup information along with some quickstart scripts. This contains more detailed documentation about the various classes and functions available.
5 |
6 | This documentation is mostly auto-generated from the docstrings within the source code.
7 |
8 | .. toctree::
9 | :glob:
10 |
11 | *
12 |
13 | Indices and tables
14 | ==================
15 |
16 | * :ref:`genindex`
17 | * :ref:`modindex`
18 | * :ref:`search`
19 |
--------------------------------------------------------------------------------
/docs/indexing.rst:
--------------------------------------------------------------------------------
1 | Gen3 Index Class
2 | ----------------
3 |
4 | .. autoclass:: gen3.index.Gen3Index
5 | :members:
6 | :show-inheritance:
7 |
--------------------------------------------------------------------------------
/docs/jobs.rst:
--------------------------------------------------------------------------------
1 | Gen3 Jobs Class
2 | ----------------
3 |
4 | .. autoclass:: gen3.jobs.Gen3Jobs
5 | :members:
6 | :show-inheritance:
7 |
--------------------------------------------------------------------------------
/docs/metadata.rst:
--------------------------------------------------------------------------------
1 | Gen3 Metadata Class
2 | -------------------
3 |
4 | .. autoclass:: gen3.metadata.Gen3Metadata
5 | :members:
6 | :show-inheritance:
7 |
--------------------------------------------------------------------------------
/docs/object.rst:
--------------------------------------------------------------------------------
1 | Gen3 Object Class
2 | -------------------
3 |
4 | .. autoclass:: gen3.object.Gen3Object
5 | :members:
6 | :show-inheritance:
7 |
--------------------------------------------------------------------------------
/docs/query.rst:
--------------------------------------------------------------------------------
1 | Gen3 Query Class
2 | ---------------------
3 |
4 | .. autoclass:: gen3.query.Gen3Query
5 | :members:
6 | :show-inheritance:
7 |
--------------------------------------------------------------------------------
/docs/reference/sdkClasses.md:
--------------------------------------------------------------------------------
1 | ## Available Classes
2 |
3 | TOC
4 | - [Gen3Auth](#gen3auth)
5 | - [Gen3Index](#gen3index)
6 | - [Gen3Submission](#gen3submission)
7 | - [Gen3Jobs](#gen3jobs)
8 | - Gen3File
9 | - [Gen3Query](#gen3query)
10 | - [Gen3Wrap](#gen3wrap)
11 | - Gen3Metadata
12 | - Gen3WsStorage
13 |
14 | ### Gen3Auth
15 |
16 | This contains an auth wrapper for supporting JWT based authentication with `requests`. The access token is generated from the refresh token and is regenerated on expiration.
17 |
18 | By default - the `Gen3Auth` constructor looks for an api key
19 | in `~/.gen3/credentials.json`. You may override that path
20 | via the `GEN3_API_KEY` environment varialbe, or by passing a
21 | `refresh_file` parameter.
22 |
23 | When working in a Gen3 Workspace, all parameters are optional and the `Gen3Auth` instance should be initialized as follows:
24 |
25 | ```
26 | auth = Gen3Auth()
27 | ```
28 |
29 | See [detailed Gen3Auth documentation](https://uc-cdis.github.io/gen3sdk-python/_build/html/auth.html) for more details.
30 |
31 | ### Gen3Index
32 |
33 | This is the client for interacting with the Indexd service for GUID brokering and resolution.
34 |
35 | ### Gen3Submission
36 |
37 | This is the client for interacting with the Gen3 submission service including GraphQL queries.
38 |
39 | ### Gen3Wrap
40 |
41 | A class that leverages `Gen3Auth` to retrieve the access token from the user's `~/.gen3/credentials.json` file, sets it as the `GEN3_TOKEN` environment variable, and relays all commands and options passed to `gen3 run`
42 |
43 | ### Gen3Query
44 |
45 | This is the client for interacting with the Gen3 ElasticSearch query service.
46 |
47 | ### Gen3Jobs
48 |
49 | This is client for interacting with Gen3's job dispatching service. A complex example script which calls a job that combines dbGaP data with indexed file objects can be seen below:
50 |
51 |
52 | ```python
53 | import sys
54 | import logging
55 | import asyncio
56 |
57 | from gen3.index import Gen3Index
58 | from gen3.auth import Gen3Auth
59 | from gen3.jobs import Gen3Jobs, DBGAP_METADATA_JOB, INGEST_METADATA_JOB
60 | from gen3.utils import get_or_create_event_loop_for_thread
61 |
62 | # An API Key downloaded from the above commons' "Profile" page
63 | API_KEY_FILEPATH = "credentials.json"
64 |
65 | logging.basicConfig(filename="output.log", level=logging.INFO)
66 | logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
67 |
68 |
69 | def metadata_ingest():
70 | auth = Gen3Auth(refresh_file=API_KEY_FILEPATH)
71 | jobs = Gen3Jobs(auth_provider=auth)
72 |
73 | job_input = {
74 | "URL": "https://cdistest-public-test-bucket.s3.amazonaws.com/04_28_20_21_55_13_merged_metadata_manifest.tsv",
75 | "metadata_source": "dbgaptest",
76 | }
77 |
78 | loop = get_or_create_event_loop_for_thread()
79 |
80 | job_output = loop.run_until_complete(
81 | jobs.async_run_job_and_wait(job_name=INGEST_METADATA_JOB, job_input=job_input)
82 | )
83 | print(job_output)
84 |
85 |
86 | def main():
87 | auth = Gen3Auth(refresh_file=API_KEY_FILEPATH)
88 | jobs = Gen3Jobs(auth_provider=auth)
89 |
90 | job_input = {
91 | "phsid_list": "phs000920 phs000921 phs000946 phs000951 phs000954 phs000956 phs000964 phs000972 phs000974 phs000988 phs000993 phs000997 phs001024 phs001032 phs001040 phs001062 phs001143 phs001189 phs001207 phs001211 phs001215 phs001217 phs001218 phs001237 phs001293 phs001345 phs001359 phs001368 phs001387 phs001402 phs001412 phs001416",
92 | "indexing_manifest_url": "https://cdistest-public-test-bucket.s3.amazonaws.com/release_manifest_no_dbgap_no_sample.csv",
93 | "manifests_mapping_config": {
94 | "guid_column_name": "guid",
95 | "row_column_name": "submitted_sample_id",
96 | "indexing_manifest_column_name": "gcp_uri",
97 | },
98 | "partial_match_or_exact_match": "partial_match",
99 | }
100 |
101 | loop = get_or_create_event_loop_for_thread()
102 |
103 | job_output = loop.run_until_complete(
104 | jobs.async_run_job_and_wait(job_name=DBGAP_METADATA_JOB, job_input=job_input)
105 | )
106 | print(job_output)
107 |
108 |
109 | if __name__ == "__main__":
110 | metadata_ingest()
111 |
112 | ```
113 |
114 | ```python
115 | import sys
116 | import logging
117 | import asyncio
118 |
119 | from gen3.auth import Gen3Auth
120 | from gen3.jobs import Gen3Jobs, DBGAP_METADATA_JOB
121 | from gen3.utils import get_or_create_event_loop_for_thread
122 |
123 | # An API Key downloaded from the above commons' "Profile" page
124 | API_KEY_FILEPATH = "credentials.json"
125 |
126 | logging.basicConfig(filename="output.log", level=logging.INFO)
127 | logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
128 |
129 | # NOTE: The indexing_manifest_url must exist and be publically accessible
130 | JOB_INPUT = {
131 | "phsid_list": "phs000956 phs000920",
132 | "indexing_manifest_url": "https://example.com/public_indexing_manifest.csv",
133 | "manifests_mapping_config": {
134 | "guid_column_name": "guid",
135 | "row_column_name": "submitted_sample_id",
136 | "indexing_manifest_column_name": "urls",
137 | },
138 | "partial_match_or_exact_match": "partial_match",
139 | }
140 |
141 |
142 | def example_async_run_job():
143 | auth = Gen3Auth(refresh_file=API_KEY_FILEPATH)
144 | jobs = Gen3Jobs(auth_provider=auth)
145 |
146 | loop = get_or_create_event_loop_for_thread()
147 |
148 | job_output = loop.run_until_complete(
149 | jobs.async_run_job_and_wait(job_name=DBGAP_METADATA_JOB, job_input=JOB_INPUT)
150 | )
151 | print(job_output)
152 |
153 | def example_non_async_run_job():
154 | auth = Gen3Auth(refresh_file=API_KEY_FILEPATH)
155 | jobs = Gen3Jobs(auth_provider=auth)
156 |
157 | is_healthy = jobs.is_healthy()
158 | print(is_healthy)
159 |
160 | version = jobs.get_version()
161 | print(version)
162 |
163 | create_job = jobs.create_job(job_name=DBGAP_METADATA_JOB, job_input=JOB_INPUT)
164 | print(create_job)
165 |
166 | status = "Running"
167 | while status == "Running":
168 | status = jobs.get_status(create_job.get("uid")).get("status")
169 | print(status)
170 |
171 | get_output = jobs.get_output(create_job.get("uid"))
172 | print(get_output)
173 |
174 |
175 | if __name__ == "__main__":
176 | example_async_run_job()
177 | ```
178 |
179 |
--------------------------------------------------------------------------------
/docs/submission.rst:
--------------------------------------------------------------------------------
1 | Gen3 Submission Class
2 | ---------------------
3 |
4 | .. autoclass:: gen3.submission.Gen3Submission
5 | :members:
6 | :show-inheritance:
7 |
--------------------------------------------------------------------------------
/docs/tools.rst:
--------------------------------------------------------------------------------
1 | Gen3 Tools
2 | ----------
3 |
4 | Tools and functions for common actions in Gen3. These tools are broken up into broad categories like indexing (for tasks related to the file object persistent identifiers within the system) and metadata (for tasks relating to file object metadata within the system).
5 |
6 | Such common indexing tasks may involve indexing file object URLs into Gen3 to assign persistent identifiers, downloading a manifest of every file object that already exists, and verifying that a Gen3 instance contains the expected indexed file objects based on a file.
7 |
8 | For metadata, the task may be ingesting a large amount of metadata from a file into the system.
9 |
10 | Most of these tools utilize async capabilities of Python to make common tasks more efficient.
11 |
12 | .. toctree::
13 | :glob:
14 |
15 | tools/*
16 |
17 | .. automodule:: gen3.tools
18 | :members:
19 | :show-inheritance:
20 |
--------------------------------------------------------------------------------
/docs/tools/drs_pull.rst:
--------------------------------------------------------------------------------
1 | DRS Download Tools
2 | ------------------
3 |
4 | .. automodule:: gen3.tools.download.drs_download
5 | :members: download_files_in_drs_manifest, download_drs_object, list_files_in_drs_manifest,
6 | list_drs_object, list_access_in_drs_manifest,
7 | Manifest, DownloadManager, Downloadable, DownloadStatus,
8 |
--------------------------------------------------------------------------------
/docs/tools/indexing.rst:
--------------------------------------------------------------------------------
1 | Indexing Tools
2 | --------------
3 |
4 |
5 | Download
6 | ========
7 |
8 | .. automodule:: gen3.tools.indexing.download_manifest
9 | :members:
10 | :show-inheritance:
11 |
12 | Index
13 | ========
14 |
15 | .. automodule:: gen3.tools.indexing.index_manifest
16 | :members:
17 | :show-inheritance:
18 |
19 | Verify
20 | ========
21 |
22 | .. automodule:: gen3.tools.indexing.verify_manifest
23 | :members:
24 | :show-inheritance:
--------------------------------------------------------------------------------
/docs/tools/metadata.rst:
--------------------------------------------------------------------------------
1 | Metadata Tools
2 | --------------
3 |
4 |
5 | Ingest
6 | ========
7 |
8 | .. automodule:: gen3.tools.metadata.ingest_manifest
9 | :members:
10 | :show-inheritance:
11 |
--------------------------------------------------------------------------------
/docs/tutorial/quickStart.md:
--------------------------------------------------------------------------------
1 | # Scripting Quickstart
2 |
3 | * install the sdk: `pip install gen3`
4 | * for authenticated access to a commons - download an API key from the portal's Profile page, and save it as `~/.gen3/credentials.json`
5 | * write a script that uses the sdk (examples below)
6 | * function calls that are configured with a backoff are retried 3 times by default. The number of retries can be customized by setting the `GEN3SDK_MAX_RETRIES` environment variable
7 |
8 | ## Quickstart Example - Object Index
9 |
10 | The Gen3 object index (indexd) provides public read access
11 | that does not require authentication.
12 |
13 | ```python
14 | """
15 | This script will use an instance of the Gen3Index class to communicate with a Gen3
16 | Commons indexing service to get some basic information.
17 |
18 | The example commons we're using is an open Canine Data Commons.
19 | """
20 | from gen3.index import Gen3Index
21 |
22 | # Gen3 Commons URL
23 | COMMONS = "https://caninedc.org/"
24 |
25 |
26 | def main():
27 | index = Gen3Index(COMMONS)
28 | if not index.is_healthy():
29 | print(f"uh oh! The indexing service is not healthy in the commons {COMMONS}")
30 | exit()
31 |
32 | print("some file stats:")
33 | print(index.get_stats())
34 |
35 | print("example GUID record:")
36 | print(index.get(guid="afea506a-62d0-4e8e-9388-19d3c5ac52be"))
37 |
38 |
39 | if __name__ == "__main__":
40 | main()
41 |
42 | ```
43 |
44 | ## Quickstart Example w/ Auth - Modify Object Index
45 |
46 | Some Gen3 API endpoints require authentication and special privileges to be able to use. The SDK can automate a lot of this by simply providing it with an API Key you download from the Gen3 Commons UI after logging in.
47 |
48 | > NOTE: The below script will most likely fail for you because your user doesn't have access to create in that commons. However, the example is still important because if you *did* have access, this would handle passing your access token to the commons API correctly.
49 |
50 | ```python
51 | """
52 | This script will use an instance of the Gen3Index class to attempt to create a
53 | new indexed file record in the specified Gen3 Commons indexing service.
54 |
55 | The example commons we're using is an open Canine Data Commons.
56 | """
57 | from gen3.index import Gen3Index
58 | from gen3.auth import Gen3Auth
59 |
60 |
61 | # Install n API Key downloaded from the
62 | # commons' "Profile" page at ~/.gen3/credentials.json
63 |
64 |
65 | def main():
66 | auth = Gen3Auth()
67 | index = Gen3Index(auth.endpoint, auth_provider=auth)
68 | if not index.is_healthy():
69 | print(f"uh oh! The indexing service is not healthy in the commons {auth.endpoint}")
70 | exit()
71 |
72 | print("trying to create new indexed file object record:\n")
73 | try:
74 | response = index.create_record(
75 | hashes={"md5": "ab167e49d25b488939b1ede42752458b"}, size=42, acl=["*"]
76 | )
77 | except Exception as exc:
78 | print(
79 | "\nERROR ocurred when trying to create the record, you probably don't have access."
80 | )
81 |
82 |
83 | if __name__ == "__main__":
84 | main()
85 |
86 | ```
87 |
88 | ## Metadata
89 |
90 | For interacting with Gen3's metadata service.
91 |
92 | ```python
93 | import sys
94 | import logging
95 | import asyncio
96 |
97 | from gen3.auth import Gen3Auth
98 | from gen3.metadata import Gen3Metadata
99 |
100 | logging.basicConfig(filename="output.log", level=logging.DEBUG)
101 | logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
102 |
103 | def main():
104 | auth = Gen3Auth(refresh_file="credentials.json")
105 | mds = Gen3Metadata(auth_provider=auth)
106 |
107 | if mds.is_healthy():
108 | print(mds.get_version())
109 |
110 | guid = "95a41871-444c-48ae-8004-63f4ed1f0691"
111 | metadata = {
112 | "foo": "bar",
113 | "fizz": "buzz",
114 | "nested_details": {
115 | "key1": "value1"
116 | }
117 | }
118 | mds.create(guid, metadata, overwrite=True)
119 |
120 | guids = mds.query("nested_details.key1=value1")
121 |
122 | print(guids)
123 | # >>> ['95a41871-444c-48ae-8004-63f4ed1f0691']
124 |
125 | if __name__ == "__main__":
126 | main()
127 | ```
128 |
--------------------------------------------------------------------------------
/docs/wss.rst:
--------------------------------------------------------------------------------
1 | Gen3 Workspace Storage
2 | -----------------------
3 |
4 | .. autoclass:: gen3.wss.Gen3WsStorage
5 | :members:
6 | :show-inheritance:
7 |
--------------------------------------------------------------------------------
/gen3/__init__.py:
--------------------------------------------------------------------------------
1 | from cdislogging import get_logger
2 |
3 | LOG_FORMAT = "[%(asctime)s][%(levelname)7s] %(message)s"
4 | logging = get_logger("__name__", format=LOG_FORMAT, log_level="info")
5 |
--------------------------------------------------------------------------------
/gen3/cli/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uc-cdis/gen3sdk-python/0ca70c579d55cb0f6218fa7a534d02a8910acda1/gen3/cli/__init__.py
--------------------------------------------------------------------------------
/gen3/cli/__main__.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 | from importlib.metadata import version
4 |
5 | import click
6 |
7 | import cdislogging
8 | import gen3.cli.auth as auth
9 | import gen3.cli.pfb as pfb
10 | import gen3.cli.wss as wss
11 | import gen3.cli.discovery as discovery
12 | import gen3.cli.configure as configure
13 | import gen3.cli.objects as objects
14 | import gen3.cli.file as file
15 | import gen3.cli.drs_pull as drs_pull
16 | import gen3.cli.users as users
17 | import gen3.cli.wrap as wrap
18 | import gen3
19 | from gen3 import logging as sdklogging
20 | from gen3.cli import nih
21 |
22 |
23 | class AuthFactory:
24 | def __init__(self, refresh_file):
25 | self.refresh_file = refresh_file
26 | self._cache = None
27 |
28 | def get(self):
29 | """Lazy factory"""
30 | if self._cache:
31 | return self._cache
32 | self._cache = gen3.auth.Gen3Auth(refresh_file=self.refresh_file)
33 | return self._cache
34 |
35 |
36 | @click.group()
37 | @click.option(
38 | "--auth",
39 | "auth_config",
40 | default=os.getenv("GEN3_API_KEY", None),
41 | help="""authentication source, by default expects an API key in "~/.gen3/credentials.json".
42 | Has special support for token service: "idp://wts/", and raw access tokens
43 | "accesstoken:///",
44 | otherwise a path to an API key or basename of key under ~/.gen3/ can be used.
45 | Default value is "credentials" if ~/.gen3/credentials.json exists, otherwise "idp://wts/local"
46 | """,
47 | )
48 | @click.option(
49 | "--endpoint",
50 | "endpoint",
51 | default=os.getenv("GEN3_ENDPOINT", None),
52 | help="commons hostname - optional if API Key given in `auth`",
53 | )
54 | @click.option(
55 | "-v",
56 | "verbose_logs",
57 | is_flag=True,
58 | default=False,
59 | help="verbose logs show INFO, WARNING & ERROR logs",
60 | )
61 | @click.option(
62 | "-vv",
63 | "very_verbose_logs",
64 | is_flag=True,
65 | default=False,
66 | help="very verbose logs show DEGUG, INFO, WARNING & ERROR logs",
67 | )
68 | @click.option(
69 | "--only-error-logs",
70 | "only_error_logs",
71 | is_flag=True,
72 | default=False,
73 | help="only show ERROR logs",
74 | )
75 | @click.option(
76 | "--silent",
77 | "silent",
78 | is_flag=True,
79 | default=False,
80 | help="don't show ANY logs",
81 | )
82 | @click.option(
83 | "--commons_url",
84 | "commons_url",
85 | default=os.getenv("GEN3_COMMONS_URL", None),
86 | help="commons url for fetching file metadata from if different than endpoint",
87 | )
88 | @click.pass_context
89 | @click.version_option(version=version("gen3"))
90 | def main(
91 | ctx,
92 | auth_config,
93 | endpoint,
94 | verbose_logs,
95 | very_verbose_logs,
96 | only_error_logs,
97 | silent,
98 | commons_url,
99 | ):
100 | """Gen3 Command Line Interface"""
101 | ctx.ensure_object(dict)
102 | ctx.obj["auth_config"] = auth_config
103 | ctx.obj["endpoint"] = endpoint
104 | ctx.obj["commons_url"] = commons_url
105 | ctx.obj["auth_factory"] = AuthFactory(auth_config)
106 |
107 | if silent:
108 | # we still need to define the logger, the log_level here doesn't
109 | # really matter b/c we immediately disable all logging
110 | logger = cdislogging.get_logger(
111 | __name__, format=gen3.LOG_FORMAT, log_level="debug"
112 | )
113 | # disables all logging
114 | logging.disable(logging.CRITICAL)
115 | elif very_verbose_logs:
116 | logger = cdislogging.get_logger(
117 | __name__, format=gen3.LOG_FORMAT, log_level="debug"
118 | )
119 | sdklogging.setLevel("DEBUG")
120 | elif verbose_logs:
121 | logger = cdislogging.get_logger(
122 | __name__, format=gen3.LOG_FORMAT, log_level="info"
123 | )
124 | sdklogging.setLevel("INFO")
125 | elif only_error_logs:
126 | logger = cdislogging.get_logger(
127 | __name__, format=gen3.LOG_FORMAT, log_level="error"
128 | )
129 | sdklogging.setLevel("ERROR")
130 | else:
131 | logger = cdislogging.get_logger(
132 | __name__, format=gen3.LOG_FORMAT, log_level="warning"
133 | )
134 | sdklogging.setLevel("WARNING")
135 |
136 |
137 | main.add_command(auth.auth)
138 | main.add_command(pfb.pfb)
139 | main.add_command(wss.wss)
140 | main.add_command(discovery.discovery)
141 | main.add_command(configure.configure)
142 | main.add_command(objects.objects)
143 | main.add_command(drs_pull.drs_pull)
144 | main.add_command(file.file)
145 | main.add_command(nih.nih)
146 | main.add_command(users.users)
147 | main.add_command(wrap.run)
148 | main()
149 |
--------------------------------------------------------------------------------
/gen3/cli/auth.py:
--------------------------------------------------------------------------------
1 | import click
2 | import os.path
3 | import json
4 | import requests
5 | import sys
6 | import gen3.auth as auth_tool
7 |
8 | from cdislogging import get_logger
9 |
10 | logging = get_logger("__name__")
11 |
12 |
13 | def stderr(*str):
14 | logging.error(*str, sys.stderr)
15 |
16 |
17 | @click.command()
18 | @click.option("--request", "request", help="HTTP Method - GET, PUT, POST, DELETE")
19 | @click.option(
20 | "--data", "data", help="json data to post - read from file if starts with @"
21 | )
22 | @click.argument("path")
23 | @click.pass_context
24 | def curl(ctx, path, request=None, data=None):
25 | """Curl the endpoint with a token - ex: gen3 curl /user/user"""
26 | auth_provider = ctx.obj["auth_factory"].get()
27 | output = auth_provider.curl(path, request, data)
28 | logging.info(output.text)
29 | if output.status_code < 200 or output.status_code > 299:
30 | stderr("err status code %i" % output.status_code)
31 | sys.exit(1)
32 |
33 |
34 | @click.command()
35 | @click.pass_context
36 | def endpoint(ctx):
37 | """Get the endpoint associated with the active authenticator"""
38 | logging.info(ctx.obj["auth_factory"].get().endpoint)
39 |
40 |
41 | @click.command()
42 | @click.pass_context
43 | def get_access_token(ctx):
44 | """Get an access token suitable to pass as an Authorization header bearer"""
45 | logging.info(ctx.obj["auth_factory"].get().get_access_token())
46 |
47 |
48 | @click.command()
49 | @click.argument("token_file")
50 | def token_decode(token_file):
51 | """Decode the given token file - may be "-" to indicate stdin"""
52 | if token_file == "-":
53 | tokenStr = sys.stdin.read()
54 | else:
55 | with open(token_file) as f:
56 | tokenStr = f.read()
57 | token = auth_tool.decode_token(tokenStr)
58 | logging.info(json.dumps(token, indent=2))
59 |
60 |
61 | @click.command()
62 | def wts_endpoint():
63 | """Get the wts endpoint"""
64 | logging.info(auth_tool.get_wts_endpoint())
65 |
66 |
67 | @click.command()
68 | def wts_list():
69 | """list the idp's available from the wts in a Gen3 workspace environment"""
70 | logging.info(json.dumps(auth_tool.get_wts_idps(), indent=2))
71 |
72 |
73 | @click.group()
74 | def auth():
75 | """Commands for authentication and authorization"""
76 | pass
77 |
78 |
79 | auth.add_command(wts_endpoint, name="wts-endpoint")
80 | auth.add_command(wts_list, name="wts-list")
81 | auth.add_command(token_decode, name="token-decode")
82 | auth.add_command(get_access_token, name="access-token")
83 | auth.add_command(endpoint, name="endpoint")
84 | auth.add_command(curl, name="curl")
85 |
--------------------------------------------------------------------------------
/gen3/cli/configure.py:
--------------------------------------------------------------------------------
1 | from cdislogging import get_logger
2 | import click
3 | import gen3.configure as config_tool
4 |
5 | logging = get_logger("__name__")
6 |
7 |
8 | @click.command()
9 | @click.option("--profile", help="name of the profile to name for this credentials")
10 | @click.option("--cred", help="path to the credentials.json")
11 | def configure(profile, cred):
12 | """[unfinished] Commands to configure multiple profiles with corresponding credentials
13 |
14 | ./gen3 configure --profile= --cred=
15 | """
16 |
17 | logging.info(f"Configuring profile [ {profile} ] with credentials at {cred}")
18 |
19 | try:
20 | profile_title, new_lines = config_tool.get_profile_from_creds(profile, cred)
21 | lines = config_tool.get_current_config_lines()
22 | config_tool.update_config_lines(lines, profile_title, new_lines)
23 | except Exception as e:
24 | logging.warning(str(e))
25 | raise e
26 |
--------------------------------------------------------------------------------
/gen3/cli/file.py:
--------------------------------------------------------------------------------
1 | import click
2 | import time
3 |
4 | from cdislogging import get_logger
5 |
6 | from gen3.file import Gen3File
7 | from gen3.utils import get_or_create_event_loop_for_thread
8 |
9 |
10 | logger = get_logger("__name__")
11 |
12 |
13 | @click.group()
14 | def file():
15 | "Commands for asynchronously downloading files from a server"
16 | pass
17 |
18 |
19 | @click.command(help="Download a single file using its GUID")
20 | @click.argument("object_id", required=True)
21 | @click.option("--path", "path", help="Path to store downloaded file in", default=".")
22 | @click.pass_context
23 | def single_download(ctx, object_id, path):
24 | auth = ctx.obj["auth_factory"].get()
25 | file_tool = Gen3File(auth)
26 |
27 | start_time = time.perf_counter()
28 | logger.info(f"Start time: {start_time}")
29 |
30 | result = file_tool.download_single(
31 | object_id=object_id,
32 | path=path,
33 | )
34 |
35 | logger.info(f"Download - {'success' if result else 'failure'}")
36 |
37 | duration = time.perf_counter() - start_time
38 | logger.info(f"\nDuration = {duration}\n")
39 |
40 |
41 | file.add_command(single_download, name="download-single")
42 |
--------------------------------------------------------------------------------
/gen3/cli/nih.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 | import click
4 |
5 | from gen3.external.nih.dbgap_study_registration import dbgapStudyRegistration
6 | from cdislogging import get_logger
7 |
8 | logger = get_logger("__name__")
9 |
10 |
11 | @click.group()
12 | def nih():
13 | """Commands for reading from NIH APIs"""
14 | pass
15 |
16 |
17 | @nih.group()
18 | def dbgap_study_registration():
19 | """Commands for interacting with the dbgap study registration api"""
20 | pass
21 |
22 |
23 | @dbgap_study_registration.command(name="get-metadata")
24 | @click.argument(
25 | "studies",
26 | nargs=-1,
27 | )
28 | def get_child_studies(studies):
29 | """
30 | Retrieve the study metadata associated with the provided study names.
31 |
32 | This command fetches the metadata associated with the given study names from the dbGaP Study Registration API.
33 |
34 | NOTE: If no version is provided, then the latest version for a study will be used.
35 |
36 | Args:
37 | studies (str): A space-separated list of study names in which to get metadata for.
38 | Example:
39 | gen3 nih dbgap-study-registration get-metadata phs002793 phs002794 phs002795
40 | > {
41 | "phs002793.v2.p1": {
42 | "@uid": "48490",
43 | "@whole_study_id": "44467",
44 | "@phs": "002793",
45 | "@v": "2",
46 | "@p": "1",
47 | "@createDate": "2022-09-14T11:39:44-05:00",
48 | "@completedByGPADate": "2022-11-07T09:51:24-05:00",
49 | "@modDate": "2022-12-21T09:03:26-05:00",
50 | "@maxParentChildStudyModDate": "2022-12-21T09:04:04-05:00",
51 | "@handle": "SLICE",
52 | "@num_participants": "500",
53 | "StudyInfo": {
54 | "@accession": "phs002793.v2.p1",
55 | "@parentAccession": "phs002793.v2.p1",
56 | "childAccession": [
57 | "phs002795.v1.p1",
58 | "phs002796.v1.p1",
59 | "phs002797.v1.p1",
60 | "phs002798.v1.p1",
61 | "phs002794.v2.p1"
62 | ],
63 | ...truncated for brevity...
64 | }
65 | }
66 | """
67 | result = dbgapStudyRegistration().get_metadata_for_ids(studies)
68 |
69 | if not result:
70 | logger.info(f"No study found for {studies}")
71 | else:
72 | click.echo(f"{json.dumps(result, indent=4)}")
73 |
74 |
75 | @dbgap_study_registration.command(name="get-child-studies")
76 | @click.argument(
77 | "studies",
78 | nargs=-1,
79 | )
80 | def get_child_studies(studies):
81 | """
82 | Retrieve the child studies associated with the provided study names.
83 |
84 | This command fetches the child studies associated with the specified parent study names
85 | from the dbGaP Study Registration API.
86 |
87 | NOTE: If no version is provided, then the latest version for a study will be used.
88 |
89 | Args:
90 | studies (str): A space-separated list of parent study names for which to fetch child studies.
91 | Example:
92 | gen3 nih dbgap-study-registration get-child-studies phs002793
93 | > {
94 | "phs002793.v2.p1": ["phs002795.v1.p1", "phs002796.v1.p1", "phs002797.v1.p1", "phs002798.v1.p1"]
95 | }
96 | gen3 nih dbgap-study-registration get-child-studies phs002076 phs002793
97 | > {
98 | "phs002076.v2.p1": ["phs002077.v2.p1"],
99 | "phs002793.v2.p1": ["phs002795.v1.p1", "phs002796.v1.p1", "phs002797.v1.p1", "phs002798.v1.p1"]
100 | }
101 | """
102 | result = dbgapStudyRegistration().get_child_studies_for_ids(studies)
103 |
104 | if not result:
105 | logger.info(f"No child studies found for {studies}")
106 | else:
107 | click.echo(f"{json.dumps(result, indent=4)}")
108 |
109 |
110 | @dbgap_study_registration.command(name="get-parent-studies")
111 | @click.argument(
112 | "studies",
113 | nargs=-1,
114 | )
115 | def get_parent_studies(studies):
116 | """
117 | Retrieve the parent study associated with each of the provided study names.
118 |
119 | This command fetches the parent study associated with each of the specified child study names
120 | from the dbGaP Study Registration API.
121 |
122 | NOTE: If no version is provided, then the latest version for a study will be used.
123 |
124 | Args:
125 | studies (str): A space-separated list of child study names for which to fetch their parent study.
126 | Example:
127 | gen3 nih dbgap-study-registration get-parent-studies phs002795
128 | > {
129 | "phs002795.v1.p1": "phs002793.v2.p1"
130 | }
131 | gen3 nih dbgap-study-registration get-parent-studies phs002795 phs002796 phs002793
132 | > {
133 | "phs002795.v1.p1": "phs002793.v2.p1",
134 | "phs002796.v1.p1": "phs002793.v2.p1"
135 | "phs002793.v2.p1": None
136 | }
137 | """
138 | result = dbgapStudyRegistration().get_parent_studies_for_ids(studies)
139 |
140 | if not result:
141 | logger.info(f"No parent studies found for any {studies}")
142 | else:
143 | click.echo(f"{json.dumps(result, indent=4)}")
144 |
--------------------------------------------------------------------------------
/gen3/cli/pfb.py:
--------------------------------------------------------------------------------
1 | import click
2 | from pfb import cli as pfb_cli
3 |
4 | try:
5 | from importlib.metadata import entry_points
6 | except ImportError:
7 | from importlib_metadata import entry_points
8 |
9 |
10 | @click.group()
11 | def main():
12 | """Gen3 Command Line Interface"""
13 | pass
14 |
15 |
16 | @click.group()
17 | def pfb():
18 | """Commands for working with Portable Format for Biomedical Data (PFB)"""
19 | pass
20 |
21 |
22 | for command in pfb_cli.main.commands:
23 | pfb.add_command(pfb_cli.main.get_command(ctx=None, cmd_name=command))
24 |
25 | # load plug-ins from entry_points
26 | for ep in entry_points().get("gen3.plugins", []):
27 | ep.load()
28 |
--------------------------------------------------------------------------------
/gen3/cli/users.py:
--------------------------------------------------------------------------------
1 | import click
2 |
3 | from gen3users import main as users_cli
4 |
5 |
6 | try:
7 | from importlib.metadata import entry_points
8 | except ImportError:
9 | from importlib_metadata import entry_points
10 |
11 |
12 | @click.group()
13 | def main():
14 | """Gen3 Command Line Interface"""
15 | pass
16 |
17 |
18 | @click.group()
19 | def users():
20 | """Commands for working with gen3users"""
21 | pass
22 |
23 |
24 | for command in users_cli.main.commands:
25 | users.add_command(users_cli.main.get_command(ctx=None, cmd_name=command))
26 |
27 | # load plug-ins from entry_points
28 | for ep in entry_points().get("gen3.plugins", []):
29 | ep.load()
30 |
--------------------------------------------------------------------------------
/gen3/cli/wrap.py:
--------------------------------------------------------------------------------
1 | import click
2 |
3 |
4 | from gen3.tools.wrap import Gen3Wrap
5 |
6 |
7 | @click.command(
8 | context_settings={"ignore_unknown_options": True, "allow_extra_args": True},
9 | help="A wrapper command that forwards COMMAND_ARGS as-is after setting the environment variable GEN3_TOKEN",
10 | )
11 | @click.argument("command_args", nargs=-1, type=click.UNPROCESSED)
12 | @click.pass_context
13 | def run(ctx, command_args):
14 | auth = ctx.obj["auth_factory"].get()
15 | gen3Wrap_object = Gen3Wrap(auth, command_args)
16 | gen3Wrap_object.run_command()
17 |
--------------------------------------------------------------------------------
/gen3/cli/wss.py:
--------------------------------------------------------------------------------
1 | import click
2 | import json
3 | import sys
4 | from gen3.wss import Gen3WsStorage, wsurl_to_tokens
5 |
6 |
7 | def clean_path(path):
8 | """
9 | Add ws:///@user/ prefix if necessary
10 | """
11 | if path[0:3] != "ws:":
12 | while path and path[0] == "/":
13 | path = path[1:]
14 | path = "ws:///@user/" + path
15 | return path
16 |
17 |
18 | @click.command()
19 | @click.argument("path", default="")
20 | @click.pass_context
21 | def ls(ctx, path=""):
22 | """List the given workspace key"""
23 | clean = clean_path(path)
24 | auth_provider = ctx.obj["auth_factory"].get()
25 | wss = Gen3WsStorage(auth_provider)
26 | print(json.dumps(wss.ls_path(clean)))
27 |
28 |
29 | @click.command()
30 | @click.argument("path")
31 | @click.pass_context
32 | def rm(ctx, path):
33 | """Remove the given workspace key"""
34 | clean = clean_path(path)
35 | auth_provider = ctx.obj["auth_factory"].get()
36 | wss = Gen3WsStorage(auth_provider)
37 | print(json.dumps(wss.rm_path(clean)))
38 |
39 |
40 | @click.command()
41 | @click.argument("path")
42 | @click.pass_context
43 | def download_url(ctx, path):
44 | """Download url for the given workspace key"""
45 | tokens = wsurl_to_tokens(clean_path(path))
46 | auth_provider = ctx.obj["auth_factory"].get()
47 | wss = Gen3WsStorage(auth_provider)
48 | print(json.dumps(wss.download_url(tokens[0], tokens[1])))
49 |
50 |
51 | @click.command()
52 | @click.argument("path")
53 | @click.pass_context
54 | def upload_url(ctx, path):
55 | """Upload url for the given workspace key"""
56 | tokens = wsurl_to_tokens(clean_path(path))
57 | auth_provider = ctx.obj["auth_factory"].get()
58 | wss = Gen3WsStorage(auth_provider)
59 | print(json.dumps(wss.upload_url(tokens[0], tokens[1])))
60 |
61 |
62 | @click.command()
63 | @click.argument("src")
64 | @click.argument("dest")
65 | @click.pass_context
66 | def copy(ctx, src, dest):
67 | """Upload url for the given workspace key"""
68 | auth_provider = ctx.obj["auth_factory"].get()
69 | wss = Gen3WsStorage(auth_provider)
70 | wss.copy(src, dest)
71 |
72 |
73 | @click.group()
74 | def wss():
75 | """[unfinished] Commands for Workspace Storage Service"""
76 | pass
77 |
78 |
79 | wss.add_command(ls, name="ls")
80 | wss.add_command(copy, name="cp")
81 | wss.add_command(download_url, name="download-url")
82 | wss.add_command(upload_url, name="upload-url")
83 | wss.add_command(rm, name="rm")
84 |
--------------------------------------------------------------------------------
/gen3/configure.py:
--------------------------------------------------------------------------------
1 | """
2 | The format of config file is described as following
3 |
4 | [profile1]
5 | key_id=key_id_example_1
6 | api_key=api_key_example_1
7 | access_key=access_key_example_1
8 | api_endpoint=http://localhost:8000
9 | use_shepherd=true
10 | min_shepherd_version=2.0.0
11 |
12 | [profile2]
13 | key_id=key_id_example_2
14 | api_key=api_key_example_2
15 | access_key=access_key_example_2
16 | api_endpoint=http://example.com
17 | use_shepherd=false
18 | min_shepherd_version=
19 |
20 | """
21 | import json
22 | from os.path import expanduser
23 | from pathlib import Path
24 | from collections import OrderedDict
25 | import gen3.auth as auth_tool
26 |
27 | from cdislogging import get_logger
28 |
29 | logging = get_logger("__name__")
30 |
31 | CONFIG_FILE_PATH = expanduser("~/.gen3/config")
32 |
33 |
34 | def get_profile_from_creds(profile, cred):
35 | with open(expanduser(cred)) as f:
36 | creds_from_json = json.load(f)
37 | credentials = OrderedDict()
38 | credentials["key_id"] = creds_from_json["key_id"]
39 | credentials["api_key"] = creds_from_json["api_key"]
40 | credentials["api_endpoint"] = auth_tool.endpoint_from_token(
41 | credentials["api_key"]
42 | )
43 | credentials["access_key"] = auth_tool.get_access_token_with_key(credentials)
44 | credentials["use_shepherd"] = ""
45 | credentials["min_shepherd_version"] = ""
46 | profile_line = "[" + profile + "]\n"
47 | new_lines = [key + "=" + value + "\n" for key, value in credentials.items()]
48 | new_lines.append("\n") # Adds an empty line between two profiles.
49 | return profile_line, new_lines
50 |
51 |
52 | def get_current_config_lines():
53 | """Read lines from the config file if exists in ~/.gen3 folder, else create new config file"""
54 | try:
55 | with open(CONFIG_FILE_PATH) as configFile:
56 | logging.info(f"Reading existing config file at {CONFIG_FILE_PATH}")
57 | return configFile.readlines()
58 | except FileNotFoundError:
59 | Path(CONFIG_FILE_PATH).touch()
60 | logging.info(f"Config file doesn't exist at {CONFIG_FILE_PATH}, creating one")
61 | return []
62 |
63 |
64 | def update_config_lines(lines, profile_title, new_lines):
65 | """Update config file contents with the new profile values"""
66 |
67 | if profile_title in lines:
68 | profile_line_index = lines.index(profile_title)
69 | next_profile_index = len(lines)
70 | for i in range(profile_line_index, len(lines)):
71 | if lines[i][0] == "[":
72 | next_profile_index = i
73 | break
74 | del lines[profile_line_index:next_profile_index]
75 |
76 | with open(CONFIG_FILE_PATH, "a+") as configFile:
77 | configFile.write(profile_title)
78 | configFile.writelines(new_lines)
79 |
--------------------------------------------------------------------------------
/gen3/external/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | gen3.external
3 |
4 | For housing clients and wrappers against non-Gen3 APIs. At the moment, they
5 | are usually used for gathering metadata.
6 |
7 | If you're adding a new metadata source, use the ExternalMetadataSourceInterface.
8 | """
9 | from gen3.external.external import ExternalMetadataSourceInterface
10 |
--------------------------------------------------------------------------------
/gen3/external/external.py:
--------------------------------------------------------------------------------
1 | class ExternalMetadataSourceInterface(object):
2 | """
3 | A simple interface for external metadata sources. The idea is to have
4 | consistency that allows combining the outputs and making similar function
5 | calls with similar input.
6 |
7 | test0 = dbgapFHIR()
8 | metadata_0 = test0.get_metadata_for_ids(["foo", "bar"])
9 |
10 | test1 = NIHReporter()
11 | metadata_1 = test1.get_metadata_for_ids(["foo", "bar"])
12 |
13 | test2 = dbgapDOI()
14 | metadata_2 = test2.get_metadata_for_ids(["foo", "bar"])
15 |
16 | test3 = somethingElse()
17 | metadata_3 = test3.get_metadata_for_ids(["foo", "bar"])
18 |
19 | all_metadata = metadata_0 + metadata_1 + metadata_2 + metadata_3
20 | """
21 |
22 | def __init__(
23 | self,
24 | api="",
25 | auth_provider=None,
26 | ):
27 | self.api = api
28 | self._auth_provider = auth_provider
29 |
30 | def get_metadata_for_ids(ids):
31 | """
32 | Returns a dictionary with the id as the key and associated metadata
33 | as another dictionary of values.
34 |
35 | Example, given: ["foo", "bar"], return:
36 | {
37 | "foo": {"name": "Foo", "description": "this is something"},
38 | "bar": {"name": "Bar", "description": "this is also something"},
39 | }
40 |
41 | Args:
42 | ids (List[str]): list of IDs to query for
43 |
44 | Returns:
45 | Dict[dict]: metadata for each of the provided IDs (which
46 | are the keys in the returned dict)
47 | """
48 | raise NotImplementedError()
49 |
--------------------------------------------------------------------------------
/gen3/external/nih/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uc-cdis/gen3sdk-python/0ca70c579d55cb0f6218fa7a534d02a8910acda1/gen3/external/nih/__init__.py
--------------------------------------------------------------------------------
/gen3/external/nih/utils.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | from cdislogging import get_logger
4 |
5 | logging = get_logger("__name__")
6 |
7 | # For more details about this regex, see the function that uses it
8 | DBGAP_ACCESSION_REGEX = (
9 | "(?Pphs(?P[0-9]+))"
10 | "(.(?Pp(?P[0-9]+))){0,1}"
11 | "(.(?Pv(?P[0-9]+))){0,1}"
12 | "(.(?Pc(?P[0-9]+)+)){0,1}"
13 | )
14 |
15 |
16 | def get_dbgap_accession_as_parts(phsid):
17 | """
18 | Return a dictionary containing the various parts of the provided
19 | dbGaP Accession (AKA phsid).
20 |
21 | Uses a regex to match an assession number that has information in forms like:
22 | phs000123.c1
23 | phs000123.v3.p1.c3
24 | phs000123.c3
25 | phs000123.v3.p4.c1
26 | phs000123
27 |
28 | This separates out each part of the accession with named groups and includes
29 | parts that include only the numbered value (which is needed in some NIH APIs)
30 |
31 | A "picture" is worth a 1000 words:
32 |
33 | Example for `phs000123.c1`:
34 | Named groups
35 | phsid phs000123
36 | phsid_number 000123
37 | version None
38 | version_number None
39 | participant_set None
40 | participant_set_number None
41 | consent c1
42 | consent_number 1
43 |
44 | Args:
45 | phsid (str): The dbGaP Accession (AKA phsid)
46 |
47 | Returns:
48 | dict[str]: A standardized dictionary (you can always expect these keys)
49 | with the values parsed from the provided dbGaP Accession
50 | Example if provided `phs000123.c1`: {
51 | "phsid": "phs000123",
52 | "phsid_number": "000123",
53 | "version": "",
54 | "version_number": "",
55 | "participant_set": "",
56 | "participant_set_number": "",
57 | "consent": "c1",
58 | "consent_number": "1",
59 | }
60 |
61 | NOTE: the "*_number" fields are still represented as strings.
62 | NOTE2: the regex groups that return None will be represented
63 | as empty strings (for easier upstream str concat-ing)
64 | """
65 | access_number_matcher = re.compile(DBGAP_ACCESSION_REGEX)
66 | raw_phs_match = access_number_matcher.match(phsid)
67 | phs_match = {}
68 |
69 | if raw_phs_match:
70 | phs_match = raw_phs_match.groupdict()
71 |
72 | standardized_phs_match = {}
73 | for key, value in phs_match.items():
74 | if value is None:
75 | standardized_phs_match[key] = ""
76 | continue
77 |
78 | standardized_phs_match[key] = value
79 |
80 | return standardized_phs_match
81 |
--------------------------------------------------------------------------------
/gen3/object.py:
--------------------------------------------------------------------------------
1 | import requests
2 | from gen3.utils import raise_for_status_and_print_error
3 |
4 |
5 | class Gen3ObjectError(Exception):
6 | pass
7 |
8 |
9 | class Gen3Object:
10 | """For interacting with Gen3 object level features.
11 |
12 | A class for interacting with the Gen3 object services.
13 | Currently allows creating and deleting of an object from the Gen3 System.
14 |
15 | Args:
16 | auth_provider (Gen3Auth): A Gen3Auth class instance.
17 |
18 | Examples:
19 | This generates the Gen3Object class pointed at the sandbox commons while
20 | using the credentials.json downloaded from the commons profile page.
21 |
22 | >>> auth = Gen3Auth(refresh_file="credentials.json")
23 | ... object = Gen3Object(auth)
24 |
25 | """
26 |
27 | def __init__(self, auth_provider=None):
28 | self._auth_provider = auth_provider
29 | self.service_endpoint = "/mds"
30 |
31 | def create_object(self, file_name, authz, metadata=None, aliases=None):
32 | url = (
33 | self._auth_provider.endpoint.rstrip("/")
34 | + self.service_endpoint
35 | + "/objects"
36 | )
37 | body = {
38 | "file_name": file_name,
39 | "authz": authz,
40 | "metadata": metadata,
41 | "aliases": aliases,
42 | }
43 | response = requests.post(url, json=body, auth=self._auth_provider)
44 | raise_for_status_and_print_error(response)
45 | data = response.json()
46 | return data["guid"], data["upload_url"]
47 |
48 | def delete_object(self, guid, delete_file_locations=False):
49 | """
50 | Delete the object from indexd, metadata service and optionally all storage locations
51 |
52 | Args:
53 | `guid` -- GUID of the object to delete
54 | `delete_file_locations` -- if True, removes the object from existing bucket location(s) through fence
55 | Returns:
56 | Nothing
57 | """
58 | delete_param = "?delete_file_locations" if delete_file_locations else ""
59 | url = (
60 | self._auth_provider.endpoint.rstrip("/")
61 | + self.service_endpoint
62 | + "/objects/"
63 | + guid
64 | + delete_param
65 | )
66 | response = requests.delete(url, auth=self._auth_provider)
67 | raise_for_status_and_print_error(response)
68 |
--------------------------------------------------------------------------------
/gen3/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uc-cdis/gen3sdk-python/0ca70c579d55cb0f6218fa7a534d02a8910acda1/gen3/tools/__init__.py
--------------------------------------------------------------------------------
/gen3/tools/download/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uc-cdis/gen3sdk-python/0ca70c579d55cb0f6218fa7a534d02a8910acda1/gen3/tools/download/__init__.py
--------------------------------------------------------------------------------
/gen3/tools/indexing/__init__.py:
--------------------------------------------------------------------------------
1 | from gen3.tools.indexing.download_manifest import async_download_object_manifest
2 | from gen3.tools.indexing.verify_manifest import async_verify_object_manifest
3 | from gen3.tools.indexing.validate_manifest_format import is_valid_manifest_format
4 | from gen3.tools.indexing.index_manifest import index_object_manifest
5 |
--------------------------------------------------------------------------------
/gen3/tools/metadata/__init__.py:
--------------------------------------------------------------------------------
1 | from gen3.tools.metadata.ingest_manifest import async_ingest_metadata_manifest
2 | from gen3.tools.metadata.ingest_manifest import async_query_urls_from_indexd
3 | from gen3.tools.metadata.verify_manifest import async_verify_metadata_manifest
4 |
--------------------------------------------------------------------------------
/gen3/tools/wrap.py:
--------------------------------------------------------------------------------
1 | import os
2 | import subprocess
3 |
4 | from cdislogging import get_logger
5 | from gen3.auth import Gen3Auth, Gen3AuthError
6 |
7 | logger = get_logger("__name__")
8 |
9 |
10 | class Gen3Wrap:
11 | def __init__(self, auth: Gen3Auth, command_args: tuple):
12 | """
13 | auth : Gen3Auth instance
14 | command_args: A tuple consisting of all the commands sent to the `gen3 run` tool
15 | """
16 | self.auth = auth
17 | self.command_args = command_args
18 |
19 | def run_command(self):
20 | """
21 | Take the command args and run a subprocess with appropriate access token in the env var
22 | """
23 | cmd = list(self.command_args)
24 | try:
25 | os.environ["GEN3_TOKEN"] = self.auth.get_access_token()
26 | except Gen3AuthError as e:
27 | logger.error(f"ERROR getting Gen3 Access Token:", e)
28 | raise
29 | logger.info(
30 | f"Running the command {self.command_args} with gen3 access token in environment variable"
31 | )
32 | try:
33 | subprocess.run(cmd, stderr=subprocess.STDOUT)
34 | except Exception as e:
35 | logger.error(f"ERROR while running '{cmd}':", e)
36 | raise
37 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "gen3"
3 | homepage = "https://gen3.org/"
4 | version = "4.27.2"
5 | description = "Gen3 CLI and Python SDK"
6 | authors = ["Center for Translational Data Science at the University of Chicago "]
7 | license = "Apache-2.0"
8 | packages = [
9 | { include = "gen3" },
10 | ]
11 | classifiers = [
12 | "Development Status :: 4 - Beta",
13 | "Intended Audience :: Science/Research",
14 | "Intended Audience :: Developers",
15 | "License :: OSI Approved :: Apache Software License",
16 | "Programming Language :: Python :: 3.9",
17 | "Topic :: Scientific/Engineering",
18 | ]
19 |
20 | [tool.poetry.dependencies]
21 | python = ">=3.9, <4"
22 | requests = "*"
23 | indexclient = "^2.3.0"
24 | drsclient = ">=0.3.0"
25 | aiohttp = "*"
26 | backoff = "*"
27 | cdislogging = "^1.1.0"
28 | click = "*"
29 | jsonschema = "*"
30 | # FIXME updating to >=0.6.0 breaks a few tests
31 | dataclasses-json = "<=0.5.9"
32 | pypfb = ">=0.5.33"
33 | tqdm = "^4.61.2"
34 | humanfriendly ="*"
35 | python-dateutil = "*"
36 | aiofiles = "^0.8.0"
37 | pandas = ">=1.4.2"
38 | urllib3 = ">2.0.0"
39 | httpx = "*"
40 | xmltodict = "^0.13.0"
41 | pyyaml = ">=6.0.1"
42 | gen3users = "*"
43 |
44 | # A list of all of the optional dependencies, some of which are included in the
45 | # below `extras`. They can be opted into by apps.
46 | fhirclient = { version = "*", optional = true }
47 |
48 | [tool.poetry.extras]
49 | fhir = ["fhirclient"]
50 |
51 | [tool.poetry.dev-dependencies]
52 | pytest = "^6.0.0"
53 | pytest-cov = "*"
54 | requests-mock = "*"
55 | cdisutilstest = { git = "https://github.com/uc-cdis/cdisutils-test.git", tag = "1.0.0" }
56 | indexd = { git = "https://github.com/uc-cdis/indexd.git", tag = "5.0.4" }
57 |
58 | [tool.poetry.scripts]
59 | gen3 = "gen3.cli.__main__:main"
60 |
61 | [tool.poetry.plugins."gen3.plugins"]
62 |
63 | [build-system]
64 | requires = ["poetry>=1.4.1"]
65 | build-backend = "poetry.masonry.api"
66 |
67 | [tool.pytest.ini_options]
68 | minversion = "6.0"
69 | addopts = "-vv"
70 | testpaths = [
71 | "tests",
72 | ]
73 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uc-cdis/gen3sdk-python/0ca70c579d55cb0f6218fa7a534d02a8910acda1/tests/__init__.py
--------------------------------------------------------------------------------
/tests/bundle_tests/invalid_manifest.csv:
--------------------------------------------------------------------------------
1 | bundle_name,ids,GUID,size,type,checksum,description
2 | A,[dg.TEST/f2a39f98-6ae1-48a5-8d48-825a0c52a22b dg.TEST/1e9d3103-cbe2-4c39-917c-b3abad4750d2],,,,,something something
3 | B,['dg.TEST/1e9d3103-cbe2-4c39-917c-b3abad4750d2' 'dg.TEST/f2a39f98-6ae1-48a5-8d48-825a0c52a22b'],,789,,,something
4 | C,[A 'B' dg.TEST/ed8f4658-6acd-4f96-9dd8-3709890c959e],,120,,,lalala
5 | D,[A B C D],,,,,
6 | E,[A B],dg.xxxx/590ee63d-2790-477a-bbf8-d53873ca4933,,,,
--------------------------------------------------------------------------------
/tests/bundle_tests/test_bundle_ingestion.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | from unittest.mock import MagicMock, patch
4 | from drsclient.client import DrsClient
5 |
6 | from gen3.tools.bundle.ingest_manifest import (
7 | _replace_bundle_name_with_guid,
8 | ingest_bundle_manifest,
9 | )
10 |
11 | from cdislogging import get_logger
12 |
13 | logging = get_logger("__name__")
14 |
15 |
16 | def test_replace_bundle_name_with_guid():
17 | list_with_guid_name = [
18 | "bundleA",
19 | "bundleB",
20 | "05b30df8-20f4-4f61-b860-902ddb9ddf0b",
21 | "dg.xxxx/05b30df8-20f4-4f61-b860-902ddb9ddf0b",
22 | ]
23 | expected_list = [
24 | "51eea3c7-cbbb-4d32-9045-c626f302e1ef",
25 | "ba989365-43a3-4fa8-9360-5cdb7ef0179c",
26 | "05b30df8-20f4-4f61-b860-902ddb9ddf0b",
27 | "dg.xxxx/05b30df8-20f4-4f61-b860-902ddb9ddf0b",
28 | ]
29 | MAIN_DICT = {
30 | "bundleA": "51eea3c7-cbbb-4d32-9045-c626f302e1ef",
31 | "bundleB": "ba989365-43a3-4fa8-9360-5cdb7ef0179c",
32 | }
33 | list_with_guid = _replace_bundle_name_with_guid(list_with_guid_name, MAIN_DICT)
34 | assert list_with_guid == expected_list
35 |
36 |
37 | def test_valid_ingest_bundle_manifest(gen3_index, indexd_server, drs_client):
38 | """
39 | Test valid manifest
40 | """
41 | # Create some indexd records to bundle
42 | rec1 = gen3_index.create_record(
43 | did="dg.TEST/f2a39f98-6ae1-48a5-8d48-825a0c52a22b",
44 | hashes={"md5": "a1234567891234567890123456789012"},
45 | size=123,
46 | acl=["DEV", "test"],
47 | authz=["/programs/DEV/projects/test"],
48 | urls=["s3://testaws/aws/test.txt", "gs://test/test.txt"],
49 | )
50 | rec2 = gen3_index.create_record(
51 | did="dg.TEST/1e9d3103-cbe2-4c39-917c-b3abad4750d2",
52 | hashes={"md5": "b1234567891234567890123456789012"},
53 | size=234,
54 | acl=["DEV", "test2"],
55 | authz=["/programs/DEV/projects/test2", "/programs/DEV/projects/test2bak"],
56 | urls=["gs://test/test.txt"],
57 | file_name="test.txt",
58 | )
59 | rec3 = gen3_index.create_record(
60 | did="dg.TEST/ed8f4658-6acd-4f96-9dd8-3709890c959e",
61 | hashes={"md5": "e1234567891234567890123456789012"},
62 | size=345,
63 | acl=["DEV", "test3"],
64 | authz=["/programs/DEV/projects/test3", "/programs/DEV/projects/test3bak"],
65 | urls=["gs://test/test3.txt"],
66 | )
67 |
68 | records = ingest_bundle_manifest(
69 | indexd_server.baseurl,
70 | "./tests/bundle_tests/valid_manifest.csv",
71 | manifest_file_delimiter=",",
72 | auth=("user", "user"),
73 | )
74 |
75 | # 6 bundles in the manfiest
76 | assert len(records) == 6
77 |
78 | resp = drs_client.get("dg.xxxx/590ee63d-2790-477a-bbf8-d53873ca4933")
79 | assert resp.status_code == 200
80 |
81 | resp1 = drs_client.get_all(endpoint="/bundle")
82 | assert resp1.status_code == 200
83 | res1 = resp1.json()
84 | assert len(res1["records"]) == 6
85 |
86 | for record in res1["records"]:
87 | assert record["name"] in ["A", "B", "C", "D", "E", "F"]
88 |
89 | resp2 = drs_client.get("dg.xxxx/e366dbca-3c7f-4be6-86e4-c1f8f3e4189d")
90 | rec2 = resp2.json()
91 | for checksum in rec2["checksums"]:
92 | assert checksum in [
93 | {"type": "md5", "checksum": "14d2e36323ad8d37423bb76347128234"},
94 | {
95 | "type": "sha256",
96 | "checksum": "3319c07dea1628afaefe76de8ac867cfece7e2bfebacb6432f69a44111536e0f",
97 | },
98 | ]
99 |
100 | resp3 = drs_client.get("dg.xxxx/590ee63d-2790-477a-bbf8-d53873ca4933")
101 | rec3 = resp3.json()
102 | for checksum in rec3["checksums"]:
103 | assert checksum in [
104 | {"type": "md5", "checksum": "14d2e36323ad8d37423bb76347128234"},
105 | {
106 | "type": "sha256",
107 | "checksum": "3319c07dea1628afaefe76de8ac867cfece7e2bfebacb6432f69a44111536e0f",
108 | },
109 | ]
110 |
111 |
112 | def test_invalid_ingest_bundle_manifest(gen3_index, indexd_server, drs_client):
113 | """
114 | Test invalid manifest
115 | """
116 | # Create some indexd records to bundle
117 | rec1 = gen3_index.create_record(
118 | did="dg.TEST/f2a39f98-6ae1-48a5-8d48-825a0c52a22b",
119 | hashes={"md5": "a1234567891234567890123456789012"},
120 | size=123,
121 | acl=["DEV", "test"],
122 | authz=["/programs/DEV/projects/test"],
123 | urls=["s3://testaws/aws/test.txt", "gs://test/test.txt"],
124 | )
125 | rec2 = gen3_index.create_record(
126 | did="dg.TEST/1e9d3103-cbe2-4c39-917c-b3abad4750d2",
127 | hashes={"md5": "b1234567891234567890123456789012"},
128 | size=234,
129 | acl=["DEV", "test2"],
130 | authz=["/programs/DEV/projects/test2", "/programs/DEV/projects/test2bak"],
131 | urls=["gs://test/test.txt"],
132 | file_name="test.txt",
133 | )
134 | rec3 = gen3_index.create_record(
135 | did="dg.TEST/ed8f4658-6acd-4f96-9dd8-3709890c959e",
136 | hashes={"md5": "e1234567891234567890123456789012"},
137 | size=345,
138 | acl=["DEV", "test3"],
139 | authz=["/programs/DEV/projects/test3", "/programs/DEV/projects/test3bak"],
140 | urls=["gs://test/test3.txt"],
141 | )
142 |
143 | records = ingest_bundle_manifest(
144 | indexd_server.baseurl,
145 | "./tests/bundle_tests/invalid_manifest.csv",
146 | manifest_file_delimiter=",",
147 | auth=("user", "user"),
148 | )
149 |
150 | assert records == None
151 |
152 | resp = drs_client.get("dg.xxxx/590ee63d-2790-477a-bbf8-d53873ca4933")
153 | assert resp.status_code == 404
154 |
155 | resp1 = drs_client.get_all(endpoint="/bundle")
156 | rec1 = resp1.json()
157 | assert len(rec1["records"]) == 0
158 |
--------------------------------------------------------------------------------
/tests/bundle_tests/valid_manifest.csv:
--------------------------------------------------------------------------------
1 | bundle_name,ids,GUID,size,type,checksum,description
2 | A,[dg.TEST/f2a39f98-6ae1-48a5-8d48-825a0c52a22b dg.TEST/1e9d3103-cbe2-4c39-917c-b3abad4750d2],,,,,something something
3 | B,['dg.TEST/1e9d3103-cbe2-4c39-917c-b3abad4750d2' 'dg.TEST/f2a39f98-6ae1-48a5-8d48-825a0c52a22b'],,789,,,something
4 | C,[A 'B' dg.TEST/ed8f4658-6acd-4f96-9dd8-3709890c959e],,121,,,lalala
5 | D,[A B C],,,md5,c3f5f87171034245b12ba9aaeb5ec6a7,
6 | E,[A B],dg.xxxx/590ee63d-2790-477a-bbf8-d53873ca4933,,[md5 sha256],["14d2e36323ad8d37423bb76347128234" '3319c07dea1628afaefe76de8ac867cfece7e2bfebacb6432f69a44111536e0f'],
7 | F,[E],dg.xxxx/e366dbca-3c7f-4be6-86e4-c1f8f3e4189d,,md5 sha256,14d2e36323ad8d37423bb76347128234 3319c07dea1628afaefe76de8ac867cfece7e2bfebacb6432f69a44111536e0f,
8 |
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | """
2 | Conf Test for Gen3 test suite
3 | """
4 | from multiprocessing import Process
5 | import multiprocessing
6 | from unittest.mock import patch
7 | import pytest
8 | import requests
9 |
10 | from drsclient.client import DrsClient
11 | from cdisutilstest.code.indexd_fixture import (
12 | setup_database,
13 | clear_database,
14 | create_user,
15 | )
16 | from gen3.cli.auth import endpoint
17 | from indexd import get_app
18 | from indexd.default_settings import settings
19 |
20 | from gen3.file import Gen3File
21 | from gen3.index import Gen3Index
22 | from gen3.submission import Gen3Submission
23 | from gen3.query import Gen3Query
24 | from gen3.auth import Gen3Auth
25 | from gen3.object import Gen3Object
26 |
27 |
28 | class MockAuth:
29 | """
30 | Mock Auth for Gen3Auth
31 | """
32 |
33 | def __init__(self):
34 | self.endpoint = "https://example.commons.com"
35 | self.refresh_token = {"api_key": "123"}
36 | self._token_info = {"sub": "42"}
37 |
38 | def _get_auth_value(self):
39 | return "foobar"
40 |
41 | @property
42 | def __class__(self):
43 | """
44 | So that `isinstance(, Gen3Auth)` returns True
45 | """
46 | return Gen3Auth
47 |
48 | def __call__(self, request):
49 | return request
50 |
51 |
52 | @pytest.fixture
53 | def sub():
54 | """
55 | Mock Gen3Submission with MockAuth
56 | """
57 | return Gen3Submission(MockAuth())
58 |
59 |
60 | @pytest.fixture
61 | def gen3_auth():
62 | """
63 | Get MockAuth
64 | """
65 | return MockAuth()
66 |
67 |
68 | @pytest.fixture
69 | def mock_gen3_auth():
70 | """
71 | Mock gen3 auth with endpoint and refresh token
72 | """
73 | mock_auth = MockAuth()
74 | # patch as __init__ has method call
75 | with patch("gen3.auth.endpoint_from_token") as mock_endpoint_from_token:
76 | mock_endpoint_from_token().return_value = mock_auth.endpoint
77 | return Gen3Auth(
78 | endpoint=mock_auth.endpoint, refresh_token=mock_auth.refresh_token
79 | )
80 |
81 |
82 | @pytest.fixture
83 | def gen3_file_no_auth():
84 | """
85 | Mock Gen3File without auth
86 | """
87 | return Gen3File(endpoint=gen3_auth.endpoint, auth_provider=None)
88 |
89 |
90 | @pytest.fixture
91 | def gen3_file(mock_gen3_auth):
92 | """
93 | Mock Gen3File with auth
94 | """
95 | return Gen3File(endpoint=mock_gen3_auth.endpoint, auth_provider=mock_gen3_auth)
96 |
97 |
98 | @pytest.fixture
99 | def gen3_object(gen3_auth):
100 | """
101 | Mock Gen3Object with auth
102 | """
103 | return Gen3Object(auth_provider=gen3_auth)
104 |
105 |
106 | @pytest.fixture(scope="function", params=("s3", "http", "ftp", "https", "gs", "az"))
107 | def supported_protocol(request):
108 | """
109 | return "s3", "http", "ftp", "https", "gs", "az"
110 |
111 | Note that "az" is an internal mapping for a supported protocol
112 | """
113 | return request.param
114 |
115 |
116 | @pytest.fixture(scope="session")
117 | def indexd_server():
118 | """
119 | Fixture copied from cdisutils-test and updated to mock Arborist
120 | """
121 |
122 | class MockServer(object):
123 | def __init__(self, port):
124 | self.port = port
125 | self.baseurl = "http://localhost:{}".format(port)
126 |
127 | def run_indexd(port):
128 | app = get_app()
129 | app.run(host="localhost", port=port, debug=False)
130 |
131 | def wait_for_indexd_alive(port):
132 | url = "http://localhost:{}".format(port)
133 | try:
134 | requests.get(url)
135 | except requests.ConnectionError:
136 | return wait_for_indexd_alive(port)
137 | else:
138 | return
139 |
140 | def wait_for_indexd_not_alive(port):
141 | url = "http://localhost:{}".format(port)
142 | try:
143 | requests.get(url)
144 | except requests.ConnectionError:
145 | return
146 | else:
147 | return wait_for_indexd_not_alive(port)
148 |
149 | class MockArboristClient(object):
150 | def auth_request(*args, **kwargs):
151 | return True
152 |
153 | port = 8001
154 | settings["auth"].arborist = MockArboristClient()
155 | indexd = Process(target=run_indexd, args=[port])
156 | # Add this line because OS X multiprocessing default is spawn which will cause pickling errors
157 | # NOTE: fork is unstable and not technically supported on OS X, forking is only supported on Unix
158 | # However explicitly setting default behavior to fork to pass unit test, only used for tests
159 | # https://docs.python.org/3/library/multiprocessing.html
160 | # https://github.com/pytest-dev/pytest-flask/issues/104
161 | multiprocessing.set_start_method("fork")
162 | indexd.start()
163 | wait_for_indexd_alive(port)
164 |
165 | yield MockServer(port=port)
166 |
167 | indexd.terminate()
168 |
169 |
170 | @pytest.fixture
171 | def index_client(indexd_server):
172 | """
173 | Handles getting all the docs from an
174 | indexing endpoint. Currently this is changing from
175 | signpost to indexd, so we'll use just indexd_client now.
176 | I.E. test to a common interface this could be multiply our
177 | tests:
178 | https://docs.pytest.org/en/latest/fixture.html#parametrizing-fixtures
179 | """
180 | setup_database()
181 |
182 | try:
183 | user = create_user("admin", "admin")
184 | except Exception:
185 | # assume user already exists, try using username and password for admin
186 | user = ("admin", "admin")
187 |
188 | client = Gen3Index(indexd_server.baseurl, user, service_location="")
189 |
190 | yield client
191 |
192 | clear_database()
193 |
194 |
195 | @pytest.fixture
196 | def gen3_index(index_client):
197 | """
198 | Mock Gen3Index
199 | """
200 | return index_client
201 |
202 |
203 | @pytest.fixture
204 | def gen3_query(gen3_auth):
205 | """
206 | Mock Gen3Query
207 | """
208 | return Gen3Query(gen3_auth)
209 |
210 |
211 | @pytest.fixture(scope="function")
212 | def drs_client(indexd_server):
213 | """
214 | Returns a DrsClient. This will delete any documents,
215 | aliases, or users made by this
216 | client after the test has completed.
217 | Currently the default user is the admin user
218 | Runs once per test.
219 | """
220 | try:
221 | user = create_user("user", "user")
222 | except Exception:
223 | user = ("user", "user")
224 | client = DrsClient(baseurl=indexd_server.baseurl, auth=user)
225 | yield client
226 | clear_database()
227 |
228 |
229 | @pytest.fixture(scope="function")
230 | def drsclient(drs_client):
231 | """
232 | Mock drsclient
233 | """
234 | return drs_client
235 |
--------------------------------------------------------------------------------
/tests/download_tests/expected/manifest_test_drs_compact_object_list.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "object_id": "dg.XXTS/b96018c5-db06-4af8-a195-28e339ba815e",
4 | "object_type": "unknown",
5 | "hostname": null,
6 | "file_size": -1,
7 | "file_name": null,
8 | "updated_time": null,
9 | "created_time": null,
10 | "access_methods": [],
11 | "children": []
12 | },
13 | {
14 | "object_id": "dg.XXTS/6d3eb293-8388-4c5d-83ef-d0c2bd5ba604",
15 | "object_type": "unknown",
16 | "hostname": null,
17 | "file_size": -1,
18 | "file_name": null,
19 | "updated_time": null,
20 | "created_time": null,
21 | "access_methods": [],
22 | "children": []
23 | },
24 | {
25 | "object_id": "dg.XXTS/6f9a924f-9d83-4597-8f66-fe7d3021729f",
26 | "object_type": "unknown",
27 | "hostname": null,
28 | "file_size": -1,
29 | "file_name": null,
30 | "updated_time": null,
31 | "created_time": null,
32 | "access_methods": [],
33 | "children": []
34 | },
35 | {
36 | "object_id": "dg.XXTS/0e618fef-e359-424b-b844-0ca320105176",
37 | "object_type": "unknown",
38 | "hostname": null,
39 | "file_size": -1,
40 | "file_name": null,
41 | "updated_time": null,
42 | "created_time": null,
43 | "access_methods": [],
44 | "children": []
45 | },
46 | {
47 | "object_id": "dg.XXTS/0e618fef-e359-424b-b844-0ca32010517a",
48 | "object_type": "unknown",
49 | "hostname": null,
50 | "file_size": -1,
51 | "file_name": null,
52 | "updated_time": null,
53 | "created_time": null,
54 | "access_methods": [],
55 | "children": []
56 | }
57 | ]
--------------------------------------------------------------------------------
/tests/download_tests/resources/bad_format.json:
--------------------------------------------------------------------------------
1 | {x}
--------------------------------------------------------------------------------
/tests/download_tests/resources/dataguids_commons1.json:
--------------------------------------------------------------------------------
1 | {
2 | "acl": [
3 | "admin"
4 | ],
5 | "authz": [],
6 | "baseid": "1e6cf3f1-a5af-4543-a1ca-84b41b3221a9",
7 | "created_date": "2018-06-13T17:16:29.981618",
8 | "did": "dg.XXTS/b96018c5-db06-4af8-a195-28e339ba815e",
9 | "file_name": "TestDataSet1.sav",
10 | "form": "object",
11 | "from_index_service": {
12 | "host": "https://test.commons1.io/index/",
13 | "name": "TestCommons1"
14 | },
15 | "hashes": {
16 | "md5": "65196806d31002bd48abed020d861cf1"
17 | },
18 | "metadata": {},
19 | "rev": "6046fb9f",
20 | "size": 1566369,
21 | "updated_date": "2018-06-13T17:16:29.981629",
22 | "uploader": null,
23 | "urls": [
24 | "gs://topmed_workflow_testing/topmed_aligner/input_files/TestDataSet1.sav",
25 | "s3://topmed-workflow-testing/topmed-aligner/input-files/TestDataSet1.sav"
26 | ],
27 | "urls_metadata": {
28 | "gs://topmed_workflow_testing/topmed_aligner/input_files/TestDataSet1.sav": {},
29 | "s3://topmed-workflow-testing/topmed-aligner/input-files/TestDataSet1.sav": {}
30 | },
31 | "version": null
32 | }
--------------------------------------------------------------------------------
/tests/download_tests/resources/download_test_data.json:
--------------------------------------------------------------------------------
1 | {
2 | "dg.XXTS/b96018c5-db06-4af8-a195-28e339ba815e": {
3 | "file_name": "TestDataSet1.sav",
4 | "content": "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore etdolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquipex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum doloreeu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt inculpa qui officia deserunt mollit anim id est laborum."
5 | },
6 | "dg.XXTS/6d3eb293-8388-4c5d-83ef-d0c2bd5ba604": {
7 | "file_name": "TestDataSet_April2020.sav",
8 | "content": "Regarde fin enlever extreme aux nos mal. Non polies roc certes dur livres ennemi corons nez. Sur sacrifice sanglante mes seulement croissent. Ivres coeur matin gagne grand en ca ah aides. Profonde six les falaises cantines batisses aussitot bon. Cheval encore que allons toi rit."
9 | },
10 | "dg.XXTS/6f9a924f-9d83-4597-8f66-fe7d3021729f": {
11 | "file_name": "TestDataSet_June2020.sav",
12 | "content": "Graven af poging groene nu schuld waarde. Hout het zee wel niet stad. Dient groot gayah zeker de en. In agentschap te initiatief ingenieurs werkwijzen bijzondere nu mogendheid. Een rijkdommen wedijveren des onvermoeid. Onder echte er matig op onzer eerst lahat. Enkelen was dus waarbij bevrijd dit vreezen. In zeer er is acre ziet zien hier. "
13 | },
14 | "dg.XXTS/0e618fef-e359-424b-b844-0ca320105176": {
15 | "file_name": "TestDataSet_Oct2020.sav",
16 | "content": "Regarde fin enlever extreme aux nos mal. Non polies roc certes dur livres ennemi corons nez. Sur sacrifice sanglante mes seulement croissent. Ivres coeur matin gagne grand en ca ah aides. Profonde six les falaises cantines batisses aussitot bon. Cheval encore que allons toi rit."
17 | },
18 | "dg.XXTS/0e618fef-e359-424b-b844-0ca32010517a": {
19 | "file_name": "a/b/TestDataSet_Oct2020.sav",
20 | "content": "This file name contains slashes and should be downloaded in subdirectories."
21 | },
22 | "dg.XXTS/0c5ddbb3-c801-4dc9-aae0-804c2c1591bf": {
23 | "file_name": "EC03_2018_C.csv",
24 | "content": "1,'Eldon Base for stackable storage shelf, platinum',3,-213.25,38.94,35,Nunavut,Storage & Organization,0.8\n2,'1.7 Cubic Foot Compact Office Refrigerators',293,457.81,208.16,68.02,Nunavut,Appliances,0.58\n3,'Cardinal Slant Ring Binder, Heavy Gauge Vinyl',293,46.71,8.69,2.99,Nunavut,Binders and Binder Accessories,0.39"
25 | },
26 | "dg.xxTS/4bc4e600-1eda-4f81-aa2b-7c33dad78bec": {
27 | "file_name": "0.zip",
28 | "content": "UEsDBBQAAAAAAK+FJFSFw9zvAgAAAAIAAAAFAAAAYy50eHRjClBLAwQUAAAAAACthSRUZtZDYwIAAAACAAAABQAAAGIudHh0QgpQSwECFAMUAAAAAACvhSRUhcPc7wIAAAACAAAABQAAAAAAAAAAAAAApIEAAAAAYy50eHRQSwECFAMUAAAAAACthSRUZtZDYwIAAAACAAAABQAAAAAAAAAAAAAApIElAAAAYi50eHRQSwUGAAAAAAIAAgBmAAAASgAAAAAA"
29 | },
30 | "dg.xxTS/4bc4e600-1eda-4f81-aa2b-7c33dad78bed": {
31 | "file_name": "1.zip",
32 | "content": "UEsDBBQAAAAAAK+FJFSFw9zvAgAAAAIAAAAFAAAAYy50eHRjClBLAwQUAAAAAACthSRUZtZDYwIAAAACAAAABQAAAGIudHh0QgpQSwECFAMUAAAAAACvhSRUhcPc7wIAAAACAAAABQAAAAAAAAAAAAAApIEAAAAAYy50eHRQSwECFAMUAAAAAACthSRUZtZDYwIAAAACAAAABQAAAAAAAAAAAAAApIElAAAAYi50eHRQSwUGAAAAAAIAAgBmAAAASgAAAAAA"
33 | },
34 | "dg.xxTS/4bc4e600-1eda-4f81-aa2b-7c33dad78bee": {
35 | "file_name": "2.zip",
36 | "content": "UEsDBBQAAAAAAK+FJFSFw9zvAgAAAAIAAAAFAAAAYy50eHRjClBLAwQUAAAAAACthSRUZtZDYwIAAAACAAAABQAAAGIudHh0QgpQSwECFAMUAAAAAACvhSRUhcPc7wIAAAACAAAABQAAAAAAAAAAAAAApIEAAAAAYy50eHRQSwECFAMUAAAAAACthSRUZtZDYwIAAAACAAAABQAAAAAAAAAAAAAApIElAAAAYi50eHRQSwUGAAAAAAIAAgBmAAAASgAAAAAA"
37 | },
38 | "dg.xxTS/4bc4e600-1eda-4f81-aa2b-7c33dad78bef": {
39 | "file_name": "3.tar",
40 | "content": "UEsDBBQAAAAAAK+FJFSFw9zvAgAAAAIAAAAFAAAAYy50eHRjClBLAwQUAAAAAACthSRUZtZDYwIAAAACAAAABQAAAGIudHh0QgpQSwECFAMUAAAAAACvhSRUhcPc7wIAAAACAAAABQAAAAAAAAAAAAAApIEAAAAAYy50eHRQSwECFAMUAAAAAACthSRUZtZDYwIAAAACAAAABQAAAAAAAAAAAAAApIElAAAAYi50eHRQSwUGAAAAAAIAAgBmAAAASgAAAAAA"
41 | },
42 | "dg.xxTS/4bc4e600-1eda-4f81-aa2b-7c33dad78beg": {
43 | "file_name": "4.zip",
44 | "content": "UEsDBBQAAAAAAK+FJFSFw9zvAgAAAAIAAAAFAAAAYy50eHRjClBLAwQUAAAAAACthSRUZtZDYwIAAAACAAAABQAAAGIudHh0QgpQSwECFAMUAAAAAACvhSRUhcPc7wIAAAACAAAABQAAAAAAAAAAAAAApIEAAAAAYy50eHRQSwECFAMUAAAAAACthSRUZtZDYwIAAAACAAAABQAAAAAAAAAAAAAApIElAAAAYiRQSwUGAAAAAAIAAgBmAAAASgAAAAAA"
45 | },
46 | "dg.xxTS/4bc4e600-1eda-4f81-aa2b-7c33dad78bex": {
47 | "file_name": "a/b/5.zip",
48 | "content": "UEsDBBQAAAAAAK+FJFSFw9zvAgAAAAIAAAAFAAAAYy50eHRjClBLAwQUAAAAAACthSRUZtZDYwIAAAACAAAABQAAAGIudHh0QgpQSwECFAMUAAAAAACvhSRUhcPc7wIAAAACAAAABQAAAAAAAAAAAAAApIEAAAAAYy50eHRQSwECFAMUAAAAAACthSRUZtZDYwIAAAACAAAABQAAAAAAAAAAAAAApIElAAAAYi50eHRQSwUGAAAAAAIAAgBmAAAASgAAAAAA"
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/tests/download_tests/resources/drs_object_commons3.json:
--------------------------------------------------------------------------------
1 | {
2 | "access_methods": [
3 | {
4 | "access_id": "s3",
5 | "access_url": {
6 | "url": "s3://testprod-default-258867494168-upload/dg.XXTS/07246ed5-8c4a-4d53-a559-ce07c1dd7d11/Access05_Z.csv"
7 | },
8 | "region": "",
9 | "type": "s3"
10 | }
11 | ],
12 | "aliases": [],
13 | "checksums": [
14 | {
15 | "checksum": "65196806d31002bd48abed020d861cf1",
16 | "type": "md5"
17 | }
18 | ],
19 | "contents": [],
20 | "created_time": "2021-04-06T11:22:19.327370",
21 | "description": null,
22 | "form": null,
23 | "id": "dg.XX23/b96018c5-db06-4af8-a195-28e339ba815e",
24 | "mime_type": "application/json",
25 | "name": "TestDataSet1.sav",
26 | "self_uri": "drs://test.commons3.io/dg.XX23/b96018c5-db06-4af8-a195-28e339ba815e",
27 | "size": 1566369,
28 | "updated_time": "2021-05-08T13:27:31.327280",
29 | "version": "4e5a4c63"
30 | }
31 |
--------------------------------------------------------------------------------
/tests/download_tests/resources/expired_drs_host_cache.json:
--------------------------------------------------------------------------------
1 | {
2 | "info": {
3 | "created": "10/22/1921 21:29:11:+0000"
4 | },
5 | "cache": {
6 | "dg.XXTS": {
7 | "host": "test1.testcommons1.org",
8 | "name": "DataSTAGE",
9 | "type": "indexd",
10 | "created": "10/22/1921 21:29:11:+0000"
11 | }
12 | }
13 | }
--------------------------------------------------------------------------------
/tests/download_tests/resources/gen3_metadata_external_file_metadata.json:
--------------------------------------------------------------------------------
1 | {
2 | "_guid_type": "discovery_metadata",
3 | "gen3_discovery": {
4 | "authz": "",
5 | "__manifest": "",
6 | "year_awarded": "2022",
7 | "project_title": "Project title",
8 | "external_file_metadata": [
9 | {
10 | "external_oidc_idp": "test-externaldata-idp",
11 | "file_retriever": "QDR",
12 | "study_id": "QDR_study_01"
13 | },
14 | {
15 | "external_oidc_idp": "test-externaldata-idp",
16 | "file_retriever": "QDR",
17 | "file_id": "QDR_file_02"
18 | }
19 | ],
20 | "study_metadata": {
21 | "data": {
22 | "data_type": [],
23 | "data_source": []
24 | }
25 | }
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/tests/download_tests/resources/index_dist.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "hints": [
4 | ".*dg\\.AB03.*"
5 | ],
6 | "host": "https://gen3.mytestcommons1.io/index/",
7 | "name": "DataTest",
8 | "type": "indexd"
9 | },
10 | {
11 | "hints": [
12 | ".*dg\\.CD5B.*"
13 | ],
14 | "host": "https://portal.mytestcommons2.org/index/",
15 | "name": "Test Environmental DC",
16 | "type": "indexd"
17 | },
18 | {
19 | "hints": [
20 | ".*dg\\.DXX1A.*"
21 | ],
22 | "host": "https://data.datacommons4.org/index/",
23 | "name": "Blanket Test",
24 | "type": "indexd"
25 | },
26 | {
27 | "hints": [],
28 | "host": "https://data.datacommons5.io/index/",
29 | "name": "Data Science Test",
30 | "type": "indexd"
31 | },
32 | {
33 | "hints": [
34 | ".*dg\\.ZYV0.*"
35 | ],
36 | "host": "https://gen3.thecommons.io/index/",
37 | "name": "DSIsG",
38 | "type": "indexd"
39 | },
40 | {
41 | "hints": [
42 | ".*dg\\.3ETS.*"
43 | ],
44 | "host": "https://www.bigcommons.io/index/",
45 | "name": "BOC",
46 | "type": "indexd"
47 | },
48 | {
49 | "hints": [
50 | ".*dg\\.XX4L.*"
51 | ],
52 | "host": "https://datatest.org/index/",
53 | "name": "DSTest",
54 | "type": "indexd"
55 | },
56 | {
57 | "hints": [
58 | ".*dg\\.XX5L.*"
59 | ],
60 | "host": "https://externaldata.datatest.org/index/",
61 | "name": "External DSTest",
62 | "type": "indexd"
63 | },
64 | {
65 | "hints": [
66 | ".*dg\\.XXTS.*"
67 | ],
68 | "host": "https://test1.testcommons9.org/index/",
69 | "name": "TestSTAGE",
70 | "type": "indexd"
71 | }
72 | ]
--------------------------------------------------------------------------------
/tests/download_tests/resources/manifest_package.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "object_id": "dg.xxTS/4bc4e600-1eda-4f81-aa2b-7c33dad78bec",
4 | "commons_url": "test.commons1.io"
5 | },
6 | {
7 | "object_id": "dg.xxTS/4bc4e600-1eda-4f81-aa2b-7c33dad78bed",
8 | "commons_url": "test.commons1.io"
9 | },
10 | {
11 | "object_id": "dg.xxTS/4bc4e600-1eda-4f81-aa2b-7c33dad78bee",
12 | "commons_url": "test.commons1.io"
13 | },
14 | {
15 | "object_id": "dg.xxTS/4bc4e600-1eda-4f81-aa2b-7c33dad78bef",
16 | "commons_url": "test.commons1.io"
17 | },
18 | {
19 | "object_id": "dg.xxTS/4bc4e600-1eda-4f81-aa2b-7c33dad78beg",
20 | "commons_url": "test.commons1.io"
21 | },
22 | {
23 | "object_id": "dg.xxTS/4bc4e600-1eda-4f81-aa2b-7c33dad78bex",
24 | "commons_url": "test.commons1.io"
25 | }
26 | ]
--------------------------------------------------------------------------------
/tests/download_tests/resources/manifest_test_1.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "md5sum": "65196806d31002bd48abed020d861cf1",
4 | "file_name": "TestDataSet1.sav",
5 | "file_size": 1566369,
6 | "object_id": "dg.XXTS/b96018c5-db06-4af8-a195-28e339ba815e",
7 | "commons_url": "test.commons1.io"
8 | },
9 | {
10 | "md5sum": "8371753a1324d421cc519cc03dcd477d",
11 | "file_name": "TestDataSet_April2020.sav",
12 | "file_size": 313525,
13 | "object_id": "dg.XXTS/6d3eb293-8388-4c5d-83ef-d0c2bd5ba604",
14 | "commons_url": "test.commons1.io"
15 | },
16 | {
17 | "md5sum": "206e2d1e7688452e0527aa67a9e67ed0",
18 | "file_name": "TestDataSet_June2020.sav",
19 | "file_size": 370292,
20 | "object_id": "dg.XXTS/6f9a924f-9d83-4597-8f66-fe7d3021729f",
21 | "commons_url": "test.commons1.io"
22 | },
23 | {
24 | "md5sum": "7f1378afee0cbd72e159036fa64c4aa4",
25 | "file_name": "TestDataSet_Oct2020.sav",
26 | "file_size": 367990,
27 | "object_id": "dg.XXTS/0e618fef-e359-424b-b844-0ca320105176",
28 | "commons_url": "test.commons1.io"
29 | }
30 | ]
--------------------------------------------------------------------------------
/tests/download_tests/resources/manifest_test_2.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "object_id": "dg.XXTS/b96018c5-db06-4af8-a195-28e339ba815e"
4 | }
5 | ]
--------------------------------------------------------------------------------
/tests/download_tests/resources/manifest_test_3.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "object_id": "dg.XXTS/b96018c5-db06-4af8-a195-28e339ba815e"
4 | },
5 | {
6 | "object_id": "dg.XXTS/6f9a924f-9d83-4597-8f66-fe7d3021729f"
7 | }
8 | ]
--------------------------------------------------------------------------------
/tests/download_tests/resources/manifest_test_bad_id.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "object_id": "dg.XXTST/b96018c5-db06-4af8-a195-28e339ba815e"
4 | }
5 | ]
--------------------------------------------------------------------------------
/tests/download_tests/resources/manifest_test_drs_compact.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "object_id": "dg.XXTS/b96018c5-db06-4af8-a195-28e339ba815e"
4 | },
5 | {
6 | "object_id": "dg.XXTS/6d3eb293-8388-4c5d-83ef-d0c2bd5ba604"
7 | },
8 | {
9 | "object_id": "dg.XXTS/6f9a924f-9d83-4597-8f66-fe7d3021729f"
10 | },
11 | {
12 | "object_id": "dg.XXTS/0e618fef-e359-424b-b844-0ca320105176"
13 | },
14 | {
15 | "object_id": "dg.XXTS/0e618fef-e359-424b-b844-0ca32010517a"
16 | }
17 | ]
--------------------------------------------------------------------------------
/tests/download_tests/resources/manifest_test_hostname_not_in_wts.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "md5sum": "65196806d31002bd48abed020d861cf1",
4 | "file_name": "TestDataSet1.sav",
5 | "file_size": 1566369,
6 | "object_id": "dg.XX23/b96018c5-db06-4af8-a195-28e339ba815e",
7 | "commons_url": "test.commons3.io"
8 | }
9 | ]
--------------------------------------------------------------------------------
/tests/download_tests/resources/valid_external_file_metadata.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "external_oidc_idp": "externaldata-keycloak",
4 | "file_retriever": "QDR",
5 | "study_id": "QDR_study_01"
6 | },
7 | {
8 | "external_oidc_idp": "externaldata-keycloak",
9 | "file_retriever": "QDR",
10 | "file_id": "QDR_file_02"
11 | }
12 | ]
--------------------------------------------------------------------------------
/tests/download_tests/resources/wts_oidc.json:
--------------------------------------------------------------------------------
1 | {
2 | "test.datacommons.io": {
3 | "providers": [
4 | {
5 | "base_url": "https://test.commons1.io",
6 | "idp": "test-google",
7 | "name": "TEST1 Google Login",
8 | "refresh_token_expiration": "9 days",
9 | "urls": [
10 | {
11 | "name": "TEST Google Login",
12 | "url": "https://test.datacommons.io/wts/oauth2/authorization_url?idp=test-google"
13 | }
14 | ]
15 | },
16 | {
17 | "base_url": "https://externaldata.commons2.org",
18 | "idp": "externaldata-google",
19 | "name": "FAIR Repository Google Login",
20 | "refresh_token_expiration": "7 days",
21 | "urls": [
22 | {
23 | "name": "FAIR Repository Google Login",
24 | "url": "https://test.datacommons.io/wts/oauth2/authorization_url?idp=externaldata-google"
25 | }
26 | ]
27 | }
28 | ]
29 | }
30 | }
--------------------------------------------------------------------------------
/tests/merge_manifests/column_mismatch/expected-merged-output-manifest.tsv:
--------------------------------------------------------------------------------
1 | acl authz guid md5 size urls
2 | 04e6256303c44ccd4ce4019e8d28a7fc 4667 gs://myotherfile1.txt
3 | 10cb099ac4adff0b0f12b92faa670248 4338 gs://myotherfile2.txt
4 | X Y dg.456/789 c49d4ebfd75b4e763866c3089358137a 15790 s3://myfile2.txt
5 | X Y dg.123/456 60cd84362c7ee2b173381aee5a89fce9 10486 s3://myfile1.txt
--------------------------------------------------------------------------------
/tests/merge_manifests/column_mismatch/input/manifest1.tsv:
--------------------------------------------------------------------------------
1 | size url md5sum
2 | 4667 gs://myotherfile1.txt 04e6256303c44ccd4ce4019e8d28a7fc
3 | 4338 gs://myotherfile2.txt 10cb099ac4adff0b0f12b92faa670248
4 |
--------------------------------------------------------------------------------
/tests/merge_manifests/column_mismatch/input/manifest2.tsv:
--------------------------------------------------------------------------------
1 | size md5 acl urls guid authz
2 | 10486 60cd84362c7ee2b173381aee5a89fce9 X Y s3://myfile1.txt dg.123/456
3 | 15790 c49d4ebfd75b4e763866c3089358137a X Y s3://myfile2.txt dg.456/789
--------------------------------------------------------------------------------
/tests/merge_manifests/duplicate_values/expected-merged-output-manifest.tsv:
--------------------------------------------------------------------------------
1 | acl authz guid md5 size urls food
2 | 81b62b73d499812d16e71e33a5454654 42 dogs.com cats.com sushi pizza
3 |
--------------------------------------------------------------------------------
/tests/merge_manifests/duplicate_values/input/manifest1.tsv:
--------------------------------------------------------------------------------
1 | size md5 url food
2 | 42 81b62b73d499812d16e71e33a5454654 cats.com pizza
3 |
--------------------------------------------------------------------------------
/tests/merge_manifests/duplicate_values/input/manifest2.tsv:
--------------------------------------------------------------------------------
1 | size md5 url food
2 | 42 81b62b73d499812d16e71e33a5454654 cats.com sushi
3 |
--------------------------------------------------------------------------------
/tests/merge_manifests/duplicate_values/input/manifest3.tsv:
--------------------------------------------------------------------------------
1 | size md5 url food
2 | 42 81b62b73d499812d16e71e33a5454654 dogs.com sushi
3 |
--------------------------------------------------------------------------------
/tests/merge_manifests/multiple_guids_per_hash/expected-merged-output-manifest.tsv:
--------------------------------------------------------------------------------
1 | acl authz guid md5 size urls more_data extra_data
2 | /baz /foo /foobar dg/123 f7cbeb4f7fcc139d95cb9cc1cf0696ec 42 http://cats.com s3://bucket/cats stuff stuff3
3 | /bar /foobar dg/124 f7cbeb4f7fcc139d95cb9cc1cf0696ec 42 http://cats.com stuff3
4 | /baz /foobar dg/125 f7cbeb4f7fcc139d95cb9cc1cf0696ec 42 s3://bucket/duplicate moredata stuff2 stuff3
5 |
--------------------------------------------------------------------------------
/tests/merge_manifests/multiple_guids_per_hash/input/manifest1.tsv:
--------------------------------------------------------------------------------
1 | guid md5 size urls authz more_data
2 | dg/123 f7cbeb4f7fcc139d95cb9cc1cf0696ec 42 http://cats.com /foo
3 | dg/125 f7cbeb4f7fcc139d95cb9cc1cf0696ec 42 s3://bucket/duplicate /baz moredata
4 |
--------------------------------------------------------------------------------
/tests/merge_manifests/multiple_guids_per_hash/input/manifest2.tsv:
--------------------------------------------------------------------------------
1 | guid md5 size urls authz extra_data
2 | f7cbeb4f7fcc139d95cb9cc1cf0696ec 42 /foobar stuff3
3 | dg/123 f7cbeb4f7fcc139d95cb9cc1cf0696ec 42 s3://bucket/cats /baz stuff
4 | dg/124 f7cbeb4f7fcc139d95cb9cc1cf0696ec 42 http://cats.com /bar
5 | dg/125 f7cbeb4f7fcc139d95cb9cc1cf0696ec 42 s3://bucket/duplicate /baz stuff2
6 |
--------------------------------------------------------------------------------
/tests/merge_manifests/multiple_urls/expected-merged-output-manifest.tsv:
--------------------------------------------------------------------------------
1 | acl authz guid md5 size urls
2 | 81b62b73d499812d16e71e33a5454654 42 cats.com dogs.com
3 |
--------------------------------------------------------------------------------
/tests/merge_manifests/multiple_urls/input/manifest1.tsv:
--------------------------------------------------------------------------------
1 | size md5 url
2 | 42 81b62b73d499812d16e71e33a5454654 cats.com
3 |
--------------------------------------------------------------------------------
/tests/merge_manifests/multiple_urls/input/manifest2.tsv:
--------------------------------------------------------------------------------
1 | size md5 url
2 | 42 81b62b73d499812d16e71e33a5454654 cats.com dogs.com
3 |
--------------------------------------------------------------------------------
/tests/merge_manifests/no_guid_same_md5_order/expected-merged-output-manifest.tsv:
--------------------------------------------------------------------------------
1 | guid size md5 acl authz urls metadata
2 | dg.4503/1234abcd 1193060 2de45cf5e6b2639c98b56b679cffc119 admin phsXXXXXX.c1 AUTHZ_HERE gs://path s3://path some_data
3 |
--------------------------------------------------------------------------------
/tests/merge_manifests/no_guid_same_md5_order/input/manifest_WITHOUT_guid.tsv:
--------------------------------------------------------------------------------
1 | guid size md5 acl authz urls metadata
2 | 1193060 2de45cf5e6b2639c98b56b679cffc119 phsXXXXXX.c1 admin AUTHZ_HERE gs://path s3://path some_data
--------------------------------------------------------------------------------
/tests/merge_manifests/no_guid_same_md5_order/input/manifest_with_guid.tsv:
--------------------------------------------------------------------------------
1 | guid size md5 acl authz urls metadata
2 | dg.4503/1234abcd 1193060 2de45cf5e6b2639c98b56b679cffc119 phsXXXXXX.c1 admin AUTHZ_HERE gs://path s3://path some_data
--------------------------------------------------------------------------------
/tests/merge_manifests/regular/expected-merged-output-manifest.tsv:
--------------------------------------------------------------------------------
1 | acl authz guid md5 size urls extra_data more_data some_additional_data
2 | phs_test f7cbeb4f7fcc139d95cb9cc1cf0696ec 1142 s3://cdistest-giangb-bucket1-databucket-gen3/ReportPrefix/job-46419099-6153-4011-a44a-829470e02758/results/57cad0b7636f54d4eb9e695f62e026ad9e333940.csv one 1
3 | phs_test phs_dev 4daa609f5c12b9354c901eb57d56398e 476 s3://cdistest-giangb-bucket1-databucket-gen3/ReportPrefix/job-8e49040d-36f0-4cc5-a4d3-d509e48b9dd7/results/1e1966b4c35dc5fa1a45760e5c24241b3f88fdca.csv s3://cdistest-giangb-bucket1-databucket-gen3/ReportPrefix/job-d2e89015-9344-4b4a-a9cc-f4f6f48c41fd/results/d25d76f50aec8b508b74d1da28a0b6f482fff341.csv s3://cdistest-giangb-bucket1-databucket-gen3/reports/object_metadata/job-2053f094-d99b-4f72-a491-9e2aa200a132/results/c5fbdf5ab6cef21b72ac2cdbd1712143159996e2.csv s3://cdistest-giangb-bucket1-databucket-gen3/reports/object_metadata/job-15fa3e5a-aeb3-4a9e-a40e-f5737abd9888/results/f3f950846e4c218e15c51916ec054108de577f45.csv foo bar two three 2 3 4 5
4 | phs_test 8279d92917f24cc63dca0c3d51cdb33f 478 s3://cdistest-giangb-bucket1-databucket-gen3/ReportPrefix/job-70ababcb-c9bf-40b8-a1ee-f388b575ad73/manifest.json buzz 6
5 |
--------------------------------------------------------------------------------
/tests/merge_manifests/regular/input/manifest1.tsv:
--------------------------------------------------------------------------------
1 | url size md5 authz some_additional_data more_data
2 | s3://cdistest-giangb-bucket1-databucket-gen3/ReportPrefix/job-46419099-6153-4011-a44a-829470e02758/results/57cad0b7636f54d4eb9e695f62e026ad9e333940.csv 1142 f7cbeb4f7fcc139d95cb9cc1cf0696ec phs_test 1 one
3 | s3://cdistest-giangb-bucket1-databucket-gen3/ReportPrefix/job-8e49040d-36f0-4cc5-a4d3-d509e48b9dd7/results/1e1966b4c35dc5fa1a45760e5c24241b3f88fdca.csv 476 4daa609f5c12b9354c901eb57d56398e phs_test 2 two
4 | s3://cdistest-giangb-bucket1-databucket-gen3/ReportPrefix/job-d2e89015-9344-4b4a-a9cc-f4f6f48c41fd/results/d25d76f50aec8b508b74d1da28a0b6f482fff341.csv 476 4daa609f5c12b9354c901eb57d56398e phs_test 3 three
5 |
--------------------------------------------------------------------------------
/tests/merge_manifests/regular/input/manifest2.tsv:
--------------------------------------------------------------------------------
1 | url size md5 authz some_additional_data extra_data
2 | s3://cdistest-giangb-bucket1-databucket-gen3/reports/object_metadata/job-2053f094-d99b-4f72-a491-9e2aa200a132/results/c5fbdf5ab6cef21b72ac2cdbd1712143159996e2.csv 476 4daa609f5c12b9354c901eb57d56398e phs_test 4 foo
3 | s3://cdistest-giangb-bucket1-databucket-gen3/reports/object_metadata/job-15fa3e5a-aeb3-4a9e-a40e-f5737abd9888/results/f3f950846e4c218e15c51916ec054108de577f45.csv 476 4daa609f5c12b9354c901eb57d56398e phs_dev 5 bar
4 | s3://cdistest-giangb-bucket1-databucket-gen3/ReportPrefix/job-70ababcb-c9bf-40b8-a1ee-f388b575ad73/manifest.json 478 8279d92917f24cc63dca0c3d51cdb33f phs_test 6 buzz
5 |
--------------------------------------------------------------------------------
/tests/merge_manifests/same_guid_for_same_hash/expected-merged-output-manifest.tsv:
--------------------------------------------------------------------------------
1 | acl authz guid md5 size urls extra_data
2 | dg/123 f7cbeb4f7fcc139d95cb9cc1cf0696ec 42 http://cats.com foo bar
3 | dg/124 4daa609f5c12b9354c901eb57d56398e 108 http://dogs.com
4 | dg/125 8279d92917f24cc63dca0c3d51cdb33f 72 http://ferrets.com baz
5 |
--------------------------------------------------------------------------------
/tests/merge_manifests/same_guid_for_same_hash/input/manifest1.tsv:
--------------------------------------------------------------------------------
1 | guid md5 size urls extra_data
2 | dg/123 f7cbeb4f7fcc139d95cb9cc1cf0696ec 42 http://cats.com foo
3 | dg/124 4daa609f5c12b9354c901eb57d56398e 108 http://dogs.com
4 |
--------------------------------------------------------------------------------
/tests/merge_manifests/same_guid_for_same_hash/input/manifest2.tsv:
--------------------------------------------------------------------------------
1 | guid md5 size urls extra_data
2 | dg/123 f7cbeb4f7fcc139d95cb9cc1cf0696ec 42 http://cats.com bar
3 | dg/125 8279d92917f24cc63dca0c3d51cdb33f 72 http://ferrets.com baz
4 |
--------------------------------------------------------------------------------
/tests/merge_manifests/size_mismatch/input/manifest1.tsv:
--------------------------------------------------------------------------------
1 | size md5 url
2 | 42 81b62b73d499812d16e71e33a5454654 cats.com
3 |
--------------------------------------------------------------------------------
/tests/merge_manifests/size_mismatch/input/manifest2.tsv:
--------------------------------------------------------------------------------
1 | size md5 url
2 | 43 81b62b73d499812d16e71e33a5454654 cats.com
3 |
--------------------------------------------------------------------------------
/tests/test_configure.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import uuid
3 | from unittest.mock import patch
4 | import json
5 | import os
6 | import gen3.configure as config_tool
7 |
8 |
9 | def mock_endpoint(_):
10 | return "https://mock.planx-pla.net"
11 |
12 |
13 | def mock_access_key(_):
14 | return "mock_access_key"
15 |
16 |
17 | profile = "DummyProfile"
18 | expected_profile_line = f"[{profile}]\n"
19 | creds = {"key_id": "1234", "api_key": "abc"}
20 | new_lines = [
21 | f"key_id={creds['key_id']}\n",
22 | f"api_key={creds['api_key']}\n",
23 | f"api_endpoint={mock_endpoint(None)}\n",
24 | f"access_key={mock_access_key(None)}\n",
25 | "use_shepherd=\n",
26 | "min_shepherd_version=\n",
27 | ]
28 |
29 | lines_with_profile = [
30 | f"[{profile}]\n",
31 | f"key_id=random_key\n",
32 | f"api_key=random_api_key\n",
33 | f"api_endpoint=random_endpoint\n",
34 | f"access_key=random_access_key\n",
35 | "use_shepherd=random_boolean\n",
36 | "min_shepherd_version=random_version\n",
37 | ]
38 |
39 |
40 | @patch("gen3.auth.endpoint_from_token", mock_endpoint)
41 | @patch("gen3.auth.get_access_token_with_key", mock_access_key)
42 | def test_get_profile_from_creds(monkeypatch):
43 | test_file_name = str(uuid.uuid4()) + ".json"
44 | try:
45 | profile = "DummyProfile"
46 | creds = {"key_id": "1234", "api_key": "abc"}
47 | with open(test_file_name, "w+") as cred_file:
48 | json.dump(creds, cred_file)
49 |
50 | profile_line, lines = config_tool.get_profile_from_creds(
51 | profile, test_file_name
52 | )
53 | finally:
54 | if os.path.exists(test_file_name):
55 | os.remove(test_file_name)
56 |
57 | assert profile_line == expected_profile_line
58 | for line, new_line in zip(lines, new_lines):
59 | assert line == new_line
60 |
61 |
62 | @pytest.mark.parametrize("test_lines", [[], lines_with_profile])
63 | def test_update_config_lines(test_lines, monkeypatch):
64 | file_name = str(uuid.uuid4())
65 | monkeypatch.setattr(config_tool, "CONFIG_FILE_PATH", file_name)
66 | try:
67 | config_tool.update_config_lines(test_lines, expected_profile_line, new_lines)
68 | with open(file_name, "r") as f:
69 | assert f.readlines() == [expected_profile_line] + new_lines
70 | finally:
71 | if os.path.exists(file_name):
72 | os.remove(file_name)
73 |
--------------------------------------------------------------------------------
/tests/test_data/crosswalk/crosswalk_1.csv:
--------------------------------------------------------------------------------
1 | https://data.midrc.org|gen3_node_property|Case.submitter_id, https://gen3.biodatacatalyst.nhlbi.nih.gov|gen3_node_property|Subject.submitter_id
2 | A01-00888, phs002363.v1_RC-1358
--------------------------------------------------------------------------------
/tests/test_data/crosswalk/crosswalk_2.csv:
--------------------------------------------------------------------------------
1 | https://data.midrc.org|gen3_node_property|Case.submitter_id, https://data.midrc.org|gen3_node_property|Case.data_submission_guid, https://data.midrc.org|masked_n3c_id|Masked N3C ID
2 | A01-00888, foobar, 123dfj4ia5oi*@a
--------------------------------------------------------------------------------
/tests/test_data/crosswalk/crosswalk_optional_info_1.csv:
--------------------------------------------------------------------------------
1 | commons_url,identifier_name,description
2 | https://data.midrc.org, Case.submitter_id, The uniquely assigned case identifier in MIDRC.
3 | https://gen3.biodatacatalyst.nhlbi.nih.gov, Subject.submitter_id, These identifiers are constructed as part of the data ingestion process in BDCat and concatenate the study and version with the study-provided subject ID (with a _ delimiting).
--------------------------------------------------------------------------------
/tests/test_data/crosswalk/crosswalk_optional_info_2.csv:
--------------------------------------------------------------------------------
1 | commons_url,identifier_name,description
2 | https://data.midrc.org, Case.data_submission_guid, The identifier for this subject as provided by the site’s submission of Datavant tokens to MIDRC.
3 | https://data.midrc.org,Masked N3C ID,Masked National COVID Consortium ID provided by a Linkage Honest Broker to the MIDRC system.
--------------------------------------------------------------------------------
/tests/test_data/crosswalk/empty_crosswalk_1.csv:
--------------------------------------------------------------------------------
1 | https://data.midrc.org|gen3_node_property|Case.submitter_id, https://gen3.biodatacatalyst.nhlbi.nih.gov|gen3_node_property|Subject.submitter_id
--------------------------------------------------------------------------------
/tests/test_data/crosswalk/empty_crosswalk_optional_info_1.csv:
--------------------------------------------------------------------------------
1 | commons_url,identifier_name,description
--------------------------------------------------------------------------------
/tests/test_data/crosswalk/empty_file.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uc-cdis/gen3sdk-python/0ca70c579d55cb0f6218fa7a534d02a8910acda1/tests/test_data/crosswalk/empty_file.csv
--------------------------------------------------------------------------------
/tests/test_data/crosswalk/full_crosswalk.csv:
--------------------------------------------------------------------------------
1 | https://data.midrc.org|gen3_node_property|Case.data_submission_guid,https://data.midrc.org|gen3_node_property|Case.submitter_id,https://data.midrc.org|masked_n3c_id|Masked N3C ID,https://gen3.biodatacatalyst.nhlbi.nih.gov|gen3_node_property|Subject.submitter_id
2 | foobar,A01-00888,123dfj4ia5oi*@a,phs002363.v1_RC-1358
3 |
--------------------------------------------------------------------------------
/tests/test_data/crosswalk/full_crosswalk_optional_info.csv:
--------------------------------------------------------------------------------
1 | commons_url,identifier_name,description
2 | https://data.midrc.org,Case.submitter_id,The uniquely assigned case identifier in MIDRC.
3 | https://gen3.biodatacatalyst.nhlbi.nih.gov,Subject.submitter_id,These identifiers are constructed as part of the data ingestion process in BDCat and concatenate the study and version with the study-provided subject ID (with a _ delimiting).
4 | https://data.midrc.org,Masked N3C ID,Masked National COVID Consortium ID provided by a Linkage Honest Broker to the MIDRC system.
5 | https://data.midrc.org,Case.data_submission_guid,The identifier for this subject as provided by the site’s submission of Datavant tokens to MIDRC.
6 |
--------------------------------------------------------------------------------
/tests/test_data/crosswalk/invalid_a_crosswalk_1.csv:
--------------------------------------------------------------------------------
1 | https://data.midrc.orggen3_node_property|Case.submitter_id, https://gen3.biodatacatalyst.nhlbi.nih.gov|gen3_node_propertySubject.submitter_id
2 | A01-00888, phs002363.v1_RC-1358
--------------------------------------------------------------------------------
/tests/test_data/crosswalk/invalid_a_crosswalk_optional_info_1.csv:
--------------------------------------------------------------------------------
1 | https://data.midrc.org, Case.submitter_id
2 | https://gen3.biodatacatalyst.nhlbi.nih.gov Subject.submitter_id, These identifiers are constructed as part of the data ingestion process in BDCat and concatenate the study and version with the study-provided subject ID (with a _ delimiting).
--------------------------------------------------------------------------------
/tests/test_data/crosswalk/invalid_b_crosswalk_1.csv:
--------------------------------------------------------------------------------
1 | https://data.midrc.org|gen3_node_property|Case.submitter_id|asdf|asdf, https://gen3.biodatacatalyst.nhlbi.nih.gov|gen3_node_property|Subject.submitter_id|888
2 | A01-00888, phs002363.v1_RC-1358, asdf, asdf, asdf
--------------------------------------------------------------------------------
/tests/test_data/crosswalk/invalid_b_crosswalk_optional_info_1.csv:
--------------------------------------------------------------------------------
1 | commons_url,identifier_name,description,asdf,hjkl
2 | https://data.midrc.org, Case.submitter_id, The uniquely assigned case identifier in MIDRC.
3 | https://gen3.biodatacatalyst.nhlbi.nih.gov, Subject.submitter_id, These identifiers are constructed as part of the data ingestion process in BDCat and concatenate the study and version with the study-provided subject ID (with a _ delimiting).
--------------------------------------------------------------------------------
/tests/test_data/crosswalk/invalid_c_crosswalk_1.csv:
--------------------------------------------------------------------------------
1 | https://data.midrc.org|gen3_node_property|Case.submitter_id, https://gen3.biodatacatalyst.nhlbi.nih.gov|gen3_node_property|Subject.submitter_id
2 | A01-00888, phs002363.v1_RC-1358, too, many, items
--------------------------------------------------------------------------------
/tests/test_data/crosswalk/invalid_c_crosswalk_optional_info_1.csv:
--------------------------------------------------------------------------------
1 | commons_url,identifier_name,description
2 | https://data.midrc.org, Case.submitter_id, The uniquely assigned case identifier in MIDRC.
3 | https://gen3.biodatacatalyst.nhlbi.nih.gov, Subject.submitter_id, These identifiers are constructed as part of the data ingestion process in BDCat and concatenate the study and version with the study-provided subject ID (with a _ delimiting)., another item, there's too many columns in this row, this will fail
--------------------------------------------------------------------------------
/tests/test_data/diff_manifests/manifest3.tsv:
--------------------------------------------------------------------------------
1 | guid md5 size authz url
2 | 255e396f-f1f8-11e9-9a07-0a80fada096c 473d83400bc1bc9dc635e334fadd133c 363455714 ['phs0001', 'phs0002'] ['s3://pdcdatastore/test4.raw']
3 | 255e396f-f1f8-11e9-9a07-0a80fada010c 473d83400bc1bc9dc635e334fadde33c 363455714 ['Open'] ['s3://pdcdatastore/test5.raw']
4 | 255e396f-f1f8-11e9-9a07-0a80fada012c 473d83400bc1bc9dc635e334fadde33c 363455714 ['Open'] ['s3://pdcdatastore/test6.raw']
5 |
--------------------------------------------------------------------------------
/tests/test_data/diff_manifests/manifest4.tsv:
--------------------------------------------------------------------------------
1 | guid md5 size authz url
2 | 255e396f-f1f8-11e9-9a07-0a80fada096c 473d83400bc1bc9dc635e334fadd133c 363455714 ['phs0002', 'phs0001'] ['s3://pdcdatastore/test4.raw']
3 | 255e396f-f1f8-11e9-9a07-0a80fada010c 473d83400bc1bc9dc635e334fadde33c 363455714 ['Open'] ['s3://pdcdatastore/test4.raw']
4 | 255e396f-f1f8-11e9-9a07-0a80fada012c 473d83400bc1bc9dc635e334fadde33c 363455715 ['Open'] ['s3://pdcdatastore/test6.raw']
5 |
--------------------------------------------------------------------------------
/tests/test_data/manifest1.csv:
--------------------------------------------------------------------------------
1 | guid,md5,size,authz,url
2 | 255e396f-f1f8-11e9-9a07-0a80fada099c,473d83400bc1bc9dc635e334faddf33c,363455714,['Open'],['s3://pdcdatastore/test1.raw']
3 | 255e396f-f1f8-11e9-9a07-0a80fada098c,473d83400bc1bc9dc635e334faddd33c,343434344,['Open'],['s3://pdcdatastore/test2.raw']
4 | 255e396f-f1f8-11e9-9a07-0a80fada097c,473d83400bc1bc9dc635e334fadd433c,543434443,"['phs0001', 'phs0002']",['s3://pdcdatastore/test3.raw']
5 |
--------------------------------------------------------------------------------
/tests/test_data/manifest2.csv:
--------------------------------------------------------------------------------
1 | guid,md5,size,authz,url
2 | 255e396f-f1f8-11e9-9a07-0a80fada099c,473d83400bc1bc9dc635e334faddf33d,363455714,['Open'],['s3://pdcdatastore/test1.raw']
3 | 255e396f-f1f8-11e9-9a07-0a80fada098d,473d83400bc1bc9dc635e334faddd33c,343434344,['Open'],['s3://pdcdatastore/test2.raw']
4 | 255e396f-f1f8-11e9-9a07-0a80fada097c,473d83400bc1bc9dc635e334fadd433c,543434443,['phs0001'],['s3://pdcdatastore/test3.raw']
5 |
--------------------------------------------------------------------------------
/tests/test_data/manifest_additional_metadata.tsv:
--------------------------------------------------------------------------------
1 | authz fancy_column file_name md5 size url
2 | ['/open'] fancy_data file.txt 473d83400bc1bc9dc635e334faddf33c 363455714 s3://my-data-bucket/dg.1234/path/file.txt
3 |
--------------------------------------------------------------------------------
/tests/test_data/manifest_additional_metadata_mult_guids.tsv:
--------------------------------------------------------------------------------
1 | guid authz columnA columnB file_name md5 size url columnC
2 | 111e396f-f1f8-11e9-9a07-0a80fada0900 ['/open'] dataA file.txt 111d83400bc1bc9dc635e334faddf33c 111 s3://my-data-bucket/dg.111/path/file.txt
3 | 222e396f-f1f8-11e9-9a07-0a80fada0900 ['/open'] dataB file.txt 222d83400bc1bc9dc635e334faddf33c 222 s3://my-data-bucket/dg.222/path/file.txt
--------------------------------------------------------------------------------
/tests/test_data/packages_manifest_bad_format.tsv:
--------------------------------------------------------------------------------
1 | authz md5 package_contents record_type size url
2 | ["/open"] 473d83400bc1bc9dc635e334faddf33c [{"bad_contents": "hello"}] package 363455714 s3://my-data-bucket/dg.1234/path/package.zip
3 |
--------------------------------------------------------------------------------
/tests/test_data/packages_manifest_not_a_package.tsv:
--------------------------------------------------------------------------------
1 | authz md5 package_contents record_type size url
2 | ["/open"] 473d83400bc1bc9dc635e334faddf33c [{"file_name": "file.text"}] object 363455714 s3://my-data-bucket/dg.1234/path/file.text
3 |
--------------------------------------------------------------------------------
/tests/test_data/packages_manifest_ok.tsv:
--------------------------------------------------------------------------------
1 | record_type guid md5 size authz url file_name package_contents
2 | object 255e396f-f1f8-11e9-9a07-0a80fada0900 473d83400bc1bc9dc635e334faddf33c 363455714 ['/open/packages'] s3://my-data-bucket/dg.1234/path/package.zip package.zip
3 | package 255e396f-f1f8-11e9-9a07-0a80fada0901 473d83400bc1bc9dc635e334faddf33c 363455714 ['/open/packages'] s3://my-data-bucket/dg.1234/path/package.zip package.zip [{"hashes":{"md5sum":"2cd6ee2c70b0bde53fbe6cac3c8b8bb1"},"file_name":"yes.txt","size":35},{"hashes":{"md5sum":"30cf3d7d133b08543cb6c8933c29dfd7"},"file_name":"hi.txt","size":35}]
4 | package 255e396f-f1f8-11e9-9a07-0a80fada0902 473d83400bc1bc9dc635e334faddf33c 363455714 ['/open/packages'] gs://my-google-data-bucket/dg.1234/path/package.zip package.zip
5 | package 255e396f-f1f8-11e9-9a07-0a80fada0903 473d83400bc1bc9dc635e334faddf33c 363455714 ['/open/packages'] ['s3://my-data-bucket/dg.1234/path/package.zip', 'gs://my-google-data-bucket/dg.1234/path/other_file_name.zip'] [{"hashes":{"md5sum":"2cd6ee2c70b0bde53fbe6cac3c8b8bb1"},"file_name":"yes.txt","size":35},{"hashes":{"md5sum":"30cf3d7d133b08543cb6c8933c29dfd7"},"file_name":"hi.txt","size":35}]
6 | package 473d83400bc1bc9dc635e334faddf33c 363455714 ['/open/packages'] s3://my-data-bucket/dg.1234/path/package.zip package.zip [{"hashes":{"md5sum":"2cd6ee2c70b0bde53fbe6cac3c8b8bb1"},"file_name":"yes.txt","size":35},{"hashes":{"md5sum":"30cf3d7d133b08543cb6c8933c29dfd7"},"file_name":"hi.txt","size":35}]
7 |
--------------------------------------------------------------------------------
/tests/test_data/test.tsv:
--------------------------------------------------------------------------------
1 | guid md5 size authz acl url file_name prev_guid
2 | 255e396f-f1f8-11e9-9a07-0a80fada099c 473d83400bc1bc9dc635e334faddf33c 363455714 ['Open'] [s3://pdcdatastore/test1.raw]
3 | 255e396f-f1f8-11e9-9a07-0a80fada098c 473d83400bc1bc9dc635e334faddd33c 343434344 /program/DEV/project/test Open s3://pdcdatastore/test2.raw
4 | 255e396f-f1f8-11e9-9a07-0a80fada097c 473d83400bc1bc9dc635e334fadd433c 543434443 /program/DEV/project/test phs0001 phs0002 s3://pdcdatastore/test3.raw
5 | 255e396f-f1f8-11e9-9a07-0a80fada096c 473d83400bc1bc9dc635e334fadd133c 363455714 /program/DEV/project/test ['phs0001', 'phs0002'] ['s3://pdcdatastore/test4.raw'] test4_file.raw
6 | 255e396f-f1f8-11e9-9a07-0a80fada010c 473d83400bc1bc9dc635e334fadde33c 363455714 /program/DEV/project/test ['Open'] s3://pdcdatastore/test5.raw
7 | 255e396f-f1f8-11e9-9a07-0a80fada012c 473d83400bc1bc9dc635e334fadde33c 363455714 /prog%20ram/DEV/project/test ['Op%20en'] s3://pdcdatastore/test6%20space.raw 255e396f-f1f8-11e9-9a07-0a80fada010c
8 |
--------------------------------------------------------------------------------
/tests/test_data/test2.tsv:
--------------------------------------------------------------------------------
1 | md5 size authz acl url
2 | 273d83400bc1bc9dc635e334faddf33c 363455714 ['Open'] [s3://pdcdatastore/test1.raw]
3 | 273d83400bc1bc9dc635e334faddd33c 343434344 /program/DEV/project/test Open s3://pdcdatastore/test2.raw
--------------------------------------------------------------------------------
/tests/test_data/test_manifest.csv:
--------------------------------------------------------------------------------
1 | guid,authz,acl,file_size,md5,urls
2 | dg.TEST/f2a39f98-6ae1-48a5-8d48-825a0c52a22b,/programs/DEV/projects/test,DEV test,123,a1234567891234567890123456789012,gs://test/test.txt s3://testaws/aws/test.txt
3 | dg.TEST/1e9d3103-cbe2-4c39-917c-b3abad4750d2,/programs/DEV/projects/test2,DEV,235,c1234567891234567890123456789012,"gs://test/test%203.txt s3://testaws/file%20space.txt s3://testaws/aws/file,with,comma.txt"
4 | dg.TEST/9c205cd7-c399-4503-9f49-5647188bde66,/programs/DEV/projects/test3 /programs/DEV/projects/test3bak,DEV test3,334,b1334567891334567890133456789013,gs://test/test.txt
5 |
--------------------------------------------------------------------------------
/tests/test_dbgap_fhir.py:
--------------------------------------------------------------------------------
1 | """
2 | Tests gen3.nih
3 | """
4 | import json
5 | import os
6 | import pytest
7 | import sys
8 |
9 | import requests
10 | from requests.auth import HTTPBasicAuth
11 | from unittest.mock import MagicMock, patch
12 |
13 | try:
14 | from gen3.external.nih.dbgap_fhir import dbgapFHIR
15 | except ModuleNotFoundError as exc:
16 | raise ModuleNotFoundError(
17 | "Missing some modules for optional external API parsing. Ensure you've "
18 | "installed all optional extras using `poetry install --all-extras`. "
19 | f"Original error: {exc}"
20 | )
21 | from tests.test_discovery import _get_tsv_data
22 | from tests.utils_mock_fhir_response import (
23 | MOCK_NIH_DBGAP_FHIR_RESPONSE_FOR_PHS000007,
24 | MOCK_NIH_DBGAP_FHIR_RESPONSE_FOR_PHS000166,
25 | )
26 |
27 |
28 | def test_dbgap_fhir(tmp_path):
29 | """
30 | Test dbGaP FHIR parsing works and outputs expected fields and values.
31 |
32 | Note that the dbGaP FHIR response is mocked, but the response provided
33 | is a real response from the dbGaP FHIR Server (to simulate current state).
34 |
35 | This does not integration test the dbGaP FHIR server. In other words,
36 | if they change format and it would break our code, this will not catch that
37 | (and it's not the intention to catch that here). This is intended to unit
38 | test our code to ensure we don't break specifically our parsing in the future.
39 | """
40 | dbgap_fhir = dbgapFHIR(
41 | api="https://example.com/fhir/x1",
42 | auth_provider=HTTPBasicAuth("DATACITE_USERNAME", "DATACITE_PASSWORD"),
43 | )
44 |
45 | def _mock_request(path, **kwargs):
46 | assert "ResearchStudy" in path
47 |
48 | output = None
49 |
50 | if path == "ResearchStudy/phs000007":
51 | output = MOCK_NIH_DBGAP_FHIR_RESPONSE_FOR_PHS000007
52 | elif path == "ResearchStudy/phs000166":
53 | output = MOCK_NIH_DBGAP_FHIR_RESPONSE_FOR_PHS000166
54 | else:
55 | # should have requested these studies from the API,
56 | # if it didn't, something went wrong
57 | assert path in ["ResearchStudy/phs000007", "ResearchStudy/phs000166"]
58 |
59 | return output
60 |
61 | dbgap_fhir.fhir_client.server.request_json = MagicMock(side_effect=_mock_request)
62 |
63 | phsids = [
64 | "phs000007.v1.p1.c1",
65 | "phs000166.c3",
66 | ]
67 |
68 | metadata = dbgap_fhir.get_metadata_for_ids(phsids)
69 |
70 | assert metadata
71 |
72 | assert "phs000007.v1.p1.c1" in metadata
73 | assert "phs000166.c3" in metadata
74 |
75 | expected_phs000007_keys = [
76 | "StudyOverviewUrl",
77 | "ReleaseDate",
78 | "StudyConsents",
79 | "Citers",
80 | "NumPhenotypeDatasets",
81 | "NumMolecularDatasets",
82 | "NumVariables",
83 | "NumDocuments",
84 | "NumAnalyses",
85 | "NumSubjects",
86 | "NumSamples",
87 | "NumSubStudies",
88 | "Id",
89 | "Category",
90 | "Condition",
91 | "Description",
92 | "Enrollment",
93 | "Focus",
94 | "Identifier",
95 | "Keyword",
96 | "Sponsor",
97 | "Status",
98 | "Title",
99 | "ResourceType",
100 | ]
101 |
102 | expected_phs000166_keys = [
103 | "StudyOverviewUrl",
104 | "ReleaseDate",
105 | "StudyConsents",
106 | "Citers",
107 | "NumPhenotypeDatasets",
108 | "NumMolecularDatasets",
109 | "NumVariables",
110 | "NumDocuments",
111 | "NumSubjects",
112 | "NumSamples",
113 | "NumSubStudies",
114 | "Id",
115 | "Category",
116 | "Description",
117 | "Enrollment",
118 | "Identifier",
119 | "Sponsor",
120 | "Status",
121 | "Title",
122 | "ResourceType",
123 | ]
124 |
125 | for key in expected_phs000007_keys:
126 | assert key in metadata["phs000007.v1.p1.c1"]
127 |
128 | for key in expected_phs000166_keys:
129 | assert key in metadata["phs000166.c3"]
130 |
131 | # check a few values to ensure correct parsing and representation as string
132 | assert metadata["phs000007.v1.p1.c1"]["NumSubjects"] == "15144"
133 | assert metadata["phs000166.c3"]["NumSubjects"] == "4046"
134 |
135 | assert metadata["phs000007.v1.p1.c1"]["Title"] == "Framingham Cohort"
136 | assert type(metadata["phs000166.c3"]["Citers"]) == list
137 |
138 | # these should have been converted to a single string, not a list
139 | for item in dbgap_fhir.suspected_single_item_list_fields:
140 | capitalized_item = item[:1].upper() + item[1:]
141 | if capitalized_item in metadata["phs000007.v1.p1.c1"]:
142 | assert type(metadata["phs000007.v1.p1.c1"][capitalized_item]) != list
143 | if capitalized_item in metadata["phs000166.c3"]:
144 | assert type(metadata["phs000166.c3"][capitalized_item]) != list
145 |
146 | # ensure the custom fields got added
147 | assert "ResearchStudyURL" in metadata["phs000007.v1.p1.c1"]
148 | assert "phs000007" in metadata["phs000007.v1.p1.c1"]["ResearchStudyURL"]
149 | assert "ResearchStudyURL" in metadata["phs000166.c3"]
150 | assert "phs000166" in metadata["phs000166.c3"]["ResearchStudyURL"]
151 |
152 | assert "Disclaimer" in metadata["phs000007.v1.p1.c1"]
153 | assert "Disclaimer" in metadata["phs000166.c3"]
154 |
155 | file_name = tmp_path / "fhir_metadata_file_TEST.tsv"
156 | dbgapFHIR.write_data_to_file(metadata, file_name)
157 | assert _get_tsv_data(file_name) == _get_tsv_data(
158 | "tests/test_data/fhir_metadata.tsv"
159 | )
160 |
--------------------------------------------------------------------------------
/tests/test_diff.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import csv
3 | import os
4 | from gen3.tools.diff import manifest_diff
5 | from pathlib import Path
6 |
7 | cwd = os.path.dirname(os.path.realpath(__file__))
8 |
9 |
10 | def test_directory_input_diff():
11 | """
12 | Test that the output manifest produced by manifest_diff for a
13 | given input directory matches the expected output manifest.
14 | """
15 |
16 | manifest_diff(
17 | directory=f"{cwd}/test_data/diff_manifests",
18 | output_manifest=f"{cwd}/test_data/manifest_diff1.tsv",
19 | key_column="guid",
20 | )
21 | assert check_diff(
22 | file=f"{cwd}/test_data/manifest_diff1.tsv",
23 | expected={
24 | "255e396f-f1f8-11e9-9a07-0a80fada010c": [
25 | "473d83400bc1bc9dc635e334fadde33c",
26 | "363455714",
27 | "['Open']",
28 | "['s3://pdcdatastore/test4.raw']",
29 | ],
30 | "255e396f-f1f8-11e9-9a07-0a80fada012c": [
31 | "473d83400bc1bc9dc635e334fadde33c",
32 | "363455715",
33 | "['Open']",
34 | "['s3://pdcdatastore/test6.raw']",
35 | ],
36 | },
37 | )
38 |
39 |
40 | def test_file_input_diff():
41 | """
42 | Test that the output manifest produced by manifest_diff for a
43 | given file strings matches the expected output manifest.
44 | """
45 |
46 | manifest_diff(
47 | files=[f"{cwd}/test_data/manifest1.csv", f"{cwd}/test_data/manifest2.csv"],
48 | output_manifest=f"{cwd}/test_data/manifest_diff2.csv",
49 | key_column="guid",
50 | )
51 | assert check_diff(
52 | file=f"{cwd}/test_data/manifest_diff2.csv",
53 | expected={
54 | "255e396f-f1f8-11e9-9a07-0a80fada099c": [
55 | "473d83400bc1bc9dc635e334faddf33d",
56 | "363455714",
57 | "['Open']",
58 | "['s3://pdcdatastore/test1.raw']",
59 | ],
60 | "255e396f-f1f8-11e9-9a07-0a80fada098d": [
61 | "473d83400bc1bc9dc635e334faddd33c",
62 | "343434344",
63 | "['Open']",
64 | "['s3://pdcdatastore/test2.raw']",
65 | ],
66 | "255e396f-f1f8-11e9-9a07-0a80fada097c": [
67 | "473d83400bc1bc9dc635e334fadd433c",
68 | "543434443",
69 | "['phs0001']",
70 | "['s3://pdcdatastore/test3.raw']",
71 | ],
72 | },
73 | )
74 |
75 |
76 | def test_file_input_mismatch():
77 | """
78 | Test for fail due to different file types.
79 | """
80 |
81 | with pytest.raises(ValueError):
82 | manifest_diff(
83 | files=[
84 | "tests/test_data/manifest1.csv",
85 | "tests/test_data/diff_manifests/manifest3.tsv",
86 | ],
87 | )
88 |
89 |
90 | def test_no_diff():
91 | """
92 | Test for an empty diff.
93 | """
94 |
95 | manifest_diff(
96 | files=[f"{cwd}/test_data/manifest1.csv", f"{cwd}/test_data/manifest1.csv"],
97 | output_manifest=f"{cwd}/test_data/manifest_diff3.csv",
98 | key_column="guid",
99 | )
100 |
101 | assert check_diff(
102 | file=f"{cwd}/test_data/manifest_diff3.csv",
103 | expected={},
104 | )
105 |
106 |
107 | def check_diff(
108 | file,
109 | expected,
110 | **kwargs,
111 | ):
112 | """
113 | Check resulting diff file with given dict of expected change.
114 | """
115 |
116 | if ".tsv" in file.lower():
117 | file_delimiter = "\t"
118 | else:
119 | file_delimiter = ","
120 |
121 | equivalent = True
122 | with open(file, "r", encoding="utf-8-sig") as csvfile:
123 | file_reader = csv.DictReader(csvfile, delimiter=file_delimiter)
124 | next(file_reader, [])
125 |
126 | for row in file_reader:
127 | diff_guid = row["guid"]
128 | expected_values = expected[diff_guid]
129 | for column in row:
130 | if column != "guid" and row[column] not in expected_values:
131 | equivalent = False
132 |
133 | remove_manifest(file)
134 | return equivalent
135 |
136 |
137 | def remove_manifest(file):
138 | if os.path.exists(file):
139 | os.remove(file)
140 | else:
141 | print("The file does not exist")
142 |
--------------------------------------------------------------------------------
/tests/test_import.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 |
4 | def test_import():
5 |
6 | import gen3
7 |
8 |
9 | def test_cdisutilstest():
10 |
11 | import cdisutilstest
12 |
13 |
14 | def test_indexclient():
15 |
16 | import indexclient
17 |
18 |
19 | def test_auth_import():
20 |
21 | from gen3.auth import Gen3Auth
22 |
23 |
24 | def test_submission_import():
25 |
26 | from gen3.submission import Gen3Submission
27 |
28 |
29 | def test_file_import():
30 |
31 | from gen3.file import Gen3File
32 |
--------------------------------------------------------------------------------
/tests/test_jobs.py:
--------------------------------------------------------------------------------
1 | import os
2 | from unittest.mock import MagicMock, patch
3 | import requests
4 | from requests.exceptions import HTTPError
5 |
6 | from gen3.jobs import Gen3Jobs, DBGAP_METADATA_JOB
7 |
8 | CURRENT_DIR = os.path.dirname(os.path.realpath(__file__))
9 |
10 |
11 | @patch("gen3.jobs.requests.get")
12 | @patch("gen3.jobs.requests.post")
13 | def test_full_job_flow(requests_post_mock, requests_get_mock, gen3_auth):
14 | """
15 | Test whole flow of creating a job, polling status, and getting output
16 | """
17 | jobs = Gen3Jobs(gen3_auth)
18 | job_input = {
19 | "phsid_list": "phs000956 phs000920",
20 | "indexing_manifest_url": "https://example.com/public_indexing_manifest.csv",
21 | "manifests_mapping_config": {
22 | "guid_column_name": "guid",
23 | "row_column_name": "submitted_sample_id",
24 | "smaller_file_column_name": "urls",
25 | },
26 | "partial_match_or_exact_match": "partial_match",
27 | }
28 |
29 | def _mock_create_request(url, **kwargs):
30 | assert "/dispatch" in url
31 |
32 | mocked_response = MagicMock(requests.Response)
33 | mocked_response.status_code = 200
34 | mocked_response.json.return_value = {
35 | "uid": "fcbdb87d-83fe-11ea-a95c-12dda9fc743b",
36 | "name": "get-dbgap-metadata-zjiio",
37 | "status": "Unknown",
38 | }
39 |
40 | mocked_response.raise_for_status.side_effect = lambda *args: None
41 |
42 | return mocked_response
43 |
44 | def _mock_get_status_request(url, **kwargs):
45 | assert "/status" in url
46 |
47 | mocked_response = MagicMock(requests.Response)
48 | mocked_response.status_code = 200
49 | mocked_response.json.return_value = {
50 | "uid": "fcbdb87d-83fe-11ea-a95c-12dda9fc743b",
51 | "name": "get-dbgap-metadata-zjiio",
52 | "status": "Completed",
53 | }
54 |
55 | mocked_response.raise_for_status.side_effect = lambda *args: None
56 |
57 | return mocked_response
58 |
59 | def _mock_get_output_request(url, **kwargs):
60 | assert "/output" in url
61 |
62 | mocked_response = MagicMock(requests.Response)
63 | mocked_response.status_code = 200
64 | mocked_response.json.return_value = {"output": "foobar"}
65 |
66 | mocked_response.raise_for_status.side_effect = lambda *args: None
67 |
68 | return mocked_response
69 |
70 | requests_post_mock.side_effect = _mock_create_request
71 |
72 | create_job = jobs.create_job(job_name=DBGAP_METADATA_JOB, job_input=job_input)
73 | assert create_job.get("uid") == "fcbdb87d-83fe-11ea-a95c-12dda9fc743b"
74 | assert DBGAP_METADATA_JOB in create_job.get("name")
75 | assert create_job.get("status")
76 |
77 | requests_get_mock.side_effect = _mock_get_status_request
78 |
79 | status = "Running"
80 | while status == "Running":
81 | status = jobs.get_status(create_job.get("uid")).get("status")
82 |
83 | assert status == "Completed"
84 |
85 | requests_get_mock.side_effect = _mock_get_output_request
86 |
87 | get_output = jobs.get_output(create_job.get("uid"))
88 | assert get_output.get("output") == "foobar"
89 |
90 |
91 | @patch("gen3.jobs.requests.get")
92 | def test_is_healthy(requests_mock, gen3_auth):
93 | """
94 | Test is healthy response
95 | """
96 | jobs = Gen3Jobs(gen3_auth)
97 |
98 | def _mock_request(url, **kwargs):
99 | assert url.endswith("/_status")
100 |
101 | mocked_response = MagicMock(requests.Response)
102 | mocked_response.status_code = 200
103 | mocked_response.text = "Healthy"
104 | mocked_response.raise_for_status.side_effect = lambda *args: None
105 |
106 | return mocked_response
107 |
108 | requests_mock.side_effect = _mock_request
109 |
110 | response = jobs.is_healthy()
111 |
112 | assert response
113 |
114 |
115 | @patch("gen3.jobs.requests.get")
116 | def test_is_not_healthy(requests_mock, gen3_auth):
117 | """
118 | Test is not healthy response
119 | """
120 | jobs = Gen3Jobs(gen3_auth)
121 |
122 | def _mock_request(url, **kwargs):
123 | assert url.endswith("/_status")
124 |
125 | mocked_response = MagicMock(requests.Response)
126 | mocked_response.status_code = 500
127 | mocked_response.text = "Not Healthy"
128 | mocked_response.json.return_value = {}
129 | mocked_response.raise_for_status.side_effect = HTTPError("uh oh")
130 |
131 | return mocked_response
132 |
133 | requests_mock.side_effect = _mock_request
134 |
135 | response = jobs.is_healthy()
136 |
137 | assert not response
138 |
139 |
140 | @patch("gen3.jobs.requests.get")
141 | def test_get_version(requests_mock, gen3_auth):
142 | """
143 | Test getting version
144 | """
145 | jobs = Gen3Jobs(gen3_auth)
146 |
147 | def _mock_request(url, **kwargs):
148 | assert url.endswith("version")
149 |
150 | mocked_response = MagicMock(requests.Response)
151 | mocked_response.status_code = 200
152 | mocked_response.json.return_value = {
153 | "commit": "bf5df61e6cb031adb9914704f04b71c57d44747a",
154 | "version": "2020.02-1-gbf5df61",
155 | }
156 | mocked_response.raise_for_status.side_effect = lambda *args: None
157 |
158 | return mocked_response
159 |
160 | requests_mock.side_effect = _mock_request
161 |
162 | response = jobs.get_version()
163 |
164 | assert response == "2020.02-1-gbf5df61"
165 |
--------------------------------------------------------------------------------
/tests/test_object.py:
--------------------------------------------------------------------------------
1 | """
2 | Tests gen3.object.Gen3Object for calls
3 | """
4 | from unittest.mock import MagicMock, patch
5 | import requests
6 | from httpx import delete
7 | import pytest
8 | from requests import HTTPError
9 |
10 |
11 | @patch("gen3.object.requests.post")
12 | def test_create_object_error(requests_mock, gen3_object):
13 | def _mock_request(url, **kwargs):
14 | assert url.endswith("/mds/objects")
15 | mocked_response = MagicMock(requests.Response)
16 | mocked_response.status_code = 500
17 | mocked_response.json.return_value = {"error": "blah"}
18 | mocked_response.raise_for_status.side_effect = HTTPError("uh oh")
19 | return mocked_response
20 |
21 | requests_mock.side_effect = _mock_request
22 | with pytest.raises(HTTPError):
23 | gen3_object.create_object("abc.txt", authz=None)
24 |
25 |
26 | @patch("gen3.object.requests.post")
27 | def test_create_object_success(requests_mock, gen3_object):
28 | mock_guid = "abcd"
29 | mock_url = "https://example.com"
30 |
31 | def _mock_request(url, **kwargs):
32 | assert url.endswith("/mds/objects")
33 | mocked_response = MagicMock(requests.Response)
34 | mocked_response.status_code = 200
35 | mocked_response.json.return_value = {"guid": mock_guid, "upload_url": mock_url}
36 | return mocked_response
37 |
38 | requests_mock.side_effect = _mock_request
39 | response_guid, response_upload_url = gen3_object.create_object(
40 | "abc.txt", authz=None
41 | )
42 | assert response_guid == mock_guid
43 | assert response_upload_url == mock_url
44 |
45 |
46 | @patch("gen3.object.requests.delete")
47 | def test_delete_object_error(requests_mock, gen3_object):
48 | mock_guid = "1234"
49 |
50 | def _mock_request(url, **kwargs):
51 | assert url.endswith(f"/mds/objects/{mock_guid}")
52 | mocked_response = MagicMock(requests.Response)
53 | mocked_response.status_code = 500
54 | mocked_response.json.return_value = {"error": "blah"}
55 | mocked_response.raise_for_status.side_effect = HTTPError("uh oh")
56 | return mocked_response
57 |
58 | requests_mock.side_effect = _mock_request
59 | with pytest.raises(HTTPError):
60 | gen3_object.delete_object(mock_guid)
61 |
62 |
63 | @pytest.mark.parametrize(
64 | "delete_file_locations",
65 | [True, False],
66 | )
67 | @patch("gen3.object.requests.delete")
68 | def test_delete_object_success(requests_mock, gen3_object, delete_file_locations):
69 | mock_guid = "1234"
70 |
71 | def _mock_request(url, **kwargs):
72 | mock_url = f"/mds/objects/{mock_guid}"
73 | assert (
74 | url.endswith(f"{mock_url}?delete_file_locations")
75 | if delete_file_locations
76 | else url.endswith(mock_url)
77 | )
78 |
79 | mocked_response = MagicMock(requests.Response)
80 | mocked_response.status_code = 200
81 | mocked_response.json.return_value = {}
82 | return mocked_response
83 |
84 | requests_mock.side_effect = _mock_request
85 | gen3_object.delete_object(mock_guid, delete_file_locations)
86 | assert requests_mock.called
87 |
--------------------------------------------------------------------------------
/tests/test_query.py:
--------------------------------------------------------------------------------
1 | import json
2 | import requests
3 | from unittest.mock import MagicMock, patch
4 |
5 |
6 | @patch("gen3.jobs.requests.post")
7 | def test_query(requests_post_mock, gen3_query):
8 | data_type = "subject"
9 | records = [
10 | {"id": "uuid1", "vital_status": "Alive"},
11 | {"id": "uuid2", "vital_status": "Alive"},
12 | {"id": "uuid3", "vital_status": "Alive"},
13 | {"id": "uuid4", "vital_status": "Alive"},
14 | ]
15 | filters = {"vital_status": "Alive"}
16 | filter_object = {"AND": [{"=": {"vital_status": "Alive"}}]}
17 |
18 | def _mock_request(url, **kwargs):
19 | mocked_response = MagicMock(requests.Response, status_code=200)
20 | if url.endswith("/guppy/graphql"):
21 | assert kwargs["json"]["variables"]["filter"] == filter_object
22 | mocked_response.json.return_value = {"data": {data_type: records}}
23 | elif url.endswith("/guppy/download"):
24 | assert kwargs["json"]["filter"] == filter_object
25 | mocked_response.json.return_value = records
26 | return mocked_response
27 |
28 | requests_post_mock.side_effect = _mock_request
29 |
30 | # hit "/guppy/graphql" endpoint. Use "filters" param, which should
31 | # be converted to a filter object
32 | data = gen3_query.query(
33 | data_type=data_type,
34 | fields=["id", "vital_status"],
35 | first=4,
36 | offset=2,
37 | filters=filters,
38 | sort_object={"id": "asc"},
39 | )
40 | # "first" and "offset" are handled on the _server_ side
41 | assert data == {"data": {data_type: records}}
42 |
43 | # hit "/guppy/graphql" endpoint. Use "filter_object" param
44 | data = gen3_query.query(
45 | data_type=data_type,
46 | fields=["id", "vital_status"],
47 | first=4,
48 | offset=2,
49 | filter_object=filter_object,
50 | sort_object={"id": "asc"},
51 | )
52 | # "first" and "offset" are handled on the _server_ side
53 | assert data == {"data": {data_type: records}}
54 |
55 | # first + offset > 10,000 triggers use of the "/guppy/download"
56 | # endpoint instead of the "/guppy/graphql" endpoint
57 | data = gen3_query.query(
58 | data_type=data_type,
59 | fields=["id", "vital_status"],
60 | first=9999,
61 | offset=2,
62 | filters=filters,
63 | sort_object={"id": "asc"},
64 | )
65 | # "first" and "offset" are handled on the _client_ side
66 | assert data == {"data": {data_type: records[2:]}}
67 |
--------------------------------------------------------------------------------
/tests/test_wrap.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from unittest.mock import MagicMock, patch
3 |
4 | from gen3.tools.wrap import Gen3Wrap, Gen3Auth, Gen3AuthError
5 |
6 |
7 | @patch.object(Gen3Auth, "get_access_token", MagicMock(return_value="1.2.3"))
8 | def test_gen3_wrap_valid_auth(mock_gen3_auth):
9 | """
10 | Patch subprocess.run and verify that the appropriate arguments are passed to the method when authentication process is valid.
11 | """
12 |
13 | test_command_args = ("echo", "Test1", "Test2")
14 | with patch("gen3.tools.wrap.subprocess.run") as mock_subprocess_run:
15 | wrapper_obj = Gen3Wrap(mock_gen3_auth, test_command_args)
16 | wrapper_obj.run_command()
17 | mock_subprocess_run.assert_called_once_with(list(test_command_args), stderr=-2)
18 |
19 |
20 | @patch.object(Gen3Auth, "get_access_token", MagicMock(side_effect=Gen3AuthError()))
21 | def test_gen3_wrap_inavalid_auth(mock_gen3_auth):
22 | """
23 | Break the authentication process to verify the following:
24 | 1. Ensure a Gen3AuthError is raised.
25 | 2. Confirm the subprocess is not executed when authentication fails.
26 | """
27 |
28 | test_command_args = ("echo", "Test1", "Test2")
29 | with pytest.raises(Gen3AuthError):
30 | with patch("gen3.tools.wrap.subprocess.run") as mock_subprocess_run:
31 | wrapper_obj = Gen3Wrap(mock_gen3_auth, test_command_args)
32 | wrapper_obj.run_command()
33 | mock_subprocess_run.assert_not_called()
34 |
--------------------------------------------------------------------------------
/tests/test_wss.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from gen3.wss import wsurl_to_tokens
4 |
5 |
6 | def test_wsurl_to_tokens():
7 | for it in [
8 | ("ws:///@user/abc/def", ("@user", "abc/def")),
9 | ("ws:///@whatever/abc", ("@whatever", "abc")),
10 | ]:
11 | assert it[1] == wsurl_to_tokens(it[0])
12 |
--------------------------------------------------------------------------------
/tests/validate_manifest_format/manifests/manifest_with_custom_column_names.tsv:
--------------------------------------------------------------------------------
1 | authz with special chars!@*& acl file size with spaces md5_with_underscores Urls With Caps
2 | invalid_authz DEV 123 1596f493ba9ec53023fca640fb69bd3b gs://test/test.txt
3 | /programs/DEV/projects/test2 DEV invalid_int d9a68f3d5d9ce03f8a08f50924247223 gs://test/test3.txt
4 | /programs/DEV/projects/test3 DEV 334 invalid_md5 gs://test/test.txt
5 | /programs/DEV/projects/test3 DEV 334 51bf75c48761b2e755adc1340e5a925a invalid_url
6 |
--------------------------------------------------------------------------------
/tests/validate_manifest_format/manifests/manifest_with_custom_url_protocols.tsv:
--------------------------------------------------------------------------------
1 | authz acl file_size md5 urls
2 | /programs/DEV/projects/test DEV test 123 1596f493ba9ec53023fca640fb69bd3b gs://test/test.txt s3://testaws/aws/test.txt
3 | /programs/DEV/projects/test2 DEV 235 d9a68f3d5d9ce03f8a08f50924247223 https://www.uchicago.edu/about
4 | /programs/DEV/projects/test3 /programs/DEV/projects/test3bak DEV test3 334 51bf75c48761b2e755adc1340e5a9259 http://en.wikipedia.org/wiki/University_of_Chicago
5 | /programs/DEV/projects/test3 /programs/DEV/projects/test3bak DEV test3 334 51bf75c48761b2e755adc1340e5a925a s3://bucket_without_path
6 | /programs/DEV/projects/test3 /programs/DEV/projects/test3bak DEV test3 334 51bf75c48761b2e755adc1340e5a925b wrong_protocol://test_bucket/test.txt
7 |
--------------------------------------------------------------------------------
/tests/validate_manifest_format/manifests/manifest_with_empty_url.tsv:
--------------------------------------------------------------------------------
1 | authz acl file_size md5 urls
2 | /programs/DEV/projects/test DEV 123 1596f493ba9ec53023fca640fb69bd3b
3 | /programs/DEV/projects/test2 DEV 235 d9a68f3d5d9ce03f8a08f50924247223 gs://test/test3.txt
4 | /programs/DEV/projects/test3 DEV 334 51bf75c48761b2e755adc1340e5a9259 gs://test/test.txt
5 |
--------------------------------------------------------------------------------
/tests/validate_manifest_format/manifests/manifest_with_invalid_authz_resources.tsv:
--------------------------------------------------------------------------------
1 | authz acl file_size md5 urls
2 | invalid_authz DEV 123 1596f493ba9ec53023fca640fb69bd3b gs://test/test.txt
3 | / DEV 123 1596f493ba9ec53023fca640fb69bd3c gs://test/test.txt
4 | // DEV 123 1596f493ba9ec53023fca640fb69bd3d gs://test/test.txt
5 | /// DEV 123 1596f493ba9ec53023fca640fb69bd3e gs://test/test.txt
6 | /programs invalid_authz2 DEV 123 1596f493ba9ec53023fca640fb69bd3e gs://test/test.txt
7 |
--------------------------------------------------------------------------------
/tests/validate_manifest_format/manifests/manifest_with_invalid_md5_values.tsv:
--------------------------------------------------------------------------------
1 | authz acl file_size md5 urls
2 | /programs/DEV/projects/test DEV 123 1596f493ba9ec53023fca640fb69bd3 s3://testaws/aws/test.txt
3 | /programs/DEV/projects/test2 DEV 235 d9a68f3d5d9ce03f8a08f509242472234 gs://test/test3.txt
4 | /programs/DEV/projects/test3 DEV 334 5J1bf75c48761b2e755adc1340e5a9259 gs://test/test.txt
5 | /programs/DEV/projects/test2 DEV 235 jd2L5LF5pSmvpfL/rkuYWA== gs://test/test3.txt
6 | /programs/DEV/projects/test2 DEV 235 aGVsbG8= gs://test/test3.txt
7 |
--------------------------------------------------------------------------------
/tests/validate_manifest_format/manifests/manifest_with_invalid_sizes.tsv:
--------------------------------------------------------------------------------
1 | authz acl file_size md5 urls
2 | /programs/DEV/projects/test DEV test -1 1596f493ba9ec53023fca640fb69bd3b gs://test/test.txt s3://testaws/aws/test.txt
3 | /programs/DEV/projects/test2 DEV not_an_int d9a68f3d5d9ce03f8a08f50924247223 gs://test/test3.txt
4 | /programs/DEV/projects/test3 /programs/DEV/projects/test3bak DEV test3 3.34 51bf75c48761b2e755adc1340e5a9259 gs://test/test.txt
5 | /programs/DEV/projects/test3 /programs/DEV/projects/test3bak DEV test3 string_with_42 51bf75c48761b2e755adc1340e5a9259 gs://test/test.txt
6 |
--------------------------------------------------------------------------------
/tests/validate_manifest_format/manifests/manifest_with_invalid_urls.tsv:
--------------------------------------------------------------------------------
1 | authz acl file_size md5 urls
2 | /programs/DEV/projects/test DEV test 123 1596f493ba9ec53023fca640fb69bd3b gs://test/test.txt s3://testaws/aws/test.txt wrong_protocol://test_bucket/test.txt
3 | /programs/DEV/projects/test DEV test 123 1596f493ba9ec53023fca640fb69bd3c test/test.txt testaws/aws/test.txt ://test_bucket/test.txt
4 | /programs/DEV/projects/test2 DEV 235 d9a68f3d5d9ce03f8a08f50924247223 s3://
5 | /programs/DEV/projects/test3 /programs/DEV/projects/test3bak DEV test3 334 51bf75c48761b2e755adc1340e5a9259 gs://
6 | /programs/DEV/projects/test2 DEV 235 d9a68f3d5d9ce03f8a08f50924247224 s3://bucket_without_object
7 | /programs/DEV/projects/test2 DEV 235 d9a68f3d5d9ce03f8a08f50924247225 s3://bucket_without_object/
8 | /programs/DEV/projects/test3 /programs/DEV/projects/test3bak DEV test3 334 51bf75c48761b2e755adc1340e5a9250 test_bucket/aws/test.txt
9 | /programs/DEV/projects/test3 /programs/DEV/projects/test3bak DEV test3 334 51bf75c48761b2e755adc1340e5a9251 s3:/test_bucket/aws/test.txt
10 | /programs/DEV/projects/test3 /programs/DEV/projects/test3bak DEV test3 334 51bf75c48761b2e755adc1340e5a9252 s3:test_bucket/aws/test.txt
11 | /programs/DEV/projects/test3 /programs/DEV/projects/test3bak DEV test3 334 51bf75c48761b2e755adc1340e5a9253 ://test_bucket/aws/test.txt
12 | /programs/DEV/projects/test3 /programs/DEV/projects/test3bak DEV test3 334 51bf75c48761b2e755adc1340e5a9254 s3test_bucket/aws/test.txt
13 | /programs/DEV/projects/test3 /programs/DEV/projects/test3bak DEV test3 334 51bf75c48761b2e755adc1340e5a9255 https://www.uchicago.edu
14 | /programs/DEV/projects/test3 /programs/DEV/projects/test3bak DEV test3 334 51bf75c48761b2e755adc1340e5a9256 https://www.uchicago.edu/about
15 | /programs/DEV/projects/test3 /programs/DEV/projects/test3bak DEV test3 334 51bf75c48761b2e755adc1340e5a9257 google.com/path
16 | /programs/DEV/projects/test3 DEV test3 334 51bf75c48761b2e755adc1340e5a9258
17 | /programs/DEV/projects/test3 DEV test3 334 51bf75c48761b2e755adc1340e5a9259 ''
18 | /programs/DEV/projects/test3 DEV test3 334 51bf75c48761b2e755adc1340e5a925a []
19 | /programs/DEV/projects/test3 DEV test3 334 51bf75c48761b2e755adc1340e5a925b ['']
20 | /programs/DEV/projects/test3 DEV test3 334 51bf75c48761b2e755adc1340e5a925c "[""""]"
21 | /programs/DEV/projects/test3 DEV test3 334 51bf75c48761b2e755adc1340e5a925d "["""" """"]"
22 | /programs/DEV/projects/test3 DEV test3 334 51bf75c48761b2e755adc1340e5a925e "["""" '']"
23 |
--------------------------------------------------------------------------------
/tests/validate_manifest_format/manifests/manifest_with_many_types_of_errors.tsv:
--------------------------------------------------------------------------------
1 | authz acl file_size md5 urls
2 | invalid_authz DEV 123 1596f493ba9ec53023fca640fb69bd3b gs://test/test.txt
3 | /programs/DEV/projects/test2 DEV invalid_int d9a68f3d5d9ce03f8a08f50924247223 gs://test/test3.txt
4 | /programs/DEV/projects/test3 DEV 334 invalid_md5 gs://test/test.txt
5 | /programs/DEV/projects/test3 DEV 334 51bf75c48761b2e755adc1340e5a925a invalid_url
6 |
--------------------------------------------------------------------------------
/tests/validate_manifest_format/manifests/manifest_with_missing_md5_column.tsv:
--------------------------------------------------------------------------------
1 | authz acl file_size urls
2 | /programs/DEV/projects/test DEV test 123 gs://test/test.txt s3://testaws/aws/test.txt
3 | /programs/DEV/projects/test2 DEV 235 gs://test/test3.txt
4 | /programs/DEV/projects/test3 /programs/DEV/projects/test3bak DEV test3 334 gs://test/test.txt
5 |
--------------------------------------------------------------------------------
/tests/validate_manifest_format/manifests/manifest_with_missing_size_column.tsv:
--------------------------------------------------------------------------------
1 | authz acl md5 urls
2 | /programs/DEV/projects/test DEV 1596f493ba9ec53023fca640fb69bd3b gs://test/test.txt
3 | /programs/DEV/projects/test2 DEV d9a68f3d5d9ce03f8a08f50924247223 gs://test/test3.txt
4 | /programs/DEV/projects/test3/ DEV 51bf75c48761b2e755adc1340e5a9259 gs://test/test.txt
5 |
--------------------------------------------------------------------------------
/tests/validate_manifest_format/manifests/manifest_with_missing_url_column.tsv:
--------------------------------------------------------------------------------
1 | authz acl file_size md5
2 | /programs/DEV/projects/test DEV 123 1596f493ba9ec53023fca640fb69bd3b
3 | /programs/DEV/projects/test2 DEV 235 d9a68f3d5d9ce03f8a08f50924247223
4 | /programs/DEV/projects/test3 DEV 334 51bf75c48761b2e755adc1340e5a9259
5 |
--------------------------------------------------------------------------------
/tests/validate_manifest_format/manifests/manifest_with_no_errors.tsv:
--------------------------------------------------------------------------------
1 | authz acl file_size md5 urls
2 | /programs/DEV/projects/test DEV test 123 1596f493ba9ec53023fca640fb69bd3b gs://test/test.txt s3://testaws/aws/test.txt
3 | /programs DEV 235 d9a68f3d5d9ce03f8a08f50924247223 gs://test/test3.txt
4 | /a /programs/DEV/projects/test3bak DEV test3 334 51bf75c48761b2e755adc1340e5a9259 gs://test/test.txt
5 | [/programs/DEV/projects/test3, /programs/DEV/projects/test3bak] DEV test3 334 51bf75c48761b2e755adc1340e5a9259 gs://test/test.txt
6 | /programs/DEV/ DEV 235 d9a68f3d5d9ce03f8a08f50924247224 [gs://test/test3.txt]
7 | /programs/DEV/projects/test2 DEV 235 d9a68f3d5d9ce03f8a08f50924247225 "[""gs://test/test3.txt""]"
8 | '/programs/DEV/projects/test2' DEV 235 d9a68f3d5d9ce03f8a08f50924247226 ['gs://test/test3.txt']
9 | /programs/DEV/projects/test2 DEV 235 d9a68f3d5d9ce03f8a08f50924247227 [gs://test/test.txt s3://testaws/aws/test.txt]
10 | /programs/DEV/projects/test2 DEV 235 d9a68f3d5d9ce03f8a08f50924247228 ['gs://test/test3.txt' 's3://testaws/aws/test.txt']
11 | /programs/DEV/projects/test2 DEV 235 d9a68f3d5d9ce03f8a08f50924247229 "[""gs://test/test3.txt"" ""s3://testaws/aws/test.txt""]"
12 | /programs/DEV/projects/test2 DEV 235 d9a68f3d5d9ce03f8a08f5092424722a "[""gs://test/test3.txt"" 's3://testaws/aws/test.txt']"
13 | /programs/DEV/projects/test2 DEV 235 d9a68f3d5d9ce03f8a08f5092424722b gs://test/test3.txt
14 | /programs/DEV/projects/test2 DEV 235 d9a68f3d5d9ce03f8a08f5092424722c 'gs://test/test3.txt'
15 | /programs/DEV/projects/test2 DEV 235 d9a68f3d5d9ce03f8a08f5092424722c gs://test/test%203.txt s3://testaws/file%20space.txt s3://testaws/aws/file,with,comma.txt
16 |
--------------------------------------------------------------------------------
/tests/validate_manifest_format/manifests/manifest_with_wide_row.tsv:
--------------------------------------------------------------------------------
1 | authz acl file_size md5 urls
2 | /programs/DEV/projects/test2 DEV 235 d9a68f3d5d9ce03f8a08f50924247223 gs://test/test3.txt
3 | /programs/DEV/projects/test DEV 123 1596f493ba9ec53023fca640fb69bd3b gs://test/test.txt extra_value
4 | /programs/DEV/projects/test3 DEV 334 51bf75c48761b2e755adc1340e5a9259 gs://test/test.txt
5 |
--------------------------------------------------------------------------------