├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.yml │ ├── config.yml │ ├── enhancement.yml │ └── usage_question.yml └── workflows │ ├── build.yml │ └── doc-changes.yml ├── .gitignore ├── .gitmodules ├── .pre-commit-config.yaml ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── docs ├── api.md ├── arrays.ipynb ├── bio-registries.ipynb ├── bionty.md ├── changelog.md ├── clinicore.md ├── curate.ipynb ├── faq.md ├── faq │ ├── acid.ipynb │ ├── curate-any.ipynb │ ├── delete.ipynb │ ├── idempotency.ipynb │ ├── import-modules.ipynb │ ├── keep-artifacts-local.ipynb │ ├── pydantic-pandera.ipynb │ ├── reference-field.ipynb │ ├── search.ipynb │ ├── setup.ipynb │ ├── symbol-mapping.ipynb │ ├── test_notebooks.py │ ├── track-run-inputs.ipynb │ ├── validate-fields.ipynb │ └── visibility.ipynb ├── guide.md ├── includes │ └── installation.md ├── index.md ├── lamindb.md ├── query-search.md ├── registries.ipynb ├── scripts │ ├── curate_anndata_flexible.py │ ├── curate_dataframe_flexible.py │ ├── curate_dataframe_minimal_errors.py │ ├── curate_mudata.py │ ├── curate_soma_experiment.py │ ├── curate_spatialdata.py │ ├── define_mini_immuno_features_labels.py │ ├── define_mini_immuno_schema_flexible.py │ ├── define_schema_anndata_ensembl_gene_ids_and_valid_features_in_obs.py │ ├── define_schema_spatialdata.py │ ├── define_valid_features.py │ ├── ingest_mini_immuno_datasets.py │ ├── run_track_and_finish.py │ ├── run_track_with_params.py │ ├── run_workflow.py │ └── synced_with_git.py ├── storage.md ├── storage │ ├── add-replace-cache.ipynb │ ├── anndata-accessor.ipynb │ ├── prepare-transfer-local-to-cloud.ipynb │ ├── test-files │ │ ├── iris.csv │ │ ├── iris.data │ │ └── new_iris.csv │ ├── test_notebooks.py │ ├── transfer-local-to-cloud.ipynb │ ├── upload.ipynb │ └── vitessce.ipynb ├── test_notebooks.py ├── track.ipynb ├── transfer.ipynb └── wetlab.md ├── lamindb ├── __init__.py ├── _finish.py ├── _tracked.py ├── _view.py ├── base │ ├── __init__.py │ ├── fields.py │ ├── ids.py │ ├── types.py │ ├── uids.py │ └── users.py ├── core │ ├── __init__.py │ ├── _compat.py │ ├── _context.py │ ├── _mapped_collection.py │ ├── _settings.py │ ├── _sync_git.py │ ├── _track_environment.py │ ├── datasets │ │ ├── __init__.py │ │ ├── _core.py │ │ ├── _fake.py │ │ ├── _small.py │ │ └── mini_immuno.py │ ├── exceptions.py │ ├── loaders.py │ ├── storage │ │ ├── __init__.py │ │ ├── _anndata_accessor.py │ │ ├── _backed_access.py │ │ ├── _polars_lazy_df.py │ │ ├── _pyarrow_dataset.py │ │ ├── _tiledbsoma.py │ │ ├── _valid_suffixes.py │ │ ├── _zarr.py │ │ ├── objects.py │ │ └── paths.py │ ├── subsettings │ │ ├── __init__.py │ │ ├── _annotation_settings.py │ │ └── _creation_settings.py │ └── types.py ├── curators │ ├── __init__.py │ ├── _cellxgene_schemas │ │ ├── __init__.py │ │ └── schema_versions.csv │ ├── _legacy.py │ └── core.py ├── errors.py ├── examples │ ├── __init__.py │ └── schemas │ │ ├── __init__.py │ │ ├── _anndata.py │ │ └── _simple.py ├── integrations │ ├── __init__.py │ └── _vitessce.py ├── migrations │ ├── 0069_squashed.py │ ├── 0070_lamindbv1_migrate_data.py │ ├── 0071_lamindbv1_migrate_schema.py │ ├── 0072_remove_user__branch_code_remove_user_aux_and_more.py │ ├── 0073_merge_ourprojects.py │ ├── 0074_lamindbv1_part4.py │ ├── 0075_lamindbv1_part5.py │ ├── 0076_lamindbv1_part6.py │ ├── 0077_lamindbv1_part6b.py │ ├── 0078_lamindbv1_part6c.py │ ├── 0079_alter_rundata_value_json_and_more.py │ ├── 0080_polish_lamindbv1.py │ ├── 0081_revert_textfield_collection.py │ ├── 0082_alter_feature_dtype.py │ ├── 0083_alter_feature_is_type_alter_flextable_is_type_and_more.py │ ├── 0084_alter_schemafeature_feature_and_more.py │ ├── 0085_alter_feature_is_type_alter_flextable_is_type_and_more.py │ ├── 0086_various.py │ ├── 0087_rename__schemas_m2m_artifact_feature_sets_and_more.py │ ├── 0088_schema_components.py │ ├── 0089_subsequent_runs.py │ ├── 0090_runproject_project_runs.py │ ├── 0091_alter_featurevalue_options_alter_space_options_and_more.py │ ├── 0092_alter_artifactfeaturevalue_artifact_and_more.py │ ├── 0093_alter_schemacomponent_unique_together.py │ ├── 0094_writeloglock_writelogmigrationstate_and_more.py │ ├── 0095_remove_rundata_flextable.py │ ├── 0096_remove_artifact__param_values_and_more.py │ ├── 0097_remove_schemaparam_param_remove_paramvalue_param_and_more.py │ ├── 0098_alter_feature_type_alter_project_type_and_more.py │ ├── 0099_alter_writelog_seqno.py │ ├── 0100_branch_alter_artifact__branch_code_and_more.py │ ├── 0101_alter_artifact_hash_alter_feature_name_and_more.py │ ├── 0102_remove_writelog_branch_code_and_more.py │ ├── 0103_remove_writelog_migration_state_and_more.py │ ├── 0104_alter_branch_uid.py │ ├── 0104_squashed.py │ ├── 0105_record_unique_name.py │ └── __init__.py ├── models │ ├── __init__.py │ ├── _describe.py │ ├── _django.py │ ├── _feature_manager.py │ ├── _from_values.py │ ├── _is_versioned.py │ ├── _label_manager.py │ ├── _relations.py │ ├── artifact.py │ ├── artifact_set.py │ ├── can_curate.py │ ├── collection.py │ ├── core.py │ ├── feature.py │ ├── has_parents.py │ ├── project.py │ ├── query_manager.py │ ├── query_set.py │ ├── record.py │ ├── run.py │ ├── save.py │ ├── schema.py │ ├── sqlrecord.py │ ├── transform.py │ └── ulabel.py ├── py.typed └── setup │ ├── __init__.py │ └── core │ └── __init__.py ├── noxfile.py ├── pyproject.toml └── tests ├── conftest.py ├── core ├── _dataset_fixtures.py ├── conftest.py ├── notebooks │ ├── basic-r-notebook.Rmd.cleaned.html │ ├── basic-r-notebook.Rmd.html │ ├── duplicate │ │ └── with-title-initialized-consecutive-finish.ipynb │ ├── no-title.ipynb │ ├── with-title-initialized-consecutive-finish-not-last-cell.ipynb │ └── with-title-initialized-consecutive-finish.ipynb ├── scripts │ ├── duplicate1 │ │ └── script-to-test-versioning.py │ ├── duplicate2 │ │ └── script-to-test-versioning.py │ ├── duplicate3 │ │ └── script-to-test-versioning.py │ ├── duplicate4 │ │ └── script-to-test-versioning.py │ ├── script-to-test-filename-change.py │ └── script-to-test-versioning.py ├── test_artifact.py ├── test_artifact_folders.py ├── test_can_curate.py ├── test_collection.py ├── test_data.py ├── test_db.py ├── test_delete.py ├── test_describe_and_df_calls.py ├── test_dtype.py ├── test_feature.py ├── test_feature_label_manager.py ├── test_from_values.py ├── test_has_parents.py ├── test_integrity.py ├── test_load.py ├── test_manager.py ├── test_models.py ├── test_notebooks.py ├── test_queryset.py ├── test_record.py ├── test_run.py ├── test_save.py ├── test_schema.py ├── test_search.py ├── test_track.py ├── test_tracked.py ├── test_transform.py ├── test_ulabel.py ├── test_versioning.py ├── test_view.py └── test_visibility.py ├── curators ├── conftest.py ├── test_cat_managers.py ├── test_curators_examples.py ├── test_curators_general.py ├── test_curators_multivalue.py ├── test_cxg_curator.py ├── test_dataframe_curators_accounting_example.py ├── test_pert_curator.py └── test_records.py ├── permissions ├── conftest.py ├── jwt_utils.py ├── scripts │ ├── check_lamin_dev.py │ ├── clean_lamin_dev.py │ ├── setup_access.py │ └── setup_instance.py └── test_permissions.py └── storage ├── conftest.py ├── test_artifact_storage.py ├── test_artifact_zarr.py ├── test_cache.py ├── test_storage.py ├── test_switch_storage.py └── test_transfer.py /.github/ISSUE_TEMPLATE/bug_report.yml: -------------------------------------------------------------------------------- 1 | name: Report a bug 2 | description: Report a bug. 3 | labels: 4 | - ":bug: bug" 5 | body: 6 | - type: textarea 7 | id: report 8 | attributes: 9 | label: Add a description 10 | placeholder: | 11 | Describe and consider providing version information. Please ensure you're on the latest version of lamindb. 12 | This is a public repository! 13 | Do not reveal any internal information. 14 | validations: 15 | required: true 16 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | contact_links: 3 | - name: LaminHub issues 4 | url: https://github.com/laminlabs/laminhub-public 5 | about: If you have issues with the GUI/web app at lamin.ai, please report them here. 6 | - name: Enterprise support 7 | url: https://lamin.ai/contact 8 | about: If you have other questions, contact us directly. 9 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/enhancement.yml: -------------------------------------------------------------------------------- 1 | name: Propose an enhancement 2 | description: Propose an enhancement. 3 | body: 4 | - type: textarea 5 | id: description 6 | attributes: 7 | label: Add a description 8 | placeholder: | 9 | This is a public repository! 10 | Do not reveal any internal information. 11 | validations: 12 | required: true 13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/usage_question.yml: -------------------------------------------------------------------------------- 1 | name: Ask a usage question 2 | description: Ask a usage question. 3 | labels: 4 | - "usage question" 5 | body: 6 | - type: textarea 7 | id: description 8 | attributes: 9 | label: Add a description 10 | placeholder: | 11 | This is a public repository! 12 | Do not reveal any internal information. 13 | validations: 14 | required: true 15 | -------------------------------------------------------------------------------- /.github/workflows/doc-changes.yml: -------------------------------------------------------------------------------- 1 | name: doc-changes 2 | 3 | on: 4 | pull_request_target: 5 | branches: 6 | - main 7 | - release 8 | types: 9 | - closed 10 | 11 | jobs: 12 | doc-changes: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v4 16 | - uses: actions/setup-python@v5 17 | with: 18 | python-version: "3.11" 19 | - run: pip install "laminci[doc-changes]@git+https://x-access-token:${{ secrets.LAMIN_BUILD_DOCS }}@github.com/laminlabs/laminci" 20 | - run: laminci doc-changes 21 | env: 22 | repo_token: ${{ secrets.GITHUB_TOKEN }} 23 | docs_token: ${{ secrets.LAMIN_BUILD_DOCS }} 24 | changelog_file: lamin-docs/docs/changelog/soon/lamindb.md 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __MACOSX/ 2 | 3 | # LaminDB 4 | .coveragerc 5 | *.db 6 | *.lndb 7 | *.jpg 8 | *.zarr/ 9 | docsbuild/ 10 | docs/lamin.md 11 | docs/guide/data-validation.ipynb 12 | docs/guide/bionty.ipynb 13 | docs/guide/lnschema-core.ipynb 14 | docs/paradisi05_laminopathic_nuclei.jpg 15 | bionty_docs/ 16 | lamindb_docs/ 17 | _build 18 | mydata/ 19 | lamin-intro/ 20 | lamin-tutorial/ 21 | mytest/ 22 | rds/ 23 | mydb/ 24 | docs/test-registries/ 25 | docs/test-annotate-flexible/ 26 | docs/lamindb.* 27 | lamin_sphinx 28 | docs/conf.py 29 | lamindb/setup/.env 30 | _secrets.py 31 | _configuration.py 32 | lamin.db 33 | docs/generated/* 34 | _docs_tmp* 35 | docs/guide/Laminopathic_nuclei.jpg 36 | docs/guide/paradisi05_laminopathic_nuclei.jpg 37 | nocodb 38 | docs/guide/SRR4238351_subsamp.fastq.gz 39 | docs/faq/paradisi05_laminopathic_nuclei.jpg 40 | docs/faq/tostore/ 41 | docs/faq/mydata_postgres/ 42 | docs/guide/myobjects/ 43 | docs/faq/test-run-inputs/ 44 | docs/intro/paradisi05_laminopathic_nuclei.jpg 45 | docs/guide/figures/ 46 | docs/test-annotate/ 47 | docs/test-track/ 48 | suo22/ 49 | docs/biology/test-flow/ 50 | docs/biology/test-scrna/ 51 | docs/biology/test-registries/ 52 | docs/biology/test-multimodal/ 53 | test-inherit1 54 | test-inherit2 55 | test-search0 56 | test-search1 57 | test-search5 58 | default_storage 59 | default_storage_unit_core 60 | default_storage_unit_storage 61 | test.ipynb 62 | test2.ipynb 63 | run-tests 64 | test-django-validation/ 65 | curate.tiledbsoma 66 | small_dataset.tiledbsoma 67 | 68 | # General 69 | .DS_Store 70 | 71 | # Byte-compiled / optimized / DLL files 72 | __pycache__/ 73 | *.py[cod] 74 | *$py.class 75 | 76 | # C extensions 77 | *.so 78 | 79 | # Distribution / packaging 80 | .Python 81 | build/ 82 | develop-eggs/ 83 | dist/ 84 | downloads/ 85 | eggs/ 86 | .eggs/ 87 | lib/ 88 | lib64/ 89 | parts/ 90 | sdist/ 91 | var/ 92 | wheels/ 93 | pip-wheel-metadata/ 94 | share/python-wheels/ 95 | *.egg-info/ 96 | .installed.cfg 97 | *.egg 98 | MANIFEST 99 | 100 | # PyInstaller 101 | # Usually these files are written by a python script from a template 102 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 103 | *.manifest 104 | *.spec 105 | 106 | # Installer logs 107 | pip-log.txt 108 | pip-delete-this-directory.txt 109 | 110 | # Unit test / coverage reports 111 | htmlcov/ 112 | .tox/ 113 | .nox/ 114 | .coverage 115 | .coverage.* 116 | .cache 117 | nosetests.xml 118 | coverage.xml 119 | *.cover 120 | *.py,cover 121 | .hypothesis/ 122 | .pytest_cache/ 123 | 124 | # Translations 125 | *.mo 126 | *.pot 127 | 128 | # Django stuff: 129 | *.log 130 | local_settings.py 131 | db.sqlite3 132 | db.sqlite3-journal 133 | 134 | # Flask stuff: 135 | instance/ 136 | .webassets-cache 137 | 138 | # Scrapy stuff: 139 | .scrapy 140 | 141 | # Sphinx documentation 142 | docs/_build/ 143 | 144 | # PyBuilder 145 | target/ 146 | 147 | # Jupyter Notebook 148 | .ipynb_checkpoints 149 | 150 | # IPython 151 | profile_default/ 152 | ipython_config.py 153 | 154 | # pyenv 155 | .python-version 156 | 157 | # pipenv 158 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 159 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 160 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 161 | # install all needed dependencies. 162 | #Pipfile.lock 163 | 164 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 165 | __pypackages__/ 166 | 167 | # Celery stuff 168 | celerybeat-schedule 169 | celerybeat.pid 170 | 171 | # SageMath parsed files 172 | *.sage.py 173 | 174 | # Environments 175 | .env 176 | .venv 177 | env/ 178 | venv/ 179 | ENV/ 180 | env.bak/ 181 | venv.bak/ 182 | 183 | # Spyder project settings 184 | .spyderproject 185 | .spyproject 186 | 187 | # Rope project settings 188 | .ropeproject 189 | 190 | # mkdocs documentation 191 | /site 192 | 193 | # mypy 194 | .mypy_cache/ 195 | .dmypy.json 196 | dmypy.json 197 | 198 | # Pyre type checker 199 | .pyre/ 200 | 201 | # data files 202 | data/ 203 | _build 204 | *.csv 205 | *.fcs 206 | *.zip 207 | *.feather 208 | *.h5ad 209 | *.h5mu 210 | *.parquet 211 | *.bam 212 | *.fastq.gz 213 | *.pt 214 | 215 | # Pycharm 216 | .idea 217 | 218 | # VSCode 219 | .vscode 220 | 221 | # cxg 222 | !lamindb/curators/_cellxgene_schemas/schema_versions.csv 223 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "sub/lamindb-setup"] 2 | path = sub/lamindb-setup 3 | url = https://github.com/laminlabs/lamindb-setup 4 | [submodule "sub/lamin-cli"] 5 | path = sub/lamin-cli 6 | url = https://github.com/laminlabs/lamin-cli 7 | [submodule "sub/bionty"] 8 | path = sub/bionty 9 | url = https://github.com/laminlabs/bionty 10 | [submodule "sub/wetlab"] 11 | path = sub/wetlab 12 | url = https://github.com/laminlabs/wetlab 13 | [submodule "sub/clinicore"] 14 | path = sub/clinicore 15 | url = https://github.com/laminlabs/clinicore 16 | [submodule "sub/cellxgene-lamin"] 17 | path = sub/cellxgene-lamin 18 | url = https://github.com/laminlabs/cellxgene-lamin.git 19 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | fail_fast: false 2 | default_language_version: 3 | python: python3 4 | default_stages: 5 | - pre-commit 6 | - pre-push 7 | minimum_pre_commit_version: 2.16.0 8 | repos: 9 | - repo: https://github.com/rbubley/mirrors-prettier 10 | rev: v3.5.1 11 | hooks: 12 | - id: prettier 13 | exclude: | 14 | (?x)( 15 | docs/changelog.md|.github/ISSUE_TEMPLATE/config.yml|tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html 16 | ) 17 | - repo: https://github.com/kynan/nbstripout 18 | rev: 0.8.1 19 | hooks: 20 | - id: nbstripout 21 | exclude: | 22 | (?x)( 23 | docs/examples/| 24 | docs/notes/ 25 | ) 26 | - repo: https://github.com/astral-sh/ruff-pre-commit 27 | rev: v0.9.10 28 | hooks: 29 | - id: ruff 30 | args: [--fix, --exit-non-zero-on-fix, --unsafe-fixes] 31 | - id: ruff-format 32 | - repo: https://github.com/pre-commit/pre-commit-hooks 33 | rev: v4.5.0 34 | hooks: 35 | - id: detect-private-key 36 | - id: check-ast 37 | - id: end-of-file-fixer 38 | exclude: | 39 | (?x)( 40 | .github/workflows/latest-changes.jinja2 41 | ) 42 | - id: mixed-line-ending 43 | args: [--fix=lf] 44 | - id: trailing-whitespace 45 | exclude: | 46 | (?x)( 47 | tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html 48 | ) 49 | - id: check-case-conflict 50 | - repo: https://github.com/pre-commit/mirrors-mypy 51 | rev: v1.14.1 52 | hooks: 53 | - id: mypy 54 | args: [--no-strict-optional, --ignore-missing-imports] 55 | additional_dependencies: ["types-requests", "types-attrs"] 56 | exclude: | 57 | (?x)( 58 | test_notebooks.py| 59 | script-to-test-versioning.py| 60 | tests/storage/conftest.py| 61 | tests/curators/conftest.py| 62 | tests/permissions/conftest.py| 63 | tests/writelog/conftest.py| 64 | tests/writelog_sqlite/conftest.py| 65 | tests/curators/test_curators_examples.py| 66 | tests/core/conftest.py 67 | ) 68 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | Contributions are generally welcome. Please make an issue to discuss proposals. 4 | 5 | ## Installation 6 | 7 | ### PyPI 8 | 9 | For installation from PyPI, see [docs.lamin.ai/setup](https://docs.lamin.ai/setup). 10 | 11 | ### Github 12 | 13 | For installation from GitHub, call: 14 | 15 | ```bash 16 | git clone --recursive https://github.com/laminlabs/lamindb 17 | pip install laminci 18 | python -m venv .venv 19 | source .venv/bin/activate 20 | nox -s install 21 | ``` 22 | 23 | This will install a few dependencies from the git submodules linked [here](https://github.com/laminlabs/lamindb/tree/main/sub), as well as packages 24 | like `pytest` and `pre-commit` that you'll need when developing. 25 | 26 | lamindb depends on several other packages that may require modifications for pull requests to successfully pass the continuous integration build. 27 | We suggest the following workflow if commits to any of the submodules are essential for the current modifications in lamindb: 28 | 29 | 1. Change directory into the submodule that you want to modify: `cd sub/SUBMODULE`. 30 | 2. Switch to a new feature branch: `git switch -c feature/NEWFEATURE`. 31 | 3. Make a pull request with your changes to the `SUBMODULE` and ensure that the CI passes. 32 | 4. In the repository root of lamindb, create a new commit and push: 33 | 34 | ```bash 35 | cd .. 36 | git add -u 37 | git commit -m "Upgraded SUBMODULE" 38 | git push 39 | ``` 40 | 41 | Any pull request of yours should now also have the changes of the submodule included allowing you to test that changes in the submodule and lamindb are compatible. 42 | 43 | ## Running and writing tests 44 | 45 | This package uses the [pytest][] for automated testing. 46 | Please add a test for every function added to the package. 47 | 48 | Running tests requires the [Docker daemon][] up, then run at the root of the repository: 49 | 50 | ```bash 51 | pytest --ignore=tests/storage --ignore=tests/permission 52 | ``` 53 | 54 | in the root of the repository. 55 | We exclude specific directories in local `pytest` runs because they directly access external resources such as AWS, which require specific access keys. 56 | Continuous integration will automatically run **all** tests on pull requests. 57 | 58 | ## Code-style 59 | 60 | This project uses [pre-commit][] to enforce consistent code-styles. On every commit, pre-commit checks will either 61 | automatically fix issues with the code, or raise an error message. 62 | 63 | To enable pre-commit locally, simply run 64 | 65 | ```bash 66 | pre-commit install 67 | ``` 68 | 69 | in the root of the repository. Pre-commit will automatically download all dependencies when it is run for the first time. 70 | 71 | We further use [gitmoji][] to add emoticons to commits. 72 | These allow us to more easily categorize them allowing for faster visual filtering. 73 | 74 | It can be installed by running: 75 | 76 | ```bash 77 | npm i -g gitmoji-cli 78 | ``` 79 | 80 | and enabled for the repository via: 81 | 82 | ```bash 83 | gitmoji -i 84 | ``` 85 | 86 | If you don't have `sudo` in your working environment, follow [these instructions](https://github.com/sindresorhus/guides/blob/main/npm-global-without-sudo.md). 87 | 88 | ## Documentation 89 | 90 | We build our documentation with an internal tool called `lndocs`. 91 | We have not made it public yet and therefore external contributors need to rely on the Github Actions `docs` job to build the documentation. 92 | If the `docs` job succeeds, a preview URL will be posted automatically as a comment to your pull request. 93 | 94 | ## Releases 95 | 96 | Currently only lamin employees have release rights. 97 | 98 | [Docker daemon]: https://docs.docker.com/engine/install/ 99 | [gitmoji]: https://gitmoji.dev/ 100 | [pre-commit]: https://pre-commit.com/ 101 | [pytest]: https://docs.pytest.org/ 102 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Stars](https://img.shields.io/github/stars/laminlabs/lamindb?logo=GitHub&color=yellow)](https://github.com/laminlabs/lamindb) 2 | [![codecov](https://codecov.io/gh/laminlabs/lamindb/branch/main/graph/badge.svg?token=VKMRJ7OWR3)](https://codecov.io/gh/laminlabs/lamindb) 3 | [![pypi](https://img.shields.io/pypi/v/lamindb?color=blue&label=pypi%20package)](https://pypi.org/project/lamindb) 4 | 5 | # LaminDB - A data framework for biology 6 | 7 | Read the [docs](https://docs.lamin.ai). 8 | -------------------------------------------------------------------------------- /docs/api.md: -------------------------------------------------------------------------------- 1 | # API 2 | 3 | 4 | 5 | ```{toctree} 6 | :maxdepth: 1 7 | :caption: CLI & lamindb 8 | :hidden: 9 | 10 | cli 11 | lamindb 12 | ``` 13 | 14 | ```{toctree} 15 | :maxdepth: 1 16 | :caption: Modules 17 | :hidden: 18 | 19 | bionty 20 | wetlab 21 | clinicore 22 | ``` 23 | -------------------------------------------------------------------------------- /docs/bionty.md: -------------------------------------------------------------------------------- 1 | # `bionty` 2 | 3 | ```{eval-rst} 4 | .. automodule:: bionty 5 | ``` 6 | -------------------------------------------------------------------------------- /docs/changelog.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | Actual content in lamin-docs. 4 | -------------------------------------------------------------------------------- /docs/clinicore.md: -------------------------------------------------------------------------------- 1 | # `clinicore` 2 | 3 | ```{eval-rst} 4 | .. automodule:: clinicore 5 | ``` 6 | -------------------------------------------------------------------------------- /docs/faq.md: -------------------------------------------------------------------------------- 1 | # FAQ 2 | 3 | ```{toctree} 4 | :maxdepth: 1 5 | 6 | faq/pydantic-pandera 7 | faq/idempotency 8 | faq/acid 9 | faq/track-run-inputs 10 | faq/setup 11 | faq/curate-any 12 | faq/import-modules 13 | faq/reference-field 14 | faq/visibility 15 | faq/delete 16 | faq/keep-artifacts-local 17 | faq/validate-fields 18 | faq/symbol-mapping 19 | faq/search 20 | ``` 21 | -------------------------------------------------------------------------------- /docs/faq/import-modules.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "# What happens if I import a schema module without lamindb?" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "# !pip install 'lamindb[bionty]'\n", 18 | "!lamin init --storage testmodule --modules bionty" 19 | ] 20 | }, 21 | { 22 | "attachments": {}, 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "Upon `import`, nothing yet happens:" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "import bionty as bt" 36 | ] 37 | }, 38 | { 39 | "attachments": {}, 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "If you try to access an attribute (other than `model`), you'll load the instance in the same way as calling `import lamindb`.\n", 44 | "\n", 45 | "Under the hood, `lamindb` is imported!" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "assert bt.Organism(name=\"human\") is not None" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "!lamin delete --force testmodule" 64 | ] 65 | } 66 | ], 67 | "metadata": { 68 | "kernelspec": { 69 | "display_name": "py39", 70 | "language": "python", 71 | "name": "python3" 72 | }, 73 | "language_info": { 74 | "artifact_extension": ".py", 75 | "codemirror_mode": { 76 | "name": "ipython", 77 | "version": 3 78 | }, 79 | "mimetype": "text/x-python", 80 | "name": "python", 81 | "nbconvert_exporter": "python", 82 | "pygments_lexer": "ipython3", 83 | "version": "3.9.16" 84 | } 85 | }, 86 | "nbformat": 4, 87 | "nbformat_minor": 2 88 | } 89 | -------------------------------------------------------------------------------- /docs/faq/reference-field.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Where to store external links and IDs?" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "When registering data in LaminDB, you might want to store a reference link or ID to indicate the source of the collection.\n", 15 | "\n", 16 | "We have `reference` and `reference_type` fields for this purpose, they are available for {class}`~lamindb.Collection`, {class}`~lamindb.Transform`, {class}`~lamindb.Run` and {class}`~lamindb.ULabel`." 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "# !pip install lamindb\n", 26 | "!lamin init --storage testreference" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "import lamindb as ln" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "Let's say we have a few donor samples that came form Vendor X, in order to chase back the orders, I'd like to keep track the donor ids provided by the vendor:" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "ln.ULabel(\n", 52 | " name=\"donor 001\", reference=\"VX984545\", reference_type=\"Donor ID from Vendor X\"\n", 53 | ")" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "!lamin delete --force testreference" 63 | ] 64 | } 65 | ], 66 | "metadata": { 67 | "kernelspec": { 68 | "display_name": "py39", 69 | "language": "python", 70 | "name": "python3" 71 | }, 72 | "language_info": { 73 | "artifact_extension": ".py", 74 | "codemirror_mode": { 75 | "name": "ipython", 76 | "version": 3 77 | }, 78 | "mimetype": "text/x-python", 79 | "name": "python", 80 | "nbconvert_exporter": "python", 81 | "pygments_lexer": "ipython3", 82 | "version": "3.9.16" 83 | } 84 | }, 85 | "nbformat": 4, 86 | "nbformat_minor": 2 87 | } 88 | -------------------------------------------------------------------------------- /docs/faq/setup.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "0", 6 | "metadata": {}, 7 | "source": [ 8 | "# What happens when importing lamindb and the instance is not yet setup?" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "id": "1", 15 | "metadata": { 16 | "tags": [ 17 | "hide-cell" 18 | ] 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "!lamin disconnect" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "id": "2", 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "# !pip install 'lamindb[jupyter]'\n", 33 | "import lamindb as ln\n", 34 | "import pytest" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "id": "3", 40 | "metadata": {}, 41 | "source": [ 42 | "If you try to use lamindb, it will raise an `InstanceNotSetupError` and ask you to `init` or `load` an instance via the python API." 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "id": "4", 49 | "metadata": { 50 | "tags": [ 51 | "hide-cell" 52 | ] 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "with pytest.raises(ln._InstanceNotSetupError):\n", 57 | " ln.track()" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "id": "5", 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "ln.setup.init(storage=\"./testsetup\")" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "id": "6", 73 | "metadata": {}, 74 | "source": [ 75 | "Now we can access functionality:" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "id": "7", 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "ln.track(\"2lhqA4uTKSFP0000\")" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "id": "8", 91 | "metadata": {}, 92 | "source": [ 93 | "Let us try to init another instance in the same Python session: It doesn't work." 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "id": "9", 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "from lamindb_setup._init_instance import CannotSwitchDefaultInstance\n", 104 | "\n", 105 | "with pytest.raises(CannotSwitchDefaultInstance):\n", 106 | " ln.setup.init(storage=\"./testsetup2\")" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "id": "10", 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [ 116 | "!lamin delete --force testsetup" 117 | ] 118 | } 119 | ], 120 | "metadata": { 121 | "kernelspec": { 122 | "display_name": "Python 3 (ipykernel)", 123 | "language": "python", 124 | "name": "python3" 125 | }, 126 | "language_info": { 127 | "codemirror_mode": { 128 | "name": "ipython", 129 | "version": 3 130 | }, 131 | "file_extension": ".py", 132 | "mimetype": "text/x-python", 133 | "name": "python", 134 | "nbconvert_exporter": "python", 135 | "pygments_lexer": "ipython3", 136 | "version": "3.9.17" 137 | }, 138 | "vscode": { 139 | "interpreter": { 140 | "hash": "61b4062b24dfb1010f420dad5aa3bd73a4d2af47d0ec44eafec465a35a9d7239" 141 | } 142 | } 143 | }, 144 | "nbformat": 4, 145 | "nbformat_minor": 5 146 | } 147 | -------------------------------------------------------------------------------- /docs/faq/test_notebooks.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import nbproject_test as test 4 | 5 | import lamindb as ln 6 | 7 | 8 | def test_notebooks(): 9 | nbdir = Path(__file__).parent 10 | ln.setup.login("testuser1") 11 | ln.setup.init(storage=nbdir / "mydata") 12 | test.execute_notebooks(nbdir, write=True) 13 | -------------------------------------------------------------------------------- /docs/faq/validate-fields.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Django field validation\n", 8 | "\n", 9 | "[Django field validation](https://docs.djangoproject.com/en/5.1/ref/validators/) are enabled for models that inherit the `ValidateFields` class." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "# pip install lamindb\n", 19 | "!lamin init --storage ./test-django-validation" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "import lamindb as ln\n", 29 | "from lamindb.core.exceptions import FieldValidationError" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "try:\n", 39 | " ln.Reference(name=\"my ref\", doi=\"abc.ef\", url=\"myurl.com\")\n", 40 | "except FieldValidationError as e:\n", 41 | " print(e)" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "!lamin delete --force test-django-validation" 51 | ] 52 | } 53 | ], 54 | "metadata": { 55 | "kernelspec": { 56 | "display_name": "py310", 57 | "language": "python", 58 | "name": "python3" 59 | }, 60 | "language_info": { 61 | "codemirror_mode": { 62 | "name": "ipython", 63 | "version": 3 64 | }, 65 | "file_extension": ".py", 66 | "mimetype": "text/x-python", 67 | "name": "python", 68 | "nbconvert_exporter": "python", 69 | "pygments_lexer": "ipython3", 70 | "version": "3.10.13" 71 | } 72 | }, 73 | "nbformat": 4, 74 | "nbformat_minor": 2 75 | } 76 | -------------------------------------------------------------------------------- /docs/guide.md: -------------------------------------------------------------------------------- 1 | # Guide 2 | 3 | ```{toctree} 4 | :hidden: 5 | :caption: "How to" 6 | 7 | query-search 8 | track 9 | curate 10 | bio-registries 11 | transfer 12 | ``` 13 | 14 | ```{toctree} 15 | :hidden: 16 | :caption: Other topics 17 | 18 | faq 19 | storage 20 | ``` 21 | -------------------------------------------------------------------------------- /docs/includes/installation.md: -------------------------------------------------------------------------------- 1 | ![pyversions](https://img.shields.io/pypi/pyversions/lamindb) 2 | 3 | ```shell 4 | pip install lamindb 5 | ``` 6 | 7 | You can configure the installation using `extras`, e.g., 8 | 9 | ```shell 10 | pip install 'lamindb[jupyter,bionty]' 11 | ``` 12 | 13 | Supported `extras` are: 14 | 15 | ```yaml 16 | # commonly used 17 | jupyter # parse Jupyter notebook metadata 18 | bionty # basic biological ontologies 19 | # cloud backends (AWS is assumed) 20 | gcp # Google Cloud (gcfs, etc.) 21 | # biological artifact formats 22 | fcs # FCS artifacts (flow cytometry) 23 | # storage backends 24 | zarr # store & stream arrays with zarr 25 | ``` 26 | 27 | If you'd like to install from GitHub, see [here](https://github.com/laminlabs/lamindb/blob/main/README.md). 28 | 29 | If you'd like a docker container, here is a way: [github.com/laminlabs/lamindb-docker](https://github.com/laminlabs/lamindb-docker). 30 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | ```{include} ../README.md 2 | :start-line: 0 3 | :end-line: 5 4 | ``` 5 | 6 | 7 | 8 | ```{toctree} 9 | :maxdepth: 1 10 | :hidden: 11 | 12 | guide 13 | api 14 | changelog 15 | ``` 16 | -------------------------------------------------------------------------------- /docs/lamindb.md: -------------------------------------------------------------------------------- 1 | # `lamindb` 2 | 3 | ```{eval-rst} 4 | .. automodule:: lamindb 5 | ``` 6 | -------------------------------------------------------------------------------- /docs/query-search.md: -------------------------------------------------------------------------------- 1 | # Query & search 2 | 3 | ```{toctree} 4 | :maxdepth: 1 5 | 6 | registries 7 | arrays 8 | ``` 9 | -------------------------------------------------------------------------------- /docs/scripts/curate_anndata_flexible.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | ln.core.datasets.mini_immuno.define_features_labels() 4 | adata = ln.core.datasets.mini_immuno.get_dataset1(otype="AnnData") 5 | schema = ln.examples.schemas.anndata_ensembl_gene_ids_and_valid_features_in_obs() 6 | artifact = ln.Artifact.from_anndata( 7 | adata, key="examples/mini_immuno.h5ad", schema=schema 8 | ).save() 9 | artifact.describe() 10 | -------------------------------------------------------------------------------- /docs/scripts/curate_dataframe_flexible.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | ln.core.datasets.mini_immuno.define_features_labels() 4 | schema = ln.examples.schemas.valid_features() 5 | df = ln.core.datasets.small_dataset1(otype="DataFrame") 6 | artifact = ln.Artifact.from_df( 7 | df, key="examples/dataset1.parquet", schema=schema 8 | ).save() 9 | artifact.describe() 10 | -------------------------------------------------------------------------------- /docs/scripts/curate_dataframe_minimal_errors.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | schema = ln.core.datasets.mini_immuno.define_mini_immuno_schema_flexible() 4 | df = ln.core.datasets.small_dataset1(otype="DataFrame") 5 | df.pop("donor") # remove donor column to trigger validation error 6 | try: 7 | artifact = ln.Artifact.from_df( 8 | df, key="examples/dataset1.parquet", schema=schema 9 | ).save() 10 | except ln.errors.ValidationError as error: 11 | print(error) 12 | -------------------------------------------------------------------------------- /docs/scripts/curate_mudata.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | import bionty as bt 3 | 4 | 5 | # define the global obs schema 6 | obs_schema = ln.Schema( 7 | name="mudata_papalexi21_subset_obs_schema", 8 | features=[ 9 | ln.Feature(name="perturbation", dtype="cat[ULabel[Perturbation]]").save(), 10 | ln.Feature(name="replicate", dtype="cat[ULabel[Replicate]]").save(), 11 | ], 12 | ).save() 13 | 14 | # define the ['rna'].obs schema 15 | obs_schema_rna = ln.Schema( 16 | name="mudata_papalexi21_subset_rna_obs_schema", 17 | features=[ 18 | ln.Feature(name="nCount_RNA", dtype=int).save(), 19 | ln.Feature(name="nFeature_RNA", dtype=int).save(), 20 | ln.Feature(name="percent.mito", dtype=float).save(), 21 | ], 22 | ).save() 23 | 24 | # define the ['hto'].obs schema 25 | obs_schema_hto = ln.Schema( 26 | name="mudata_papalexi21_subset_hto_obs_schema", 27 | features=[ 28 | ln.Feature(name="nCount_HTO", dtype=int).save(), 29 | ln.Feature(name="nFeature_HTO", dtype=int).save(), 30 | ln.Feature(name="technique", dtype=bt.ExperimentalFactor).save(), 31 | ], 32 | ).save() 33 | 34 | # define ['rna'].var schema 35 | var_schema_rna = ln.Schema( 36 | name="mudata_papalexi21_subset_rna_var_schema", 37 | itype=bt.Gene.symbol, 38 | dtype=float, 39 | ).save() 40 | 41 | # define composite schema 42 | mudata_schema = ln.Schema( 43 | name="mudata_papalexi21_subset_mudata_schema", 44 | otype="MuData", 45 | slots={ 46 | "obs": obs_schema, 47 | "rna:obs": obs_schema_rna, 48 | "hto:obs": obs_schema_hto, 49 | "rna:var": var_schema_rna, 50 | }, 51 | ).save() 52 | 53 | # curate a MuData 54 | mdata = ln.core.datasets.mudata_papalexi21_subset() 55 | bt.settings.organism = "human" # set the organism to map gene symbols 56 | curator = ln.curators.MuDataCurator(mdata, mudata_schema) 57 | artifact = curator.save_artifact(key="examples/mudata_papalexi21_subset.h5mu") 58 | assert artifact.schema == mudata_schema 59 | -------------------------------------------------------------------------------- /docs/scripts/curate_soma_experiment.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | import bionty as bt 3 | import tiledbsoma as soma 4 | import tiledbsoma.io 5 | 6 | adata = ln.core.datasets.mini_immuno.get_dataset1(otype="AnnData") 7 | tiledbsoma.io.from_anndata("small_dataset.tiledbsoma", adata, measurement_name="RNA") 8 | 9 | obs_schema = ln.Schema( 10 | name="soma_obs_schema", 11 | features=[ 12 | ln.Feature(name="cell_type_by_expert", dtype=bt.CellType).save(), 13 | ln.Feature(name="cell_type_by_model", dtype=bt.CellType).save(), 14 | ], 15 | ).save() 16 | 17 | var_schema = ln.Schema( 18 | name="soma_var_schema", 19 | features=[ 20 | ln.Feature(name="var_id", dtype=bt.Gene.ensembl_gene_id).save(), 21 | ], 22 | coerce_dtype=True, 23 | ).save() 24 | 25 | soma_schema = ln.Schema( 26 | name="soma_experiment_schema", 27 | otype="tiledbsoma", 28 | slots={ 29 | "obs": obs_schema, 30 | "ms:RNA.T": var_schema, 31 | }, 32 | ).save() 33 | 34 | with soma.Experiment.open("small_dataset.tiledbsoma") as experiment: 35 | curator = ln.curators.TiledbsomaExperimentCurator(experiment, soma_schema) 36 | curator.validate() 37 | artifact = curator.save_artifact( 38 | key="examples/soma_experiment.tiledbsoma", 39 | description="SOMA experiment with schema validation", 40 | ) 41 | assert artifact.schema == soma_schema 42 | artifact.describe() 43 | -------------------------------------------------------------------------------- /docs/scripts/curate_spatialdata.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | spatialdata = ln.core.datasets.spatialdata_blobs() 4 | sdata_schema = ln.Schema.get(name="spatialdata_blobs_schema") 5 | curator = ln.curators.SpatialDataCurator(spatialdata, sdata_schema) 6 | try: 7 | curator.validate() 8 | except ln.errors.ValidationError: 9 | pass 10 | 11 | spatialdata.tables["table"].var.drop(index="ENSG00000999999", inplace=True) 12 | 13 | # validate again (must pass now) and save artifact 14 | artifact = ln.Artifact.from_spatialdata( 15 | spatialdata, key="examples/spatialdata1.zarr", schema=sdata_schema 16 | ).save() 17 | artifact.describe() 18 | -------------------------------------------------------------------------------- /docs/scripts/define_mini_immuno_features_labels.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | import bionty as bt 3 | 4 | # define valid labels 5 | perturbation_type = ln.ULabel(name="Perturbation", is_type=True).save() 6 | ln.ULabel(name="DMSO", type=perturbation_type).save() 7 | ln.ULabel(name="IFNG", type=perturbation_type).save() 8 | bt.CellType.from_source(name="B cell").save() 9 | bt.CellType.from_source(name="T cell").save() 10 | 11 | # define valid features 12 | ln.Feature(name="perturbation", dtype=perturbation_type).save() 13 | ln.Feature(name="cell_type_by_expert", dtype=bt.CellType).save() 14 | ln.Feature(name="cell_type_by_model", dtype=bt.CellType).save() 15 | ln.Feature(name="assay_oid", dtype=bt.ExperimentalFactor.ontology_id).save() 16 | ln.Feature(name="concentration", dtype=str).save() 17 | ln.Feature(name="treatment_time_h", dtype="num", coerce_dtype=True).save() 18 | ln.Feature(name="donor", dtype=str, nullable=True).save() 19 | ln.Feature(name="donor_ethnicity", dtype=list[bt.Ethnicity]).save() 20 | -------------------------------------------------------------------------------- /docs/scripts/define_mini_immuno_schema_flexible.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | schema = ln.Schema( 4 | name="Mini immuno schema", 5 | features=[ 6 | ln.Feature.get(name="perturbation"), 7 | ln.Feature.get(name="cell_type_by_model"), 8 | ln.Feature.get(name="assay_oid"), 9 | ln.Feature.get(name="donor"), 10 | ln.Feature.get(name="concentration"), 11 | ln.Feature.get(name="treatment_time_h"), 12 | ], 13 | flexible=True, # _additional_ columns in a dataframe are validated & annotated 14 | ).save() 15 | -------------------------------------------------------------------------------- /docs/scripts/define_schema_anndata_ensembl_gene_ids_and_valid_features_in_obs.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | import bionty as bt 3 | 4 | obs_schema = ln.examples.schemas.valid_features() 5 | varT_schema = ln.Schema( 6 | name="valid_ensembl_gene_ids", itype=bt.Gene.ensembl_gene_id 7 | ).save() 8 | schema = ln.Schema( 9 | name="anndata_ensembl_gene_ids_and_valid_features_in_obs", 10 | otype="AnnData", 11 | slots={"obs": obs_schema, "var.T": varT_schema}, 12 | ).save() 13 | -------------------------------------------------------------------------------- /docs/scripts/define_schema_spatialdata.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | import bionty as bt 3 | 4 | 5 | attrs_schema = ln.Schema( 6 | features=[ 7 | ln.Feature(name="bio", dtype=dict).save(), 8 | ln.Feature(name="tech", dtype=dict).save(), 9 | ], 10 | ).save() 11 | 12 | sample_schema = ln.Schema( 13 | features=[ 14 | ln.Feature(name="disease", dtype=bt.Disease, coerce_dtype=True).save(), 15 | ln.Feature( 16 | name="developmental_stage", 17 | dtype=bt.DevelopmentalStage, 18 | coerce_dtype=True, 19 | ).save(), 20 | ], 21 | ).save() 22 | 23 | tech_schema = ln.Schema( 24 | features=[ 25 | ln.Feature(name="assay", dtype=bt.ExperimentalFactor, coerce_dtype=True).save(), 26 | ], 27 | ).save() 28 | 29 | obs_schema = ln.Schema( 30 | features=[ 31 | ln.Feature(name="sample_region", dtype="str").save(), 32 | ], 33 | ).save() 34 | 35 | # Schema enforces only registered Ensembl Gene IDs are valid (maximal_set=True) 36 | varT_schema = ln.Schema(itype=bt.Gene.ensembl_gene_id, maximal_set=True).save() 37 | 38 | sdata_schema = ln.Schema( 39 | name="spatialdata_blobs_schema", 40 | otype="SpatialData", 41 | slots={ 42 | "attrs:bio": sample_schema, 43 | "attrs:tech": tech_schema, 44 | "attrs": attrs_schema, 45 | "tables:table:obs": obs_schema, 46 | "tables:table:var.T": varT_schema, 47 | }, 48 | ).save() 49 | -------------------------------------------------------------------------------- /docs/scripts/define_valid_features.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | schema = ln.Schema(name="valid_features", itype=ln.Feature).save() 4 | -------------------------------------------------------------------------------- /docs/scripts/ingest_mini_immuno_datasets.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | import bionty as bt 3 | 4 | # observation-level metadata 5 | ln.Feature(name="perturbation", dtype="cat[ULabel]").save() 6 | ln.Feature(name="sample_note", dtype="str").save() 7 | ln.Feature(name="cell_type_by_expert", dtype="cat[bionty.CellType]").save() 8 | ln.Feature(name="cell_type_by_model", dtype="cat[bionty.CellType]").save() 9 | # dataset-level metadata 10 | ln.Feature(name="temperature", dtype="float").save() 11 | ln.Feature(name="experiment", dtype="cat[ULabel]").save() 12 | ln.Feature(name="date_of_study", dtype="date").save() 13 | ln.Feature(name="study_note", dtype="str").save() 14 | ln.Feature(name="study_metadata", dtype=dict).save() 15 | 16 | ## Permissible values for categoricals 17 | ln.ULabel.from_values(["DMSO", "IFNG"], create=True).save() 18 | ln.ULabel.from_values(["Experiment 1", "Experiment 2"], create=True).save() 19 | bt.CellType.from_values(["B cell", "T cell"], create=True).save() 20 | 21 | schema = ln.examples.schemas.anndata_ensembl_gene_ids_and_valid_features_in_obs() 22 | 23 | ## Ingest dataset1 24 | adata = ln.core.datasets.mini_immuno.get_dataset1(otype="AnnData") 25 | artifact = ln.Artifact.from_anndata( 26 | adata, 27 | key="examples/dataset1.h5ad", 28 | schema=schema, 29 | ).save() 30 | adhoc = {"study_metadata": {"detail1": "123", "detail2": 1}} 31 | dataset_metadata = adata.uns 32 | dataset_metadata.update(adhoc) 33 | artifact.features.add_values(dataset_metadata) # type: ignore 34 | 35 | # Ingest dataset2 36 | adata2 = ln.core.datasets.mini_immuno.get_dataset2(otype="AnnData") 37 | artifact2 = ln.Artifact.from_anndata( 38 | adata2, 39 | key="examples/dataset2.h5ad", 40 | schema=schema, 41 | ).save() 42 | adhoc2 = {"study_metadata": {"detail1": "456", "detail2": 2}} 43 | dataset_metadata2 = adata2.uns 44 | dataset_metadata2.update(adhoc2) 45 | artifact2.features.add_values(dataset_metadata2) # type: ignore 46 | -------------------------------------------------------------------------------- /docs/scripts/run_track_and_finish.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | ln.track() # initiate a tracked notebook/script run 4 | 5 | # your code automatically tracks inputs & outputs 6 | 7 | ln.finish() # mark run as finished, save execution report, source code & environment 8 | -------------------------------------------------------------------------------- /docs/scripts/run_track_with_params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import lamindb as ln 3 | 4 | if __name__ == "__main__": 5 | p = argparse.ArgumentParser() 6 | p.add_argument("--input-dir", type=str) 7 | p.add_argument("--downsample", action="store_true") 8 | p.add_argument("--learning-rate", type=float) 9 | args = p.parse_args() 10 | params = { 11 | "input_dir": args.input_dir, 12 | "learning_rate": args.learning_rate, 13 | "preprocess_params": { 14 | "downsample": args.downsample, # nested parameter names & values in dictionaries are not validated 15 | "normalization": "the_good_one", 16 | }, 17 | } 18 | ln.track(params=params) 19 | 20 | # your code 21 | 22 | ln.finish() 23 | -------------------------------------------------------------------------------- /docs/scripts/run_workflow.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import lamindb as ln 3 | 4 | ln.Param(name="run_workflow_subset", dtype=bool).save() 5 | 6 | 7 | @ln.tracked() 8 | def subset_dataframe( 9 | artifact: ln.Artifact, 10 | subset_rows: int = 2, 11 | subset_cols: int = 2, 12 | run: ln.Run | None = None, 13 | ) -> ln.Artifact: 14 | dataset = artifact.load(is_run_input=run) 15 | new_data = dataset.iloc[:subset_rows, :subset_cols] 16 | new_key = artifact.key.replace(".parquet", "_subsetted.parquet") 17 | return ln.Artifact.from_df(new_data, key=new_key, run=run).save() 18 | 19 | 20 | if __name__ == "__main__": 21 | p = argparse.ArgumentParser() 22 | p.add_argument("--subset", action="store_true") 23 | args = p.parse_args() 24 | 25 | params = {"run_workflow_subset": args.subset} 26 | 27 | ln.track(params=params) 28 | 29 | if args.subset: 30 | df = ln.core.datasets.small_dataset1(otype="DataFrame") 31 | artifact = ln.Artifact.from_df(df, key="my_analysis/dataset.parquet").save() 32 | subsetted_artifact = subset_dataframe(artifact) 33 | 34 | ln.finish() 35 | -------------------------------------------------------------------------------- /docs/scripts/synced_with_git.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | ln.settings.sync_git_repo = "https://github.com/..." 4 | ln.track() 5 | # your code 6 | ln.finish() 7 | -------------------------------------------------------------------------------- /docs/storage.md: -------------------------------------------------------------------------------- 1 | # Storage 2 | 3 | ```{toctree} 4 | :maxdepth: 1 5 | 6 | storage/upload 7 | storage/add-replace-cache 8 | storage/anndata-accessor 9 | storage/prepare-transfer-local-to-cloud 10 | storage/transfer-local-to-cloud 11 | storage/vitessce 12 | ``` 13 | -------------------------------------------------------------------------------- /docs/storage/prepare-transfer-local-to-cloud.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Prepare transfer artifacts from a local instance to a cloud instance" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "!lamin settings set auto-connect false" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "import lamindb as ln\n", 26 | "import bionty as bt\n", 27 | "import wetlab as wl\n", 28 | "import pandas as pd" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "ln.setup.init(storage=\"./test-transfer-to-cloud\", modules=\"bionty,wetlab\")\n", 38 | "ln.setup.settings.auto_connect = False" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "artifact = ln.Artifact.from_df(\n", 48 | " pd.DataFrame({\"a\": [1, 2, 3]}), description=\"test-transfer-to-cloud\"\n", 49 | ").save()\n", 50 | "features = bt.CellMarker.from_values(\n", 51 | " [\"PD1\", \"CD21\"], field=bt.CellMarker.name, organism=\"human\"\n", 52 | ")\n", 53 | "ln.save(features)\n", 54 | "artifact.features._add_schema(ln.FeatureSet(features), slot=\"var\")\n", 55 | "\n", 56 | "organism = bt.Organism.from_source(name=\"human\").save()\n", 57 | "artifact.labels.add(organism)\n", 58 | "\n", 59 | "experiment = wl.Experiment(name=\"experiment-test-transfer-to-cloud\").save()\n", 60 | "artifact.experiments.add(experiment)\n", 61 | "\n", 62 | "artifact.describe()" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "assert artifact.features[\"var\"].count() == 2" 72 | ] 73 | } 74 | ], 75 | "metadata": { 76 | "kernelspec": { 77 | "display_name": "Python 3 (ipykernel)", 78 | "language": "python", 79 | "name": "python3" 80 | }, 81 | "language_info": { 82 | "codemirror_mode": { 83 | "name": "ipython", 84 | "version": 3 85 | }, 86 | "file_extension": ".py", 87 | "mimetype": "text/x-python", 88 | "name": "python", 89 | "nbconvert_exporter": "python", 90 | "pygments_lexer": "ipython3", 91 | "version": "3.9.17" 92 | } 93 | }, 94 | "nbformat": 4, 95 | "nbformat_minor": 2 96 | } 97 | -------------------------------------------------------------------------------- /docs/storage/test-files/iris.csv: -------------------------------------------------------------------------------- 1 | "sepal.length","sepal.width","petal.length","petal.width","variety" 2 | 5.1,3.5,1.4,.2,"Setosa" 3 | 4.9,3,1.4,.2,"Setosa" 4 | 7,3.2,4.7,1.4,"Versicolor" 5 | 6.4,3.2,4.5,1.5,"Versicolor" 6 | 6.3,3.3,6,2.5,"Virginica" 7 | 5.8,2.7,5.1,1.9,"Virginica" 8 | -------------------------------------------------------------------------------- /docs/storage/test-files/iris.data: -------------------------------------------------------------------------------- 1 | 5.1,3.5,1.4,0.2,Iris-setosa 2 | 4.9,3.0,1.4,0.2,Iris-setosa 3 | 7.0,3.2,4.7,1.4,Iris-versicolor 4 | 6.4,3.2,4.5,1.5,Iris-versicolor 5 | 6.2,3.4,5.4,2.3,Iris-virginica 6 | 5.9,3.0,5.1,1.8,Iris-virginica 7 | -------------------------------------------------------------------------------- /docs/storage/test-files/new_iris.csv: -------------------------------------------------------------------------------- 1 | ;sepal.length;sepal.width;petal.length;petal.width;variety 2 | 0;5.1;3.5;1.4;0.2;Setosa 3 | 1;4.9;3.0;1.4;0.2;Setosa 4 | 50;7.0;3.2;4.7;1.4;Versicolor 5 | 51;6.4;3.2;4.5;1.5;Versicolor 6 | 100;6.3;3.3;6.0;2.5;Virginica 7 | 101;5.8;2.7;5.1;1.9;Virginica 8 | -------------------------------------------------------------------------------- /docs/storage/test_notebooks.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import nbproject_test as test 4 | 5 | import lamindb as ln 6 | 7 | 8 | def test_notebooks(): 9 | nbdir = Path(__file__).parent 10 | ln.setup.login("testuser1") 11 | test.execute_notebooks(nbdir, write=True) 12 | -------------------------------------------------------------------------------- /docs/storage/transfer-local-to-cloud.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Transfer artifacts from a local instance to a cloud instance" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import lamindb as ln\n", 17 | "import bionty as bt\n", 18 | "\n", 19 | "ln.connect(\"laminlabs/lamin-dev\")\n", 20 | "bt.settings.organism = \"human\"" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": { 27 | "tags": [ 28 | "hide-cell" 29 | ] 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "def cleanup(artifact: ln.Artifact):\n", 34 | " features_sets = artifact.feature_sets.all()\n", 35 | " experiments = artifact.experiments.all()\n", 36 | " artifact.delete(permanent=True, storage=False)\n", 37 | " features_sets.delete()\n", 38 | " experiments.delete()\n", 39 | "\n", 40 | "\n", 41 | "artifacts = ln.Artifact.filter(description=\"test-transfer-to-cloud\").all()\n", 42 | "for artifact in artifacts:\n", 43 | " cleanup(artifact)" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "artifact = ln.Artifact.using(\"testuser1/test-transfer-to-cloud\").get(\n", 53 | " description=\"test-transfer-to-cloud\"\n", 54 | ")\n", 55 | "artifact.describe()" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "artifact.save()" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "artifact.describe()" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "assert artifact._state.db == \"default\"\n", 83 | "assert artifact.organisms.get().name == \"human\"\n", 84 | "assert artifact.experiments.get().name == \"experiment-test-transfer-to-cloud\"\n", 85 | "assert artifact.features[\"var\"].count() == 2" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "!lamin delete --force test-transfer-to-cloud\n", 95 | "!rm -r ./test-transfer-to-cloud" 96 | ] 97 | } 98 | ], 99 | "metadata": { 100 | "kernelspec": { 101 | "display_name": "Python 3 (ipykernel)", 102 | "language": "python", 103 | "name": "python3" 104 | }, 105 | "language_info": { 106 | "codemirror_mode": { 107 | "name": "ipython", 108 | "version": 3 109 | }, 110 | "file_extension": ".py", 111 | "mimetype": "text/x-python", 112 | "name": "python", 113 | "nbconvert_exporter": "python", 114 | "pygments_lexer": "ipython3", 115 | "version": "3.9.17" 116 | } 117 | }, 118 | "nbformat": 4, 119 | "nbformat_minor": 2 120 | } 121 | -------------------------------------------------------------------------------- /docs/test_notebooks.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from pathlib import Path 3 | 4 | import nbproject_test as test 5 | 6 | sys.path[:0] = [str(Path(__file__).parent.parent)] 7 | 8 | from noxfile import GROUPS 9 | 10 | DOCS = Path(__file__).parents[1] / "docs/" 11 | 12 | 13 | def test_tutorial(): 14 | for artifactname in GROUPS["tutorial"]: 15 | test.execute_notebooks(DOCS / artifactname, write=True) 16 | 17 | 18 | def test_guide(): 19 | for artifactname in GROUPS["guide"]: 20 | test.execute_notebooks(DOCS / artifactname, write=True) 21 | 22 | 23 | def test_biology(): 24 | for artifactname in GROUPS["biology"]: 25 | test.execute_notebooks(DOCS / artifactname, write=True) 26 | -------------------------------------------------------------------------------- /docs/wetlab.md: -------------------------------------------------------------------------------- 1 | # `wetlab` 2 | 3 | ```{eval-rst} 4 | .. automodule:: wetlab 5 | ``` 6 | -------------------------------------------------------------------------------- /lamindb/__init__.py: -------------------------------------------------------------------------------- 1 | """A data framework for biology. 2 | 3 | Data lineage 4 | ============ 5 | 6 | Track inputs, outputs & environment of a notebook or script run. 7 | 8 | .. autosummary:: 9 | :toctree: . 10 | 11 | track 12 | finish 13 | 14 | Decorate a function with `@tracked()` to track inputs, outputs & environment of function executions. 15 | 16 | .. autosummary:: 17 | :toctree: . 18 | 19 | tracked 20 | 21 | Registries 22 | ========== 23 | 24 | Manage artifacts and transforms. 25 | 26 | .. autosummary:: 27 | :toctree: . 28 | 29 | Artifact 30 | Storage 31 | Transform 32 | Run 33 | 34 | Validate and annotate artifacts. 35 | 36 | .. autosummary:: 37 | :toctree: . 38 | 39 | Feature 40 | ULabel 41 | Schema 42 | 43 | Manage flexible records to track, e.g., samples or donors. 44 | 45 | .. autosummary:: 46 | :toctree: . 47 | 48 | Record 49 | Sheet 50 | 51 | Manage projects. 52 | 53 | .. autosummary:: 54 | :toctree: . 55 | 56 | User 57 | Collection 58 | Project 59 | Space 60 | Branch 61 | Reference 62 | Person 63 | 64 | Other 65 | ===== 66 | 67 | Functions and classes. 68 | 69 | .. autosummary:: 70 | :toctree: . 71 | 72 | connect 73 | view 74 | save 75 | UPath 76 | settings 77 | context 78 | 79 | Curators and integrations. 80 | 81 | .. autosummary:: 82 | :toctree: . 83 | 84 | curators 85 | integrations 86 | 87 | Low-level functionality. 88 | 89 | .. autosummary:: 90 | :toctree: . 91 | 92 | examples 93 | errors 94 | setup 95 | base 96 | core 97 | models 98 | 99 | Backwards compatibility. 100 | 101 | .. autosummary:: 102 | :toctree: . 103 | 104 | Param 105 | FeatureSet 106 | Curator 107 | 108 | """ 109 | 110 | # ruff: noqa: I001 111 | # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc. 112 | __version__ = "1.6.2" 113 | 114 | import warnings 115 | 116 | # through SpatialData 117 | warnings.filterwarnings( 118 | "ignore", message="The legacy Dask DataFrame implementation is deprecated" 119 | ) 120 | 121 | from lamindb_setup._check_setup import InstanceNotSetupError as _InstanceNotSetupError 122 | from lamindb_setup._check_setup import _check_instance_setup 123 | from lamindb_setup._connect_instance import connect 124 | from lamindb_setup.core.upath import UPath 125 | 126 | from . import base, errors, setup 127 | 128 | 129 | def __getattr__(name): 130 | raise _InstanceNotSetupError() 131 | 132 | 133 | if _check_instance_setup(from_module="lamindb"): 134 | del __getattr__ # so that imports work out 135 | from . import base 136 | from ._tracked import tracked 137 | from ._view import view 138 | from .core._context import context 139 | from .core._settings import settings 140 | from .curators._legacy import CatManager as Curator 141 | from .models import ( 142 | Artifact, 143 | Collection, 144 | Feature, 145 | FeatureSet, # backward compat 146 | Person, 147 | Project, 148 | Reference, 149 | Run, 150 | Schema, 151 | Storage, 152 | Transform, 153 | ULabel, 154 | User, 155 | Space, 156 | Branch, 157 | Record, 158 | Sheet, 159 | ) 160 | from .models.save import save 161 | from . import core 162 | from . import integrations 163 | from . import curators 164 | from . import examples 165 | 166 | track = context._track 167 | finish = context._finish 168 | settings.__doc__ = """Global live settings (:class:`~lamindb.core.Settings`).""" 169 | context.__doc__ = """Global run context (:class:`~lamindb.core.Context`).""" 170 | from django.db.models import Q 171 | 172 | Param = Feature # backward compat 173 | -------------------------------------------------------------------------------- /lamindb/base/__init__.py: -------------------------------------------------------------------------------- 1 | """Base library. 2 | 3 | Is available also when no instance is setup. 4 | 5 | Modules: 6 | 7 | .. autosummary:: 8 | :toctree: . 9 | 10 | uids 11 | types 12 | fields 13 | 14 | Utils: 15 | 16 | .. autosummary:: 17 | :toctree: . 18 | 19 | doc_args 20 | deprecated 21 | 22 | """ 23 | 24 | from lamindb_setup.core import deprecated, doc_args 25 | 26 | from . import fields, types, uids 27 | -------------------------------------------------------------------------------- /lamindb/base/ids.py: -------------------------------------------------------------------------------- 1 | from .uids import * # noqa: F403 2 | -------------------------------------------------------------------------------- /lamindb/base/types.py: -------------------------------------------------------------------------------- 1 | """Types. 2 | 3 | Central object types. 4 | 5 | .. autosummary:: 6 | :toctree: . 7 | 8 | ArtifactKind 9 | TransformType 10 | Dtype 11 | 12 | Basic types. 13 | 14 | .. autosummary:: 15 | :toctree: . 16 | 17 | UPathStr 18 | StrField 19 | ListLike 20 | FieldAttr 21 | """ 22 | 23 | from __future__ import annotations 24 | 25 | from typing import Literal, Union 26 | 27 | import numpy as np 28 | import pandas as pd 29 | from django.db.models.query_utils import DeferredAttribute as FieldAttr 30 | from lamindb_setup.core.types import UPathStr # noqa: F401 31 | 32 | # need to use Union because __future__.annotations doesn't do the job here <3.10 33 | # typing.TypeAlias, >3.10 on but already deprecated 34 | ListLike = Union[list[str], pd.Series, np.array] 35 | StrField = Union[str, FieldAttr] # typing.TypeAlias 36 | 37 | TransformType = Literal[ 38 | "pipeline", "notebook", "upload", "script", "function", "linker" 39 | ] 40 | ArtifactKind = Literal["dataset", "model"] 41 | 42 | # below is used for Feature.dtype and Param.dtype 43 | Dtype = Literal[ 44 | "cat", # categoricals 45 | "num", # numericals 46 | "str", # string 47 | "int", # integer / numpy.integer 48 | "float", # float 49 | "bool", # boolean 50 | "date", # date 51 | "datetime", # datetime 52 | "dict", # dictionary 53 | "object", # this is a pandas input dtype, we're only using it for complicated types, not for strings 54 | ] 55 | """Data type. 56 | 57 | Data types in lamindb are a string-serialized abstraction of common data types. 58 | 59 | Overview 60 | ======== 61 | 62 | ============ ============ ================================================= 63 | description lamindb pandas 64 | ============ ============ ================================================= 65 | categorical `"cat"` `category` 66 | numerical `"num"` `int | float` 67 | integer `"int"` `int64 | int32 | int16 | int8 | uint | ...` 68 | float `"float"` `float64 | float32 | float16 | float8 | ...` 69 | string `"str"` `object` 70 | datetime `"datetime"` `datetime` 71 | date `"date"` `object` (pandera requires an ISO-format string, convert with `df["date"] = df["date"].dt.date`) 72 | dictionary `"dict"` `object` 73 | ============ ============ ================================================= 74 | 75 | Categoricals 76 | ============ 77 | 78 | Beyond indicating that a feature is a categorical, `lamindb` allows you to define the registry to which values are restricted. 79 | 80 | For example, `'cat[ULabel]'` or `'cat[bionty.CellType]'` indicate that permissible values are from the `ULabel` or `CellType` registry, respectively. 81 | 82 | You can also reference multiple registries, e.g., `'cat[ULabel|bionty.CellType]'` indicates that values can be from either registry. 83 | 84 | You can also restrict to sub-types defined in registries via the `type` column, e.g., `'cat[ULabel[CellMedium]]'` indicates that values must be of type `CellMedium` within the `ULabel` registry. 85 | 86 | Literal 87 | ======= 88 | 89 | A `Dtype` object in `lamindb` is a `Literal` up to further specification of `"cat"`. 90 | 91 | """ 92 | FeatureDtype = Dtype # backward compat 93 | -------------------------------------------------------------------------------- /lamindb/base/uids.py: -------------------------------------------------------------------------------- 1 | """Universal IDs. 2 | 3 | Base generators: 4 | 5 | .. autosummary:: 6 | :toctree: . 7 | 8 | base26 9 | base62 10 | base64 11 | 12 | `uid` generators: 13 | 14 | .. autosummary:: 15 | :toctree: . 16 | 17 | base62_8 18 | base62_12 19 | base62_16 20 | base62_20 21 | 22 | 23 | Collision probabilities 24 | ======================= 25 | 26 | 8 base62 characters (`62**8=2e+14`): 27 | 28 | ======= =========== 29 | n p_collision 30 | ======= =========== 31 | 100k 2e-05 32 | 1M 2e-03 33 | ======= =========== 34 | 35 | 12 base62 characters (`62**12=3e+21`): 36 | 37 | ======= =========== 38 | n p_collision 39 | ======= =========== 40 | 100M 2e-06 41 | 1B 2e-04 42 | ======= =========== 43 | 44 | 16 base62 characters (`62**16=5e+28`): 45 | 46 | ======= =========== 47 | n p_collision 48 | ======= =========== 49 | 1e12 7e-05 50 | 1e13 7e-03 51 | ======= =========== 52 | 53 | 20 base62 characters (`62**20=7e+35`) roughly matches UUID (`2**122=5e+36`): 54 | 55 | ======= =========== 56 | n p_collision 57 | ======= =========== 58 | 1e16 7e-05 59 | 1e17 7e-03 60 | ======= =========== 61 | 62 | See `source `__. 63 | 64 | """ 65 | 66 | import secrets 67 | import string 68 | 69 | 70 | def base64(n_char: int) -> str: 71 | """Random Base64 string.""" 72 | alphabet = string.digits + string.ascii_letters.swapcase() + "_" + "-" 73 | uid = "".join(secrets.choice(alphabet) for i in range(n_char)) 74 | return uid 75 | 76 | 77 | def base62(n_char: int) -> str: 78 | """Random Base62 string.""" 79 | alphabet = string.digits + string.ascii_letters.swapcase() 80 | uid = "".join(secrets.choice(alphabet) for i in range(n_char)) 81 | return uid 82 | 83 | 84 | def base26(n_char: int): 85 | """ASCII lowercase.""" 86 | alphabet = string.ascii_lowercase 87 | uid = "".join(secrets.choice(alphabet) for i in range(n_char)) 88 | return uid 89 | 90 | 91 | def base62_4() -> str: 92 | return base62(4) 93 | 94 | 95 | def base62_8() -> str: 96 | """Random Base62 string of length 8.""" 97 | return base62(8) 98 | 99 | 100 | def base62_12() -> str: 101 | """Random Base62 string of length 12.""" 102 | return base62(12) 103 | 104 | 105 | def base62_16() -> str: 106 | """Random Base62 string of length 16.""" 107 | return base62(16) 108 | 109 | 110 | def base62_20() -> str: 111 | """Random Base62 string of length 20.""" 112 | return base62(20) 113 | 114 | 115 | def base62_24() -> str: 116 | """Random Base62 string of length 24.""" 117 | return base62(24) 118 | -------------------------------------------------------------------------------- /lamindb/base/users.py: -------------------------------------------------------------------------------- 1 | user_id_cache = {} 2 | 3 | 4 | def current_user_id() -> int: 5 | import lamindb_setup as ln_setup 6 | from lamindb_setup import settings 7 | from lamindb_setup._init_instance import register_user 8 | 9 | from lamindb.models import User 10 | 11 | def query_user_id(): 12 | if ln_setup.core.django.IS_MIGRATING: 13 | return 1 14 | else: 15 | try: 16 | user_id = User.objects.get(uid=settings.user.uid).id 17 | except User.DoesNotExist: 18 | register_user(settings.user) 19 | user_id = User.objects.get(uid=settings.user.uid).id 20 | return user_id 21 | 22 | if settings._instance_exists: 23 | if settings.instance.slug not in user_id_cache: 24 | user_id_cache[settings.instance.slug] = query_user_id() 25 | return user_id_cache[settings.instance.slug] 26 | else: 27 | return query_user_id() 28 | -------------------------------------------------------------------------------- /lamindb/core/__init__.py: -------------------------------------------------------------------------------- 1 | """Core library. 2 | 3 | Settings & context: 4 | 5 | .. autosummary:: 6 | :toctree: . 7 | 8 | Settings 9 | subsettings 10 | Context 11 | 12 | Artifact loaders: 13 | 14 | .. autosummary:: 15 | :toctree: . 16 | 17 | loaders 18 | 19 | Data loaders: 20 | 21 | .. autosummary:: 22 | :toctree: . 23 | 24 | MappedCollection 25 | 26 | Modules: 27 | 28 | .. autosummary:: 29 | :toctree: . 30 | 31 | datasets 32 | storage 33 | logger 34 | 35 | """ 36 | 37 | from lamin_utils import logger 38 | from lamin_utils._inspect import InspectResult 39 | 40 | from .. import errors as exceptions 41 | from . import datasets, loaders, subsettings, types 42 | from ._context import Context 43 | from ._mapped_collection import MappedCollection 44 | from ._settings import Settings 45 | -------------------------------------------------------------------------------- /lamindb/core/_compat.py: -------------------------------------------------------------------------------- 1 | import importlib.util 2 | from typing import Any, Callable, TypeVar 3 | 4 | T = TypeVar("T") 5 | 6 | 7 | def is_package_installed(package_name: str) -> bool: 8 | spec = importlib.util.find_spec(package_name) 9 | return spec is not None 10 | 11 | 12 | def with_package(package_name: str, operation: Callable[[Any], T]) -> T: 13 | """Execute an operation that requires a specific package. 14 | 15 | Args: 16 | package_name: Package name (e.g., "mudata") 17 | operation: Function that takes the imported module and returns a result 18 | 19 | Examples: 20 | # For direct package functions 21 | result = with_package("mudata", lambda mod: mod.read_zarr(path)) 22 | """ 23 | try: 24 | module = importlib.import_module(package_name) 25 | return operation(module) 26 | except ImportError: 27 | raise ImportError( 28 | f"Package '{package_name}' is required but not installed. " 29 | f"Please install with: pip install {package_name}" 30 | ) from None 31 | 32 | 33 | def with_package_obj( 34 | obj: Any, class_name: str, package_name: str, operation: Callable[[Any], T] 35 | ) -> tuple[bool, T | None]: 36 | """Handle operations on objects that require specific packages. 37 | 38 | Args: 39 | obj: The object to operate on 40 | class_name: Expected class name (e.g., "MuData") 41 | package_name: Package that provides the class (e.g., "mudata") 42 | operation: Function to call with the object if package is available. 43 | 44 | Examples: 45 | # For instance methods 46 | handled, res = apply_class_func(dmem, "MuData", "mudata", 47 | lambda obj: obj.write(filepath)) 48 | """ 49 | if obj.__class__.__name__ == class_name: 50 | try: 51 | importlib.import_module(package_name) 52 | result = operation(obj) 53 | return True, result 54 | except ImportError: 55 | raise ImportError( 56 | f"Object appears to be {class_name} but '{package_name}' package is not installed. " 57 | f"Please install with: pip install {package_name}" 58 | ) from None 59 | 60 | return False, None 61 | -------------------------------------------------------------------------------- /lamindb/core/_track_environment.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import subprocess 4 | import sys 5 | from typing import TYPE_CHECKING 6 | 7 | import lamindb_setup as ln_setup 8 | from lamin_utils import logger 9 | 10 | if TYPE_CHECKING: 11 | from lamindb.models import Run 12 | 13 | 14 | def track_environment(run: Run) -> None: 15 | filepath = ln_setup.settings.cache_dir / f"run_env_pip_{run.uid}.txt" 16 | # create a requirements.txt 17 | # we don't create a conda environment.yml mostly for its slowness 18 | try: 19 | with open(filepath, "w") as f: 20 | result = subprocess.run( 21 | [sys.executable, "-m", "pip", "freeze"], 22 | stdout=f, 23 | ) 24 | except OSError as e: 25 | result = None 26 | logger.warning(f"could not run pip freeze with error {e}") 27 | if result is not None and result.returncode == 0: 28 | logger.info(f"tracked pip freeze > {str(filepath)}") 29 | -------------------------------------------------------------------------------- /lamindb/core/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | """Test datasets. 2 | 3 | The mini immuno dataset. 4 | 5 | .. autosummary:: 6 | :toctree: . 7 | 8 | mini_immuno 9 | 10 | Small in-memory datasets. 11 | 12 | .. autosummary:: 13 | :toctree: . 14 | 15 | anndata_with_obs 16 | 17 | Files. 18 | 19 | .. autosummary:: 20 | :toctree: . 21 | 22 | file_fcs 23 | file_fcs_alpert19 24 | file_tsv_rnaseq_nfcore_salmon_merged_gene_counts 25 | file_jpg_paradisi05 26 | file_tiff_suo22 27 | file_fastq 28 | file_bam 29 | file_mini_csv 30 | 31 | Directories. 32 | 33 | .. autosummary:: 34 | :toctree: . 35 | 36 | dir_scrnaseq_cellranger 37 | dir_iris_images 38 | 39 | Dataframe, AnnData, MuData. 40 | 41 | .. autosummary:: 42 | :toctree: . 43 | 44 | df_iris 45 | df_iris_in_meter 46 | df_iris_in_meter_study1 47 | df_iris_in_meter_study2 48 | anndata_mouse_sc_lymph_node 49 | anndata_human_immune_cells 50 | anndata_pbmc68k_reduced 51 | anndata_file_pbmc68k_test 52 | anndata_pbmc3k_processed 53 | anndata_with_obs 54 | anndata_suo22_Visium10X 55 | mudata_papalexi21_subset 56 | schmidt22_crispra_gws_IFNG 57 | schmidt22_perturbseq 58 | 59 | Other. 60 | 61 | .. autosummary:: 62 | :toctree: . 63 | 64 | fake_bio_notebook_titles 65 | """ 66 | 67 | from . import mini_immuno 68 | from ._core import ( 69 | anndata_file_pbmc68k_test, 70 | anndata_human_immune_cells, 71 | anndata_mouse_sc_lymph_node, 72 | anndata_pbmc3k_processed, 73 | anndata_pbmc68k_reduced, 74 | anndata_suo22_Visium10X, 75 | df_iris, 76 | df_iris_in_meter, 77 | df_iris_in_meter_study1, 78 | df_iris_in_meter_study2, 79 | dir_iris_images, 80 | dir_scrnaseq_cellranger, 81 | file_bam, 82 | file_fastq, 83 | file_fcs, 84 | file_fcs_alpert19, 85 | file_jpg_paradisi05, 86 | file_mini_csv, 87 | file_tiff_suo22, 88 | file_tsv_rnaseq_nfcore_salmon_merged_gene_counts, 89 | mudata_papalexi21_subset, 90 | schmidt22_crispra_gws_IFNG, 91 | schmidt22_perturbseq, 92 | spatialdata_blobs, 93 | ) 94 | from ._fake import fake_bio_notebook_titles 95 | from ._small import ( 96 | anndata_with_obs, 97 | small_dataset3_cellxgene, 98 | ) 99 | 100 | small_dataset1 = mini_immuno.get_dataset1 # backward compat 101 | small_dataset2 = mini_immuno.get_dataset2 # backward compat 102 | -------------------------------------------------------------------------------- /lamindb/core/datasets/_fake.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | 4 | def fake_bio_notebook_titles(n=100) -> list[str]: 5 | """A fake collection of study titles.""" 6 | from faker import Faker 7 | 8 | fake = Faker() 9 | 10 | from faker_biology.mol_biol import Antibody 11 | from faker_biology.physiology import CellType, Organ, Organelle 12 | 13 | fake.add_provider(CellType) 14 | fake.add_provider(Organ) 15 | fake.add_provider(Organelle) 16 | fake.add_provider(Antibody) 17 | 18 | my_words = [ 19 | "study", 20 | "investigate", 21 | "research", 22 | "result", 23 | "cluster", 24 | "rank", 25 | "candidate", 26 | "visualize", 27 | "efficiency", 28 | "classify", 29 | ] 30 | my_words += [fake.organ() for i in range(5)] + ["intestine", "intestinal"] 31 | my_words += [fake.celltype() for i in range(10)] 32 | my_words += [fake.antibody_isotype() for i in range(20)] 33 | 34 | my_notebook_titles = [fake.sentence(ext_word_list=my_words) for i in range(n)] 35 | 36 | return my_notebook_titles 37 | -------------------------------------------------------------------------------- /lamindb/core/datasets/_small.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Any, Literal 4 | 5 | import anndata as ad 6 | import numpy as np 7 | import pandas as pd 8 | 9 | 10 | def small_dataset3_cellxgene( 11 | otype: Literal["DataFrame", "AnnData"] = "AnnData", 12 | ) -> tuple[pd.DataFrame, dict[str, Any]] | ad.AnnData: 13 | # TODO: consider other ids for other organisms 14 | # "ENSMUSG00002076988" 15 | var_ids = ["invalid_ensembl_id", "ENSG00000000419", "ENSG00000139618"] 16 | dataset_dict = { 17 | var_ids[0]: [2, 3, 3], 18 | var_ids[1]: [3, 4, 5], 19 | var_ids[2]: [4, 2, 3], 20 | "disease_ontology_term_id": ["MONDO:0004975", "MONDO:0004980", "MONDO:0004980"], 21 | "organism": ["human", "human", "human"], 22 | "sex": ["female", "male", "unknown"], 23 | "sex_ontology_term_id": ["PATO:0000383", "PATO:0000384", "unknown"], 24 | "tissue": ["lungg", "lungg", "heart"], 25 | "donor": ["-1", "1", "2"], 26 | } 27 | dataset_df = pd.DataFrame( 28 | dataset_dict, 29 | index=["barcode1", "barcode2", "barcode3"], 30 | ) 31 | dataset_df["tissue"] = dataset_df["tissue"].astype("category") 32 | ad.AnnData( 33 | dataset_df[var_ids], 34 | obs=dataset_df[[key for key in dataset_dict if key not in var_ids]], 35 | ) 36 | if otype == "DataFrame": 37 | return dataset_df 38 | else: 39 | dataset_ad = ad.AnnData(dataset_df.iloc[:, :3], obs=dataset_df.iloc[:, 3:]) 40 | return dataset_ad 41 | 42 | 43 | def anndata_with_obs() -> ad.AnnData: 44 | """Create a mini anndata with cell_type, disease and tissue.""" 45 | import anndata as ad 46 | import bionty.base as bionty_base 47 | 48 | celltypes = ["T cell", "hematopoietic stem cell", "hepatocyte", "my new cell type"] 49 | celltype_ids = ["CL:0000084", "CL:0000037", "CL:0000182", ""] 50 | diseases = [ 51 | "chronic kidney disease", 52 | "liver lymphoma", 53 | "cardiac ventricle disorder", 54 | "Alzheimer disease", 55 | ] 56 | tissues = ["kidney", "liver", "heart", "brain"] 57 | df = pd.DataFrame() 58 | df["cell_type"] = celltypes * 10 59 | df["cell_type_id"] = celltype_ids * 10 60 | df["tissue"] = tissues * 10 61 | df["disease"] = diseases * 10 62 | df.index = "obs" + df.index.astype(str) 63 | 64 | adata = ad.AnnData(X=np.zeros(shape=(40, 100), dtype=np.float32), obs=df) 65 | adata.var.index = bionty_base.Gene().df().head(100)["ensembl_gene_id"].values 66 | 67 | return adata 68 | -------------------------------------------------------------------------------- /lamindb/core/exceptions.py: -------------------------------------------------------------------------------- 1 | from ..errors import * # noqa: F403 backward compat 2 | -------------------------------------------------------------------------------- /lamindb/core/storage/__init__.py: -------------------------------------------------------------------------------- 1 | """Storage API. 2 | 3 | Valid suffixes. 4 | 5 | .. autosummary:: 6 | :toctree: . 7 | 8 | VALID_SUFFIXES 9 | 10 | Array accessors. 11 | 12 | .. autosummary:: 13 | :toctree: . 14 | 15 | AnnDataAccessor 16 | BackedAccessor 17 | """ 18 | 19 | from lamindb_setup.core.upath import LocalPathClasses, UPath, infer_filesystem 20 | 21 | from ._backed_access import AnnDataAccessor, BackedAccessor 22 | from ._tiledbsoma import save_tiledbsoma_experiment 23 | from ._valid_suffixes import VALID_SUFFIXES 24 | from .objects import infer_suffix, write_to_disk 25 | from .paths import delete_storage 26 | -------------------------------------------------------------------------------- /lamindb/core/storage/_polars_lazy_df.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from contextlib import contextmanager 4 | from pathlib import Path 5 | from typing import TYPE_CHECKING 6 | 7 | if TYPE_CHECKING: 8 | from collections.abc import Iterator 9 | 10 | from polars import LazyFrame as PolarsLazyFrame 11 | from upath import UPath 12 | 13 | POLARS_SUFFIXES = (".parquet", ".csv", ".ndjson", ".ipc") 14 | 15 | 16 | @contextmanager 17 | def _open_polars_lazy_df( 18 | paths: UPath | list[UPath], **kwargs 19 | ) -> Iterator[PolarsLazyFrame]: 20 | try: 21 | import polars as pl 22 | except ImportError as ie: 23 | raise ImportError("Please install polars: pip install polars") from ie 24 | 25 | scans = { 26 | ".parquet": pl.scan_parquet, 27 | ".csv": pl.scan_csv, 28 | ".ndjson": pl.scan_ndjson, 29 | ".ipc": pl.scan_ipc, 30 | } 31 | 32 | path_list = [] 33 | if isinstance(paths, Path): 34 | paths = [paths] 35 | for path in paths: 36 | # assume http is always a file 37 | if getattr(path, "protocol", None) not in {"http", "https"} and path.is_dir(): 38 | path_list += [p for p in path.rglob("*") if p.suffix != ""] 39 | else: 40 | path_list.append(path) 41 | 42 | open_files = [] 43 | 44 | try: 45 | for path in path_list: 46 | open_files.append(path.open(mode="rb")) 47 | 48 | yield scans[path_list[0].suffix](open_files, **kwargs) 49 | finally: 50 | for open_file in open_files: 51 | open_file.close() 52 | -------------------------------------------------------------------------------- /lamindb/core/storage/_pyarrow_dataset.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING 4 | 5 | import pyarrow.dataset 6 | from lamindb_setup.core.upath import LocalPathClasses 7 | 8 | if TYPE_CHECKING: 9 | from pyarrow.dataset import Dataset as PyArrowDataset 10 | from upath import UPath 11 | 12 | 13 | PYARROW_SUFFIXES = (".parquet", ".csv", ".json", ".orc", ".arrow", ".feather", ".ipc") 14 | 15 | 16 | def _open_pyarrow_dataset(paths: UPath | list[UPath], **kwargs) -> PyArrowDataset: 17 | if isinstance(paths, list): 18 | # a single path can be a directory, but a list of paths 19 | # has to be a flat list of files 20 | paths_str = [] 21 | path0 = paths[0] 22 | if isinstance(path0, LocalPathClasses): 23 | path_to_str = lambda p: p.as_posix() 24 | filesystem = None 25 | else: 26 | path_to_str = lambda p: p.path 27 | filesystem = path0.fs 28 | for path in paths: 29 | if ( 30 | getattr(path, "protocol", None) not in {"http", "https"} 31 | and path.is_dir() 32 | ): 33 | paths_str += [path_to_str(p) for p in path.rglob("*") if p.suffix != ""] 34 | else: 35 | paths_str.append(path_to_str(path)) 36 | elif isinstance(paths, LocalPathClasses): 37 | paths_str, filesystem = paths.as_posix(), None 38 | else: 39 | paths_str, filesystem = paths.path, paths.fs 40 | 41 | return pyarrow.dataset.dataset(paths_str, filesystem=filesystem, **kwargs) 42 | -------------------------------------------------------------------------------- /lamindb/core/storage/_valid_suffixes.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from lamindb_setup.core.upath import VALID_COMPOSITE_SUFFIXES, VALID_SIMPLE_SUFFIXES 4 | 5 | # add new composite suffixes like so 6 | VALID_COMPOSITE_SUFFIXES.update( 7 | { 8 | ".vitessce.json", 9 | ".ome.zarr", 10 | } 11 | ) 12 | # can do the same for simple valid suffixes 13 | 14 | 15 | class VALID_SUFFIXES: 16 | """Valid suffixes.""" 17 | 18 | SIMPLE: set[str] = VALID_SIMPLE_SUFFIXES 19 | """Simple suffixes.""" 20 | COMPOSITE: set[str] = VALID_COMPOSITE_SUFFIXES 21 | """Composite suffixes.""" 22 | -------------------------------------------------------------------------------- /lamindb/core/storage/objects.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from pathlib import PurePosixPath 4 | from typing import TYPE_CHECKING, TypeAlias 5 | 6 | from anndata import AnnData 7 | from pandas import DataFrame 8 | 9 | from lamindb.core._compat import ( 10 | with_package_obj, 11 | ) 12 | from lamindb.core.types import ScverseDataStructures 13 | 14 | if TYPE_CHECKING: 15 | from lamindb_setup.core.types import UPathStr 16 | 17 | SupportedDataTypes: TypeAlias = DataFrame | ScverseDataStructures 18 | 19 | 20 | def infer_suffix(dmem: SupportedDataTypes, format: str | None = None): 21 | """Infer LaminDB storage file suffix from a data object.""" 22 | if isinstance(dmem, AnnData): 23 | if format is not None: 24 | # should be `.h5ad`, `.`zarr`, or `.anndata.zarr` 25 | if format not in {"h5ad", "zarr", "anndata.zarr"}: 26 | raise ValueError( 27 | "Error when specifying AnnData storage format, it should be" 28 | f" 'h5ad', 'zarr', not '{format}'. Check 'format'" 29 | " or the suffix of 'key'." 30 | ) 31 | return "." + format 32 | return ".h5ad" 33 | 34 | if isinstance(dmem, DataFrame): 35 | if format == ".csv": 36 | return ".csv" 37 | return ".parquet" 38 | 39 | if with_package_obj( 40 | dmem, 41 | "MuData", 42 | "mudata", 43 | lambda obj: True, # Just checking type, not calling any method 44 | )[0]: 45 | return ".h5mu" 46 | 47 | has_spatialdata, spatialdata_suffix = with_package_obj( 48 | dmem, 49 | "SpatialData", 50 | "spatialdata", 51 | lambda obj: ( 52 | format 53 | if format is not None and format in {"spatialdata.zarr", "zarr"} 54 | else ".zarr" 55 | if format is None 56 | else (_ for _ in ()).throw( 57 | ValueError( 58 | "Error when specifying SpatialData storage format, it should be" 59 | f" 'zarr', 'spatialdata.zarr', not '{format}'. Check 'format'" 60 | " or the suffix of 'key'." 61 | ) 62 | ) 63 | ), 64 | ) 65 | if has_spatialdata: 66 | return spatialdata_suffix 67 | else: 68 | raise NotImplementedError 69 | 70 | 71 | def write_to_disk(dmem: SupportedDataTypes, filepath: UPathStr) -> None: 72 | """Writes the passed in memory data to disk to a specified path.""" 73 | if isinstance(dmem, AnnData): 74 | suffix = PurePosixPath(filepath).suffix 75 | if suffix == ".h5ad": 76 | dmem.write_h5ad(filepath) 77 | return 78 | elif suffix == ".zarr": 79 | dmem.write_zarr(filepath) 80 | return 81 | else: 82 | raise NotImplementedError 83 | 84 | if isinstance(dmem, DataFrame): 85 | if filepath.suffix == ".csv": 86 | dmem.to_csv(filepath) 87 | return 88 | dmem.to_parquet(filepath) 89 | return 90 | 91 | if with_package_obj(dmem, "MuData", "mudata", lambda obj: obj.write(filepath))[0]: 92 | return 93 | 94 | if with_package_obj( 95 | dmem, 96 | "SpatialData", 97 | "spatialdata", 98 | lambda obj: obj.write(filepath, overwrite=True), 99 | )[0]: 100 | return 101 | 102 | raise NotImplementedError 103 | -------------------------------------------------------------------------------- /lamindb/core/subsettings/__init__.py: -------------------------------------------------------------------------------- 1 | """Sub settings. 2 | 3 | .. autosummary:: 4 | :toctree: . 5 | 6 | CreationSettings 7 | AnnotationSettings 8 | 9 | """ 10 | 11 | from ._annotation_settings import AnnotationSettings 12 | from ._creation_settings import CreationSettings 13 | -------------------------------------------------------------------------------- /lamindb/core/subsettings/_annotation_settings.py: -------------------------------------------------------------------------------- 1 | class AnnotationSettings: 2 | n_max_records: int = 1000 3 | """Maximal number of records to annotate with during automated annotation. 4 | 5 | If the number of records to annotate exceeds this limit, print a warning and do not annotate. 6 | 7 | The number is calculated per feature for labels, and per schema for features. 8 | """ 9 | 10 | 11 | annotation_settings = AnnotationSettings() 12 | -------------------------------------------------------------------------------- /lamindb/core/subsettings/_creation_settings.py: -------------------------------------------------------------------------------- 1 | class CreationSettings: 2 | search_names: bool = True 3 | """Switch off to speed up creating records (default `True`). 4 | 5 | If `True`, search for alternative names and avoids duplicates. 6 | 7 | FAQ: :doc:`/faq/idempotency` 8 | """ 9 | artifact_skip_size_hash: bool = False 10 | """To speed up registering high numbers of files (default `False`). 11 | 12 | This bypasses queries for size and hash to AWS & GCP. 13 | 14 | It speeds up file creation by about a factor 100. 15 | """ 16 | artifact_silence_missing_run_warning: bool = False 17 | """Silence warning about missing run & transform during artifact creation (default `False`).""" 18 | _artifact_use_virtual_keys: bool = True 19 | """Treat `key` parameter in :class:`~lamindb.Artifact` as virtual. 20 | 21 | If `True`, the `key` is **not** used to construct file paths, but file paths are 22 | based on the `uid` of artifact. 23 | """ 24 | 25 | 26 | creation_settings = CreationSettings() 27 | -------------------------------------------------------------------------------- /lamindb/core/types.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING, TypeVar 4 | 5 | from anndata import AnnData 6 | from lamindb_setup.core.types import UPathStr 7 | 8 | from lamindb.base.types import ( 9 | Dtype, 10 | FieldAttr, 11 | ListLike, 12 | StrField, 13 | TransformType, 14 | ) 15 | 16 | MuData = TypeVar("MuData") 17 | SpatialData = TypeVar("SpatialData") 18 | 19 | ScverseDataStructures = AnnData | MuData | SpatialData 20 | -------------------------------------------------------------------------------- /lamindb/curators/__init__.py: -------------------------------------------------------------------------------- 1 | """Curators. 2 | 3 | .. autosummary:: 4 | :toctree: . 5 | 6 | DataFrameCurator 7 | AnnDataCurator 8 | MuDataCurator 9 | SpatialDataCurator 10 | TiledbsomaExperimentCurator 11 | 12 | Modules. 13 | 14 | .. autosummary:: 15 | :toctree: . 16 | 17 | core 18 | 19 | """ 20 | 21 | from ._legacy import ( # backward compat 22 | CellxGeneAnnDataCatManager, 23 | PertAnnDataCatManager, 24 | ) 25 | from .core import ( 26 | AnnDataCurator, 27 | DataFrameCurator, 28 | MuDataCurator, 29 | SpatialDataCurator, 30 | TiledbsomaExperimentCurator, 31 | ) 32 | 33 | __all__ = [ 34 | "CellxGeneAnnDataCatManager", 35 | "PertAnnDataCatManager", 36 | "AnnDataCurator", 37 | "DataFrameCurator", 38 | "MuDataCurator", 39 | "SpatialDataCurator", 40 | "TiledbsomaExperimentCurator", 41 | ] 42 | -------------------------------------------------------------------------------- /lamindb/curators/_cellxgene_schemas/schema_versions.csv: -------------------------------------------------------------------------------- 1 | schema_version,entity,organism,source,version 2 | 4.0.0,CellType,all,cl,2023-08-24 3 | 4.0.0,ExperimentalFactor,all,efo,3.57.0 4 | 4.0.0,Ethnicity,human,hancestro,3.0 5 | 4.0.0,DevelopmentalStage,human,hsapdv,2020-03-10 6 | 4.0.0,DevelopmentalStage,mouse,mmusdv,2020-03-10 7 | 4.0.0,Disease,all,mondo,2023-08-02 8 | 4.0.0,Organism,all,ncbitaxon,2023-06-20 9 | 4.0.0,Phenotype,all,pato,2023-05-18 10 | 4.0.0,Tissue,all,uberon,2023-09-05 11 | 5.0.0,CellType,all,cl,2024-01-04 12 | 5.0.0,ExperimentalFactor,all,efo,3.62.0 13 | 5.0.0,Ethnicity,human,hancestro,3.0 14 | 5.0.0,DevelopmentalStage,human,hsapdv,2020-03-10 15 | 5.0.0,DevelopmentalStage,mouse,mmusdv,2020-03-10 16 | 5.0.0,Disease,all,mondo,2024-01-03 17 | 5.0.0,Organism,all,ncbitaxon,2023-06-20 18 | 5.0.0,Phenotype,all,pato,2023-05-18 19 | 5.0.0,Tissue,all,uberon,2024-01-18 20 | 5.0.0,Gene,human,ensembl,release-110 21 | 5.0.0,Gene,mouse,ensembl,release-110 22 | 5.1.0,CellType,all,cl,2024-04-05 23 | 5.1.0,ExperimentalFactor,all,efo,3.65.0 24 | 5.1.0,Ethnicity,human,hancestro,3.0 25 | 5.1.0,DevelopmentalStage,human,hsapdv,2020-03-10 26 | 5.1.0,DevelopmentalStage,mouse,mmusdv,2020-03-10 27 | 5.1.0,Disease,all,mondo,2024-05-08 28 | 5.1.0,Organism,all,ncbitaxon,2023-06-20 29 | 5.1.0,Phenotype,all,pato,2023-05-18 30 | 5.1.0,Tissue,all,uberon,2024-03-22 31 | 5.1.0,Gene,human,ensembl,release-110 32 | 5.1.0,Gene,mouse,ensembl,release-110 33 | 5.2.0,CellType,all,cl,2024-08-16 34 | 5.2.0,ExperimentalFactor,all,efo,3.69.0 35 | 5.2.0,Ethnicity,human,hancestro,3.0 36 | 5.2.0,DevelopmentalStage,human,hsapdv,2024-05-28 37 | 5.2.0,DevelopmentalStage,mouse,mmusdv,2024-05-28 38 | 5.2.0,Disease,all,mondo,2024-08-06 39 | 5.2.0,Organism,all,ncbitaxon,2023-06-20 40 | 5.2.0,Phenotype,all,pato,2023-05-18 41 | 5.2.0,Tissue,all,uberon,2024-08-07 42 | 5.2.0,Gene,human,ensembl,release-110 43 | 5.2.0,Gene,mouse,ensembl,release-110 44 | -------------------------------------------------------------------------------- /lamindb/errors.py: -------------------------------------------------------------------------------- 1 | """Errors. 2 | 3 | .. autosummary:: 4 | :toctree: . 5 | 6 | ValidationError 7 | InvalidArgument 8 | DoesNotExist 9 | NotebookNotSaved 10 | MissingContextUID 11 | UpdateContext 12 | IntegrityError 13 | SQLRecordNameChangeIntegrityError 14 | 15 | """ 16 | 17 | # inheriting from SystemExit has the sole purpose of suppressing 18 | # the traceback - this isn't optimal but the current best solution 19 | # https://laminlabs.slack.com/archives/C04A0RMA0SC/p1726856875597489 20 | 21 | 22 | class ValidationError(Exception): 23 | """Validation error.""" 24 | 25 | pass 26 | 27 | 28 | class InvalidArgument(Exception): 29 | """Invalid method or function argument.""" 30 | 31 | pass 32 | 33 | 34 | class TrackNotCalled(Exception): 35 | """`ln.track()` wasn't called.""" 36 | 37 | pass 38 | 39 | 40 | class NotebookNotSaved(Exception): 41 | """Notebook wasn't saved.""" 42 | 43 | pass 44 | 45 | 46 | # equivalent to Django's DoesNotExist 47 | # and SQLAlchemy's NoResultFound 48 | class DoesNotExist(Exception): 49 | """No record found.""" 50 | 51 | pass 52 | 53 | 54 | class InconsistentKey(Exception): 55 | """Inconsistent transform or artifact `key`.""" 56 | 57 | pass 58 | 59 | 60 | class SQLRecordNameChangeIntegrityError(Exception): 61 | """Custom exception for name change errors.""" 62 | 63 | pass 64 | 65 | 66 | class FieldValidationError(Exception): 67 | """Field validation error.""" 68 | 69 | pass 70 | 71 | 72 | # ------------------------------------------------------------------------------------- 73 | # run context 74 | # ------------------------------------------------------------------------------------- 75 | 76 | 77 | class IntegrityError(Exception): 78 | """Integrity error. 79 | 80 | For instance, it's not allowed to delete artifacts outside managed storage 81 | locations. 82 | """ 83 | 84 | pass 85 | 86 | 87 | class MissingContextUID(SystemExit): 88 | """User didn't define transform settings.""" 89 | 90 | pass 91 | 92 | 93 | class UpdateContext(SystemExit): 94 | """Transform settings require update.""" 95 | 96 | pass 97 | 98 | 99 | # ------------------------------------------------------------------------------------- 100 | # record 101 | # ------------------------------------------------------------------------------------- 102 | 103 | 104 | class NoWriteAccess(Exception): 105 | """No write access to a space.""" 106 | 107 | pass 108 | -------------------------------------------------------------------------------- /lamindb/examples/__init__.py: -------------------------------------------------------------------------------- 1 | """Examples. 2 | 3 | .. autosummary:: 4 | :toctree: . 5 | 6 | ingest_mini_immuno_datasets 7 | schemas 8 | 9 | """ 10 | 11 | from . import schemas 12 | 13 | 14 | def ingest_mini_immuno_datasets(): 15 | """Ingest mini immuno datasets. 16 | 17 | .. literalinclude:: scripts/ingest_mini_immuno_datasets.py 18 | :language: python 19 | """ 20 | import sys 21 | from pathlib import Path 22 | 23 | docs_path = Path(__file__).parent.parent.parent / "docs" / "scripts" 24 | if str(docs_path) not in sys.path: 25 | sys.path.append(str(docs_path)) 26 | 27 | import ingest_mini_immuno_datasets # noqa 28 | -------------------------------------------------------------------------------- /lamindb/examples/schemas/__init__.py: -------------------------------------------------------------------------------- 1 | """Example schemas. 2 | 3 | .. autosummary:: 4 | :toctree: . 5 | 6 | valid_features 7 | anndata_ensembl_gene_ids_and_valid_features_in_obs 8 | 9 | """ 10 | 11 | from ._anndata import anndata_ensembl_gene_ids_and_valid_features_in_obs 12 | from ._simple import valid_features 13 | -------------------------------------------------------------------------------- /lamindb/examples/schemas/_anndata.py: -------------------------------------------------------------------------------- 1 | from ... import Schema 2 | 3 | 4 | def anndata_ensembl_gene_ids_and_valid_features_in_obs() -> Schema: 5 | """Return a schema for an AnnData with Ensembl gene IDs and valid features in obs. 6 | 7 | .. literalinclude:: scripts/define_schema_anndata_ensembl_gene_ids_and_valid_features_in_obs.py 8 | :language: python 9 | """ 10 | import subprocess 11 | from pathlib import Path 12 | 13 | docs_path = Path(__file__).parent.parent.parent.parent / "docs" / "scripts" 14 | subprocess.run( 15 | [ 16 | "python", 17 | str( 18 | docs_path 19 | / "define_schema_anndata_ensembl_gene_ids_and_valid_features_in_obs.py" 20 | ), 21 | ], 22 | check=True, 23 | ) 24 | 25 | return Schema.get(name="anndata_ensembl_gene_ids_and_valid_features_in_obs") 26 | -------------------------------------------------------------------------------- /lamindb/examples/schemas/_simple.py: -------------------------------------------------------------------------------- 1 | from ... import Schema 2 | 3 | 4 | def valid_features() -> Schema: 5 | """Return a schema for an AnnData with Ensembl gene IDs and valid features in obs. 6 | 7 | .. literalinclude:: scripts/define_schema_anndata_ensembl_gene_ids_and_valid_features_in_obs.py 8 | :language: python 9 | """ 10 | import subprocess 11 | from pathlib import Path 12 | 13 | docs_path = Path(__file__).parent.parent.parent.parent / "docs" / "scripts" 14 | subprocess.run( 15 | ["python", str(docs_path / "define_valid_features.py")], 16 | check=True, 17 | ) 18 | 19 | return Schema.get(name="valid_features") 20 | -------------------------------------------------------------------------------- /lamindb/integrations/__init__.py: -------------------------------------------------------------------------------- 1 | """Integrations. 2 | 3 | .. autosummary:: 4 | :toctree: . 5 | 6 | save_vitessce_config 7 | save_tiledbsoma_experiment 8 | """ 9 | 10 | from lamindb.core.storage import save_tiledbsoma_experiment 11 | 12 | from ._vitessce import save_vitessce_config 13 | -------------------------------------------------------------------------------- /lamindb/migrations/0070_lamindbv1_migrate_data.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-01-05 11:58 2 | 3 | from pathlib import Path 4 | 5 | import lamindb_setup as ln_setup 6 | import psycopg2 7 | from django.db import migrations 8 | 9 | 10 | def get_artifact_path_psycopg2(artifact_id): 11 | """Get artifact path using psycopg2.""" 12 | query = """ 13 | SELECT 14 | s.root || '/.lamindb/' || a.uid || a.suffix AS full_path 15 | FROM 16 | lamindb_artifact a 17 | JOIN lamindb_storage s ON a.storage_id = s.id 18 | WHERE 19 | a.id = %s 20 | """ 21 | 22 | with psycopg2.connect(ln_setup.settings.instance.db) as conn: 23 | with conn.cursor() as cur: 24 | cur.execute(query, (artifact_id,)) 25 | return cur.fetchone()[0] 26 | 27 | 28 | def transfer_source_code(apps, schema_editor): 29 | from lamindb._finish import notebook_to_script 30 | 31 | Transform = apps.get_model("lamindb", "Transform") 32 | transforms = Transform.objects.filter( 33 | _source_code_artifact__isnull=False, 34 | ).select_related("_source_code_artifact") 35 | 36 | for transform in transforms: 37 | print(f"migrating source code of transform {transform}") 38 | artifact = transform._source_code_artifact 39 | print("artifact", artifact.uid) 40 | 41 | path_str = get_artifact_path_psycopg2(artifact.id) 42 | print(ln_setup.settings.storage.root_as_str) 43 | print(path_str) 44 | if path_str.startswith(ln_setup.settings.storage.root_as_str): 45 | path = ( 46 | ln_setup.settings.storage.root 47 | / f".lamindb/{artifact.uid}{artifact.suffix}" 48 | ) 49 | else: 50 | path = ln_setup.core.upath.UPath(path_str) 51 | if path.exists(): 52 | if path_str.startswith("s3://"): 53 | local_path = Path(f"temp{path.suffix}") 54 | path.download_to(local_path) 55 | else: 56 | local_path = path 57 | 58 | if artifact.suffix == ".ipynb": 59 | transform.source_code = notebook_to_script(transform, local_path) 60 | else: 61 | transform.source_code = local_path.read_text() 62 | transform.hash = artifact.hash 63 | path.unlink() 64 | else: 65 | print(f"path did not exist: {path_str}") 66 | transform._source_code_artifact = None 67 | transform.save() 68 | artifact.delete() 69 | 70 | 71 | class Migration(migrations.Migration): 72 | dependencies = [ 73 | ("lamindb", "0069_squashed"), 74 | ] 75 | 76 | operations = [ 77 | migrations.RunPython(transfer_source_code), 78 | ] 79 | -------------------------------------------------------------------------------- /lamindb/migrations/0079_alter_rundata_value_json_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-01-16 01:29 2 | 3 | import django.db.models.deletion 4 | from django.db import migrations, models 5 | 6 | import lamindb.base.fields 7 | 8 | 9 | class Migration(migrations.Migration): 10 | dependencies = [ 11 | ("lamindb", "0078_lamindbv1_part6c"), 12 | ] 13 | 14 | operations = [ 15 | migrations.AlterField( 16 | model_name="rundata", 17 | name="value_json", 18 | field=models.JSONField(blank=True, null=True), 19 | ), 20 | migrations.AlterField( 21 | model_name="tidytabledata", 22 | name="value_json", 23 | field=models.JSONField(blank=True, null=True), 24 | ), 25 | migrations.AlterField( 26 | model_name="tidytable", 27 | name="schema", 28 | field=lamindb.base.fields.ForeignKey( 29 | blank=True, 30 | null=True, 31 | on_delete=django.db.models.deletion.SET_NULL, 32 | related_name="_tidytables", 33 | to="lamindb.schema", 34 | ), 35 | ), 36 | ] 37 | -------------------------------------------------------------------------------- /lamindb/migrations/0081_revert_textfield_collection.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-01-21 17:03 2 | 3 | from django.db import migrations 4 | 5 | import lamindb.base.fields 6 | 7 | 8 | class Migration(migrations.Migration): 9 | dependencies = [ 10 | ("lamindb", "0080_polish_lamindbv1"), 11 | ] 12 | 13 | operations = [ 14 | migrations.AlterField( 15 | model_name="collection", 16 | name="description", 17 | field=lamindb.base.fields.TextField( 18 | blank=True, db_index=True, default=None, null=True 19 | ), 20 | ), 21 | ] 22 | -------------------------------------------------------------------------------- /lamindb/migrations/0082_alter_feature_dtype.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-01-25 08:26 2 | 3 | from django.db import migrations 4 | 5 | import lamindb.base.fields 6 | 7 | 8 | class Migration(migrations.Migration): 9 | dependencies = [ 10 | ("lamindb", "0081_revert_textfield_collection"), 11 | ] 12 | 13 | operations = [ 14 | migrations.AlterField( 15 | model_name="feature", 16 | name="dtype", 17 | field=lamindb.base.fields.CharField( 18 | blank=True, db_index=True, default=None, max_length=255, null=True 19 | ), 20 | ), 21 | ] 22 | -------------------------------------------------------------------------------- /lamindb/migrations/0083_alter_feature_is_type_alter_flextable_is_type_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-01-25 13:29 2 | 3 | from django.db import migrations 4 | 5 | import lamindb.base.fields 6 | 7 | 8 | class Migration(migrations.Migration): 9 | dependencies = [ 10 | ("lamindb", "0082_alter_feature_dtype"), 11 | ] 12 | 13 | operations = [ 14 | migrations.RunSQL( 15 | sql=""" 16 | UPDATE lamindb_feature 17 | SET is_type = FALSE 18 | WHERE is_type IS NULL; 19 | 20 | UPDATE lamindb_flextable 21 | SET is_type = FALSE 22 | WHERE is_type IS NULL; 23 | 24 | UPDATE lamindb_param 25 | SET is_type = FALSE 26 | WHERE is_type IS NULL; 27 | 28 | UPDATE lamindb_project 29 | SET is_type = FALSE 30 | WHERE is_type IS NULL; 31 | 32 | UPDATE lamindb_reference 33 | SET is_type = FALSE 34 | WHERE is_type IS NULL; 35 | 36 | UPDATE lamindb_schema 37 | SET is_type = FALSE 38 | WHERE is_type IS NULL; 39 | 40 | UPDATE lamindb_ulabel 41 | SET is_type = FALSE 42 | WHERE is_type IS NULL; 43 | """ 44 | ), 45 | migrations.AlterField( 46 | model_name="feature", 47 | name="is_type", 48 | field=lamindb.base.fields.BooleanField( 49 | blank=True, db_index=True, default=False 50 | ), 51 | ), 52 | migrations.AlterField( 53 | model_name="flextable", 54 | name="is_type", 55 | field=lamindb.base.fields.BooleanField( 56 | blank=True, db_index=True, default=False 57 | ), 58 | ), 59 | migrations.AlterField( 60 | model_name="param", 61 | name="is_type", 62 | field=lamindb.base.fields.BooleanField( 63 | blank=True, db_index=True, default=False 64 | ), 65 | ), 66 | migrations.AlterField( 67 | model_name="project", 68 | name="is_type", 69 | field=lamindb.base.fields.BooleanField( 70 | blank=True, db_index=True, default=False 71 | ), 72 | ), 73 | migrations.AlterField( 74 | model_name="reference", 75 | name="is_type", 76 | field=lamindb.base.fields.BooleanField( 77 | blank=True, db_index=True, default=False 78 | ), 79 | ), 80 | migrations.AlterField( 81 | model_name="schema", 82 | name="is_type", 83 | field=lamindb.base.fields.BooleanField( 84 | blank=True, db_index=True, default=False 85 | ), 86 | ), 87 | migrations.AlterField( 88 | model_name="ulabel", 89 | name="is_type", 90 | field=lamindb.base.fields.BooleanField( 91 | blank=True, db_index=True, default=False 92 | ), 93 | ), 94 | ] 95 | -------------------------------------------------------------------------------- /lamindb/migrations/0084_alter_schemafeature_feature_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-01-27 07:22 2 | 3 | import django.db.models.deletion 4 | from django.db import migrations 5 | 6 | import lamindb.base.fields 7 | 8 | 9 | class Migration(migrations.Migration): 10 | dependencies = [ 11 | ("lamindb", "0083_alter_feature_is_type_alter_flextable_is_type_and_more"), 12 | ] 13 | 14 | operations = [ 15 | migrations.AlterField( 16 | model_name="schemafeature", 17 | name="feature", 18 | field=lamindb.base.fields.ForeignKey( 19 | blank=True, 20 | on_delete=django.db.models.deletion.PROTECT, 21 | related_name="links_schema", 22 | to="lamindb.feature", 23 | ), 24 | ), 25 | migrations.AlterField( 26 | model_name="schemafeature", 27 | name="schema", 28 | field=lamindb.base.fields.ForeignKey( 29 | blank=True, 30 | on_delete=django.db.models.deletion.CASCADE, 31 | related_name="links_feature", 32 | to="lamindb.schema", 33 | ), 34 | ), 35 | ] 36 | -------------------------------------------------------------------------------- /lamindb/migrations/0085_alter_feature_is_type_alter_flextable_is_type_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-01-27 13:48 2 | 3 | from django.db import migrations 4 | 5 | import lamindb.base.fields 6 | 7 | 8 | class Migration(migrations.Migration): 9 | dependencies = [ 10 | ("lamindb", "0084_alter_schemafeature_feature_and_more"), 11 | ] 12 | 13 | operations = [ 14 | migrations.AlterField( 15 | model_name="feature", 16 | name="is_type", 17 | field=lamindb.base.fields.BooleanField( 18 | blank=True, db_index=True, default=False, null=True 19 | ), 20 | ), 21 | migrations.AlterField( 22 | model_name="flextable", 23 | name="is_type", 24 | field=lamindb.base.fields.BooleanField( 25 | blank=True, db_index=True, default=False, null=True 26 | ), 27 | ), 28 | migrations.AlterField( 29 | model_name="param", 30 | name="is_type", 31 | field=lamindb.base.fields.BooleanField( 32 | blank=True, db_index=True, default=False, null=True 33 | ), 34 | ), 35 | migrations.AlterField( 36 | model_name="project", 37 | name="is_type", 38 | field=lamindb.base.fields.BooleanField( 39 | blank=True, db_index=True, default=False, null=True 40 | ), 41 | ), 42 | migrations.AlterField( 43 | model_name="reference", 44 | name="is_type", 45 | field=lamindb.base.fields.BooleanField( 46 | blank=True, db_index=True, default=False, null=True 47 | ), 48 | ), 49 | migrations.AlterField( 50 | model_name="schema", 51 | name="is_type", 52 | field=lamindb.base.fields.BooleanField( 53 | blank=True, db_index=True, default=False, null=True 54 | ), 55 | ), 56 | migrations.AlterField( 57 | model_name="ulabel", 58 | name="is_type", 59 | field=lamindb.base.fields.BooleanField( 60 | blank=True, db_index=True, default=False, null=True 61 | ), 62 | ), 63 | ] 64 | -------------------------------------------------------------------------------- /lamindb/migrations/0086_various.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-02-06 07:10 2 | 3 | from django.db import migrations, models 4 | 5 | import lamindb.base.fields 6 | 7 | 8 | class Migration(migrations.Migration): 9 | dependencies = [ 10 | ("lamindb", "0085_alter_feature_is_type_alter_flextable_is_type_and_more"), 11 | ] 12 | 13 | operations = [ 14 | migrations.AlterField( 15 | model_name="transform", 16 | name="hash", 17 | field=lamindb.base.fields.CharField( 18 | blank=True, 19 | db_index=True, 20 | default=None, 21 | max_length=22, 22 | null=True, 23 | unique=True, 24 | ), 25 | ), 26 | migrations.AlterField( 27 | model_name="artifact", 28 | name="hash", 29 | field=lamindb.base.fields.CharField( 30 | blank=True, 31 | db_index=True, 32 | default=None, 33 | max_length=22, 34 | null=True, 35 | unique=True, 36 | ), 37 | ), 38 | migrations.AlterField( 39 | model_name="collection", 40 | name="hash", 41 | field=lamindb.base.fields.CharField( 42 | blank=True, 43 | db_index=True, 44 | default=None, 45 | max_length=22, 46 | null=True, 47 | unique=True, 48 | ), 49 | ), 50 | migrations.CreateModel( 51 | name="Migration", 52 | fields=[ 53 | ( 54 | "id", 55 | models.BigAutoField( 56 | auto_created=True, 57 | primary_key=True, 58 | serialize=False, 59 | verbose_name="ID", 60 | ), 61 | ), 62 | ( 63 | "app", 64 | lamindb.base.fields.CharField( 65 | blank=True, default=None, max_length=255 66 | ), 67 | ), 68 | ( 69 | "name", 70 | lamindb.base.fields.CharField( 71 | blank=True, default=None, max_length=255 72 | ), 73 | ), 74 | ("applied", lamindb.base.fields.DateTimeField(blank=True)), 75 | ], 76 | options={ 77 | "db_table": "django_migrations", 78 | "managed": False, 79 | }, 80 | ), 81 | migrations.AlterField( 82 | model_name="param", 83 | name="dtype", 84 | field=lamindb.base.fields.CharField( 85 | blank=True, db_index=True, default=None, max_length=64, null=True 86 | ), 87 | ), 88 | migrations.AlterField( 89 | model_name="param", 90 | name="dtype", 91 | field=lamindb.base.fields.CharField( 92 | blank=True, db_index=True, default=None, max_length=255, null=True 93 | ), 94 | ), 95 | ] 96 | -------------------------------------------------------------------------------- /lamindb/migrations/0087_rename__schemas_m2m_artifact_feature_sets_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-02-13 12:00 2 | 3 | import django.db.models.deletion 4 | from django.db import migrations, models 5 | 6 | import lamindb.base.fields 7 | 8 | 9 | class Migration(migrations.Migration): 10 | dependencies = [ 11 | ("lamindb", "0086_various"), 12 | ] 13 | 14 | operations = [ 15 | migrations.RenameField( 16 | model_name="artifact", 17 | old_name="_schemas_m2m", 18 | new_name="feature_sets", 19 | ), 20 | migrations.AlterField( 21 | model_name="artifact", 22 | name="schema", 23 | field=lamindb.base.fields.ForeignKey( 24 | blank=True, 25 | default=None, 26 | null=True, 27 | on_delete=django.db.models.deletion.PROTECT, 28 | related_name="validated_artifacts", 29 | to="lamindb.schema", 30 | ), 31 | ), 32 | migrations.AlterField( 33 | model_name="artifact", 34 | name="feature_sets", 35 | field=models.ManyToManyField( 36 | related_name="artifacts", 37 | through="lamindb.ArtifactSchema", 38 | to="lamindb.schema", 39 | ), 40 | ), 41 | ] 42 | -------------------------------------------------------------------------------- /lamindb/migrations/0090_runproject_project_runs.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-03-05 10:20 2 | 3 | import django.db.models.deletion 4 | import django.db.models.functions.datetime 5 | from django.db import migrations, models 6 | 7 | import lamindb.base.fields 8 | import lamindb.base.users 9 | import lamindb.models.sqlrecord 10 | 11 | 12 | class Migration(migrations.Migration): 13 | dependencies = [ 14 | ("lamindb", "0089_subsequent_runs"), 15 | ] 16 | 17 | operations = [ 18 | migrations.CreateModel( 19 | name="RunProject", 20 | fields=[ 21 | ("id", models.BigAutoField(primary_key=True, serialize=False)), 22 | ( 23 | "created_at", 24 | lamindb.base.fields.DateTimeField( 25 | blank=True, 26 | db_default=django.db.models.functions.datetime.Now(), 27 | db_index=True, 28 | editable=False, 29 | ), 30 | ), 31 | ( 32 | "created_by", 33 | lamindb.base.fields.ForeignKey( 34 | blank=True, 35 | default=lamindb.base.users.current_user_id, 36 | editable=False, 37 | on_delete=django.db.models.deletion.PROTECT, 38 | related_name="+", 39 | to="lamindb.user", 40 | ), 41 | ), 42 | ( 43 | "project", 44 | lamindb.base.fields.ForeignKey( 45 | blank=True, 46 | on_delete=django.db.models.deletion.PROTECT, 47 | related_name="links_run", 48 | to="lamindb.project", 49 | ), 50 | ), 51 | ( 52 | "run", 53 | lamindb.base.fields.ForeignKey( 54 | blank=True, 55 | on_delete=django.db.models.deletion.CASCADE, 56 | related_name="links_project", 57 | to="lamindb.run", 58 | ), 59 | ), 60 | ], 61 | options={ 62 | "unique_together": {("run", "project")}, 63 | }, 64 | bases=(models.Model, lamindb.models.sqlrecord.IsLink), 65 | ), 66 | migrations.AddField( 67 | model_name="project", 68 | name="runs", 69 | field=models.ManyToManyField( 70 | related_name="projects", through="lamindb.RunProject", to="lamindb.run" 71 | ), 72 | ), 73 | ] 74 | -------------------------------------------------------------------------------- /lamindb/migrations/0091_alter_featurevalue_options_alter_space_options_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.1.4 on 2025-04-30 09:11 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("lamindb", "0090_runproject_project_runs"), 9 | ] 10 | 11 | operations = [ 12 | migrations.AlterModelOptions( 13 | name="featurevalue", 14 | options={"base_manager_name": "objects"}, 15 | ), 16 | migrations.AlterModelOptions( 17 | name="space", 18 | options={"base_manager_name": "objects"}, 19 | ), 20 | migrations.AlterModelOptions( 21 | name="user", 22 | options={"base_manager_name": "objects"}, 23 | ), 24 | ] 25 | -------------------------------------------------------------------------------- /lamindb/migrations/0092_alter_artifactfeaturevalue_artifact_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-05-06 20:34 2 | 3 | import django.db.models.deletion 4 | from django.db import migrations 5 | 6 | import lamindb.base.fields 7 | 8 | 9 | class Migration(migrations.Migration): 10 | dependencies = [ 11 | ("lamindb", "0091_alter_featurevalue_options_alter_space_options_and_more"), 12 | ] 13 | 14 | operations = [ 15 | migrations.AlterField( 16 | model_name="artifactfeaturevalue", 17 | name="artifact", 18 | field=lamindb.base.fields.ForeignKey( 19 | blank=True, 20 | on_delete=django.db.models.deletion.CASCADE, 21 | related_name="links_featurevalue", 22 | to="lamindb.artifact", 23 | ), 24 | ), 25 | migrations.AlterField( 26 | model_name="artifactfeaturevalue", 27 | name="featurevalue", 28 | field=lamindb.base.fields.ForeignKey( 29 | blank=True, 30 | on_delete=django.db.models.deletion.PROTECT, 31 | related_name="links_artifact", 32 | to="lamindb.featurevalue", 33 | ), 34 | ), 35 | migrations.AlterField( 36 | model_name="artifactparamvalue", 37 | name="artifact", 38 | field=lamindb.base.fields.ForeignKey( 39 | blank=True, 40 | on_delete=django.db.models.deletion.CASCADE, 41 | related_name="links_paramvalue", 42 | to="lamindb.artifact", 43 | ), 44 | ), 45 | migrations.AlterField( 46 | model_name="artifactparamvalue", 47 | name="paramvalue", 48 | field=lamindb.base.fields.ForeignKey( 49 | blank=True, 50 | on_delete=django.db.models.deletion.PROTECT, 51 | related_name="links_artifact", 52 | to="lamindb.paramvalue", 53 | ), 54 | ), 55 | migrations.AlterField( 56 | model_name="runparamvalue", 57 | name="paramvalue", 58 | field=lamindb.base.fields.ForeignKey( 59 | blank=True, 60 | on_delete=django.db.models.deletion.PROTECT, 61 | related_name="links_run", 62 | to="lamindb.paramvalue", 63 | ), 64 | ), 65 | migrations.AlterField( 66 | model_name="runparamvalue", 67 | name="run", 68 | field=lamindb.base.fields.ForeignKey( 69 | blank=True, 70 | on_delete=django.db.models.deletion.CASCADE, 71 | related_name="links_paramvalue", 72 | to="lamindb.run", 73 | ), 74 | ), 75 | ] 76 | -------------------------------------------------------------------------------- /lamindb/migrations/0093_alter_schemacomponent_unique_together.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-05-07 12:16 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("lamindb", "0092_alter_artifactfeaturevalue_artifact_and_more"), 9 | ] 10 | 11 | operations = [ 12 | migrations.AlterUniqueTogether( 13 | name="schemacomponent", 14 | unique_together={("composite", "slot"), ("composite", "slot", "component")}, 15 | ), 16 | ] 17 | -------------------------------------------------------------------------------- /lamindb/migrations/0094_writeloglock_writelogmigrationstate_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.1.7 on 2025-05-10 00:32 2 | 3 | import django.db.models.deletion 4 | from django.db import migrations, models 5 | 6 | 7 | class Migration(migrations.Migration): 8 | dependencies = [ 9 | ("lamindb", "0093_alter_schemacomponent_unique_together"), 10 | ] 11 | 12 | operations = [ 13 | migrations.CreateModel( 14 | name="WriteLogLock", 15 | fields=[ 16 | ( 17 | "id", 18 | models.BigAutoField( 19 | auto_created=True, 20 | primary_key=True, 21 | serialize=False, 22 | verbose_name="ID", 23 | ), 24 | ), 25 | ("locked", models.BooleanField()), 26 | ], 27 | ), 28 | migrations.CreateModel( 29 | name="MigrationState", 30 | fields=[ 31 | ("id", models.SmallAutoField(primary_key=True, serialize=False)), 32 | ("migration_state_id", models.JSONField()), 33 | ], 34 | ), 35 | migrations.CreateModel( 36 | name="TableState", 37 | fields=[ 38 | ("id", models.SmallAutoField(primary_key=True, serialize=False)), 39 | ("table_name", models.CharField(max_length=255)), 40 | ("backfilled", models.BooleanField()), 41 | ], 42 | ), 43 | migrations.CreateModel( 44 | name="WriteLog", 45 | fields=[ 46 | ("seqno", models.AutoField(primary_key=True, serialize=False)), 47 | ( 48 | "uid", 49 | models.CharField( 50 | db_index=True, editable=False, max_length=18, unique=True 51 | ), 52 | ), 53 | ("space_uid", models.CharField(max_length=12, null=True)), 54 | ("created_by_uid", models.CharField(default="00000000", max_length=8)), 55 | ("branch_code", models.IntegerField(default=1)), 56 | ( 57 | "run_uid", 58 | models.CharField(default="0000000000000000", max_length=16), 59 | ), 60 | ("record_uid", models.JSONField(null=True)), 61 | ("record_data", models.JSONField(null=True)), 62 | ("event_type", models.PositiveSmallIntegerField()), 63 | ("created_at", models.DateTimeField()), 64 | ( 65 | "migration_state", 66 | models.ForeignKey( 67 | on_delete=django.db.models.deletion.PROTECT, 68 | to="lamindb.migrationstate", 69 | ), 70 | ), 71 | ( 72 | "table", 73 | models.ForeignKey( 74 | on_delete=django.db.models.deletion.PROTECT, 75 | to="lamindb.tablestate", 76 | ), 77 | ), 78 | ], 79 | options={ 80 | "verbose_name": "Write Log", 81 | "verbose_name_plural": "Write Logs", 82 | }, 83 | ), 84 | ] 85 | -------------------------------------------------------------------------------- /lamindb/migrations/0097_remove_schemaparam_param_remove_paramvalue_param_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-05-11 18:54 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("lamindb", "0096_remove_artifact__param_values_and_more"), 9 | ] 10 | 11 | operations = [ 12 | migrations.DeleteModel( 13 | name="ArtifactParamValue", 14 | ), 15 | migrations.DeleteModel( 16 | name="SchemaParam", 17 | ), 18 | migrations.DeleteModel( 19 | name="Param", 20 | ), 21 | migrations.DeleteModel( 22 | name="ParamValue", 23 | ), 24 | migrations.DeleteModel( 25 | name="RunParamValue", 26 | ), 27 | ] 28 | -------------------------------------------------------------------------------- /lamindb/migrations/0099_alter_writelog_seqno.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.1.7 on 2025-05-23 23:20 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("lamindb", "0098_alter_feature_type_alter_project_type_and_more"), 9 | ] 10 | 11 | operations = [ 12 | migrations.AlterField( 13 | model_name="writelog", 14 | name="seqno", 15 | field=models.BigAutoField(primary_key=True, serialize=False), 16 | ), 17 | migrations.RenameField( 18 | model_name="writelog", 19 | old_name="seqno", 20 | new_name="id", 21 | ), 22 | ] 23 | -------------------------------------------------------------------------------- /lamindb/migrations/0100_branch_alter_artifact__branch_code_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-05-25 11:59 2 | 3 | import django.db.models.deletion 4 | import django.db.models.functions.datetime 5 | from django.db import migrations, models 6 | 7 | import lamindb.base.fields 8 | 9 | 10 | def update_space_uids_and_create_branches(apps, schema_editor): 11 | Space = apps.get_model("lamindb", "Space") 12 | Space.objects.filter(uid="00000000").update(uid="A") 13 | Branch = apps.get_model("lamindb", "Branch") 14 | Branch.objects.get_or_create( 15 | id=-1, 16 | uid="T", 17 | name="Trash", 18 | description="The trash.", 19 | ) 20 | Branch.objects.get_or_create( 21 | id=0, 22 | uid="A", 23 | name="Archive", 24 | description="The archive.", 25 | ) 26 | Branch.objects.get_or_create( 27 | uid="M", 28 | name="Main", 29 | description="The main & default branch of the instance.", 30 | ) 31 | 32 | 33 | class Migration(migrations.Migration): 34 | dependencies = [ 35 | ("lamindb", "0099_alter_writelog_seqno"), 36 | ] 37 | 38 | operations = [ 39 | migrations.CreateModel( 40 | name="Branch", 41 | fields=[ 42 | ("id", models.AutoField(primary_key=True, serialize=False)), 43 | ("name", models.CharField(db_index=True, max_length=100)), 44 | ( 45 | "uid", 46 | lamindb.base.fields.CharField( 47 | blank=True, 48 | db_default="M", 49 | db_index=True, 50 | default="M", 51 | editable=False, 52 | max_length=12, 53 | unique=True, 54 | ), 55 | ), 56 | ( 57 | "description", 58 | lamindb.base.fields.CharField( 59 | blank=True, default=None, max_length=255, null=True 60 | ), 61 | ), 62 | ( 63 | "created_at", 64 | lamindb.base.fields.DateTimeField( 65 | blank=True, 66 | db_default=django.db.models.functions.datetime.Now(), 67 | db_index=True, 68 | editable=False, 69 | ), 70 | ), 71 | ( 72 | "created_by", 73 | lamindb.base.fields.ForeignKey( 74 | blank=True, 75 | default=None, 76 | null=True, 77 | on_delete=django.db.models.deletion.CASCADE, 78 | related_name="+", 79 | to="lamindb.user", 80 | ), 81 | ), 82 | ], 83 | options={ 84 | "abstract": False, 85 | "base_manager_name": "objects", 86 | }, 87 | ), 88 | migrations.AlterField( 89 | model_name="space", 90 | name="uid", 91 | field=lamindb.base.fields.CharField( 92 | blank=True, 93 | db_default="A", 94 | db_index=True, 95 | default="A", 96 | editable=False, 97 | max_length=12, 98 | unique=True, 99 | ), 100 | ), 101 | migrations.RunPython(update_space_uids_and_create_branches), 102 | ] 103 | -------------------------------------------------------------------------------- /lamindb/migrations/0102_remove_writelog_branch_code_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-05-27 11:29 2 | 3 | import django.db.models.deletion 4 | from django.db import migrations, models 5 | 6 | 7 | class Migration(migrations.Migration): 8 | dependencies = [ 9 | ("lamindb", "0101_alter_artifact_hash_alter_feature_name_and_more"), 10 | ] 11 | 12 | operations = [ 13 | migrations.RemoveField( 14 | model_name="writelog", 15 | name="branch_code", 16 | ), 17 | migrations.RemoveField( 18 | model_name="writelog", 19 | name="space_uid", 20 | ), 21 | migrations.AddField( 22 | model_name="writelog", 23 | name="branch", 24 | field=models.ForeignKey( 25 | default=1, 26 | on_delete=django.db.models.deletion.PROTECT, 27 | to="lamindb.branch", 28 | ), 29 | ), 30 | migrations.AddField( 31 | model_name="writelog", 32 | name="space", 33 | field=models.ForeignKey( 34 | default=1, 35 | on_delete=django.db.models.deletion.PROTECT, 36 | to="lamindb.space", 37 | ), 38 | ), 39 | migrations.AlterField( 40 | model_name="writelog", 41 | name="run_uid", 42 | field=models.CharField(default="0000000000000000", max_length=20), 43 | ), 44 | migrations.AlterField( 45 | model_name="writelog", 46 | name="record_uid", 47 | field=models.JSONField(db_index=True, default=0), 48 | preserve_default=False, 49 | ), 50 | migrations.AlterModelOptions( 51 | name="migrationstate", 52 | options={"base_manager_name": "objects"}, 53 | ), 54 | migrations.AlterModelOptions( 55 | name="tablestate", 56 | options={"base_manager_name": "objects"}, 57 | ), 58 | migrations.AlterField( 59 | model_name="writelog", 60 | name="migration_state", 61 | field=models.ForeignKey( 62 | on_delete=django.db.models.deletion.PROTECT, to="lamindb.migrationstate" 63 | ), 64 | ), 65 | migrations.AlterField( 66 | model_name="writelog", 67 | name="table", 68 | field=models.ForeignKey( 69 | on_delete=django.db.models.deletion.PROTECT, to="lamindb.tablestate" 70 | ), 71 | ), 72 | ] 73 | -------------------------------------------------------------------------------- /lamindb/migrations/0103_remove_writelog_migration_state_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-05-29 12:02 2 | 3 | from django.db import migrations 4 | 5 | 6 | def fix_artifact_kind(apps, schema_editor): 7 | Artifact = apps.get_model("lamindb", "Artifact") 8 | Artifact.objects.filter(kind="__lamindb__").update(kind="__lamindb_run__") 9 | 10 | 11 | class Migration(migrations.Migration): 12 | dependencies = [ 13 | ("lamindb", "0102_remove_writelog_branch_code_and_more"), 14 | ] 15 | 16 | operations = [ 17 | migrations.RunPython(fix_artifact_kind), 18 | migrations.RemoveField( 19 | model_name="writelog", 20 | name="migration_state", 21 | ), 22 | migrations.RemoveField( 23 | model_name="writelog", 24 | name="table", 25 | ), 26 | migrations.RemoveField( 27 | model_name="writelog", 28 | name="branch", 29 | ), 30 | migrations.RemoveField( 31 | model_name="writelog", 32 | name="space", 33 | ), 34 | migrations.DeleteModel( 35 | name="WriteLogLock", 36 | ), 37 | migrations.DeleteModel( 38 | name="MigrationState", 39 | ), 40 | migrations.DeleteModel( 41 | name="TableState", 42 | ), 43 | migrations.DeleteModel( 44 | name="WriteLog", 45 | ), 46 | ] 47 | -------------------------------------------------------------------------------- /lamindb/migrations/0105_record_unique_name.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-06-03 19:37 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("lamindb", "0104_squashed"), 9 | ] 10 | 11 | operations = [ 12 | migrations.AddConstraint( 13 | model_name="record", 14 | constraint=models.UniqueConstraint( 15 | condition=models.Q(("is_type", True)), 16 | fields=("name",), 17 | name="unique_name", 18 | ), 19 | ), 20 | ] 21 | -------------------------------------------------------------------------------- /lamindb/migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/laminlabs/lamindb/0615476ba7f3680f4ff961851e6522d11e7f0a5c/lamindb/migrations/__init__.py -------------------------------------------------------------------------------- /lamindb/models/__init__.py: -------------------------------------------------------------------------------- 1 | """Models library. 2 | 3 | .. autosummary:: 4 | :toctree: . 5 | 6 | BaseSQLRecord 7 | SQLRecord 8 | Registry 9 | BasicQuerySet 10 | QuerySet 11 | ArtifactSet 12 | QueryManager 13 | SQLRecordList 14 | FeatureManager 15 | LabelManager 16 | IsVersioned 17 | CanCurate 18 | HasParents 19 | TracksRun 20 | TracksUpdates 21 | FeatureValue 22 | InspectResult 23 | ValidateFields 24 | SchemaOptionals 25 | 26 | """ 27 | 28 | # ruff: noqa: I001 29 | from lamin_utils._inspect import InspectResult 30 | from ._is_versioned import IsVersioned 31 | from .can_curate import CanCurate 32 | from .sqlrecord import ( 33 | BaseSQLRecord, 34 | SQLRecord, 35 | Registry, 36 | Space, 37 | Branch, 38 | Migration, 39 | ValidateFields, 40 | format_field_value, 41 | record_repr, 42 | IsLink, 43 | ) 44 | from .core import Storage 45 | from .transform import Transform 46 | from .run import Run, TracksRun, TracksUpdates, current_run, User 47 | from .feature import Feature, FeatureValue 48 | from .schema import Schema 49 | from .ulabel import ULabel 50 | 51 | # should come last as it needs everything else 52 | from .artifact import Artifact 53 | from ._feature_manager import FeatureManager 54 | from ._label_manager import LabelManager 55 | from .collection import Collection, CollectionArtifact 56 | from .project import Person, Project, Reference 57 | from .query_manager import QueryManager 58 | from .query_set import BasicQuerySet, QuerySet, SQLRecordList 59 | from .artifact_set import ArtifactSet 60 | from .has_parents import HasParents 61 | from datetime import datetime as _datetime 62 | 63 | FeatureSet = Schema # backward compat 64 | 65 | # link models 66 | from .artifact import ArtifactFeatureValue 67 | from .project import ( 68 | ArtifactProject, 69 | TransformProject, 70 | CollectionProject, 71 | ULabelProject, 72 | FeatureProject, 73 | SchemaProject, 74 | ArtifactReference, 75 | CollectionReference, 76 | SheetProject, 77 | RunProject, 78 | RecordProject, 79 | PersonProject, 80 | ) 81 | from .run import RunFeatureValue 82 | from .schema import ( 83 | SchemaFeature, 84 | ArtifactSchema, 85 | SchemaComponent, 86 | SchemaOptionals, 87 | ) 88 | from .ulabel import ArtifactULabel, TransformULabel, RunULabel, CollectionULabel 89 | 90 | from .record import ( 91 | Record, 92 | Sheet, 93 | RecordJson, 94 | RecordRecord, 95 | RecordULabel, 96 | RecordRun, 97 | RecordArtifact, 98 | ) 99 | 100 | 101 | LinkORM = IsLink # backward compat 102 | ParamValue = FeatureValue # backward compat 103 | ArtifactParamValue = ArtifactFeatureValue # backward compat 104 | RunParamValue = RunFeatureValue # backward compat 105 | Param = Feature # backward compat 106 | BasicRecord = BaseSQLRecord # backward compat 107 | -------------------------------------------------------------------------------- /lamindb/models/_relations.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING 4 | 5 | import lamindb_setup as ln_setup 6 | from django.db.models import ManyToManyField 7 | from lamindb_setup._connect_instance import ( 8 | get_owner_name_from_identifier, 9 | load_instance_settings, 10 | ) 11 | from lamindb_setup.core._settings_store import instance_settings_file 12 | 13 | from lamindb.models.sqlrecord import IsLink 14 | 15 | if TYPE_CHECKING: 16 | from lamindb.models.sqlrecord import Registry, SQLRecord 17 | 18 | 19 | def get_schema_modules(instance: str | None) -> set[str]: 20 | if instance is None or instance == "default": 21 | schema_modules = set(ln_setup.settings.instance.modules) 22 | schema_modules.add("core") 23 | return schema_modules 24 | owner, name = get_owner_name_from_identifier(instance) 25 | settings_file = instance_settings_file(name, owner) 26 | if settings_file.exists(): 27 | modules = set(load_instance_settings(settings_file).modules) 28 | else: 29 | cache_filepath = ( 30 | ln_setup.settings.cache_dir / f"instance--{owner}--{name}--uid.txt" 31 | ) 32 | if cache_filepath.exists(): 33 | modules = set(cache_filepath.read_text().split("\n")[1].split(",")) 34 | else: 35 | raise ValueError(f"Instance {instance} not found") 36 | shared_schema_modules = set(ln_setup.settings.instance.modules).intersection( 37 | modules 38 | ) 39 | shared_schema_modules.add("core") 40 | return shared_schema_modules 41 | 42 | 43 | # this function here should likely be renamed 44 | # it maps the __get_name_with_module__() onto the actual model 45 | def dict_module_name_to_model_name( 46 | registry: Registry, instance: str | None = None 47 | ) -> dict[str, Registry]: 48 | schema_modules = get_schema_modules(instance) 49 | d: dict = { 50 | i.related_model.__get_name_with_module__(): i.related_model 51 | for i in registry._meta.related_objects 52 | if i.related_name is not None 53 | and i.related_model.__get_module_name__() in schema_modules 54 | } 55 | d.update( 56 | { 57 | i.related_model.__get_name_with_module__(): i.related_model 58 | for i in registry._meta.many_to_many 59 | if i.name is not None 60 | and i.related_model.__get_module_name__() in schema_modules 61 | } 62 | ) 63 | return d 64 | 65 | 66 | def dict_related_model_to_related_name( 67 | registry: type[SQLRecord], links: bool = False, instance: str | None = None 68 | ) -> dict[str, str]: 69 | def include(model: SQLRecord): 70 | return not links != issubclass(model, IsLink) 71 | 72 | schema_modules = get_schema_modules(instance) 73 | 74 | related_objects = registry._meta.related_objects + registry._meta.many_to_many 75 | d: dict = { 76 | record.related_model.__get_name_with_module__(): ( 77 | record.related_name 78 | if not isinstance(record, ManyToManyField) 79 | else record.name 80 | ) 81 | for record in related_objects 82 | if ( 83 | record.name is not None 84 | and include(record.related_model) 85 | and record.related_model.__get_module_name__() in schema_modules 86 | ) 87 | } 88 | return d 89 | 90 | 91 | def get_related_name(features_type: type[SQLRecord]) -> str: 92 | from lamindb.models.schema import Schema 93 | 94 | candidates = [ 95 | field.related_name 96 | for field in Schema._meta.related_objects 97 | if field.related_model == features_type 98 | ] 99 | if not candidates: 100 | raise ValueError( 101 | f"Can't create feature sets from {features_type.__name__} because it's not" 102 | " related to it!\nYou need to create a link model between Schema and" 103 | " your SQLRecord in your custom module.\nTo do so, add a" 104 | " line:\n_feature_sets = models.ManyToMany(Schema," 105 | " related_name='mythings')\n" 106 | ) 107 | return candidates[0] 108 | -------------------------------------------------------------------------------- /lamindb/models/core.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import ( 4 | TYPE_CHECKING, 5 | overload, 6 | ) 7 | 8 | from django.db import models 9 | 10 | from lamindb.base.fields import ( 11 | CharField, 12 | ) 13 | 14 | from ..base.ids import base62_12 15 | from .run import TracksRun, TracksUpdates 16 | from .sqlrecord import SQLRecord 17 | 18 | if TYPE_CHECKING: 19 | from pathlib import Path 20 | 21 | from upath import UPath 22 | 23 | from .artifact import Artifact 24 | 25 | 26 | class Storage(SQLRecord, TracksRun, TracksUpdates): 27 | """Storage locations of artifacts such as folders and S3 buckets. 28 | 29 | A storage location is either a folder (local or in the cloud) or 30 | an entire S3/GCP bucket. 31 | 32 | A LaminDB instance can manage and link multiple storage locations. But any 33 | storage location is managed by *at most one* LaminDB instance. 34 | 35 | .. dropdown:: Managed vs. linked storage locations 36 | 37 | The LaminDB instance can update & delete artifacts in managed storage 38 | locations but merely read artifacts in linked storage locations. 39 | 40 | The `instance_uid` field defines the managing LaminDB instance of a 41 | storage location. 42 | 43 | When you delete a LaminDB instance, you'll be warned about data in managed 44 | storage locations while data in linked storage locations is ignored. 45 | 46 | See Also: 47 | :attr:`~lamindb.core.Settings.storage` 48 | Default storage. 49 | :attr:`~lamindb.setup.core.StorageSettings` 50 | Storage settings. 51 | 52 | Examples: 53 | 54 | Configure the default storage location on the command line:: 55 | 56 | lamin init --storage ./myfolder # or "s3://my-bucket" or "gs://my-bucket" 57 | 58 | View the current storage location for writing artifacts:: 59 | 60 | import lamindb as ln 61 | 62 | print(ln.settings.storage) 63 | 64 | Change the current storage location for writing artifacts:: 65 | 66 | ln.settings.storage = "./myfolder2" # or "s3://my-bucket2" or "gs://my-bucket2" 67 | 68 | View all storage locations used by the current instance:: 69 | 70 | ln.Storage.df() 71 | """ 72 | 73 | class Meta(SQLRecord.Meta, TracksRun.Meta, TracksUpdates.Meta): 74 | abstract = False 75 | 76 | _name_field: str = "root" 77 | 78 | id: int = models.AutoField(primary_key=True) 79 | """Internal id, valid only in one DB instance.""" 80 | uid: str = CharField( 81 | editable=False, unique=True, max_length=12, default=base62_12, db_index=True 82 | ) 83 | """Universal id, valid across DB instances.""" 84 | root: str = CharField(db_index=True, unique=True) 85 | """Root path of storage (cloud or local path).""" 86 | description: str | None = CharField(db_index=True, null=True) 87 | """A description of what the storage location is used for (optional).""" 88 | type: str = CharField(max_length=30, db_index=True) 89 | """Can be "local" vs. "s3" vs. "gs".""" 90 | region: str | None = CharField(max_length=64, db_index=True, null=True) 91 | """Cloud storage region, if applicable.""" 92 | instance_uid: str | None = CharField(max_length=12, db_index=True, null=True) 93 | """Instance that manages this storage location.""" 94 | artifacts: Artifact 95 | """Artifacts contained in this storage location.""" 96 | 97 | @overload 98 | def __init__( 99 | self, 100 | root: str, 101 | type: str, 102 | region: str | None, 103 | ): ... 104 | 105 | @overload 106 | def __init__( 107 | self, 108 | *db_args, 109 | ): ... 110 | 111 | def __init__( 112 | self, 113 | *args, 114 | **kwargs, 115 | ): 116 | super().__init__(*args, **kwargs) 117 | 118 | @property 119 | def path(self) -> Path | UPath: 120 | """Path. 121 | 122 | Uses the `.root` field and converts it into a `Path` or `UPath`. 123 | """ 124 | from lamindb_setup.core.upath import create_path 125 | 126 | access_token = self._access_token if hasattr(self, "_access_token") else None 127 | return create_path(self.root, access_token=access_token) 128 | -------------------------------------------------------------------------------- /lamindb/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/laminlabs/lamindb/0615476ba7f3680f4ff961851e6522d11e7f0a5c/lamindb/py.typed -------------------------------------------------------------------------------- /lamindb/setup/__init__.py: -------------------------------------------------------------------------------- 1 | import lamindb_setup as _lamindb_setup 2 | from lamindb_setup import * # noqa: F403 3 | from lamindb_setup import ( 4 | connect, 5 | delete, 6 | init, 7 | settings, 8 | ) 9 | 10 | from . import core 11 | 12 | del connect # we have this at the root level, hence, we don't want it here 13 | __doc__ = _lamindb_setup.__doc__.replace("lamindb_setup", "lamindb.setup") 14 | settings.__doc__ = settings.__doc__.replace("lamindb_setup", "lamindb.setup") 15 | -------------------------------------------------------------------------------- /lamindb/setup/core/__init__.py: -------------------------------------------------------------------------------- 1 | import lamindb_setup as _lamindb_setup 2 | from lamindb_setup.core import * # noqa: F403 3 | 4 | __doc__ = _lamindb_setup.core.__doc__.replace("lamindb_setup", "lamindb.setup") 5 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | from pathlib import Path 3 | 4 | import pytest 5 | 6 | 7 | @pytest.fixture(scope="function") 8 | def clean_soma_files(request): 9 | path = request.param if hasattr(request, "param") else "small_dataset.tiledbsoma" 10 | if Path(path).exists(): 11 | shutil.rmtree(path) 12 | 13 | yield path 14 | 15 | if Path(path).exists(): 16 | shutil.rmtree(path) 17 | -------------------------------------------------------------------------------- /tests/core/_dataset_fixtures.py: -------------------------------------------------------------------------------- 1 | import anndata as ad 2 | import lamindb as ln 3 | import mudata as md 4 | import numpy as np 5 | import pandas as pd 6 | import pytest 7 | import spatialdata as sd 8 | import tiledbsoma 9 | import tiledbsoma.io 10 | from scipy.sparse import csr_matrix 11 | 12 | 13 | @pytest.fixture(scope="session") 14 | def get_small_adata(): 15 | return ad.AnnData( 16 | X=np.array([[1, 2, 3], [4, 5, 6]]), 17 | obs={"feat1": ["A", "B"]}, 18 | var=pd.DataFrame(index=["MYC", "TCF7", "GATA1"]), 19 | obsm={"X_pca": np.array([[1, 2], [3, 4]])}, 20 | ) 21 | 22 | 23 | @pytest.fixture(scope="session") 24 | def get_small_mdata(): 25 | adata1 = ad.AnnData( 26 | X=np.array([[1, 2, 3], [4, 5, 6]]), 27 | obs={"feat1": ["A", "B"]}, 28 | var=pd.DataFrame(index=["MYC", "TCF7", "GATA1"]), 29 | obsm={"X_pca": np.array([[1, 2], [3, 4]])}, 30 | ) 31 | 32 | adata2 = ad.AnnData( 33 | X=np.array([[7, 8], [9, 10]]), 34 | obs={"feat2": ["C", "D"]}, 35 | var=pd.DataFrame(index=["FOXP3", "CD8A"]), 36 | obsm={"X_umap": np.array([[5, 6], [7, 8]])}, 37 | ) 38 | 39 | return md.MuData({"rna": adata1, "protein": adata2}) 40 | 41 | 42 | @pytest.fixture(scope="session") 43 | def get_small_sdata(): 44 | adata = ad.AnnData( 45 | X=csr_matrix(np.array([[0.1, 0.2], [0.3, 0.4]])), 46 | obs=pd.DataFrame(index=["cell1", "cell2"]), 47 | var=pd.DataFrame(index=["gene1", "gene2"]), 48 | ) 49 | 50 | { 51 | "region1": np.array([[[0, 0], [0, 1], [1, 1], [1, 0]]]), 52 | "region2": np.array([[[2, 2], [2, 3], [3, 3], [3, 2]]]), 53 | } 54 | 55 | sdata_obj = sd.SpatialData( 56 | tables={"gene_expression": adata}, 57 | ) 58 | 59 | return sdata_obj 60 | 61 | 62 | @pytest.fixture(scope="session") 63 | def get_small_soma_experiment(): 64 | adata = ln.core.datasets.mini_immuno.get_dataset1(otype="AnnData") 65 | tiledbsoma.io.from_anndata("test.tiledbsoma", adata, measurement_name="RNA") 66 | 67 | exp = tiledbsoma.Experiment.open("test.tiledbsoma") 68 | 69 | return exp 70 | -------------------------------------------------------------------------------- /tests/core/conftest.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | from pathlib import Path 3 | from subprocess import DEVNULL, run 4 | from time import perf_counter 5 | 6 | import lamindb_setup as ln_setup 7 | import pytest 8 | from lamin_utils import logger 9 | from laminci.db import setup_local_test_postgres 10 | 11 | AUTO_CONNECT = ln_setup.settings.auto_connect 12 | ln_setup.settings.auto_connect = False 13 | 14 | import lamindb as ln 15 | 16 | 17 | def pytest_sessionstart(): 18 | t_execute_start = perf_counter() 19 | 20 | ln_setup._TESTING = True 21 | pgurl = setup_local_test_postgres() 22 | ln.setup.init( 23 | storage="./default_storage_unit_core", 24 | modules="bionty", 25 | name="lamindb-unit-tests-core", 26 | db=pgurl, 27 | ) 28 | ln.setup.settings.auto_connect = True 29 | ln.settings.creation.artifact_silence_missing_run_warning = True 30 | total_time_elapsed = perf_counter() - t_execute_start 31 | print(f"Time to setup the instance: {total_time_elapsed:.3f}s") 32 | 33 | 34 | def pytest_sessionfinish(session: pytest.Session): 35 | logger.set_verbosity(1) 36 | shutil.rmtree("./default_storage_unit_core") 37 | ln.setup.delete("lamindb-unit-tests-core", force=True) 38 | run("docker stop pgtest && docker rm pgtest", shell=True, stdout=DEVNULL) # noqa: S602 39 | ln.setup.settings.auto_connect = AUTO_CONNECT 40 | 41 | 42 | @pytest.fixture 43 | def ccaplog(caplog): 44 | """Add caplog handler to our custom logger at session start.""" 45 | from lamin_utils._logger import logger 46 | 47 | # Add caplog's handler to our custom logger 48 | logger.addHandler(caplog.handler) 49 | 50 | yield caplog 51 | 52 | # Clean up at the end of the session 53 | logger.removeHandler(caplog.handler) 54 | 55 | 56 | @pytest.fixture( 57 | scope="module", 58 | params=[ 59 | # tuple of is_in_registered_storage, path, suffix, hash of test_dir 60 | (True, "./default_storage_unit_core/", ".csv", "iGtHiFEBV3r1_TFovdQCgw"), 61 | (True, "./default_storage_unit_core/", "", "iGtHiFEBV3r1_TFovdQCgw"), 62 | (True, "./registered_storage/", ".csv", "iGtHiFEBV3r1_TFovdQCgw"), 63 | (True, "./registered_storage/", "", "iGtHiFEBV3r1_TFovdQCgw"), 64 | (False, "./nonregistered_storage/", ".csv", "iGtHiFEBV3r1_TFovdQCgw"), 65 | (False, "./nonregistered_storage/", "", "iGtHiFEBV3r1_TFovdQCgw"), 66 | ], 67 | ) 68 | def get_test_filepaths(request): # -> Tuple[bool, Path, Path, Path, str] 69 | import lamindb as ln 70 | 71 | is_in_registered_storage: bool = request.param[0] 72 | root_dir: Path = Path(request.param[1]) 73 | suffix: str = request.param[2] 74 | hash_test_dir: str = request.param[3] 75 | if is_in_registered_storage: 76 | # ensure that it's actually registered 77 | if ln.Storage.filter(root=root_dir.resolve().as_posix()).one_or_none() is None: 78 | ln.Storage(root=root_dir.resolve().as_posix(), type="local").save() 79 | else: 80 | assert ( 81 | ln.Storage.filter(root=root_dir.resolve().as_posix()).one_or_none() is None 82 | ) 83 | test_dirpath = root_dir / "my_dir/" 84 | test_dirpath.mkdir(parents=True, exist_ok=True) 85 | # create a first file 86 | test_filepath0 = test_dirpath / f"my_file{suffix}" 87 | test_filepath0.write_text("0") 88 | # create a second, duplicated file 89 | test_filepath1 = test_dirpath / f"my_file1{suffix}" 90 | test_filepath1.write_text("0") 91 | # create a non-duplicated file 92 | test_filepath2 = test_dirpath / f"my_file2{suffix}" 93 | test_filepath2.write_text("1") 94 | # return a boolean indicating whether test filepath is in default storage 95 | # and the test filepath 96 | yield ( 97 | is_in_registered_storage, 98 | root_dir, 99 | test_dirpath, 100 | test_filepath0, 101 | suffix, 102 | hash_test_dir, 103 | ) 104 | shutil.rmtree(test_dirpath) 105 | -------------------------------------------------------------------------------- /tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 |
library(laminr)
13 | 
14 | db <- connect()
15 | 16 | 17 | 18 |
→ connected lamindb: laminlabs/lamindata
19 | 20 | 21 | 22 |
db$track("lOScuxDTDE0q0000")
23 | 24 | 25 | 26 |
→ loaded Transform('lOScuxDT'), started Run('GWpaTtUg') at 2024-12-01 17:49:18 UTC
27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 |
db$finish()
35 | 36 | 37 | 38 |
MoreOUTPUT 
39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /tests/core/notebooks/basic-r-notebook.Rmd.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | My exemplary R analysis 5 |

My exemplary R analysis

6 | 7 | 8 | 9 | 10 | 11 | 12 |
library(laminr)
13 | 
14 | db <- connect()
15 | 16 | 17 | 18 |
→ connected lamindb: laminlabs/lamindata
19 | 20 | 21 | 22 |
db$track("lOScuxDTDE0q0000")
23 | 24 | 25 | 26 |
→ loaded Transform('lOScuxDT'), started Run('GWpaTtUg') at 2024-12-01 17:49:18 UTC
27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 |
db$finish()
35 | 36 | 37 | 38 |
MoreOUTPUT ! please hit SHORTCUT to save the notebook in your editor and re-run finish()
39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /tests/core/notebooks/duplicate/with-title-initialized-consecutive-finish.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# My duplicated test notebook (consecutive) with `ln.finish()`" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "This has actually different content than the original one in the `notebooks/` folder." 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import lamindb as ln\n", 24 | "\n", 25 | "ln.track()" 26 | ] 27 | } 28 | ], 29 | "metadata": { 30 | "kernelspec": { 31 | "display_name": "py310", 32 | "language": "python", 33 | "name": "python3" 34 | }, 35 | "language_info": { 36 | "codemirror_mode": { 37 | "name": "ipython", 38 | "version": 3 39 | }, 40 | "file_extension": ".py", 41 | "mimetype": "text/x-python", 42 | "name": "python", 43 | "nbconvert_exporter": "python", 44 | "pygments_lexer": "ipython3", 45 | "version": "3.12.8" 46 | } 47 | }, 48 | "nbformat": 4, 49 | "nbformat_minor": 2 50 | } 51 | -------------------------------------------------------------------------------- /tests/core/notebooks/no-title.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "0", 6 | "metadata": {}, 7 | "source": [ 8 | "A notebook without title." 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "id": "1", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import lamindb as ln" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "id": "2", 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "# pass stem uid\n", 29 | "ln.track(\"123456789ABC\")" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "id": "3", 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "assert ln.context.transform.description == \"no-title.ipynb\"\n", 40 | "assert ln.context.transform.key == \"no-title.ipynb\"" 41 | ] 42 | } 43 | ], 44 | "metadata": { 45 | "kernelspec": { 46 | "display_name": "Python 3.9.12 ('base1')", 47 | "language": "python", 48 | "name": "python3" 49 | }, 50 | "language_info": { 51 | "codemirror_mode": { 52 | "name": "ipython", 53 | "version": 3 54 | }, 55 | "file_extension": ".py", 56 | "mimetype": "text/x-python", 57 | "name": "python", 58 | "nbconvert_exporter": "python", 59 | "pygments_lexer": "ipython3", 60 | "version": "3.12.8" 61 | }, 62 | "nbproject": { 63 | "id": "Irn3xQyQ40GU", 64 | "pypackage": { 65 | "nbproject": "0.0.7+2.g8521e30" 66 | }, 67 | "time_init": "2022-06-08T14:42:31.551211+00:00", 68 | "version": "0" 69 | }, 70 | "vscode": { 71 | "interpreter": { 72 | "hash": "2775e555cdc2d728c54aa22130c79afb1fa4da64f22f2fc6dcc2aa346c4e0672" 73 | } 74 | } 75 | }, 76 | "nbformat": 4, 77 | "nbformat_minor": 5 78 | } 79 | -------------------------------------------------------------------------------- /tests/core/notebooks/with-title-initialized-consecutive-finish-not-last-cell.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# My test notebook (consecutive) with `ln.finish()` not in last cell" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import lamindb as ln" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "# do not pass uid purposefully\n", 26 | "ln.track()" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "print(\"my consecutive cell\")" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "ln.finish(ignore_non_consecutive=True)" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "print(\"my consecutive cell\")" 54 | ] 55 | } 56 | ], 57 | "metadata": { 58 | "kernelspec": { 59 | "display_name": "py39", 60 | "language": "python", 61 | "name": "python3" 62 | }, 63 | "language_info": { 64 | "codemirror_mode": { 65 | "name": "ipython", 66 | "version": 3 67 | }, 68 | "file_extension": ".py", 69 | "mimetype": "text/x-python", 70 | "name": "python", 71 | "nbconvert_exporter": "python", 72 | "pygments_lexer": "ipython3", 73 | "version": "3.12.8" 74 | } 75 | }, 76 | "nbformat": 4, 77 | "nbformat_minor": 2 78 | } 79 | -------------------------------------------------------------------------------- /tests/core/notebooks/with-title-initialized-consecutive-finish.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# My test notebook (consecutive) with `ln.finish()`" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import lamindb as ln\n", 17 | "import pytest" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": null, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "with pytest.raises(ln.errors.InvalidArgument) as error:\n", 27 | " ln.track(\"ujPaFZ\")\n", 28 | "print(error.exconly())\n", 29 | "assert error.exconly().startswith(\n", 30 | " 'lamindb.errors.InvalidArgument: Please pass an auto-generated uid instead of \"ujPaFZ\". Resolve by running:'\n", 31 | ")" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "# with uid passed\n", 41 | "ln.track(\"ujPaFZatnMLG0000\")" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "print(\"my consecutive cell\")" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "print(\"my consecutive cell\")" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "ln.finish()" 69 | ] 70 | } 71 | ], 72 | "metadata": { 73 | "kernelspec": { 74 | "display_name": "py312", 75 | "language": "python", 76 | "name": "python3" 77 | }, 78 | "language_info": { 79 | "codemirror_mode": { 80 | "name": "ipython", 81 | "version": 3 82 | }, 83 | "file_extension": ".py", 84 | "mimetype": "text/x-python", 85 | "name": "python", 86 | "nbconvert_exporter": "python", 87 | "pygments_lexer": "ipython3", 88 | "version": "3.12.8" 89 | } 90 | }, 91 | "nbformat": 4, 92 | "nbformat_minor": 2 93 | } 94 | -------------------------------------------------------------------------------- /tests/core/scripts/duplicate1/script-to-test-versioning.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | ln.context.version = "1" 4 | ln.track("Ro1gl7n8YrdH0001") 5 | -------------------------------------------------------------------------------- /tests/core/scripts/duplicate2/script-to-test-versioning.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | ln.context.version = "2" 4 | ln.track("Ro1gl7n8YrdH0001") 5 | 6 | assert ln.context.transform.version == "2" 7 | -------------------------------------------------------------------------------- /tests/core/scripts/duplicate3/script-to-test-versioning.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | ln.context.version = "3" 4 | ln.track("Ro1gl7n8YrdH0001") 5 | -------------------------------------------------------------------------------- /tests/core/scripts/duplicate4/script-to-test-versioning.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | ln.track() 4 | -------------------------------------------------------------------------------- /tests/core/scripts/script-to-test-filename-change.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | ln.track("Ro1gl7n8YrdH0000") 4 | -------------------------------------------------------------------------------- /tests/core/scripts/script-to-test-versioning.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | ln.context.version = "1" 4 | ln.track("Ro1gl7n8YrdH0000") 5 | -------------------------------------------------------------------------------- /tests/core/test_artifact_folders.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | import pytest 3 | from lamindb.errors import InvalidArgument 4 | 5 | 6 | @pytest.mark.parametrize("key", [None, "my_new_folder"]) 7 | def test_folder_like_artifact(get_test_filepaths, key): 8 | # get variables from fixture 9 | is_in_registered_storage = get_test_filepaths[0] 10 | test_dirpath = get_test_filepaths[2] 11 | hash_test_dir = get_test_filepaths[5] 12 | 13 | # run tests on initial Artifact creation 14 | if key is not None and is_in_registered_storage: 15 | with pytest.raises(InvalidArgument) as error: 16 | ln.Artifact(test_dirpath, key=key) 17 | assert error.exconly().startswith( 18 | "lamindb.errors.InvalidArgument: The path" # The path {data} is already in registered storage 19 | ) 20 | return None 21 | if key is None and not is_in_registered_storage: 22 | with pytest.raises(ValueError) as error: 23 | ln.Artifact(test_dirpath, key=key) 24 | assert error.exconly().startswith( 25 | "ValueError: Pass one of key, run or description as a parameter" 26 | ) 27 | return None 28 | artifact1 = ln.Artifact(test_dirpath, key=key) 29 | assert artifact1.n_files == 3 30 | assert artifact1.hash == hash_test_dir 31 | assert artifact1._state.adding 32 | assert artifact1.description is None 33 | assert artifact1.path.exists() 34 | artifact1.save() 35 | 36 | # run tests on re-creating the Artifact 37 | artifact2 = ln.Artifact(test_dirpath, key=key, description="something") 38 | assert not artifact2._state.adding 39 | assert artifact1.id == artifact2.id 40 | assert artifact1.uid == artifact2.uid 41 | assert artifact1.storage == artifact2.storage 42 | assert artifact2.path.exists() 43 | assert artifact2.description == "something" 44 | 45 | # now put another file in the test directory 46 | 47 | # create a first file 48 | test_filepath_added = test_dirpath / "my_file_added.txt" 49 | test_filepath_added.write_text("2") 50 | artifact3 = ln.Artifact(test_dirpath, key=key, revises=artifact1) 51 | assert artifact3.n_files == 4 52 | assert artifact3.hash != hash_test_dir 53 | assert artifact3._state.adding 54 | assert artifact3.description is None 55 | assert artifact3.path.exists() 56 | artifact3.save() 57 | 58 | # the state of artifact1 is lost, because artifact3 is stored at the same path 59 | assert artifact3.overwrite_versions 60 | assert artifact1.overwrite_versions 61 | assert artifact3.path == artifact1.path 62 | test_filepath_added.unlink() 63 | 64 | # delete the artifact 65 | artifact2.delete(permanent=True, storage=False) 66 | artifact3.delete(permanent=True, storage=False) 67 | 68 | 69 | def test_overwrite_versions_false(get_test_filepaths): 70 | # get variables from fixture 71 | is_in_registered_storage = get_test_filepaths[0] 72 | test_dirpath = get_test_filepaths[2] 73 | hash_test_dir = get_test_filepaths[5] 74 | if is_in_registered_storage: 75 | return 76 | artifact1 = ln.Artifact( 77 | test_dirpath, key="my_folder", overwrite_versions=False 78 | ).save() 79 | assert artifact1.hash == hash_test_dir 80 | # skip artifact2 because we already test this above 81 | # create a first file 82 | test_filepath_added = test_dirpath / "my_file_added.txt" 83 | test_filepath_added.write_text("2") 84 | artifact3 = ln.Artifact(test_dirpath, key="my_folder", overwrite_versions=False) 85 | assert artifact3.hash != hash_test_dir 86 | artifact3.save() 87 | # the state of artifact1 is lost, because artifact3 is stored at the same path 88 | assert not artifact3.overwrite_versions 89 | assert not artifact1.overwrite_versions 90 | assert artifact3.path != artifact1.path 91 | test_filepath_added.unlink() 92 | artifact1.delete(permanent=True, storage=False) 93 | artifact3.delete(permanent=True, storage=False) 94 | -------------------------------------------------------------------------------- /tests/core/test_data.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | import pytest 3 | 4 | 5 | def test_rename(): 6 | import pandas as pd 7 | from lamindb.errors import SQLRecordNameChangeIntegrityError 8 | 9 | df = pd.DataFrame( 10 | { 11 | "feature_to_rename": [ 12 | "label-to-rename", 13 | "label-to-rename", 14 | "label-not-to-rename", 15 | ], 16 | "feature_to_rename2": [ 17 | "label-not-to-rename", 18 | "label-not-to-rename", 19 | "label-not-to-rename", 20 | ], 21 | } 22 | ) 23 | 24 | curator = ln.Curator.from_df( 25 | df, 26 | categoricals={ 27 | "feature_to_rename": ln.ULabel.name, 28 | "feature_to_rename2": ln.ULabel.name, 29 | }, 30 | ) 31 | curator.add_new_from("feature_to_rename") 32 | curator.add_new_from("feature_to_rename2") 33 | artifact = curator.save_artifact(description="test-rename") 34 | assert artifact.ulabels.through.objects.filter( 35 | feature__name="feature_to_rename", ulabel__name="label-to-rename" 36 | ).exists() 37 | assert ln.Artifact.filter(feature_sets__features__name="feature_to_rename").exists() 38 | 39 | # rename label 40 | ulabel = ln.ULabel.get(name="label-to-rename") 41 | with pytest.raises(SQLRecordNameChangeIntegrityError): 42 | ulabel.name = "label-renamed" 43 | ulabel.save() 44 | 45 | artifact.labels.make_external(ulabel) 46 | assert not artifact.ulabels.through.objects.filter( 47 | feature__name="feature_to_rename", ulabel__name="label-to-rename" 48 | ).exists() 49 | ulabel.name = "label-renamed" 50 | ulabel.save() 51 | 52 | # rename feature 53 | feature = ln.Feature.get(name="feature_to_rename") 54 | with pytest.raises(SQLRecordNameChangeIntegrityError): 55 | feature.name = "feature_renamed" 56 | feature.save() 57 | 58 | artifact.features.make_external(feature) 59 | assert not ln.Artifact.filter( 60 | feature_sets__features__name="feature_to_rename" 61 | ).exists() 62 | assert ln.Artifact.filter( 63 | feature_sets__features__name="feature_to_rename2" 64 | ).exists() 65 | feature.name = "feature_renamed" 66 | feature.save() 67 | 68 | # rename the other feature, automatically deletes no-member schema 69 | feature2 = ln.Feature.get(name="feature_to_rename2") 70 | artifact.features.make_external(feature2) 71 | assert artifact.feature_sets.count() == 0 72 | 73 | # clean up 74 | artifact.delete(permanent=True) 75 | ln.Schema.filter().delete() 76 | ln.ULabel.filter().delete() 77 | ln.Feature.filter().delete() 78 | -------------------------------------------------------------------------------- /tests/core/test_db.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | 4 | def test_create_to_load(): 5 | transform = ln.Transform(version="0", key="test", type="pipeline") 6 | transform.save() 7 | run = ln.Run(transform=transform) 8 | run.save() 9 | ln.Storage.get(root=str(ln.setup.settings.storage.root)) 10 | -------------------------------------------------------------------------------- /tests/core/test_delete.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | 4 | def test_delete_record(): 5 | names = ["label1", "label2", "label3"] 6 | labels = [ln.ULabel(name=name) for name in names] 7 | ln.save(labels) 8 | ln.ULabel.filter(name__in=names).delete() 9 | assert ln.ULabel.filter(name__in=names).count() == 0 10 | -------------------------------------------------------------------------------- /tests/core/test_feature.py: -------------------------------------------------------------------------------- 1 | import bionty as bt 2 | import lamindb as ln 3 | import pandas as pd 4 | import pytest 5 | from lamindb.errors import ValidationError 6 | from lamindb.models.feature import serialize_pandas_dtype 7 | from pandas.api.types import is_string_dtype 8 | 9 | 10 | @pytest.fixture(scope="module") 11 | def df(): 12 | return pd.DataFrame( 13 | { 14 | "feat1": [1, 2, 3], 15 | "feat2": [3.1, 4.2, 5.3], 16 | "feat3": ["cond1", "cond2", "cond2"], 17 | "feat4": ["id1", "id2", "id3"], 18 | "rando_feature": ["rando1", "rando2", "rando3"], 19 | } 20 | ) 21 | 22 | 23 | def test_feature_init(): 24 | # no args allowed 25 | with pytest.raises(ValueError): 26 | ln.Feature("x") 27 | # no dtype passed 28 | with pytest.raises(ValidationError): 29 | ln.Feature(name="feat") 30 | # is OK if also is_type is passed 31 | ln.Feature(name="Feat", is_type=True) 32 | # wrong type 33 | with pytest.raises(ValueError): 34 | ln.Feature(name="feat", dtype="x") 35 | # type has to be a list of SQLRecord types 36 | with pytest.raises(ValidationError): 37 | ln.Feature(name="feat", dtype="cat[1]") 38 | # ensure feat1 does not exist 39 | if feat1 := ln.Feature.filter(name="feat1").one_or_none() is not None: 40 | feat1.delete() 41 | feat1 = ln.Feature(name="feat", dtype="str").save() 42 | with pytest.raises(ValidationError) as error: 43 | ln.Feature(name="feat", dtype="cat") 44 | assert ( 45 | error.exconly() 46 | == "lamindb.errors.ValidationError: Feature feat already exists with dtype str, you passed cat" 47 | ) 48 | feat1.delete() 49 | 50 | # should just return the feature 51 | feat2 = ln.Feature(name="feat2", dtype="str", description="feat2").save() 52 | feat2_again = ln.Feature(name="feat2", dtype="str", description="feat2").save() 53 | assert feat2 == feat2_again 54 | feat2.delete() 55 | 56 | # check that this works 57 | feature = ln.Feature(name="feat1", dtype="cat[ULabel|bionty.Gene]") 58 | # check that it also works via objects 59 | feature = ln.Feature(name="feat1", dtype=[ln.ULabel, bt.Gene]) 60 | assert feature.dtype == "cat[ULabel|bionty.Gene]" 61 | 62 | 63 | def test_feature_from_df(df): 64 | if feat1 := ln.Feature.filter(name="feat1").one_or_none() is not None: 65 | feat1.delete() 66 | features = ln.Feature.from_df(df.iloc[:, :4]).save() 67 | artifact = ln.Artifact.from_df(df, description="test").save() 68 | # test for deprecated add_feature_set 69 | artifact.features.add_feature_set(ln.Schema(features), slot="columns") 70 | features = artifact.features["columns"] 71 | assert len(features) == len(df.columns[:4]) 72 | [col for col in df.columns if is_string_dtype(df[col])] 73 | categoricals = { 74 | col: df[col] for col in df.columns if isinstance(df[col], pd.CategoricalDtype) 75 | } 76 | for feature in features: 77 | if feature.name in categoricals: 78 | assert feature.dtype == "cat" 79 | else: 80 | orig_type = df[feature.name].dtype 81 | assert feature.dtype == serialize_pandas_dtype(orig_type) 82 | for feature in features: 83 | feature.save() 84 | labels = [ln.ULabel(name=name) for name in df["feat3"].unique()] 85 | ln.save(labels) 86 | feature = ln.Feature.get(name="feat3") 87 | feature.dtype = "cat" 88 | feature.save() 89 | with pytest.raises(ValidationError) as err: 90 | artifact.labels.add(labels, feature=feature) 91 | assert ( 92 | err.exconly() 93 | == "lamindb.errors.ValidationError: Cannot manually annotate a feature measured *within* the dataset. Please use a Curator." 94 | ) 95 | extfeature = ln.Feature(name="extfeat", dtype="str").save() 96 | with pytest.raises(ValidationError) as err: 97 | artifact.labels.add(labels, feature=extfeature) 98 | assert ( 99 | err.exconly() 100 | == f"lamindb.errors.ValidationError: Feature {extfeature.name} needs dtype='cat' for label annotation, currently has dtype='str'" 101 | ) 102 | 103 | # clean up 104 | artifact.delete(permanent=True) 105 | ln.Schema.filter().all().delete() 106 | ln.ULabel.filter().all().delete() 107 | ln.Feature.filter().all().delete() 108 | -------------------------------------------------------------------------------- /tests/core/test_from_values.py: -------------------------------------------------------------------------------- 1 | import bionty as bt 2 | import lamindb as ln 3 | import pandas as pd 4 | import pytest 5 | 6 | 7 | @pytest.fixture(scope="module") 8 | def df(): 9 | return pd.DataFrame( 10 | ( 11 | ["T cell", "CL:0000084"], 12 | ["hepatocyte", "CL:0000182"], 13 | ["my new cell type", ""], 14 | ), 15 | columns=["cell_type", "cell_type_id"], 16 | ) 17 | 18 | 19 | def test_from_values_name(df): 20 | bt.CellType.filter().delete() 21 | assert df["cell_type"].tolist() == ["T cell", "hepatocyte", "my new cell type"] 22 | # create records from bionty 23 | result = bt.CellType.from_values(df.cell_type, "name") 24 | ids = [i.ontology_id for i in result] 25 | assert len(result) == 2 26 | assert set(ids) == {"CL:0000084", "CL:0000182"} 27 | assert result[0].source.entity == "bionty.CellType" 28 | 29 | # wrong field type 30 | with pytest.raises(TypeError): 31 | result = bt.CellType.from_values(df.cell_type, field=bt.CellType) 32 | 33 | 34 | def test_from_values_ontology_id(df): 35 | assert df["cell_type_id"].tolist() == ["CL:0000084", "CL:0000182", ""] 36 | result = bt.CellType.from_values(df.cell_type_id, "ontology_id") 37 | names = {i.name for i in result} 38 | assert len(result) == 2 39 | assert names == {"T cell", "hepatocyte"} 40 | assert result[0].source.entity == "bionty.CellType" 41 | 42 | 43 | def test_from_values_multiple_match(): 44 | records = bt.Gene.from_values(["ABC1", "PDCD1"], bt.Gene.symbol, organism="human") 45 | assert len(records) == 3 46 | 47 | 48 | def test_get_or_create_records(): 49 | names = ["ulabel" + str(i) for i in range(25)] 50 | labels = [ln.ULabel(name=name) for name in names] 51 | ln.save(labels) 52 | # more than 20 existing values 53 | labels = ln.ULabel.from_values(names, field="name") 54 | assert len(labels) == 25 55 | 56 | 57 | def test_from_values_synonyms_aware(): 58 | bt.CellType.from_source(name="T cell").save() 59 | # existing validated values 60 | records = bt.CellType.from_values(["T cell"], "name") 61 | assert len(records) == 1 62 | assert records[0].name == "T cell" 63 | assert isinstance(records[0].source, bt.Source) 64 | # existing validated values and synonyms 65 | records = bt.CellType.from_values(["T cell", "T-cell"], "name") 66 | assert len(records) == 1 67 | assert records[0].name == "T cell" 68 | assert isinstance(records[0].source, bt.Source) 69 | # bionty values and synonyms 70 | records = bt.CellType.from_values(["B-cell", "B cell"], "name") 71 | assert len(records) == 1 72 | assert records[0].name == "B cell" 73 | assert isinstance(records[0].source, bt.Source) 74 | # all possibilities of validated values 75 | records = bt.CellType.from_values( 76 | ["T cell", "T-cell", "t cell", "B cell", "B-cell"], "name" 77 | ) 78 | assert len(records) == 2 79 | names = [r.name for r in records] 80 | assert set(names) == {"T cell", "B cell"} 81 | assert isinstance(records[0].source, bt.Source) 82 | assert isinstance(records[1].source, bt.Source) 83 | # non-validated values 84 | records = bt.CellType.from_values(["T cell", "mycell"], "name") 85 | assert len(records) == 1 86 | assert records[0].name == "T cell" 87 | assert isinstance(records[0].source, bt.Source) 88 | assert records[0].ontology_id == "CL:0000084" 89 | bt.CellType.filter().all().delete() 90 | 91 | 92 | def test_standardize(): 93 | # only name field can be standardized 94 | results = bt.Gene.from_values( 95 | ["HES4", "TNFRSF4"], field=bt.Gene.ensembl_gene_id, organism="human" 96 | ) 97 | assert len(results) == 0 98 | 99 | results = bt.Gene.from_values( 100 | ["HES4", "TNFRSF4"], field=bt.Gene.symbol, organism="human" 101 | ) 102 | assert len(results) == 2 103 | -------------------------------------------------------------------------------- /tests/core/test_has_parents.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | from lamindb.models.has_parents import _add_emoji 3 | 4 | 5 | def test_view_parents(): 6 | label1 = ln.ULabel(name="label1") 7 | label2 = ln.ULabel(name="label2") 8 | label1.save() 9 | label2.save() 10 | label1.parents.add(label2) 11 | label1.view_parents(ln.ULabel.name, distance=1) 12 | label1.delete() 13 | label2.delete() 14 | 15 | 16 | def test_query_parents_children(): 17 | label1 = ln.ULabel(name="label1").save() 18 | label2 = ln.ULabel(name="label2").save() 19 | label3 = ln.ULabel(name="label3").save() 20 | label1.children.add(label2) 21 | label2.children.add(label3) 22 | parents = label3.query_parents() 23 | assert len(parents) == 2 24 | assert label1 in parents and label2 in parents 25 | children = label1.query_children() 26 | assert len(children) == 2 27 | assert label2 in children and label3 in children 28 | label1.delete() 29 | label2.delete() 30 | label3.delete() 31 | 32 | 33 | def test_add_emoji(): 34 | transform = ln.Transform(key="test-12345", type="upload") 35 | assert _add_emoji(transform, label="transform") == "🖥️ transform" 36 | transform.save() 37 | run = ln.Run(transform=transform) 38 | assert _add_emoji(run, label="run") == "🖥️ run" 39 | transform.delete() 40 | 41 | 42 | def test_view_lineage_circular(): 43 | import pandas as pd 44 | 45 | transform = ln.Transform(key="test").save() 46 | run = ln.Run(transform=transform).save() 47 | artifact = ln.Artifact.from_df( 48 | pd.DataFrame({"a": [1, 2, 3]}), description="test artifact", run=run 49 | ).save() 50 | run.input_artifacts.add(artifact) 51 | artifact.view_lineage() 52 | artifact.delete(permanent=True) 53 | run.delete() 54 | transform.delete() 55 | -------------------------------------------------------------------------------- /tests/core/test_integrity.py: -------------------------------------------------------------------------------- 1 | import lamindb_setup as ln_setup 2 | 3 | 4 | def test_migrate_check(): 5 | assert ln_setup.migrate.check() 6 | 7 | 8 | def test_system_check(): 9 | ln_setup.django("check") 10 | -------------------------------------------------------------------------------- /tests/core/test_manager.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | 4 | def test_manager_list(): 5 | label = ln.ULabel(name="manager label") 6 | label.save() 7 | label_names = [f"ULabel {i}" for i in range(3)] 8 | labels = [ln.ULabel(name=name) for name in label_names] 9 | ln.save(labels) 10 | label.parents.set(labels) 11 | assert len(label.parents.list()) == 3 12 | assert "ULabel 1" in label.parents.list("name") 13 | label.delete() 14 | for label in labels: 15 | label.delete() 16 | -------------------------------------------------------------------------------- /tests/core/test_models.py: -------------------------------------------------------------------------------- 1 | import re 2 | import textwrap 3 | 4 | import lamindb as ln 5 | import pandas as pd 6 | import pytest 7 | 8 | 9 | def _strip_ansi(text: str) -> str: 10 | """Remove ANSI escape sequences from a string.""" 11 | ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])") 12 | return ansi_escape.sub("", text) 13 | 14 | 15 | def test_registry__repr__feature(): 16 | import lamindb.models as ln 17 | 18 | feature = ln.Param 19 | expected_repr = textwrap.dedent("""\ 20 | Feature 21 | Simple fields 22 | .uid: CharField 23 | .name: CharField 24 | .dtype: CharField 25 | .is_type: BooleanField 26 | .unit: CharField 27 | .description: CharField 28 | .array_rank: SmallIntegerField 29 | .array_size: IntegerField 30 | .array_shape: JSONField 31 | .proxy_dtype: CharField 32 | .synonyms: TextField 33 | .created_at: DateTimeField 34 | .updated_at: DateTimeField 35 | Relational fields 36 | .branch: Branch 37 | .space: Space 38 | .created_by: User 39 | .run: Run 40 | .type: Feature 41 | .schemas: Schema 42 | .features: Feature 43 | .values: FeatureValue 44 | .projects: Project 45 | """).strip() 46 | 47 | actual_repr = _strip_ansi(repr(feature)) 48 | print(actual_repr) 49 | assert actual_repr.strip() == expected_repr.strip() 50 | 51 | 52 | def test_registry__repr__artifact(): 53 | import lamindb.models as ln 54 | 55 | artifact = ln.Artifact 56 | expected_repr = textwrap.dedent("""\ 57 | Artifact 58 | Simple fields 59 | .uid: CharField 60 | .key: CharField 61 | .description: CharField 62 | .suffix: CharField 63 | .kind: CharField 64 | .otype: CharField 65 | .size: BigIntegerField 66 | .hash: CharField 67 | .n_files: BigIntegerField 68 | .n_observations: BigIntegerField 69 | .version: CharField 70 | .is_latest: BooleanField 71 | .created_at: DateTimeField 72 | .updated_at: DateTimeField 73 | Relational fields 74 | .branch: Branch 75 | .space: Space 76 | .storage: Storage 77 | .run: Run 78 | .schema: Schema 79 | .created_by: User 80 | .ulabels: ULabel 81 | .input_of_runs: Run 82 | .feature_sets: Schema 83 | .collections: Collection 84 | .records: Record 85 | .references: Reference 86 | .projects: Project 87 | Bionty fields 88 | .organisms: bionty.Organism 89 | .genes: bionty.Gene 90 | .proteins: bionty.Protein 91 | .cell_markers: bionty.CellMarker 92 | .tissues: bionty.Tissue 93 | .cell_types: bionty.CellType 94 | .diseases: bionty.Disease 95 | .cell_lines: bionty.CellLine 96 | .phenotypes: bionty.Phenotype 97 | .pathways: bionty.Pathway 98 | .experimental_factors: bionty.ExperimentalFactor 99 | .developmental_stages: bionty.DevelopmentalStage 100 | .ethnicities: bionty.Ethnicity 101 | """).strip() 102 | 103 | actual_repr = _strip_ansi(repr(artifact)) 104 | print(actual_repr) 105 | assert actual_repr.strip() == expected_repr.strip() 106 | 107 | 108 | def test_unsaved_relationship_modification_attempts(): 109 | af = ln.Artifact.from_df( 110 | pd.DataFrame({"col1": [1, 2, 3], "col2": [4, 5, 6]}), description="testme" 111 | ) 112 | 113 | new_label = ln.ULabel(name="testlabel").save() 114 | with pytest.raises(ValueError) as excinfo: 115 | af.ulabels.add(new_label) 116 | 117 | assert ( 118 | str(excinfo.value) 119 | == "You are trying to access the many-to-many relationships of an unsaved Artifact object. Please save it first using '.save()'." 120 | ) 121 | 122 | new_label.delete() 123 | af.delete() 124 | -------------------------------------------------------------------------------- /tests/core/test_notebooks.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | from pathlib import Path 4 | 5 | import lamindb as ln 6 | import nbproject_test 7 | 8 | notebook_dir = Path(__file__).parent / "notebooks/" 9 | notebook_dir_duplicate = Path(__file__).parent / "notebooks/duplicate/" 10 | 11 | 12 | def test_all_notebooks(): 13 | env = os.environ 14 | env["LAMIN_TESTING"] = "true" 15 | nbproject_test.execute_notebooks(notebook_dir) 16 | nbproject_test.execute_notebooks(notebook_dir_duplicate) 17 | del env["LAMIN_TESTING"] 18 | 19 | 20 | def test_run_after_rename_no_uid(): 21 | notebook_path = ( 22 | notebook_dir / "with-title-initialized-consecutive-finish-not-last-cell.ipynb" 23 | ) 24 | result = subprocess.run( # noqa: S602 25 | f"jupyter nbconvert --to notebook --inplace --execute {notebook_path}", 26 | shell=True, 27 | capture_output=True, 28 | ) 29 | print(result.stdout.decode()) 30 | print(result.stderr.decode()) 31 | assert result.returncode == 0 32 | 33 | uid = ln.Transform.get( 34 | key="with-title-initialized-consecutive-finish-not-last-cell.ipynb" 35 | ).uid 36 | 37 | # now, assume the user renames the notebook 38 | new_path = notebook_path.with_name("no-uid-renamed.ipynb") 39 | os.system(f"cp {notebook_path} {new_path}") # noqa: S605 40 | 41 | env = os.environ 42 | env["LAMIN_TESTING"] = "true" 43 | result = subprocess.run( # noqa: S602 44 | f"jupyter nbconvert --to notebook --inplace --execute {new_path}", 45 | shell=True, 46 | capture_output=True, 47 | env=env, 48 | ) 49 | print(result.stdout.decode()) 50 | print(result.stderr.decode()) 51 | assert result.returncode == 0 52 | del env["LAMIN_TESTING"] 53 | 54 | assert ln.Transform.get(key="no-uid-renamed.ipynb").uid == uid 55 | 56 | # new_path.unlink() 57 | -------------------------------------------------------------------------------- /tests/core/test_run.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | import pytest 3 | 4 | 5 | def test_run(): 6 | transform = ln.Transform(key="My transform") 7 | with pytest.raises(ValueError) as error: 8 | ln.Run(transform) 9 | assert ( 10 | error.exconly() 11 | == "ValueError: Please save transform record before creating a run" 12 | ) 13 | transform.save() 14 | run = ln.Run(transform) 15 | assert run.reference is None 16 | assert run.reference_type is None 17 | run2 = ln.Run(transform, reference="test1", reference_type="test2") 18 | assert run2.reference == "test1" 19 | assert run2.reference_type == "test2" 20 | assert run.uid != run2.uid 21 | transform.delete() 22 | 23 | 24 | def test_edge_cases(): 25 | with pytest.raises(ValueError) as error: 26 | ln.Run(1, 2) 27 | assert error.exconly() == "ValueError: Only one non-keyword arg allowed: transform" 28 | with pytest.raises(TypeError) as error: 29 | ln.Run() 30 | assert error.exconly() == "TypeError: Pass transform parameter" 31 | -------------------------------------------------------------------------------- /tests/core/test_save.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | import pytest 3 | from lamindb.models.save import prepare_error_message, store_artifacts 4 | 5 | 6 | def test_bulk_save_and_update(): 7 | label_names = [f"ULabel {i} new" for i in range(3)] 8 | labels = [ln.ULabel(name=name) for name in label_names] 9 | # test bulk creation of new records 10 | ln.save(labels) 11 | assert len(ln.ULabel.filter(name__in=label_names).distinct().all()) == 3 12 | labels[0].name = "ULabel 0 updated" 13 | # test bulk update of existing records 14 | ln.save(labels) 15 | assert len(ln.ULabel.filter(name__in=label_names).distinct().all()) == 2 16 | assert ln.ULabel.get(name="ULabel 0 updated") 17 | 18 | 19 | def test_prepare_error_message(): 20 | ln.core.datasets.file_mini_csv() 21 | artifact = ln.Artifact("mini.csv", description="test") 22 | exception = Exception("exception") 23 | 24 | error = prepare_error_message([], [artifact], exception) 25 | assert error.startswith( 26 | "The following entries have been successfully uploaded and committed to the database" 27 | ) 28 | 29 | error = prepare_error_message([artifact], [], exception) 30 | assert error.startswith("No entries were uploaded or committed to the database") 31 | 32 | 33 | def test_save_data_object(): 34 | ln.core.datasets.file_mini_csv() 35 | artifact = ln.Artifact("mini.csv", description="test") 36 | artifact.save() 37 | assert artifact.path.exists() 38 | artifact.delete(permanent=True, storage=True) 39 | 40 | 41 | def test_store_artifacts_acid(): 42 | ln.core.datasets.file_mini_csv() 43 | artifact = ln.Artifact("mini.csv", description="test") 44 | artifact._clear_storagekey = "test.csv" 45 | # errors on check_and_attempt_clearing 46 | with pytest.raises(RuntimeError): 47 | artifact.save() 48 | 49 | with pytest.raises(RuntimeError) as error: 50 | store_artifacts([artifact], using_key=None) 51 | assert str(error.exconly()).startswith( 52 | "RuntimeError: The following entries have been successfully uploaded" 53 | ) 54 | 55 | artifact.delete(permanent=True) 56 | 57 | 58 | def test_save_parents(): 59 | import bionty as bt 60 | 61 | records = bt.CellLine.from_values(["HEPG2", "HUVEC"]) 62 | ln.save(records) 63 | assert bt.CellLine.get("4ea731nb").parents.df().shape[0] == 1 64 | bt.CellLine.filter().delete() 65 | -------------------------------------------------------------------------------- /tests/core/test_search.py: -------------------------------------------------------------------------------- 1 | import bionty as bt 2 | import lamindb as ln 3 | import pytest 4 | 5 | 6 | @pytest.fixture(scope="module") 7 | def prepare_cell_type_registry(): 8 | bt.CellType.filter().all().delete() 9 | records = [ 10 | { 11 | "ontology_id": "CL:0000084", 12 | "name": "T cell", 13 | "synonyms": "T-cell|T-lymphocyte|T lymphocyte", 14 | "children": ["CL:0000798", "CL:0002420", "CL:0002419", "CL:0000789"], 15 | }, 16 | { 17 | "ontology_id": "CL:0000236", 18 | "name": "B cell", 19 | "synonyms": "B-lymphocyte|B lymphocyte|B-cell", 20 | "children": ["CL:0009114", "CL:0001201"], 21 | }, 22 | { 23 | "ontology_id": "CL:0000696", 24 | "name": "PP cell", 25 | "synonyms": "type F enteroendocrine cell", 26 | "children": ["CL:0002680"], 27 | }, 28 | { 29 | "ontology_id": "CL:0002072", 30 | "name": "nodal myocyte", 31 | "synonyms": "P cell|myocytus nodalis|cardiac pacemaker cell", 32 | "children": ["CL:1000409", "CL:1000410"], 33 | }, 34 | ] 35 | public_records = [] 36 | for ref_record in records: 37 | record = bt.CellType.from_source(ontology_id=ref_record["ontology_id"]) 38 | assert record.name == ref_record["name"] 39 | assert set(record.synonyms.split("|")) == set(ref_record["synonyms"].split("|")) 40 | public_records.append(record) 41 | ln.save(public_records) 42 | yield "prepared" 43 | bt.CellType.filter().all().delete() 44 | 45 | 46 | def test_search_synonyms(prepare_cell_type_registry): 47 | result = bt.CellType.search("P cell").df() 48 | assert set(result.name.iloc[:2]) == {"nodal myocyte", "PP cell"} 49 | 50 | 51 | def test_search_limit(prepare_cell_type_registry): 52 | result = bt.CellType.search("P cell", limit=1).df() 53 | assert len(result) == 1 54 | 55 | 56 | def test_search_case_sensitive(prepare_cell_type_registry): 57 | result = bt.CellType.search("b cell", case_sensitive=False).df() 58 | assert result.name.iloc[0] == "B cell" 59 | 60 | 61 | def test_search_None(): 62 | with pytest.raises( 63 | ValueError, match="Cannot search for None value! Please pass a valid string." 64 | ): 65 | bt.CellType.search(None) 66 | -------------------------------------------------------------------------------- /tests/core/test_tracked.py: -------------------------------------------------------------------------------- 1 | import concurrent.futures 2 | 3 | import lamindb as ln 4 | import pandas as pd 5 | import pytest 6 | 7 | 8 | @ln.tracked() 9 | def process_chunk(chunk_id: int) -> str: 10 | # Create a simple DataFrame 11 | df = pd.DataFrame( 12 | {"id": range(chunk_id * 10, (chunk_id + 1) * 10), "value": range(10)} 13 | ) 14 | 15 | # Save it as an artifact 16 | key = f"chunk_{chunk_id}.parquet" 17 | artifact = ln.Artifact.from_df(df, key=key).save() 18 | return artifact.key 19 | 20 | 21 | def test_tracked_parallel(): 22 | param_type = ln.Feature(name="Script[test_tracked.py]", is_type=True).save() 23 | ln.Feature(name="chunk_id", dtype="int", type=param_type).save() 24 | 25 | with pytest.raises(RuntimeError) as err: 26 | process_chunk(4) 27 | assert ( 28 | err.exconly() 29 | == "RuntimeError: Please track the global run context before using @ln.tracked(): ln.track()" 30 | ) 31 | 32 | # Ensure tracking is on 33 | ln.track() 34 | 35 | # Number of parallel executions 36 | n_parallel = 3 37 | 38 | # Use ThreadPoolExecutor for parallel execution 39 | with concurrent.futures.ThreadPoolExecutor(max_workers=n_parallel) as executor: 40 | # Submit all tasks 41 | futures = [executor.submit(process_chunk, i) for i in range(n_parallel)] 42 | # Get results as they complete 43 | chunk_keys = [ 44 | future.result() for future in concurrent.futures.as_completed(futures) 45 | ] 46 | 47 | # Verify results 48 | # Each execution should have created its own artifact with unique run 49 | print(f"Created artifacts with keys: {chunk_keys}") 50 | artifacts = [ln.Artifact.get(key=key) for key in chunk_keys] 51 | 52 | # Check that we got the expected number of artifacts 53 | assert len(artifacts) == n_parallel 54 | 55 | # Verify each artifact has its own unique run 56 | runs = [artifact.run for artifact in artifacts] 57 | run_ids = [run.id for run in runs] 58 | print(f"Run IDs: {run_ids}") 59 | assert len(set(run_ids)) == n_parallel # all runs should be unique 60 | 61 | # Verify each run has the correct start and finish times 62 | for run in runs: 63 | print(f"Run details: {run}") 64 | assert run.started_at is not None 65 | assert run.finished_at is not None 66 | assert run.started_at < run.finished_at 67 | 68 | # Clean up test artifacts 69 | for artifact in artifacts: 70 | artifact.delete(permanent=True) 71 | 72 | ln.context._uid = None 73 | ln.context._run = None 74 | ln.context._transform = None 75 | ln.context._path = None 76 | 77 | 78 | if __name__ == "__main__": 79 | test_tracked_parallel() 80 | -------------------------------------------------------------------------------- /tests/core/test_ulabel.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | import lamindb as ln 4 | import pytest 5 | from lamindb.errors import FieldValidationError 6 | 7 | 8 | def test_ulabel(): 9 | with pytest.raises( 10 | FieldValidationError, 11 | match=re.escape( 12 | "Only name, type, is_type, description, reference, reference_type are valid keyword arguments" 13 | ), 14 | ): 15 | ln.ULabel(x=1) 16 | 17 | with pytest.raises(ValueError) as error: 18 | ln.ULabel(1) 19 | assert error.exconly() == "ValueError: Only one non-keyword arg allowed" 20 | 21 | with pytest.raises( 22 | ValueError, 23 | match=re.escape( 24 | "'my_type' should start with a capital letter given you're defining a type" 25 | ), 26 | ): 27 | ln.ULabel(name="my_type", is_type=True) 28 | 29 | 30 | def test_ulabel_plural_type_warning(ccaplog): 31 | ln.ULabel(name="MyThings", is_type=True) 32 | assert ( 33 | "name 'MyThings' for type ends with 's', in case you're naming with plural, consider the singular for a type name" 34 | in ccaplog.text 35 | ) 36 | -------------------------------------------------------------------------------- /tests/core/test_view.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | 4 | def test_vew(): 5 | ln.view(modules="core") 6 | ln.view() 7 | -------------------------------------------------------------------------------- /tests/core/test_visibility.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | 4 | def testbranch_id(): 5 | # create a file with default branch_id 6 | with open("./testbranch_id.txt", "w") as f: 7 | f.write("branch_id") 8 | artifact = ln.Artifact("./testbranch_id.txt", description="testbranch_id").save() 9 | assert artifact.branch_id == 1 10 | 11 | # create a collection from file 12 | collection = ln.Collection(artifact, key="testbranch_id").save() 13 | 14 | # delete a collection will put both collection but not linked artifact in trash 15 | collection.delete() 16 | assert collection.ordered_artifacts[0].branch_id == 1 17 | result = ln.Collection.filter(key="testbranch_id").all() 18 | assert len(result) == 0 19 | result = ln.Collection.filter(key="testbranch_id", branch_id=1).all() 20 | assert len(result) == 0 21 | result = ln.Collection.filter(key="testbranch_id", visibility=1).all() 22 | assert len(result) == 0 23 | result = ln.Collection.filter(key="testbranch_id", branch_id=None).all() 24 | assert len(result) == 1 25 | result = ln.Collection.filter(key="testbranch_id", visibility=None).all() 26 | assert len(result) == 1 27 | 28 | # restore 29 | collection.restore() 30 | assert collection.branch_id == 1 31 | assert collection.ordered_artifacts[0].branch_id == 1 32 | 33 | # permanent delete 34 | collection.delete(permanent=True) 35 | result = ln.Artifact.filter(description="testbranch_id", branch_id=None).all() 36 | # also permanently deleted linked file 37 | assert len(result) == 1 38 | -------------------------------------------------------------------------------- /tests/curators/conftest.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | 3 | import lamindb_setup as ln_setup 4 | import pytest 5 | 6 | 7 | def pytest_sessionstart(): 8 | ln_setup.init(storage="./testdb", modules="bionty,wetlab") 9 | 10 | 11 | def pytest_sessionfinish(session: pytest.Session): 12 | shutil.rmtree("./testdb") 13 | ln_setup.delete("testdb", force=True) 14 | 15 | 16 | @pytest.fixture 17 | def ccaplog(caplog): 18 | """Add caplog handler to our custom logger at session start.""" 19 | from lamin_utils._logger import logger 20 | 21 | # Add caplog's handler to our custom logger 22 | logger.addHandler(caplog.handler) 23 | 24 | yield caplog 25 | 26 | # Clean up at the end of the session 27 | logger.removeHandler(caplog.handler) 28 | -------------------------------------------------------------------------------- /tests/curators/test_curators_multivalue.py: -------------------------------------------------------------------------------- 1 | import bionty as bt 2 | import lamindb as ln 3 | import pandas as pd 4 | import pytest 5 | from lamindb.core.exceptions import ValidationError 6 | 7 | 8 | @pytest.fixture 9 | def df(): 10 | return pd.DataFrame( 11 | { 12 | "sample_id": [["sample1", "sample2"], ["sample2"], ["sample3"]], 13 | "dose": [[1.2, 2.3], [1.2], [2.3]], 14 | "cell_type": [["B cell", "T cell"], ["B cell"], ["T cell"]], 15 | "tissue": [["blood", "pulmo"], ["blood"], ["lung"]], 16 | } 17 | ) 18 | 19 | 20 | @pytest.fixture(scope="module") 21 | def lists_schema(): 22 | schema = ln.Schema( 23 | name="lists schema cat", 24 | features=[ 25 | ln.Feature(name="sample_id", dtype=list[str]).save(), 26 | ln.Feature(name="dose", dtype=list[float]).save(), 27 | ln.Feature(name="cell_type", dtype=list[str]).save(), 28 | ln.Feature(name="tissue", dtype=list[bt.Tissue]).save(), 29 | ], 30 | ).save() 31 | 32 | yield schema 33 | 34 | schema.delete() 35 | ln.Feature.filter().delete() 36 | bt.Tissue.filter().delete() 37 | 38 | 39 | def test_curator_df_multivalue(df, lists_schema): 40 | curator = ln.curators.DataFrameCurator(df, lists_schema) 41 | with pytest.raises(ValidationError): 42 | curator.validate() 43 | assert curator.cat._cat_vectors.keys() == {"columns", "tissue"} 44 | assert curator.cat._cat_vectors["tissue"]._validated == ["blood", "lung"] 45 | assert curator.cat._cat_vectors["tissue"]._non_validated == ["pulmo"] 46 | assert curator.cat._cat_vectors["tissue"]._synonyms == {"pulmo": "lung"} 47 | 48 | curator.cat.standardize("tissue") 49 | assert curator.cat._cat_vectors["tissue"]._non_validated == [] 50 | assert df["tissue"].tolist() == [["blood", "lung"], ["blood"], ["lung"]] 51 | 52 | assert curator.validate() is None 53 | -------------------------------------------------------------------------------- /tests/curators/test_cxg_curator.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | import numpy as np 3 | 4 | 5 | def test_cxg_curator(): 6 | schema_version = "5.2.0" 7 | adata = ln.core.datasets.small_dataset3_cellxgene() 8 | curator = ln.curators._legacy.CellxGeneAnnDataCatManager( 9 | adata, schema_version=schema_version 10 | ) 11 | 12 | adata.obs.rename(columns={"donor": "donor_id"}, inplace=True) 13 | curator = ln.curators._legacy.CellxGeneAnnDataCatManager( 14 | adata, 15 | defaults=ln.curators._legacy.CellxGeneAnnDataCatManager.cxg_categoricals_defaults, 16 | schema_version=schema_version, 17 | ) 18 | assert not curator.validate() 19 | 20 | adata = adata[:, ~adata.var.index.isin(curator.non_validated["var_index"])] 21 | adata.obs["tissue"] = adata.obs["tissue"].cat.rename_categories({"lungg": "lung"}) 22 | curator = ln.curators._legacy.CellxGeneAnnDataCatManager( 23 | adata, schema_version=schema_version 24 | ) 25 | assert curator.validate() 26 | 27 | artifact = curator.save_artifact( 28 | key=f"examples/dataset-curated-against-cxg-{curator.schema_version}.h5ad" 29 | ) 30 | title = "Cross-tissue immune cell analysis reveals tissue-specific features in humans (for test demo only)" 31 | 32 | adata.obsm["X_umap"] = np.zeros((adata.shape[0], 2)) 33 | adata_cxg = curator.to_cellxgene_anndata(is_primary_data=True, title=title) 34 | assert "cell_type_ontology_term_id" in adata_cxg.obs.columns 35 | 36 | artifact.delete(permanent=True) 37 | -------------------------------------------------------------------------------- /tests/curators/test_pert_curator.py: -------------------------------------------------------------------------------- 1 | # Here we use `PertCurator` to curate perturbation related columns in a subsetted `AnnData` object of [McFarland et al. 2020](https://www.nature.com/articles/s41467-020-17440-w). 2 | 3 | import bionty as bt 4 | import lamindb as ln 5 | import pandas as pd 6 | import wetlab as wl 7 | 8 | 9 | def test_pert_curator(): 10 | ln.settings.verbosity = "hint" 11 | adata = ( 12 | ln.Artifact.using("laminlabs/lamindata") 13 | .get(key="scrna/micro-macfarland2020.h5ad") 14 | .load() 15 | ) 16 | 17 | # ## Curate and register perturbations 18 | # 19 | # Required columns: 20 | # - Either "pert_target" or "pert_name" and "pert_type" ("pert_type" allows: "genetic", "drug", "biologic", "physical") 21 | # - If pert_dose = True (default), requires "pert_dose" in form of number+unit. E.g. 10.0nM 22 | # - If pert_time = True (default), requires "pert_time" in form of number+unit. E.g. 10.0h 23 | 24 | # + 25 | # rename the columns to match the expected format 26 | adata.obs["pert_time"] = adata.obs["time"].apply( 27 | lambda x: str(x).split(", ")[-1] + "h" if pd.notna(x) else x 28 | ) # we only take the last timepoint 29 | adata.obs["pert_dose"] = adata.obs["dose_value"].map( 30 | lambda x: f"{x}{adata.obs['dose_unit'].iloc[0]}" if pd.notna(x) else None 31 | ) 32 | adata.obs.rename( 33 | columns={"perturbation": "pert_name", "perturbation_type": "pert_type"}, 34 | inplace=True, 35 | ) 36 | # fix the perturbation type as suggested by the curator 37 | adata.obs["pert_type"] = adata.obs["pert_type"].cat.rename_categories( 38 | {"CRISPR": "genetic", "drug": "compound"} 39 | ) 40 | 41 | adata.obs["tissue_type"] = "cell culture" 42 | 43 | curator = ln.curators._legacy.PertAnnDataCatManager(adata) 44 | 45 | assert curator.validate() is not True 46 | 47 | # ### Genetic perturbations 48 | 49 | # register genetic perturbations with their target genes 50 | pert_target_map = { 51 | "sggpx4-1": "GPX4", 52 | "sggpx4-2": "GPX4", 53 | "sgor2j2": "OR2J2", # cutting control 54 | } 55 | 56 | ln.settings.creation.search_names = False 57 | for sg_name, gene_symbol in pert_target_map.items(): 58 | pert = wl.GeneticPerturbation.filter( 59 | system="CRISPR-Cas9", name=sg_name 60 | ).one_or_none() 61 | if pert is None: 62 | pert = wl.GeneticPerturbation( 63 | system="CRISPR-Cas9", 64 | name=sg_name, 65 | description="cutting control" if sg_name == "sgor2j2" else None, 66 | ).save() 67 | target = wl.PerturbationTarget.filter(name=gene_symbol).one_or_none() 68 | if target is None: 69 | target = wl.PerturbationTarget(name=gene_symbol).save() 70 | pert.targets.add(target) 71 | genes = bt.Gene.filter(symbol=gene_symbol).all() 72 | if len(genes) == 0: 73 | genes = bt.Gene.from_values( 74 | [gene_symbol], field=bt.Gene.symbol, organism="human" 75 | ).save() 76 | target.genes.add(*genes) 77 | ln.settings.creation.search_names = True 78 | 79 | adata.obs["pert_target"] = adata.obs["pert_genetic"].map(pert_target_map) 80 | 81 | # register the negative control without targets: Non-cutting control 82 | wl.GeneticPerturbation( 83 | name="sglacz", system="CRISPR-Cas9", description="non-cutting control" 84 | ).save() 85 | 86 | # ### Compounds 87 | 88 | # the remaining compounds are not in CHEBI and we create records for them 89 | curator.add_new_from("pert_compound") 90 | 91 | # manually fix sex and set assay 92 | adata.obs["sex"] = adata.obs["sex"].astype(str).str.lower() 93 | adata.obs["assay"] = "10x 3' v3" 94 | 95 | # subset the adata to only include the validated genes 96 | if "var_index" in curator.non_validated: 97 | adata = adata[ 98 | :, ~adata.var_names.isin(curator.non_validated["var_index"]) 99 | ].copy() 100 | 101 | # standardize disease and sex as suggested 102 | curator.standardize("disease") 103 | 104 | curator = wl.PertCurator(adata) 105 | curator.validate() 106 | curator.standardize("all") 107 | curator.add_new_from("all") 108 | 109 | assert curator.validate() is True 110 | -------------------------------------------------------------------------------- /tests/permissions/conftest.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | from subprocess import DEVNULL, run 3 | from time import perf_counter 4 | 5 | import lamindb_setup as ln_setup 6 | import pytest 7 | from lamin_utils import logger 8 | 9 | 10 | def pytest_sessionstart(): 11 | t_execute_start = perf_counter() 12 | 13 | ln_setup.settings.auto_connect = True 14 | # these are called in separate scripts because can't change connection 15 | # within the same python process due to django 16 | # init instance and setup RLS 17 | run( # noqa: S602 18 | "python ./tests/permissions/scripts/setup_instance.py", 19 | shell=True, 20 | capture_output=False, 21 | ) 22 | # populate permissions and models via the admin connection 23 | run( # noqa: S602 24 | "python ./tests/permissions/scripts/setup_access.py", 25 | shell=True, 26 | capture_output=False, 27 | ) 28 | 29 | total_time_elapsed = perf_counter() - t_execute_start 30 | print(f"Time to setup the instance: {total_time_elapsed:.3f}s") 31 | 32 | 33 | def pytest_sessionfinish(session: pytest.Session): 34 | logger.set_verbosity(1) 35 | shutil.rmtree("./default_storage_permissions") 36 | ln_setup.delete("lamindb-test-permissions", force=True) 37 | run("docker stop pgtest && docker rm pgtest", shell=True, stdout=DEVNULL) # noqa: S602 38 | -------------------------------------------------------------------------------- /tests/permissions/jwt_utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import psycopg2 4 | 5 | 6 | def sign_jwt(db_url, payload: dict) -> str: 7 | with psycopg2.connect(db_url) as conn, conn.cursor() as cur: 8 | cur.execute( 9 | """ 10 | SELECT sign( 11 | %s::json, 12 | (SELECT security.get_secret('jwt_secret')), 13 | %s 14 | ) 15 | """, 16 | (json.dumps(payload), "HS256"), 17 | ) 18 | token = cur.fetchone()[0] 19 | if not token: 20 | msg = "Failed to generate JWT" 21 | raise ValueError(msg) 22 | return token 23 | -------------------------------------------------------------------------------- /tests/permissions/scripts/check_lamin_dev.py: -------------------------------------------------------------------------------- 1 | import lamindb_setup as ln_setup 2 | 3 | ln_setup.settings.auto_connect = False 4 | 5 | import lamindb as ln 6 | 7 | assert ln.setup.settings.user.handle == "testuser1" 8 | 9 | ln.connect("laminlabs/lamin-dev") 10 | 11 | assert ln.setup.settings.instance.slug == "laminlabs/lamin-dev" 12 | 13 | space_name = "Our test space for CI" 14 | ln.track(space=space_name) 15 | 16 | assert ln.context.space.name == space_name 17 | ulabel = ln.ULabel(name="My test ulabel in test space").save() 18 | assert ulabel.space.name == "All" # ulabel should end up in common space 19 | ulabel.delete() # delete silently passes in case another worker deleted the ulabel 20 | assert ( 21 | ln.context.transform.space.name == space_name 22 | ) # transform and run in restricted space 23 | assert ln.context.run.space.name == space_name # transform and run in restricted space 24 | ln.context.transform.delete() 25 | -------------------------------------------------------------------------------- /tests/permissions/scripts/clean_lamin_dev.py: -------------------------------------------------------------------------------- 1 | import lamindb_setup as ln_setup 2 | 3 | ln_setup.settings.auto_connect = False 4 | 5 | import lamindb as ln 6 | 7 | assert ln.setup.settings.user.handle == "testuser1" 8 | 9 | ln.connect("laminlabs/lamin-dev") 10 | 11 | assert ln.setup.settings.instance.slug == "laminlabs/lamin-dev" 12 | 13 | artifact = ln.Artifact.get(key="mytest") 14 | assert artifact.space.name == "Our test space for CI" 15 | artifact.delete() 16 | -------------------------------------------------------------------------------- /tests/permissions/scripts/setup_access.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln # noqa 2 | import hubmodule.models as hm 3 | from uuid import uuid4 4 | from hubmodule._setup import _install_db_module 5 | from laminhub_rest.core.postgres import DbRoleHandler 6 | 7 | # create a db connection url that works with RLS 8 | JWT_ROLE_NAME = "permissions_jwt" 9 | 10 | 11 | def create_jwt_user(dsn_admin: str, jwt_role_name: str): 12 | db_role_handler = DbRoleHandler(dsn_admin) 13 | jwt_db_url = db_role_handler.create( 14 | jwt_role_name, expires_in=None, alter_if_exists=True 15 | ) 16 | db_role_handler.permission.grant_write_jwt(jwt_role_name) 17 | return jwt_db_url 18 | 19 | 20 | pgurl = "postgresql://postgres:pwd@0.0.0.0:5432/pgtest" # admin db connection url 21 | jwt_db_url = create_jwt_user(pgurl, jwt_role_name=JWT_ROLE_NAME) 22 | _install_db_module(pgurl, jwt_role_name=JWT_ROLE_NAME) 23 | 24 | print("Created jwt db connection") 25 | 26 | # create models 27 | 28 | full_access = ln.Space(name="full access", uid="00000001").save() # type: ignore 29 | select_access = ln.Space(name="select access", uid="00000002").save() # type: ignore 30 | no_access = ln.Space(name="no access", uid="00000003").save() # type: ignore 31 | # set read role for the default space 32 | account = hm.Account( 33 | id=ln.setup.settings.user._uuid.hex, uid="accntid1", role="read" 34 | ).save() 35 | 36 | # no access space 37 | ulabel = ln.ULabel(name="no_access_ulabel") 38 | ulabel.space = no_access 39 | ulabel.save() 40 | 41 | project = ln.Project(name="No_access_project") # type: ignore 42 | project.space = no_access 43 | project.save() 44 | 45 | # setup write access space 46 | hm.AccessSpace(account=account, space=full_access, role="write").save() 47 | 48 | ulabel = ln.ULabel(name="full_access_ulabel") 49 | ulabel.space = full_access 50 | ulabel.save() 51 | # setup read access space 52 | hm.AccessSpace(account=account, space=select_access, role="read").save() 53 | 54 | ulabel = ln.ULabel(name="select_ulabel") 55 | ulabel.space = select_access 56 | ulabel.save() 57 | # artificial but better to test 58 | # create a link table referencing rows in different spaces 59 | ulabel.projects.add(project) 60 | 61 | # default space, only select access by default 62 | ulabel = ln.ULabel(name="default_space_ulabel").save() 63 | ulabel.projects.add(project) 64 | 65 | project = ln.Project(name="default_space_project").save() 66 | ulabel.projects.add(project) 67 | 68 | # create a link table referencing ulabel from the default space and project from select space 69 | project = ln.Project(name="select_project") 70 | project.space = select_access 71 | project.save() 72 | 73 | ulabel.projects.add(project) 74 | 75 | # setup team and relevent models 76 | team_access = ln.Space(name="team access", uid="00000004").save() # type: ignore 77 | team = hm.Team(id=uuid4().hex, uid="teamuiduid11", name="test_team", role="read").save() 78 | hm.AccountTeam(account=account, team=team).save() 79 | hm.AccessSpace(team=team, space=team_access, role="read").save() 80 | 81 | feature = ln.Feature(name="team_access_feature", dtype=float) 82 | feature.space = team_access 83 | feature.save() 84 | 85 | print("Created models") 86 | 87 | # save jwt db connection 88 | 89 | ln.setup.settings.instance._db = jwt_db_url 90 | ln.setup.settings.instance._persist() 91 | -------------------------------------------------------------------------------- /tests/permissions/scripts/setup_instance.py: -------------------------------------------------------------------------------- 1 | import lamindb_setup as ln_setup 2 | from laminci.db import setup_local_test_postgres 3 | 4 | pgurl = setup_local_test_postgres() 5 | 6 | ln_setup.init( 7 | storage="./default_storage_permissions", 8 | name="lamindb-test-permissions", 9 | db=pgurl, 10 | ) 11 | 12 | # can't add this app in the init because don't want t trigger the initial migration 13 | # that conflicts with _install_db_module 14 | ln_setup.settings.instance._schema_str = "hubmodule" 15 | ln_setup.settings.instance._persist() 16 | -------------------------------------------------------------------------------- /tests/storage/conftest.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | from subprocess import DEVNULL, run 3 | from time import perf_counter 4 | 5 | import lamindb_setup as ln_setup 6 | import pytest 7 | from lamin_utils import logger 8 | from laminci.db import setup_local_test_postgres 9 | 10 | AUTO_CONNECT = ln_setup.settings.auto_connect 11 | ln_setup.settings.auto_connect = False 12 | 13 | import lamindb as ln 14 | 15 | 16 | def pytest_sessionstart(): 17 | t_execute_start = perf_counter() 18 | 19 | ln_setup._TESTING = True 20 | pgurl = setup_local_test_postgres() 21 | ln.setup.init( 22 | storage="./default_storage_unit_storage", 23 | modules="bionty", 24 | name="lamindb-unit-tests-storage", 25 | db=pgurl, 26 | ) 27 | ln.setup.register() # temporarily 28 | ln.setup.settings.auto_connect = True 29 | ln.settings.creation.artifact_silence_missing_run_warning = True 30 | ln.settings.storage = ( 31 | "s3://lamindb-ci/test-data" # register as valid storage location 32 | ) 33 | ln.settings.storage = "./default_storage_unit_storage" 34 | total_time_elapsed = perf_counter() - t_execute_start 35 | print(f"Time to setup the instance: {total_time_elapsed:.3f}s") 36 | 37 | 38 | def pytest_sessionfinish(session: pytest.Session): 39 | logger.set_verbosity(1) 40 | shutil.rmtree("./default_storage_unit_storage") 41 | # handle below better in the future 42 | if ln.UPath("s3://lamindb-test/storage/.lamindb").exists(): 43 | ln.UPath("s3://lamindb-test/storage/.lamindb").rmdir() 44 | another_storage = ln.UPath("s3://lamindb-ci/lamindb-unit-tests-cloud/.lamindb") 45 | if another_storage.exists(): 46 | another_storage.rmdir() 47 | ln.setup.delete("lamindb-unit-tests-storage", force=True) 48 | run("docker stop pgtest && docker rm pgtest", shell=True, stdout=DEVNULL) # noqa: S602 49 | ln.setup.settings.auto_connect = AUTO_CONNECT 50 | -------------------------------------------------------------------------------- /tests/storage/test_artifact_zarr.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | from pathlib import Path 3 | 4 | import anndata as ad 5 | import lamindb as ln 6 | import numpy as np 7 | import pandas as pd 8 | import pytest 9 | from lamindb.core.storage._zarr import identify_zarr_type 10 | from lamindb_setup.core.upath import ( 11 | CloudPath, 12 | ) 13 | 14 | 15 | @pytest.fixture(scope="session") 16 | def get_small_adata(): 17 | return ad.AnnData( 18 | X=np.array([[1, 2, 3], [4, 5, 6]]), 19 | obs={"feat1": ["A", "B"]}, 20 | var=pd.DataFrame(index=["MYC", "TCF7", "GATA1"]), 21 | obsm={"X_pca": np.array([[1, 2], [3, 4]])}, 22 | ) 23 | 24 | 25 | def test_zarr_upload_cache(get_small_adata): 26 | previous_storage = ln.setup.settings.storage.root_as_str 27 | ln.settings.storage = "s3://lamindb-test/core" 28 | 29 | zarr_path = Path("./test_adata.zarr") 30 | get_small_adata.write_zarr(zarr_path) 31 | 32 | artifact = ln.Artifact(zarr_path, key="test_adata.zarr") 33 | assert artifact.otype == "AnnData" 34 | assert artifact.n_files >= 1 35 | artifact.save() 36 | 37 | assert isinstance(artifact.path, CloudPath) 38 | assert artifact.path.exists() 39 | assert identify_zarr_type(artifact.path) == "anndata" 40 | 41 | shutil.rmtree(artifact.cache()) 42 | 43 | cache_path = artifact._cache_path 44 | assert isinstance(artifact.load(), ad.AnnData) 45 | assert cache_path.is_dir() 46 | 47 | shutil.rmtree(cache_path) 48 | assert not cache_path.exists() 49 | artifact.cache() 50 | assert cache_path.is_dir() 51 | 52 | artifact.delete(permanent=True, storage=True) 53 | shutil.rmtree(zarr_path) 54 | 55 | # test zarr from memory 56 | artifact = ln.Artifact(get_small_adata, key="test_adata.anndata.zarr") 57 | assert artifact._local_filepath.is_dir() 58 | assert artifact.otype == "AnnData" 59 | assert artifact.suffix == ".anndata.zarr" 60 | assert artifact.n_files >= 1 61 | 62 | artifact.save() 63 | assert isinstance(artifact.path, CloudPath) 64 | assert artifact.path.exists() 65 | cache_path = artifact._cache_path 66 | assert cache_path.is_dir() 67 | 68 | shutil.rmtree(cache_path) 69 | assert not cache_path.exists() 70 | 71 | artifact._memory_rep = None 72 | 73 | assert isinstance(artifact.load(), ad.AnnData) 74 | assert cache_path.is_dir() 75 | 76 | artifact.delete(permanent=True, storage=True) 77 | 78 | ln.settings.storage = previous_storage 79 | -------------------------------------------------------------------------------- /tests/storage/test_switch_storage.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import lamindb as ln 4 | 5 | 6 | def test_settings_switch_storage(): 7 | ln.settings.storage = "./default_storage_unit_storage" 8 | assert ( 9 | ln.settings.storage.root.resolve() 10 | == Path("./default_storage_unit_storage").resolve() 11 | ) 12 | new_storage_location = "s3://lamindb-ci/test-settings-switch-storage" 13 | ln.settings.storage = new_storage_location 14 | assert ln.setup.settings.storage.type_is_cloud 15 | assert ln.setup.settings.storage.root_as_str == new_storage_location 16 | # root.fs contains the underlying fsspec filesystem 17 | # the following is set by lamindb to True for s3 by default 18 | assert ln.setup.settings.storage.root.fs.cache_regions 19 | ln.settings.storage = new_storage_location, {"cache_regions": False} 20 | assert not ln.setup.settings.storage.root.fs.cache_regions 21 | assert ln.Storage.filter(root=new_storage_location).one_or_none() is not None 22 | # switch back to default storage 23 | ln.settings.storage = "./default_storage_unit_storage" 24 | --------------------------------------------------------------------------------