├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.yml
    │   ├── config.yml
    │   ├── enhancement.yml
    │   └── usage_question.yml
    └── workflows
    │   ├── build.yml
    │   └── doc-changes.yml
├── .gitignore
├── .gitmodules
├── .pre-commit-config.yaml
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── docs
    ├── api.md
    ├── arrays.ipynb
    ├── bio-registries.ipynb
    ├── bionty.md
    ├── changelog.md
    ├── clinicore.md
    ├── curate.ipynb
    ├── faq.md
    ├── faq
    │   ├── acid.ipynb
    │   ├── curate-any.ipynb
    │   ├── delete.ipynb
    │   ├── idempotency.ipynb
    │   ├── import-modules.ipynb
    │   ├── keep-artifacts-local.ipynb
    │   ├── pydantic-pandera.ipynb
    │   ├── reference-field.ipynb
    │   ├── search.ipynb
    │   ├── setup.ipynb
    │   ├── symbol-mapping.ipynb
    │   ├── test_notebooks.py
    │   ├── track-run-inputs.ipynb
    │   ├── validate-fields.ipynb
    │   └── visibility.ipynb
    ├── guide.md
    ├── includes
    │   └── installation.md
    ├── index.md
    ├── lamindb.md
    ├── query-search.md
    ├── registries.ipynb
    ├── scripts
    │   ├── curate_anndata_flexible.py
    │   ├── curate_dataframe_flexible.py
    │   ├── curate_dataframe_minimal_errors.py
    │   ├── curate_mudata.py
    │   ├── curate_soma_experiment.py
    │   ├── curate_spatialdata.py
    │   ├── define_mini_immuno_features_labels.py
    │   ├── define_mini_immuno_schema_flexible.py
    │   ├── define_schema_anndata_ensembl_gene_ids_and_valid_features_in_obs.py
    │   ├── define_schema_spatialdata.py
    │   ├── define_valid_features.py
    │   ├── ingest_mini_immuno_datasets.py
    │   ├── run_track_and_finish.py
    │   ├── run_track_with_params.py
    │   ├── run_workflow.py
    │   └── synced_with_git.py
    ├── storage.md
    ├── storage
    │   ├── add-replace-cache.ipynb
    │   ├── anndata-accessor.ipynb
    │   ├── prepare-transfer-local-to-cloud.ipynb
    │   ├── test-files
    │   │   ├── iris.csv
    │   │   ├── iris.data
    │   │   └── new_iris.csv
    │   ├── test_notebooks.py
    │   ├── transfer-local-to-cloud.ipynb
    │   ├── upload.ipynb
    │   └── vitessce.ipynb
    ├── test_notebooks.py
    ├── track.ipynb
    ├── transfer.ipynb
    └── wetlab.md
├── lamindb
    ├── __init__.py
    ├── _finish.py
    ├── _tracked.py
    ├── _view.py
    ├── base
    │   ├── __init__.py
    │   ├── fields.py
    │   ├── ids.py
    │   ├── types.py
    │   ├── uids.py
    │   └── users.py
    ├── core
    │   ├── __init__.py
    │   ├── _compat.py
    │   ├── _context.py
    │   ├── _mapped_collection.py
    │   ├── _settings.py
    │   ├── _sync_git.py
    │   ├── _track_environment.py
    │   ├── datasets
    │   │   ├── __init__.py
    │   │   ├── _core.py
    │   │   ├── _fake.py
    │   │   ├── _small.py
    │   │   └── mini_immuno.py
    │   ├── exceptions.py
    │   ├── loaders.py
    │   ├── storage
    │   │   ├── __init__.py
    │   │   ├── _anndata_accessor.py
    │   │   ├── _backed_access.py
    │   │   ├── _polars_lazy_df.py
    │   │   ├── _pyarrow_dataset.py
    │   │   ├── _tiledbsoma.py
    │   │   ├── _valid_suffixes.py
    │   │   ├── _zarr.py
    │   │   ├── objects.py
    │   │   └── paths.py
    │   ├── subsettings
    │   │   ├── __init__.py
    │   │   ├── _annotation_settings.py
    │   │   └── _creation_settings.py
    │   └── types.py
    ├── curators
    │   ├── __init__.py
    │   ├── _cellxgene_schemas
    │   │   ├── __init__.py
    │   │   └── schema_versions.csv
    │   ├── _legacy.py
    │   └── core.py
    ├── errors.py
    ├── examples
    │   ├── __init__.py
    │   └── schemas
    │   │   ├── __init__.py
    │   │   ├── _anndata.py
    │   │   └── _simple.py
    ├── integrations
    │   ├── __init__.py
    │   └── _vitessce.py
    ├── migrations
    │   ├── 0069_squashed.py
    │   ├── 0070_lamindbv1_migrate_data.py
    │   ├── 0071_lamindbv1_migrate_schema.py
    │   ├── 0072_remove_user__branch_code_remove_user_aux_and_more.py
    │   ├── 0073_merge_ourprojects.py
    │   ├── 0074_lamindbv1_part4.py
    │   ├── 0075_lamindbv1_part5.py
    │   ├── 0076_lamindbv1_part6.py
    │   ├── 0077_lamindbv1_part6b.py
    │   ├── 0078_lamindbv1_part6c.py
    │   ├── 0079_alter_rundata_value_json_and_more.py
    │   ├── 0080_polish_lamindbv1.py
    │   ├── 0081_revert_textfield_collection.py
    │   ├── 0082_alter_feature_dtype.py
    │   ├── 0083_alter_feature_is_type_alter_flextable_is_type_and_more.py
    │   ├── 0084_alter_schemafeature_feature_and_more.py
    │   ├── 0085_alter_feature_is_type_alter_flextable_is_type_and_more.py
    │   ├── 0086_various.py
    │   ├── 0087_rename__schemas_m2m_artifact_feature_sets_and_more.py
    │   ├── 0088_schema_components.py
    │   ├── 0089_subsequent_runs.py
    │   ├── 0090_runproject_project_runs.py
    │   ├── 0091_alter_featurevalue_options_alter_space_options_and_more.py
    │   ├── 0092_alter_artifactfeaturevalue_artifact_and_more.py
    │   ├── 0093_alter_schemacomponent_unique_together.py
    │   ├── 0094_writeloglock_writelogmigrationstate_and_more.py
    │   ├── 0095_remove_rundata_flextable.py
    │   ├── 0096_remove_artifact__param_values_and_more.py
    │   ├── 0097_remove_schemaparam_param_remove_paramvalue_param_and_more.py
    │   ├── 0098_alter_feature_type_alter_project_type_and_more.py
    │   ├── 0099_alter_writelog_seqno.py
    │   ├── 0100_branch_alter_artifact__branch_code_and_more.py
    │   ├── 0101_alter_artifact_hash_alter_feature_name_and_more.py
    │   ├── 0102_remove_writelog_branch_code_and_more.py
    │   ├── 0103_remove_writelog_migration_state_and_more.py
    │   ├── 0104_alter_branch_uid.py
    │   ├── 0104_squashed.py
    │   ├── 0105_record_unique_name.py
    │   └── __init__.py
    ├── models
    │   ├── __init__.py
    │   ├── _describe.py
    │   ├── _django.py
    │   ├── _feature_manager.py
    │   ├── _from_values.py
    │   ├── _is_versioned.py
    │   ├── _label_manager.py
    │   ├── _relations.py
    │   ├── artifact.py
    │   ├── artifact_set.py
    │   ├── can_curate.py
    │   ├── collection.py
    │   ├── core.py
    │   ├── feature.py
    │   ├── has_parents.py
    │   ├── project.py
    │   ├── query_manager.py
    │   ├── query_set.py
    │   ├── record.py
    │   ├── run.py
    │   ├── save.py
    │   ├── schema.py
    │   ├── sqlrecord.py
    │   ├── transform.py
    │   └── ulabel.py
    ├── py.typed
    └── setup
    │   ├── __init__.py
    │   └── core
    │       └── __init__.py
├── noxfile.py
├── pyproject.toml
└── tests
    ├── conftest.py
    ├── core
        ├── _dataset_fixtures.py
        ├── conftest.py
        ├── notebooks
        │   ├── basic-r-notebook.Rmd.cleaned.html
        │   ├── basic-r-notebook.Rmd.html
        │   ├── duplicate
        │   │   └── with-title-initialized-consecutive-finish.ipynb
        │   ├── no-title.ipynb
        │   ├── with-title-initialized-consecutive-finish-not-last-cell.ipynb
        │   └── with-title-initialized-consecutive-finish.ipynb
        ├── scripts
        │   ├── duplicate1
        │   │   └── script-to-test-versioning.py
        │   ├── duplicate2
        │   │   └── script-to-test-versioning.py
        │   ├── duplicate3
        │   │   └── script-to-test-versioning.py
        │   ├── duplicate4
        │   │   └── script-to-test-versioning.py
        │   ├── script-to-test-filename-change.py
        │   └── script-to-test-versioning.py
        ├── test_artifact.py
        ├── test_artifact_folders.py
        ├── test_can_curate.py
        ├── test_collection.py
        ├── test_data.py
        ├── test_db.py
        ├── test_delete.py
        ├── test_describe_and_df_calls.py
        ├── test_dtype.py
        ├── test_feature.py
        ├── test_feature_label_manager.py
        ├── test_from_values.py
        ├── test_has_parents.py
        ├── test_integrity.py
        ├── test_load.py
        ├── test_manager.py
        ├── test_models.py
        ├── test_notebooks.py
        ├── test_queryset.py
        ├── test_record.py
        ├── test_run.py
        ├── test_save.py
        ├── test_schema.py
        ├── test_search.py
        ├── test_track.py
        ├── test_tracked.py
        ├── test_transform.py
        ├── test_ulabel.py
        ├── test_versioning.py
        ├── test_view.py
        └── test_visibility.py
    ├── curators
        ├── conftest.py
        ├── test_cat_managers.py
        ├── test_curators_examples.py
        ├── test_curators_general.py
        ├── test_curators_multivalue.py
        ├── test_cxg_curator.py
        ├── test_dataframe_curators_accounting_example.py
        ├── test_pert_curator.py
        └── test_records.py
    ├── permissions
        ├── conftest.py
        ├── jwt_utils.py
        ├── scripts
        │   ├── check_lamin_dev.py
        │   ├── clean_lamin_dev.py
        │   ├── setup_access.py
        │   └── setup_instance.py
        └── test_permissions.py
    └── storage
        ├── conftest.py
        ├── test_artifact_storage.py
        ├── test_artifact_zarr.py
        ├── test_cache.py
        ├── test_storage.py
        ├── test_switch_storage.py
        └── test_transfer.py


/.github/ISSUE_TEMPLATE/bug_report.yml:
--------------------------------------------------------------------------------
 1 | name: Report a bug
 2 | description: Report a bug.
 3 | labels:
 4 |   - ":bug: bug"
 5 | body:
 6 |   - type: textarea
 7 |     id: report
 8 |     attributes:
 9 |       label: Add a description
10 |       placeholder: |
11 |         Describe and consider providing version information. Please ensure you're on the latest version of lamindb.
12 |         This is a public repository!
13 |         Do not reveal any internal information.
14 |     validations:
15 |       required: true
16 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: true
2 | contact_links:
3 |     - name: LaminHub issues
4 |       url: https://github.com/laminlabs/laminhub-public
5 |       about: If you have issues with the GUI/web app at lamin.ai, please report them here.
6 |     - name: Enterprise support
7 |       url: https://lamin.ai/contact
8 |       about: If you have other questions, contact us directly.
9 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/enhancement.yml:
--------------------------------------------------------------------------------
 1 | name: Propose an enhancement
 2 | description: Propose an enhancement.
 3 | body:
 4 |   - type: textarea
 5 |     id: description
 6 |     attributes:
 7 |       label: Add a description
 8 |       placeholder: |
 9 |         This is a public repository!
10 |         Do not reveal any internal information.
11 |     validations:
12 |       required: true
13 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/usage_question.yml:
--------------------------------------------------------------------------------
 1 | name: Ask a usage question
 2 | description: Ask a usage question.
 3 | labels:
 4 |   - "usage question"
 5 | body:
 6 |   - type: textarea
 7 |     id: description
 8 |     attributes:
 9 |       label: Add a description
10 |       placeholder: |
11 |         This is a public repository!
12 |         Do not reveal any internal information.
13 |     validations:
14 |       required: true
15 | 


--------------------------------------------------------------------------------
/.github/workflows/doc-changes.yml:
--------------------------------------------------------------------------------
 1 | name: doc-changes
 2 | 
 3 | on:
 4 |   pull_request_target:
 5 |     branches:
 6 |       - main
 7 |       - release
 8 |     types:
 9 |       - closed
10 | 
11 | jobs:
12 |   doc-changes:
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - uses: actions/checkout@v4
16 |       - uses: actions/setup-python@v5
17 |         with:
18 |           python-version: "3.11"
19 |       - run: pip install "laminci[doc-changes]@git+https://x-access-token:${{ secrets.LAMIN_BUILD_DOCS }}@github.com/laminlabs/laminci"
20 |       - run: laminci doc-changes
21 |         env:
22 |           repo_token: ${{ secrets.GITHUB_TOKEN }}
23 |           docs_token: ${{ secrets.LAMIN_BUILD_DOCS }}
24 |           changelog_file: lamin-docs/docs/changelog/soon/lamindb.md
25 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | __MACOSX/
  2 | 
  3 | # LaminDB
  4 | .coveragerc
  5 | *.db
  6 | *.lndb
  7 | *.jpg
  8 | *.zarr/
  9 | docsbuild/
 10 | docs/lamin.md
 11 | docs/guide/data-validation.ipynb
 12 | docs/guide/bionty.ipynb
 13 | docs/guide/lnschema-core.ipynb
 14 | docs/paradisi05_laminopathic_nuclei.jpg
 15 | bionty_docs/
 16 | lamindb_docs/
 17 | _build
 18 | mydata/
 19 | lamin-intro/
 20 | lamin-tutorial/
 21 | mytest/
 22 | rds/
 23 | mydb/
 24 | docs/test-registries/
 25 | docs/test-annotate-flexible/
 26 | docs/lamindb.*
 27 | lamin_sphinx
 28 | docs/conf.py
 29 | lamindb/setup/.env
 30 | _secrets.py
 31 | _configuration.py
 32 | lamin.db
 33 | docs/generated/*
 34 | _docs_tmp*
 35 | docs/guide/Laminopathic_nuclei.jpg
 36 | docs/guide/paradisi05_laminopathic_nuclei.jpg
 37 | nocodb
 38 | docs/guide/SRR4238351_subsamp.fastq.gz
 39 | docs/faq/paradisi05_laminopathic_nuclei.jpg
 40 | docs/faq/tostore/
 41 | docs/faq/mydata_postgres/
 42 | docs/guide/myobjects/
 43 | docs/faq/test-run-inputs/
 44 | docs/intro/paradisi05_laminopathic_nuclei.jpg
 45 | docs/guide/figures/
 46 | docs/test-annotate/
 47 | docs/test-track/
 48 | suo22/
 49 | docs/biology/test-flow/
 50 | docs/biology/test-scrna/
 51 | docs/biology/test-registries/
 52 | docs/biology/test-multimodal/
 53 | test-inherit1
 54 | test-inherit2
 55 | test-search0
 56 | test-search1
 57 | test-search5
 58 | default_storage
 59 | default_storage_unit_core
 60 | default_storage_unit_storage
 61 | test.ipynb
 62 | test2.ipynb
 63 | run-tests
 64 | test-django-validation/
 65 | curate.tiledbsoma
 66 | small_dataset.tiledbsoma
 67 | 
 68 | # General
 69 | .DS_Store
 70 | 
 71 | # Byte-compiled / optimized / DLL files
 72 | __pycache__/
 73 | *.py[cod]
 74 | *$py.class
 75 | 
 76 | # C extensions
 77 | *.so
 78 | 
 79 | # Distribution / packaging
 80 | .Python
 81 | build/
 82 | develop-eggs/
 83 | dist/
 84 | downloads/
 85 | eggs/
 86 | .eggs/
 87 | lib/
 88 | lib64/
 89 | parts/
 90 | sdist/
 91 | var/
 92 | wheels/
 93 | pip-wheel-metadata/
 94 | share/python-wheels/
 95 | *.egg-info/
 96 | .installed.cfg
 97 | *.egg
 98 | MANIFEST
 99 | 
100 | # PyInstaller
101 | #  Usually these files are written by a python script from a template
102 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
103 | *.manifest
104 | *.spec
105 | 
106 | # Installer logs
107 | pip-log.txt
108 | pip-delete-this-directory.txt
109 | 
110 | # Unit test / coverage reports
111 | htmlcov/
112 | .tox/
113 | .nox/
114 | .coverage
115 | .coverage.*
116 | .cache
117 | nosetests.xml
118 | coverage.xml
119 | *.cover
120 | *.py,cover
121 | .hypothesis/
122 | .pytest_cache/
123 | 
124 | # Translations
125 | *.mo
126 | *.pot
127 | 
128 | # Django stuff:
129 | *.log
130 | local_settings.py
131 | db.sqlite3
132 | db.sqlite3-journal
133 | 
134 | # Flask stuff:
135 | instance/
136 | .webassets-cache
137 | 
138 | # Scrapy stuff:
139 | .scrapy
140 | 
141 | # Sphinx documentation
142 | docs/_build/
143 | 
144 | # PyBuilder
145 | target/
146 | 
147 | # Jupyter Notebook
148 | .ipynb_checkpoints
149 | 
150 | # IPython
151 | profile_default/
152 | ipython_config.py
153 | 
154 | # pyenv
155 | .python-version
156 | 
157 | # pipenv
158 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
159 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
160 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
161 | #   install all needed dependencies.
162 | #Pipfile.lock
163 | 
164 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
165 | __pypackages__/
166 | 
167 | # Celery stuff
168 | celerybeat-schedule
169 | celerybeat.pid
170 | 
171 | # SageMath parsed files
172 | *.sage.py
173 | 
174 | # Environments
175 | .env
176 | .venv
177 | env/
178 | venv/
179 | ENV/
180 | env.bak/
181 | venv.bak/
182 | 
183 | # Spyder project settings
184 | .spyderproject
185 | .spyproject
186 | 
187 | # Rope project settings
188 | .ropeproject
189 | 
190 | # mkdocs documentation
191 | /site
192 | 
193 | # mypy
194 | .mypy_cache/
195 | .dmypy.json
196 | dmypy.json
197 | 
198 | # Pyre type checker
199 | .pyre/
200 | 
201 | # data files
202 | data/
203 | _build
204 | *.csv
205 | *.fcs
206 | *.zip
207 | *.feather
208 | *.h5ad
209 | *.h5mu
210 | *.parquet
211 | *.bam
212 | *.fastq.gz
213 | *.pt
214 | 
215 | # Pycharm
216 | .idea
217 | 
218 | # VSCode
219 | .vscode
220 | 
221 | # cxg
222 | !lamindb/curators/_cellxgene_schemas/schema_versions.csv
223 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "sub/lamindb-setup"]
 2 | 	path = sub/lamindb-setup
 3 | 	url = https://github.com/laminlabs/lamindb-setup
 4 | [submodule "sub/lamin-cli"]
 5 | 	path = sub/lamin-cli
 6 | 	url = https://github.com/laminlabs/lamin-cli
 7 | [submodule "sub/bionty"]
 8 | 	path = sub/bionty
 9 | 	url = https://github.com/laminlabs/bionty
10 | [submodule "sub/wetlab"]
11 | 	path = sub/wetlab
12 | 	url = https://github.com/laminlabs/wetlab
13 | [submodule "sub/clinicore"]
14 | 	path = sub/clinicore
15 | 	url = https://github.com/laminlabs/clinicore
16 | [submodule "sub/cellxgene-lamin"]
17 | 	path = sub/cellxgene-lamin
18 | 	url = https://github.com/laminlabs/cellxgene-lamin.git
19 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | fail_fast: false
 2 | default_language_version:
 3 |   python: python3
 4 | default_stages:
 5 |   - pre-commit
 6 |   - pre-push
 7 | minimum_pre_commit_version: 2.16.0
 8 | repos:
 9 |   - repo: https://github.com/rbubley/mirrors-prettier
10 |     rev: v3.5.1
11 |     hooks:
12 |       - id: prettier
13 |         exclude: |
14 |           (?x)(
15 |             docs/changelog.md|.github/ISSUE_TEMPLATE/config.yml|tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html
16 |           )
17 |   - repo: https://github.com/kynan/nbstripout
18 |     rev: 0.8.1
19 |     hooks:
20 |       - id: nbstripout
21 |         exclude: |
22 |           (?x)(
23 |               docs/examples/|
24 |               docs/notes/
25 |           )
26 |   - repo: https://github.com/astral-sh/ruff-pre-commit
27 |     rev: v0.9.10
28 |     hooks:
29 |       - id: ruff
30 |         args: [--fix, --exit-non-zero-on-fix, --unsafe-fixes]
31 |       - id: ruff-format
32 |   - repo: https://github.com/pre-commit/pre-commit-hooks
33 |     rev: v4.5.0
34 |     hooks:
35 |       - id: detect-private-key
36 |       - id: check-ast
37 |       - id: end-of-file-fixer
38 |         exclude: |
39 |           (?x)(
40 |               .github/workflows/latest-changes.jinja2
41 |             )
42 |       - id: mixed-line-ending
43 |         args: [--fix=lf]
44 |       - id: trailing-whitespace
45 |         exclude: |
46 |           (?x)(
47 |               tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html
48 |             )
49 |       - id: check-case-conflict
50 |   - repo: https://github.com/pre-commit/mirrors-mypy
51 |     rev: v1.14.1
52 |     hooks:
53 |       - id: mypy
54 |         args: [--no-strict-optional, --ignore-missing-imports]
55 |         additional_dependencies: ["types-requests", "types-attrs"]
56 |         exclude: |
57 |           (?x)(
58 |               test_notebooks.py|
59 |               script-to-test-versioning.py|
60 |               tests/storage/conftest.py|
61 |               tests/curators/conftest.py|
62 |               tests/permissions/conftest.py|
63 |               tests/writelog/conftest.py|
64 |               tests/writelog_sqlite/conftest.py|
65 |               tests/curators/test_curators_examples.py|
66 |               tests/core/conftest.py
67 |           )
68 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | # Contributing
  2 | 
  3 | Contributions are generally welcome. Please make an issue to discuss proposals.
  4 | 
  5 | ## Installation
  6 | 
  7 | ### PyPI
  8 | 
  9 | For installation from PyPI, see [docs.lamin.ai/setup](https://docs.lamin.ai/setup).
 10 | 
 11 | ### Github
 12 | 
 13 | For installation from GitHub, call:
 14 | 
 15 | ```bash
 16 | git clone --recursive https://github.com/laminlabs/lamindb
 17 | pip install laminci
 18 | python -m venv .venv
 19 | source .venv/bin/activate
 20 | nox -s install
 21 | ```
 22 | 
 23 | This will install a few dependencies from the git submodules linked [here](https://github.com/laminlabs/lamindb/tree/main/sub), as well as packages
 24 | like `pytest` and `pre-commit` that you'll need when developing.
 25 | 
 26 | lamindb depends on several other packages that may require modifications for pull requests to successfully pass the continuous integration build.
 27 | We suggest the following workflow if commits to any of the submodules are essential for the current modifications in lamindb:
 28 | 
 29 | 1. Change directory into the submodule that you want to modify: `cd sub/SUBMODULE`.
 30 | 2. Switch to a new feature branch: `git switch -c feature/NEWFEATURE`.
 31 | 3. Make a pull request with your changes to the `SUBMODULE` and ensure that the CI passes.
 32 | 4. In the repository root of lamindb, create a new commit and push:
 33 | 
 34 | ```bash
 35 | cd ..
 36 | git add -u
 37 | git commit -m "Upgraded SUBMODULE"
 38 | git push
 39 | ```
 40 | 
 41 | Any pull request of yours should now also have the changes of the submodule included allowing you to test that changes in the submodule and lamindb are compatible.
 42 | 
 43 | ## Running and writing tests
 44 | 
 45 | This package uses the [pytest][] for automated testing.
 46 | Please add a test for every function added to the package.
 47 | 
 48 | Running tests requires the [Docker daemon][] up, then run at the root of the repository:
 49 | 
 50 | ```bash
 51 | pytest --ignore=tests/storage --ignore=tests/permission
 52 | ```
 53 | 
 54 | in the root of the repository.
 55 | We exclude specific directories in local `pytest` runs because they directly access external resources such as AWS, which require specific access keys.
 56 | Continuous integration will automatically run **all** tests on pull requests.
 57 | 
 58 | ## Code-style
 59 | 
 60 | This project uses [pre-commit][] to enforce consistent code-styles. On every commit, pre-commit checks will either
 61 | automatically fix issues with the code, or raise an error message.
 62 | 
 63 | To enable pre-commit locally, simply run
 64 | 
 65 | ```bash
 66 | pre-commit install
 67 | ```
 68 | 
 69 | in the root of the repository. Pre-commit will automatically download all dependencies when it is run for the first time.
 70 | 
 71 | We further use [gitmoji][] to add emoticons to commits.
 72 | These allow us to more easily categorize them allowing for faster visual filtering.
 73 | 
 74 | It can be installed by running:
 75 | 
 76 | ```bash
 77 | npm i -g gitmoji-cli
 78 | ```
 79 | 
 80 | and enabled for the repository via:
 81 | 
 82 | ```bash
 83 | gitmoji -i
 84 | ```
 85 | 
 86 | If you don't have `sudo` in your working environment, follow [these instructions](https://github.com/sindresorhus/guides/blob/main/npm-global-without-sudo.md).
 87 | 
 88 | ## Documentation
 89 | 
 90 | We build our documentation with an internal tool called `lndocs`.
 91 | We have not made it public yet and therefore external contributors need to rely on the Github Actions `docs` job to build the documentation.
 92 | If the `docs` job succeeds, a preview URL will be posted automatically as a comment to your pull request.
 93 | 
 94 | ## Releases
 95 | 
 96 | Currently only lamin employees have release rights.
 97 | 
 98 | [Docker daemon]: https://docs.docker.com/engine/install/
 99 | [gitmoji]: https://gitmoji.dev/
100 | [pre-commit]: https://pre-commit.com/
101 | [pytest]: https://docs.pytest.org/
102 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [![Stars](https://img.shields.io/github/stars/laminlabs/lamindb?logo=GitHub&color=yellow)](https://github.com/laminlabs/lamindb)
2 | [![codecov](https://codecov.io/gh/laminlabs/lamindb/branch/main/graph/badge.svg?token=VKMRJ7OWR3)](https://codecov.io/gh/laminlabs/lamindb)
3 | [![pypi](https://img.shields.io/pypi/v/lamindb?color=blue&label=pypi%20package)](https://pypi.org/project/lamindb)
4 | 
5 | # LaminDB - A data framework for biology
6 | 
7 | Read the [docs](https://docs.lamin.ai).
8 | 


--------------------------------------------------------------------------------
/docs/api.md:
--------------------------------------------------------------------------------
 1 | # API
 2 | 
 3 | <meta http-equiv="Refresh" content="0; url=./lamindb.html" />
 4 | 
 5 | ```{toctree}
 6 | :maxdepth: 1
 7 | :caption: CLI & lamindb
 8 | :hidden:
 9 | 
10 | cli
11 | lamindb
12 | ```
13 | 
14 | ```{toctree}
15 | :maxdepth: 1
16 | :caption: Modules
17 | :hidden:
18 | 
19 | bionty
20 | wetlab
21 | clinicore
22 | ```
23 | 


--------------------------------------------------------------------------------
/docs/bionty.md:
--------------------------------------------------------------------------------
1 | # `bionty`
2 | 
3 | ```{eval-rst}
4 | .. automodule:: bionty
5 | ```
6 | 


--------------------------------------------------------------------------------
/docs/changelog.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 | 
3 | Actual content in lamin-docs.
4 | 


--------------------------------------------------------------------------------
/docs/clinicore.md:
--------------------------------------------------------------------------------
1 | # `clinicore`
2 | 
3 | ```{eval-rst}
4 | .. automodule:: clinicore
5 | ```
6 | 


--------------------------------------------------------------------------------
/docs/faq.md:
--------------------------------------------------------------------------------
 1 | # FAQ
 2 | 
 3 | ```{toctree}
 4 | :maxdepth: 1
 5 | 
 6 | faq/pydantic-pandera
 7 | faq/idempotency
 8 | faq/acid
 9 | faq/track-run-inputs
10 | faq/setup
11 | faq/curate-any
12 | faq/import-modules
13 | faq/reference-field
14 | faq/visibility
15 | faq/delete
16 | faq/keep-artifacts-local
17 | faq/validate-fields
18 | faq/symbol-mapping
19 | faq/search
20 | ```
21 | 


--------------------------------------------------------------------------------
/docs/faq/import-modules.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "attachments": {},
 5 |    "cell_type": "markdown",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "# What happens if I import a schema module without lamindb?"
 9 |    ]
10 |   },
11 |   {
12 |    "cell_type": "code",
13 |    "execution_count": null,
14 |    "metadata": {},
15 |    "outputs": [],
16 |    "source": [
17 |     "# !pip install 'lamindb[bionty]'\n",
18 |     "!lamin init --storage testmodule --modules bionty"
19 |    ]
20 |   },
21 |   {
22 |    "attachments": {},
23 |    "cell_type": "markdown",
24 |    "metadata": {},
25 |    "source": [
26 |     "Upon `import`, nothing yet happens:"
27 |    ]
28 |   },
29 |   {
30 |    "cell_type": "code",
31 |    "execution_count": null,
32 |    "metadata": {},
33 |    "outputs": [],
34 |    "source": [
35 |     "import bionty as bt"
36 |    ]
37 |   },
38 |   {
39 |    "attachments": {},
40 |    "cell_type": "markdown",
41 |    "metadata": {},
42 |    "source": [
43 |     "If you try to access an attribute (other than `model`), you'll load the instance in the same way as calling `import lamindb`.\n",
44 |     "\n",
45 |     "Under the hood, `lamindb` is imported!"
46 |    ]
47 |   },
48 |   {
49 |    "cell_type": "code",
50 |    "execution_count": null,
51 |    "metadata": {},
52 |    "outputs": [],
53 |    "source": [
54 |     "assert bt.Organism(name=\"human\") is not None"
55 |    ]
56 |   },
57 |   {
58 |    "cell_type": "code",
59 |    "execution_count": null,
60 |    "metadata": {},
61 |    "outputs": [],
62 |    "source": [
63 |     "!lamin delete --force testmodule"
64 |    ]
65 |   }
66 |  ],
67 |  "metadata": {
68 |   "kernelspec": {
69 |    "display_name": "py39",
70 |    "language": "python",
71 |    "name": "python3"
72 |   },
73 |   "language_info": {
74 |    "artifact_extension": ".py",
75 |    "codemirror_mode": {
76 |     "name": "ipython",
77 |     "version": 3
78 |    },
79 |    "mimetype": "text/x-python",
80 |    "name": "python",
81 |    "nbconvert_exporter": "python",
82 |    "pygments_lexer": "ipython3",
83 |    "version": "3.9.16"
84 |   }
85 |  },
86 |  "nbformat": 4,
87 |  "nbformat_minor": 2
88 | }
89 | 


--------------------------------------------------------------------------------
/docs/faq/reference-field.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Where to store external links and IDs?"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "markdown",
12 |    "metadata": {},
13 |    "source": [
14 |     "When registering data in LaminDB, you might want to store a reference link or ID to indicate the source of the collection.\n",
15 |     "\n",
16 |     "We have `reference` and `reference_type` fields for this purpose, they are available for {class}`~lamindb.Collection`, {class}`~lamindb.Transform`, {class}`~lamindb.Run` and {class}`~lamindb.ULabel`."
17 |    ]
18 |   },
19 |   {
20 |    "cell_type": "code",
21 |    "execution_count": null,
22 |    "metadata": {},
23 |    "outputs": [],
24 |    "source": [
25 |     "# !pip install lamindb\n",
26 |     "!lamin init --storage testreference"
27 |    ]
28 |   },
29 |   {
30 |    "cell_type": "code",
31 |    "execution_count": null,
32 |    "metadata": {},
33 |    "outputs": [],
34 |    "source": [
35 |     "import lamindb as ln"
36 |    ]
37 |   },
38 |   {
39 |    "cell_type": "markdown",
40 |    "metadata": {},
41 |    "source": [
42 |     "Let's say we have a few donor samples that came form Vendor X, in order to chase back the orders, I'd like to keep track the donor ids provided by the vendor:"
43 |    ]
44 |   },
45 |   {
46 |    "cell_type": "code",
47 |    "execution_count": null,
48 |    "metadata": {},
49 |    "outputs": [],
50 |    "source": [
51 |     "ln.ULabel(\n",
52 |     "    name=\"donor 001\", reference=\"VX984545\", reference_type=\"Donor ID from Vendor X\"\n",
53 |     ")"
54 |    ]
55 |   },
56 |   {
57 |    "cell_type": "code",
58 |    "execution_count": null,
59 |    "metadata": {},
60 |    "outputs": [],
61 |    "source": [
62 |     "!lamin delete --force testreference"
63 |    ]
64 |   }
65 |  ],
66 |  "metadata": {
67 |   "kernelspec": {
68 |    "display_name": "py39",
69 |    "language": "python",
70 |    "name": "python3"
71 |   },
72 |   "language_info": {
73 |    "artifact_extension": ".py",
74 |    "codemirror_mode": {
75 |     "name": "ipython",
76 |     "version": 3
77 |    },
78 |    "mimetype": "text/x-python",
79 |    "name": "python",
80 |    "nbconvert_exporter": "python",
81 |    "pygments_lexer": "ipython3",
82 |    "version": "3.9.16"
83 |   }
84 |  },
85 |  "nbformat": 4,
86 |  "nbformat_minor": 2
87 | }
88 | 


--------------------------------------------------------------------------------
/docs/faq/setup.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "0",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# What happens when importing lamindb and the instance is not yet setup?"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": null,
 14 |    "id": "1",
 15 |    "metadata": {
 16 |     "tags": [
 17 |      "hide-cell"
 18 |     ]
 19 |    },
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "!lamin disconnect"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "id": "2",
 29 |    "metadata": {},
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "# !pip install 'lamindb[jupyter]'\n",
 33 |     "import lamindb as ln\n",
 34 |     "import pytest"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "id": "3",
 40 |    "metadata": {},
 41 |    "source": [
 42 |     "If you try to use lamindb, it will raise an `InstanceNotSetupError` and ask you to `init` or `load` an instance via the python API."
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": null,
 48 |    "id": "4",
 49 |    "metadata": {
 50 |     "tags": [
 51 |      "hide-cell"
 52 |     ]
 53 |    },
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "with pytest.raises(ln._InstanceNotSetupError):\n",
 57 |     "    ln.track()"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": null,
 63 |    "id": "5",
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "ln.setup.init(storage=\"./testsetup\")"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "id": "6",
 73 |    "metadata": {},
 74 |    "source": [
 75 |     "Now we can access functionality:"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": null,
 81 |    "id": "7",
 82 |    "metadata": {},
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "ln.track(\"2lhqA4uTKSFP0000\")"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "markdown",
 90 |    "id": "8",
 91 |    "metadata": {},
 92 |    "source": [
 93 |     "Let us try to init another instance in the same Python session: It doesn't work."
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": null,
 99 |    "id": "9",
100 |    "metadata": {},
101 |    "outputs": [],
102 |    "source": [
103 |     "from lamindb_setup._init_instance import CannotSwitchDefaultInstance\n",
104 |     "\n",
105 |     "with pytest.raises(CannotSwitchDefaultInstance):\n",
106 |     "    ln.setup.init(storage=\"./testsetup2\")"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": null,
112 |    "id": "10",
113 |    "metadata": {},
114 |    "outputs": [],
115 |    "source": [
116 |     "!lamin delete --force testsetup"
117 |    ]
118 |   }
119 |  ],
120 |  "metadata": {
121 |   "kernelspec": {
122 |    "display_name": "Python 3 (ipykernel)",
123 |    "language": "python",
124 |    "name": "python3"
125 |   },
126 |   "language_info": {
127 |    "codemirror_mode": {
128 |     "name": "ipython",
129 |     "version": 3
130 |    },
131 |    "file_extension": ".py",
132 |    "mimetype": "text/x-python",
133 |    "name": "python",
134 |    "nbconvert_exporter": "python",
135 |    "pygments_lexer": "ipython3",
136 |    "version": "3.9.17"
137 |   },
138 |   "vscode": {
139 |    "interpreter": {
140 |     "hash": "61b4062b24dfb1010f420dad5aa3bd73a4d2af47d0ec44eafec465a35a9d7239"
141 |    }
142 |   }
143 |  },
144 |  "nbformat": 4,
145 |  "nbformat_minor": 5
146 | }
147 | 


--------------------------------------------------------------------------------
/docs/faq/test_notebooks.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import nbproject_test as test
 4 | 
 5 | import lamindb as ln
 6 | 
 7 | 
 8 | def test_notebooks():
 9 |     nbdir = Path(__file__).parent
10 |     ln.setup.login("testuser1")
11 |     ln.setup.init(storage=nbdir / "mydata")
12 |     test.execute_notebooks(nbdir, write=True)
13 | 


--------------------------------------------------------------------------------
/docs/faq/validate-fields.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Django field validation\n",
 8 |     "\n",
 9 |     "[Django field validation](https://docs.djangoproject.com/en/5.1/ref/validators/) are enabled for models that inherit the `ValidateFields` class."
10 |    ]
11 |   },
12 |   {
13 |    "cell_type": "code",
14 |    "execution_count": null,
15 |    "metadata": {},
16 |    "outputs": [],
17 |    "source": [
18 |     "# pip install lamindb\n",
19 |     "!lamin init --storage ./test-django-validation"
20 |    ]
21 |   },
22 |   {
23 |    "cell_type": "code",
24 |    "execution_count": null,
25 |    "metadata": {},
26 |    "outputs": [],
27 |    "source": [
28 |     "import lamindb as ln\n",
29 |     "from lamindb.core.exceptions import FieldValidationError"
30 |    ]
31 |   },
32 |   {
33 |    "cell_type": "code",
34 |    "execution_count": null,
35 |    "metadata": {},
36 |    "outputs": [],
37 |    "source": [
38 |     "try:\n",
39 |     "    ln.Reference(name=\"my ref\", doi=\"abc.ef\", url=\"myurl.com\")\n",
40 |     "except FieldValidationError as e:\n",
41 |     "    print(e)"
42 |    ]
43 |   },
44 |   {
45 |    "cell_type": "code",
46 |    "execution_count": null,
47 |    "metadata": {},
48 |    "outputs": [],
49 |    "source": [
50 |     "!lamin delete --force test-django-validation"
51 |    ]
52 |   }
53 |  ],
54 |  "metadata": {
55 |   "kernelspec": {
56 |    "display_name": "py310",
57 |    "language": "python",
58 |    "name": "python3"
59 |   },
60 |   "language_info": {
61 |    "codemirror_mode": {
62 |     "name": "ipython",
63 |     "version": 3
64 |    },
65 |    "file_extension": ".py",
66 |    "mimetype": "text/x-python",
67 |    "name": "python",
68 |    "nbconvert_exporter": "python",
69 |    "pygments_lexer": "ipython3",
70 |    "version": "3.10.13"
71 |   }
72 |  },
73 |  "nbformat": 4,
74 |  "nbformat_minor": 2
75 | }
76 | 


--------------------------------------------------------------------------------
/docs/guide.md:
--------------------------------------------------------------------------------
 1 | # Guide
 2 | 
 3 | ```{toctree}
 4 | :hidden:
 5 | :caption: "How to"
 6 | 
 7 | query-search
 8 | track
 9 | curate
10 | bio-registries
11 | transfer
12 | ```
13 | 
14 | ```{toctree}
15 | :hidden:
16 | :caption: Other topics
17 | 
18 | faq
19 | storage
20 | ```
21 | 


--------------------------------------------------------------------------------
/docs/includes/installation.md:
--------------------------------------------------------------------------------
 1 | ![pyversions](https://img.shields.io/pypi/pyversions/lamindb)
 2 | 
 3 | ```shell
 4 | pip install lamindb
 5 | ```
 6 | 
 7 | You can configure the installation using `extras`, e.g.,
 8 | 
 9 | ```shell
10 | pip install 'lamindb[jupyter,bionty]'
11 | ```
12 | 
13 | Supported `extras` are:
14 | 
15 | ```yaml
16 | # commonly used
17 | jupyter   # parse Jupyter notebook metadata
18 | bionty    # basic biological ontologies
19 | # cloud backends (AWS is assumed)
20 | gcp       # Google Cloud (gcfs, etc.)
21 | # biological artifact formats
22 | fcs       # FCS artifacts (flow cytometry)
23 | # storage backends
24 | zarr      # store & stream arrays with zarr
25 | ```
26 | 
27 | If you'd like to install from GitHub, see [here](https://github.com/laminlabs/lamindb/blob/main/README.md).
28 | 
29 | If you'd like a docker container, here is a way: [github.com/laminlabs/lamindb-docker](https://github.com/laminlabs/lamindb-docker).
30 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | ```{include} ../README.md
 2 | :start-line: 0
 3 | :end-line: 5
 4 | ```
 5 | 
 6 | <meta http-equiv="Refresh" content="0; url=./guide.html" />
 7 | 
 8 | ```{toctree}
 9 | :maxdepth: 1
10 | :hidden:
11 | 
12 | guide
13 | api
14 | changelog
15 | ```
16 | 


--------------------------------------------------------------------------------
/docs/lamindb.md:
--------------------------------------------------------------------------------
1 | # `lamindb`
2 | 
3 | ```{eval-rst}
4 | .. automodule:: lamindb
5 | ```
6 | 


--------------------------------------------------------------------------------
/docs/query-search.md:
--------------------------------------------------------------------------------
1 | # Query & search
2 | 
3 | ```{toctree}
4 | :maxdepth: 1
5 | 
6 | registries
7 | arrays
8 | ```
9 | 


--------------------------------------------------------------------------------
/docs/scripts/curate_anndata_flexible.py:
--------------------------------------------------------------------------------
 1 | import lamindb as ln
 2 | 
 3 | ln.core.datasets.mini_immuno.define_features_labels()
 4 | adata = ln.core.datasets.mini_immuno.get_dataset1(otype="AnnData")
 5 | schema = ln.examples.schemas.anndata_ensembl_gene_ids_and_valid_features_in_obs()
 6 | artifact = ln.Artifact.from_anndata(
 7 |     adata, key="examples/mini_immuno.h5ad", schema=schema
 8 | ).save()
 9 | artifact.describe()
10 | 


--------------------------------------------------------------------------------
/docs/scripts/curate_dataframe_flexible.py:
--------------------------------------------------------------------------------
 1 | import lamindb as ln
 2 | 
 3 | ln.core.datasets.mini_immuno.define_features_labels()
 4 | schema = ln.examples.schemas.valid_features()
 5 | df = ln.core.datasets.small_dataset1(otype="DataFrame")
 6 | artifact = ln.Artifact.from_df(
 7 |     df, key="examples/dataset1.parquet", schema=schema
 8 | ).save()
 9 | artifact.describe()
10 | 


--------------------------------------------------------------------------------
/docs/scripts/curate_dataframe_minimal_errors.py:
--------------------------------------------------------------------------------
 1 | import lamindb as ln
 2 | 
 3 | schema = ln.core.datasets.mini_immuno.define_mini_immuno_schema_flexible()
 4 | df = ln.core.datasets.small_dataset1(otype="DataFrame")
 5 | df.pop("donor")  # remove donor column to trigger validation error
 6 | try:
 7 |     artifact = ln.Artifact.from_df(
 8 |         df, key="examples/dataset1.parquet", schema=schema
 9 |     ).save()
10 | except ln.errors.ValidationError as error:
11 |     print(error)
12 | 


--------------------------------------------------------------------------------
/docs/scripts/curate_mudata.py:
--------------------------------------------------------------------------------
 1 | import lamindb as ln
 2 | import bionty as bt
 3 | 
 4 | 
 5 | # define the global obs schema
 6 | obs_schema = ln.Schema(
 7 |     name="mudata_papalexi21_subset_obs_schema",
 8 |     features=[
 9 |         ln.Feature(name="perturbation", dtype="cat[ULabel[Perturbation]]").save(),
10 |         ln.Feature(name="replicate", dtype="cat[ULabel[Replicate]]").save(),
11 |     ],
12 | ).save()
13 | 
14 | # define the ['rna'].obs schema
15 | obs_schema_rna = ln.Schema(
16 |     name="mudata_papalexi21_subset_rna_obs_schema",
17 |     features=[
18 |         ln.Feature(name="nCount_RNA", dtype=int).save(),
19 |         ln.Feature(name="nFeature_RNA", dtype=int).save(),
20 |         ln.Feature(name="percent.mito", dtype=float).save(),
21 |     ],
22 | ).save()
23 | 
24 | # define the ['hto'].obs schema
25 | obs_schema_hto = ln.Schema(
26 |     name="mudata_papalexi21_subset_hto_obs_schema",
27 |     features=[
28 |         ln.Feature(name="nCount_HTO", dtype=int).save(),
29 |         ln.Feature(name="nFeature_HTO", dtype=int).save(),
30 |         ln.Feature(name="technique", dtype=bt.ExperimentalFactor).save(),
31 |     ],
32 | ).save()
33 | 
34 | # define ['rna'].var schema
35 | var_schema_rna = ln.Schema(
36 |     name="mudata_papalexi21_subset_rna_var_schema",
37 |     itype=bt.Gene.symbol,
38 |     dtype=float,
39 | ).save()
40 | 
41 | # define composite schema
42 | mudata_schema = ln.Schema(
43 |     name="mudata_papalexi21_subset_mudata_schema",
44 |     otype="MuData",
45 |     slots={
46 |         "obs": obs_schema,
47 |         "rna:obs": obs_schema_rna,
48 |         "hto:obs": obs_schema_hto,
49 |         "rna:var": var_schema_rna,
50 |     },
51 | ).save()
52 | 
53 | # curate a MuData
54 | mdata = ln.core.datasets.mudata_papalexi21_subset()
55 | bt.settings.organism = "human"  # set the organism to map gene symbols
56 | curator = ln.curators.MuDataCurator(mdata, mudata_schema)
57 | artifact = curator.save_artifact(key="examples/mudata_papalexi21_subset.h5mu")
58 | assert artifact.schema == mudata_schema
59 | 


--------------------------------------------------------------------------------
/docs/scripts/curate_soma_experiment.py:
--------------------------------------------------------------------------------
 1 | import lamindb as ln
 2 | import bionty as bt
 3 | import tiledbsoma as soma
 4 | import tiledbsoma.io
 5 | 
 6 | adata = ln.core.datasets.mini_immuno.get_dataset1(otype="AnnData")
 7 | tiledbsoma.io.from_anndata("small_dataset.tiledbsoma", adata, measurement_name="RNA")
 8 | 
 9 | obs_schema = ln.Schema(
10 |     name="soma_obs_schema",
11 |     features=[
12 |         ln.Feature(name="cell_type_by_expert", dtype=bt.CellType).save(),
13 |         ln.Feature(name="cell_type_by_model", dtype=bt.CellType).save(),
14 |     ],
15 | ).save()
16 | 
17 | var_schema = ln.Schema(
18 |     name="soma_var_schema",
19 |     features=[
20 |         ln.Feature(name="var_id", dtype=bt.Gene.ensembl_gene_id).save(),
21 |     ],
22 |     coerce_dtype=True,
23 | ).save()
24 | 
25 | soma_schema = ln.Schema(
26 |     name="soma_experiment_schema",
27 |     otype="tiledbsoma",
28 |     slots={
29 |         "obs": obs_schema,
30 |         "ms:RNA.T": var_schema,
31 |     },
32 | ).save()
33 | 
34 | with soma.Experiment.open("small_dataset.tiledbsoma") as experiment:
35 |     curator = ln.curators.TiledbsomaExperimentCurator(experiment, soma_schema)
36 |     curator.validate()
37 |     artifact = curator.save_artifact(
38 |         key="examples/soma_experiment.tiledbsoma",
39 |         description="SOMA experiment with schema validation",
40 |     )
41 | assert artifact.schema == soma_schema
42 | artifact.describe()
43 | 


--------------------------------------------------------------------------------
/docs/scripts/curate_spatialdata.py:
--------------------------------------------------------------------------------
 1 | import lamindb as ln
 2 | 
 3 | spatialdata = ln.core.datasets.spatialdata_blobs()
 4 | sdata_schema = ln.Schema.get(name="spatialdata_blobs_schema")
 5 | curator = ln.curators.SpatialDataCurator(spatialdata, sdata_schema)
 6 | try:
 7 |     curator.validate()
 8 | except ln.errors.ValidationError:
 9 |     pass
10 | 
11 | spatialdata.tables["table"].var.drop(index="ENSG00000999999", inplace=True)
12 | 
13 | # validate again (must pass now) and save artifact
14 | artifact = ln.Artifact.from_spatialdata(
15 |     spatialdata, key="examples/spatialdata1.zarr", schema=sdata_schema
16 | ).save()
17 | artifact.describe()
18 | 


--------------------------------------------------------------------------------
/docs/scripts/define_mini_immuno_features_labels.py:
--------------------------------------------------------------------------------
 1 | import lamindb as ln
 2 | import bionty as bt
 3 | 
 4 | # define valid labels
 5 | perturbation_type = ln.ULabel(name="Perturbation", is_type=True).save()
 6 | ln.ULabel(name="DMSO", type=perturbation_type).save()
 7 | ln.ULabel(name="IFNG", type=perturbation_type).save()
 8 | bt.CellType.from_source(name="B cell").save()
 9 | bt.CellType.from_source(name="T cell").save()
10 | 
11 | # define valid features
12 | ln.Feature(name="perturbation", dtype=perturbation_type).save()
13 | ln.Feature(name="cell_type_by_expert", dtype=bt.CellType).save()
14 | ln.Feature(name="cell_type_by_model", dtype=bt.CellType).save()
15 | ln.Feature(name="assay_oid", dtype=bt.ExperimentalFactor.ontology_id).save()
16 | ln.Feature(name="concentration", dtype=str).save()
17 | ln.Feature(name="treatment_time_h", dtype="num", coerce_dtype=True).save()
18 | ln.Feature(name="donor", dtype=str, nullable=True).save()
19 | ln.Feature(name="donor_ethnicity", dtype=list[bt.Ethnicity]).save()
20 | 


--------------------------------------------------------------------------------
/docs/scripts/define_mini_immuno_schema_flexible.py:
--------------------------------------------------------------------------------
 1 | import lamindb as ln
 2 | 
 3 | schema = ln.Schema(
 4 |     name="Mini immuno schema",
 5 |     features=[
 6 |         ln.Feature.get(name="perturbation"),
 7 |         ln.Feature.get(name="cell_type_by_model"),
 8 |         ln.Feature.get(name="assay_oid"),
 9 |         ln.Feature.get(name="donor"),
10 |         ln.Feature.get(name="concentration"),
11 |         ln.Feature.get(name="treatment_time_h"),
12 |     ],
13 |     flexible=True,  # _additional_ columns in a dataframe are validated & annotated
14 | ).save()
15 | 


--------------------------------------------------------------------------------
/docs/scripts/define_schema_anndata_ensembl_gene_ids_and_valid_features_in_obs.py:
--------------------------------------------------------------------------------
 1 | import lamindb as ln
 2 | import bionty as bt
 3 | 
 4 | obs_schema = ln.examples.schemas.valid_features()
 5 | varT_schema = ln.Schema(
 6 |     name="valid_ensembl_gene_ids", itype=bt.Gene.ensembl_gene_id
 7 | ).save()
 8 | schema = ln.Schema(
 9 |     name="anndata_ensembl_gene_ids_and_valid_features_in_obs",
10 |     otype="AnnData",
11 |     slots={"obs": obs_schema, "var.T": varT_schema},
12 | ).save()
13 | 


--------------------------------------------------------------------------------
/docs/scripts/define_schema_spatialdata.py:
--------------------------------------------------------------------------------
 1 | import lamindb as ln
 2 | import bionty as bt
 3 | 
 4 | 
 5 | attrs_schema = ln.Schema(
 6 |     features=[
 7 |         ln.Feature(name="bio", dtype=dict).save(),
 8 |         ln.Feature(name="tech", dtype=dict).save(),
 9 |     ],
10 | ).save()
11 | 
12 | sample_schema = ln.Schema(
13 |     features=[
14 |         ln.Feature(name="disease", dtype=bt.Disease, coerce_dtype=True).save(),
15 |         ln.Feature(
16 |             name="developmental_stage",
17 |             dtype=bt.DevelopmentalStage,
18 |             coerce_dtype=True,
19 |         ).save(),
20 |     ],
21 | ).save()
22 | 
23 | tech_schema = ln.Schema(
24 |     features=[
25 |         ln.Feature(name="assay", dtype=bt.ExperimentalFactor, coerce_dtype=True).save(),
26 |     ],
27 | ).save()
28 | 
29 | obs_schema = ln.Schema(
30 |     features=[
31 |         ln.Feature(name="sample_region", dtype="str").save(),
32 |     ],
33 | ).save()
34 | 
35 | # Schema enforces only registered Ensembl Gene IDs are valid (maximal_set=True)
36 | varT_schema = ln.Schema(itype=bt.Gene.ensembl_gene_id, maximal_set=True).save()
37 | 
38 | sdata_schema = ln.Schema(
39 |     name="spatialdata_blobs_schema",
40 |     otype="SpatialData",
41 |     slots={
42 |         "attrs:bio": sample_schema,
43 |         "attrs:tech": tech_schema,
44 |         "attrs": attrs_schema,
45 |         "tables:table:obs": obs_schema,
46 |         "tables:table:var.T": varT_schema,
47 |     },
48 | ).save()
49 | 


--------------------------------------------------------------------------------
/docs/scripts/define_valid_features.py:
--------------------------------------------------------------------------------
1 | import lamindb as ln
2 | 
3 | schema = ln.Schema(name="valid_features", itype=ln.Feature).save()
4 | 


--------------------------------------------------------------------------------
/docs/scripts/ingest_mini_immuno_datasets.py:
--------------------------------------------------------------------------------
 1 | import lamindb as ln
 2 | import bionty as bt
 3 | 
 4 | # observation-level metadata
 5 | ln.Feature(name="perturbation", dtype="cat[ULabel]").save()
 6 | ln.Feature(name="sample_note", dtype="str").save()
 7 | ln.Feature(name="cell_type_by_expert", dtype="cat[bionty.CellType]").save()
 8 | ln.Feature(name="cell_type_by_model", dtype="cat[bionty.CellType]").save()
 9 | # dataset-level metadata
10 | ln.Feature(name="temperature", dtype="float").save()
11 | ln.Feature(name="experiment", dtype="cat[ULabel]").save()
12 | ln.Feature(name="date_of_study", dtype="date").save()
13 | ln.Feature(name="study_note", dtype="str").save()
14 | ln.Feature(name="study_metadata", dtype=dict).save()
15 | 
16 | ## Permissible values for categoricals
17 | ln.ULabel.from_values(["DMSO", "IFNG"], create=True).save()
18 | ln.ULabel.from_values(["Experiment 1", "Experiment 2"], create=True).save()
19 | bt.CellType.from_values(["B cell", "T cell"], create=True).save()
20 | 
21 | schema = ln.examples.schemas.anndata_ensembl_gene_ids_and_valid_features_in_obs()
22 | 
23 | ## Ingest dataset1
24 | adata = ln.core.datasets.mini_immuno.get_dataset1(otype="AnnData")
25 | artifact = ln.Artifact.from_anndata(
26 |     adata,
27 |     key="examples/dataset1.h5ad",
28 |     schema=schema,
29 | ).save()
30 | adhoc = {"study_metadata": {"detail1": "123", "detail2": 1}}
31 | dataset_metadata = adata.uns
32 | dataset_metadata.update(adhoc)
33 | artifact.features.add_values(dataset_metadata)  # type: ignore
34 | 
35 | # Ingest dataset2
36 | adata2 = ln.core.datasets.mini_immuno.get_dataset2(otype="AnnData")
37 | artifact2 = ln.Artifact.from_anndata(
38 |     adata2,
39 |     key="examples/dataset2.h5ad",
40 |     schema=schema,
41 | ).save()
42 | adhoc2 = {"study_metadata": {"detail1": "456", "detail2": 2}}
43 | dataset_metadata2 = adata2.uns
44 | dataset_metadata2.update(adhoc2)
45 | artifact2.features.add_values(dataset_metadata2)  # type: ignore
46 | 


--------------------------------------------------------------------------------
/docs/scripts/run_track_and_finish.py:
--------------------------------------------------------------------------------
1 | import lamindb as ln
2 | 
3 | ln.track()  # initiate a tracked notebook/script run
4 | 
5 | # your code automatically tracks inputs & outputs
6 | 
7 | ln.finish()  # mark run as finished, save execution report, source code & environment
8 | 


--------------------------------------------------------------------------------
/docs/scripts/run_track_with_params.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import lamindb as ln
 3 | 
 4 | if __name__ == "__main__":
 5 |     p = argparse.ArgumentParser()
 6 |     p.add_argument("--input-dir", type=str)
 7 |     p.add_argument("--downsample", action="store_true")
 8 |     p.add_argument("--learning-rate", type=float)
 9 |     args = p.parse_args()
10 |     params = {
11 |         "input_dir": args.input_dir,
12 |         "learning_rate": args.learning_rate,
13 |         "preprocess_params": {
14 |             "downsample": args.downsample,  # nested parameter names & values in dictionaries are not validated
15 |             "normalization": "the_good_one",
16 |         },
17 |     }
18 |     ln.track(params=params)
19 | 
20 |     # your code
21 | 
22 |     ln.finish()
23 | 


--------------------------------------------------------------------------------
/docs/scripts/run_workflow.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import lamindb as ln
 3 | 
 4 | ln.Param(name="run_workflow_subset", dtype=bool).save()
 5 | 
 6 | 
 7 | @ln.tracked()
 8 | def subset_dataframe(
 9 |     artifact: ln.Artifact,
10 |     subset_rows: int = 2,
11 |     subset_cols: int = 2,
12 |     run: ln.Run | None = None,
13 | ) -> ln.Artifact:
14 |     dataset = artifact.load(is_run_input=run)
15 |     new_data = dataset.iloc[:subset_rows, :subset_cols]
16 |     new_key = artifact.key.replace(".parquet", "_subsetted.parquet")
17 |     return ln.Artifact.from_df(new_data, key=new_key, run=run).save()
18 | 
19 | 
20 | if __name__ == "__main__":
21 |     p = argparse.ArgumentParser()
22 |     p.add_argument("--subset", action="store_true")
23 |     args = p.parse_args()
24 | 
25 |     params = {"run_workflow_subset": args.subset}
26 | 
27 |     ln.track(params=params)
28 | 
29 |     if args.subset:
30 |         df = ln.core.datasets.small_dataset1(otype="DataFrame")
31 |         artifact = ln.Artifact.from_df(df, key="my_analysis/dataset.parquet").save()
32 |         subsetted_artifact = subset_dataframe(artifact)
33 | 
34 |     ln.finish()
35 | 


--------------------------------------------------------------------------------
/docs/scripts/synced_with_git.py:
--------------------------------------------------------------------------------
1 | import lamindb as ln
2 | 
3 | ln.settings.sync_git_repo = "https://github.com/..."
4 | ln.track()
5 | # your code
6 | ln.finish()
7 | 


--------------------------------------------------------------------------------
/docs/storage.md:
--------------------------------------------------------------------------------
 1 | # Storage
 2 | 
 3 | ```{toctree}
 4 | :maxdepth: 1
 5 | 
 6 | storage/upload
 7 | storage/add-replace-cache
 8 | storage/anndata-accessor
 9 | storage/prepare-transfer-local-to-cloud
10 | storage/transfer-local-to-cloud
11 | storage/vitessce
12 | ```
13 | 


--------------------------------------------------------------------------------
/docs/storage/prepare-transfer-local-to-cloud.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Prepare transfer artifacts from a local instance to a cloud instance"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "code",
12 |    "execution_count": null,
13 |    "metadata": {},
14 |    "outputs": [],
15 |    "source": [
16 |     "!lamin settings set auto-connect false"
17 |    ]
18 |   },
19 |   {
20 |    "cell_type": "code",
21 |    "execution_count": null,
22 |    "metadata": {},
23 |    "outputs": [],
24 |    "source": [
25 |     "import lamindb as ln\n",
26 |     "import bionty as bt\n",
27 |     "import wetlab as wl\n",
28 |     "import pandas as pd"
29 |    ]
30 |   },
31 |   {
32 |    "cell_type": "code",
33 |    "execution_count": null,
34 |    "metadata": {},
35 |    "outputs": [],
36 |    "source": [
37 |     "ln.setup.init(storage=\"./test-transfer-to-cloud\", modules=\"bionty,wetlab\")\n",
38 |     "ln.setup.settings.auto_connect = False"
39 |    ]
40 |   },
41 |   {
42 |    "cell_type": "code",
43 |    "execution_count": null,
44 |    "metadata": {},
45 |    "outputs": [],
46 |    "source": [
47 |     "artifact = ln.Artifact.from_df(\n",
48 |     "    pd.DataFrame({\"a\": [1, 2, 3]}), description=\"test-transfer-to-cloud\"\n",
49 |     ").save()\n",
50 |     "features = bt.CellMarker.from_values(\n",
51 |     "    [\"PD1\", \"CD21\"], field=bt.CellMarker.name, organism=\"human\"\n",
52 |     ")\n",
53 |     "ln.save(features)\n",
54 |     "artifact.features._add_schema(ln.FeatureSet(features), slot=\"var\")\n",
55 |     "\n",
56 |     "organism = bt.Organism.from_source(name=\"human\").save()\n",
57 |     "artifact.labels.add(organism)\n",
58 |     "\n",
59 |     "experiment = wl.Experiment(name=\"experiment-test-transfer-to-cloud\").save()\n",
60 |     "artifact.experiments.add(experiment)\n",
61 |     "\n",
62 |     "artifact.describe()"
63 |    ]
64 |   },
65 |   {
66 |    "cell_type": "code",
67 |    "execution_count": null,
68 |    "metadata": {},
69 |    "outputs": [],
70 |    "source": [
71 |     "assert artifact.features[\"var\"].count() == 2"
72 |    ]
73 |   }
74 |  ],
75 |  "metadata": {
76 |   "kernelspec": {
77 |    "display_name": "Python 3 (ipykernel)",
78 |    "language": "python",
79 |    "name": "python3"
80 |   },
81 |   "language_info": {
82 |    "codemirror_mode": {
83 |     "name": "ipython",
84 |     "version": 3
85 |    },
86 |    "file_extension": ".py",
87 |    "mimetype": "text/x-python",
88 |    "name": "python",
89 |    "nbconvert_exporter": "python",
90 |    "pygments_lexer": "ipython3",
91 |    "version": "3.9.17"
92 |   }
93 |  },
94 |  "nbformat": 4,
95 |  "nbformat_minor": 2
96 | }
97 | 


--------------------------------------------------------------------------------
/docs/storage/test-files/iris.csv:
--------------------------------------------------------------------------------
1 | "sepal.length","sepal.width","petal.length","petal.width","variety"
2 | 5.1,3.5,1.4,.2,"Setosa"
3 | 4.9,3,1.4,.2,"Setosa"
4 | 7,3.2,4.7,1.4,"Versicolor"
5 | 6.4,3.2,4.5,1.5,"Versicolor"
6 | 6.3,3.3,6,2.5,"Virginica"
7 | 5.8,2.7,5.1,1.9,"Virginica"
8 | 


--------------------------------------------------------------------------------
/docs/storage/test-files/iris.data:
--------------------------------------------------------------------------------
1 | 5.1,3.5,1.4,0.2,Iris-setosa
2 | 4.9,3.0,1.4,0.2,Iris-setosa
3 | 7.0,3.2,4.7,1.4,Iris-versicolor
4 | 6.4,3.2,4.5,1.5,Iris-versicolor
5 | 6.2,3.4,5.4,2.3,Iris-virginica
6 | 5.9,3.0,5.1,1.8,Iris-virginica
7 | 


--------------------------------------------------------------------------------
/docs/storage/test-files/new_iris.csv:
--------------------------------------------------------------------------------
1 | ;sepal.length;sepal.width;petal.length;petal.width;variety
2 | 0;5.1;3.5;1.4;0.2;Setosa
3 | 1;4.9;3.0;1.4;0.2;Setosa
4 | 50;7.0;3.2;4.7;1.4;Versicolor
5 | 51;6.4;3.2;4.5;1.5;Versicolor
6 | 100;6.3;3.3;6.0;2.5;Virginica
7 | 101;5.8;2.7;5.1;1.9;Virginica
8 | 


--------------------------------------------------------------------------------
/docs/storage/test_notebooks.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import nbproject_test as test
 4 | 
 5 | import lamindb as ln
 6 | 
 7 | 
 8 | def test_notebooks():
 9 |     nbdir = Path(__file__).parent
10 |     ln.setup.login("testuser1")
11 |     test.execute_notebooks(nbdir, write=True)
12 | 


--------------------------------------------------------------------------------
/docs/storage/transfer-local-to-cloud.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Transfer artifacts from a local instance to a cloud instance"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import lamindb as ln\n",
 17 |     "import bionty as bt\n",
 18 |     "\n",
 19 |     "ln.connect(\"laminlabs/lamin-dev\")\n",
 20 |     "bt.settings.organism = \"human\""
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": null,
 26 |    "metadata": {
 27 |     "tags": [
 28 |      "hide-cell"
 29 |     ]
 30 |    },
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "def cleanup(artifact: ln.Artifact):\n",
 34 |     "    features_sets = artifact.feature_sets.all()\n",
 35 |     "    experiments = artifact.experiments.all()\n",
 36 |     "    artifact.delete(permanent=True, storage=False)\n",
 37 |     "    features_sets.delete()\n",
 38 |     "    experiments.delete()\n",
 39 |     "\n",
 40 |     "\n",
 41 |     "artifacts = ln.Artifact.filter(description=\"test-transfer-to-cloud\").all()\n",
 42 |     "for artifact in artifacts:\n",
 43 |     "    cleanup(artifact)"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "artifact = ln.Artifact.using(\"testuser1/test-transfer-to-cloud\").get(\n",
 53 |     "    description=\"test-transfer-to-cloud\"\n",
 54 |     ")\n",
 55 |     "artifact.describe()"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": null,
 61 |    "metadata": {},
 62 |    "outputs": [],
 63 |    "source": [
 64 |     "artifact.save()"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": null,
 70 |    "metadata": {},
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "artifact.describe()"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": null,
 79 |    "metadata": {},
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "assert artifact._state.db == \"default\"\n",
 83 |     "assert artifact.organisms.get().name == \"human\"\n",
 84 |     "assert artifact.experiments.get().name == \"experiment-test-transfer-to-cloud\"\n",
 85 |     "assert artifact.features[\"var\"].count() == 2"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": null,
 91 |    "metadata": {},
 92 |    "outputs": [],
 93 |    "source": [
 94 |     "!lamin delete --force test-transfer-to-cloud\n",
 95 |     "!rm -r ./test-transfer-to-cloud"
 96 |    ]
 97 |   }
 98 |  ],
 99 |  "metadata": {
100 |   "kernelspec": {
101 |    "display_name": "Python 3 (ipykernel)",
102 |    "language": "python",
103 |    "name": "python3"
104 |   },
105 |   "language_info": {
106 |    "codemirror_mode": {
107 |     "name": "ipython",
108 |     "version": 3
109 |    },
110 |    "file_extension": ".py",
111 |    "mimetype": "text/x-python",
112 |    "name": "python",
113 |    "nbconvert_exporter": "python",
114 |    "pygments_lexer": "ipython3",
115 |    "version": "3.9.17"
116 |   }
117 |  },
118 |  "nbformat": 4,
119 |  "nbformat_minor": 2
120 | }
121 | 


--------------------------------------------------------------------------------
/docs/test_notebooks.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from pathlib import Path
 3 | 
 4 | import nbproject_test as test
 5 | 
 6 | sys.path[:0] = [str(Path(__file__).parent.parent)]
 7 | 
 8 | from noxfile import GROUPS
 9 | 
10 | DOCS = Path(__file__).parents[1] / "docs/"
11 | 
12 | 
13 | def test_tutorial():
14 |     for artifactname in GROUPS["tutorial"]:
15 |         test.execute_notebooks(DOCS / artifactname, write=True)
16 | 
17 | 
18 | def test_guide():
19 |     for artifactname in GROUPS["guide"]:
20 |         test.execute_notebooks(DOCS / artifactname, write=True)
21 | 
22 | 
23 | def test_biology():
24 |     for artifactname in GROUPS["biology"]:
25 |         test.execute_notebooks(DOCS / artifactname, write=True)
26 | 


--------------------------------------------------------------------------------
/docs/wetlab.md:
--------------------------------------------------------------------------------
1 | # `wetlab`
2 | 
3 | ```{eval-rst}
4 | .. automodule:: wetlab
5 | ```
6 | 


--------------------------------------------------------------------------------
/lamindb/__init__.py:
--------------------------------------------------------------------------------
  1 | """A data framework for biology.
  2 | 
  3 | Data lineage
  4 | ============
  5 | 
  6 | Track inputs, outputs & environment of a notebook or script run.
  7 | 
  8 | .. autosummary::
  9 |    :toctree: .
 10 | 
 11 |    track
 12 |    finish
 13 | 
 14 | Decorate a function with `@tracked()` to track inputs, outputs & environment of function executions.
 15 | 
 16 | .. autosummary::
 17 |    :toctree: .
 18 | 
 19 |    tracked
 20 | 
 21 | Registries
 22 | ==========
 23 | 
 24 | Manage artifacts and transforms.
 25 | 
 26 | .. autosummary::
 27 |    :toctree: .
 28 | 
 29 |    Artifact
 30 |    Storage
 31 |    Transform
 32 |    Run
 33 | 
 34 | Validate and annotate artifacts.
 35 | 
 36 | .. autosummary::
 37 |    :toctree: .
 38 | 
 39 |    Feature
 40 |    ULabel
 41 |    Schema
 42 | 
 43 | Manage flexible records to track, e.g., samples or donors.
 44 | 
 45 | .. autosummary::
 46 |    :toctree: .
 47 | 
 48 |    Record
 49 |    Sheet
 50 | 
 51 | Manage projects.
 52 | 
 53 | .. autosummary::
 54 |    :toctree: .
 55 | 
 56 |    User
 57 |    Collection
 58 |    Project
 59 |    Space
 60 |    Branch
 61 |    Reference
 62 |    Person
 63 | 
 64 | Other
 65 | =====
 66 | 
 67 | Functions and classes.
 68 | 
 69 | .. autosummary::
 70 |    :toctree: .
 71 | 
 72 |    connect
 73 |    view
 74 |    save
 75 |    UPath
 76 |    settings
 77 |    context
 78 | 
 79 | Curators and integrations.
 80 | 
 81 | .. autosummary::
 82 |    :toctree: .
 83 | 
 84 |    curators
 85 |    integrations
 86 | 
 87 | Low-level functionality.
 88 | 
 89 | .. autosummary::
 90 |    :toctree: .
 91 | 
 92 |    examples
 93 |    errors
 94 |    setup
 95 |    base
 96 |    core
 97 |    models
 98 | 
 99 | Backwards compatibility.
100 | 
101 | .. autosummary::
102 |    :toctree: .
103 | 
104 |    Param
105 |    FeatureSet
106 |    Curator
107 | 
108 | """
109 | 
110 | # ruff: noqa: I001
111 | # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
112 | __version__ = "1.6.2"
113 | 
114 | import warnings
115 | 
116 | # through SpatialData
117 | warnings.filterwarnings(
118 |     "ignore", message="The legacy Dask DataFrame implementation is deprecated"
119 | )
120 | 
121 | from lamindb_setup._check_setup import InstanceNotSetupError as _InstanceNotSetupError
122 | from lamindb_setup._check_setup import _check_instance_setup
123 | from lamindb_setup._connect_instance import connect
124 | from lamindb_setup.core.upath import UPath
125 | 
126 | from . import base, errors, setup
127 | 
128 | 
129 | def __getattr__(name):
130 |     raise _InstanceNotSetupError()
131 | 
132 | 
133 | if _check_instance_setup(from_module="lamindb"):
134 |     del __getattr__  # so that imports work out
135 |     from . import base
136 |     from ._tracked import tracked
137 |     from ._view import view
138 |     from .core._context import context
139 |     from .core._settings import settings
140 |     from .curators._legacy import CatManager as Curator
141 |     from .models import (
142 |         Artifact,
143 |         Collection,
144 |         Feature,
145 |         FeatureSet,  # backward compat
146 |         Person,
147 |         Project,
148 |         Reference,
149 |         Run,
150 |         Schema,
151 |         Storage,
152 |         Transform,
153 |         ULabel,
154 |         User,
155 |         Space,
156 |         Branch,
157 |         Record,
158 |         Sheet,
159 |     )
160 |     from .models.save import save
161 |     from . import core
162 |     from . import integrations
163 |     from . import curators
164 |     from . import examples
165 | 
166 |     track = context._track
167 |     finish = context._finish
168 |     settings.__doc__ = """Global live settings (:class:`~lamindb.core.Settings`)."""
169 |     context.__doc__ = """Global run context (:class:`~lamindb.core.Context`)."""
170 |     from django.db.models import Q
171 | 
172 |     Param = Feature  # backward compat
173 | 


--------------------------------------------------------------------------------
/lamindb/base/__init__.py:
--------------------------------------------------------------------------------
 1 | """Base library.
 2 | 
 3 | Is available also when no instance is setup.
 4 | 
 5 | Modules:
 6 | 
 7 | .. autosummary::
 8 |    :toctree: .
 9 | 
10 |    uids
11 |    types
12 |    fields
13 | 
14 | Utils:
15 | 
16 | .. autosummary::
17 |    :toctree: .
18 | 
19 |    doc_args
20 |    deprecated
21 | 
22 | """
23 | 
24 | from lamindb_setup.core import deprecated, doc_args
25 | 
26 | from . import fields, types, uids
27 | 


--------------------------------------------------------------------------------
/lamindb/base/ids.py:
--------------------------------------------------------------------------------
1 | from .uids import *  # noqa: F403
2 | 


--------------------------------------------------------------------------------
/lamindb/base/types.py:
--------------------------------------------------------------------------------
 1 | """Types.
 2 | 
 3 | Central object types.
 4 | 
 5 | .. autosummary::
 6 |    :toctree: .
 7 | 
 8 |    ArtifactKind
 9 |    TransformType
10 |    Dtype
11 | 
12 | Basic types.
13 | 
14 | .. autosummary::
15 |    :toctree: .
16 | 
17 |    UPathStr
18 |    StrField
19 |    ListLike
20 |    FieldAttr
21 | """
22 | 
23 | from __future__ import annotations
24 | 
25 | from typing import Literal, Union
26 | 
27 | import numpy as np
28 | import pandas as pd
29 | from django.db.models.query_utils import DeferredAttribute as FieldAttr
30 | from lamindb_setup.core.types import UPathStr  # noqa: F401
31 | 
32 | # need to use Union because __future__.annotations doesn't do the job here <3.10
33 | # typing.TypeAlias, >3.10 on but already deprecated
34 | ListLike = Union[list[str], pd.Series, np.array]
35 | StrField = Union[str, FieldAttr]  # typing.TypeAlias
36 | 
37 | TransformType = Literal[
38 |     "pipeline", "notebook", "upload", "script", "function", "linker"
39 | ]
40 | ArtifactKind = Literal["dataset", "model"]
41 | 
42 | # below is used for Feature.dtype and Param.dtype
43 | Dtype = Literal[
44 |     "cat",  # categoricals
45 |     "num",  # numericals
46 |     "str",  # string
47 |     "int",  # integer / numpy.integer
48 |     "float",  # float
49 |     "bool",  # boolean
50 |     "date",  # date
51 |     "datetime",  # datetime
52 |     "dict",  # dictionary
53 |     "object",  # this is a pandas input dtype, we're only using it for complicated types, not for strings
54 | ]
55 | """Data type.
56 | 
57 | Data types in lamindb are a string-serialized abstraction of common data types.
58 | 
59 | Overview
60 | ========
61 | 
62 | ============  ============  =================================================
63 | description   lamindb       pandas
64 | ============  ============  =================================================
65 | categorical   `"cat"`       `category`
66 | numerical     `"num"`       `int | float`
67 | integer       `"int"`       `int64 | int32 | int16 | int8 | uint | ...`
68 | float         `"float"`     `float64 | float32 | float16 | float8 | ...`
69 | string        `"str"`       `object`
70 | datetime      `"datetime"`  `datetime`
71 | date          `"date"`      `object` (pandera requires an ISO-format string, convert with `df["date"] = df["date"].dt.date`)
72 | dictionary    `"dict"`      `object`
73 | ============  ============  =================================================
74 | 
75 | Categoricals
76 | ============
77 | 
78 | Beyond indicating that a feature is a categorical, `lamindb` allows you to define the registry to which values are restricted.
79 | 
80 | For example, `'cat[ULabel]'` or `'cat[bionty.CellType]'` indicate that permissible values are from the `ULabel` or `CellType` registry, respectively.
81 | 
82 | You can also reference multiple registries, e.g., `'cat[ULabel|bionty.CellType]'` indicates that values can be from either registry.
83 | 
84 | You can also restrict to sub-types defined in registries via the `type` column, e.g., `'cat[ULabel[CellMedium]]'` indicates that values must be of type `CellMedium` within the `ULabel` registry.
85 | 
86 | Literal
87 | =======
88 | 
89 | A `Dtype` object in `lamindb` is a `Literal` up to further specification of `"cat"`.
90 | 
91 | """
92 | FeatureDtype = Dtype  # backward compat
93 | 


--------------------------------------------------------------------------------
/lamindb/base/uids.py:
--------------------------------------------------------------------------------
  1 | """Universal IDs.
  2 | 
  3 | Base generators:
  4 | 
  5 | .. autosummary::
  6 |    :toctree: .
  7 | 
  8 |    base26
  9 |    base62
 10 |    base64
 11 | 
 12 | `uid` generators:
 13 | 
 14 | .. autosummary::
 15 |    :toctree: .
 16 | 
 17 |    base62_8
 18 |    base62_12
 19 |    base62_16
 20 |    base62_20
 21 | 
 22 | 
 23 | Collision probabilities
 24 | =======================
 25 | 
 26 | 8 base62 characters (`62**8=2e+14`):
 27 | 
 28 | ======= ===========
 29 | n       p_collision
 30 | ======= ===========
 31 | 100k    2e-05
 32 | 1M      2e-03
 33 | ======= ===========
 34 | 
 35 | 12 base62 characters (`62**12=3e+21`):
 36 | 
 37 | ======= ===========
 38 | n       p_collision
 39 | ======= ===========
 40 | 100M    2e-06
 41 | 1B      2e-04
 42 | ======= ===========
 43 | 
 44 | 16 base62 characters (`62**16=5e+28`):
 45 | 
 46 | ======= ===========
 47 | n       p_collision
 48 | ======= ===========
 49 | 1e12    7e-05
 50 | 1e13    7e-03
 51 | ======= ===========
 52 | 
 53 | 20 base62 characters (`62**20=7e+35`) roughly matches UUID (`2**122=5e+36`):
 54 | 
 55 | ======= ===========
 56 | n       p_collision
 57 | ======= ===========
 58 | 1e16    7e-05
 59 | 1e17    7e-03
 60 | ======= ===========
 61 | 
 62 | See `source <https://lamin.ai/laminlabs/lamindata/transform/t2xCdMB9v5wL>`__.
 63 | 
 64 | """
 65 | 
 66 | import secrets
 67 | import string
 68 | 
 69 | 
 70 | def base64(n_char: int) -> str:
 71 |     """Random Base64 string."""
 72 |     alphabet = string.digits + string.ascii_letters.swapcase() + "_" + "-"
 73 |     uid = "".join(secrets.choice(alphabet) for i in range(n_char))
 74 |     return uid
 75 | 
 76 | 
 77 | def base62(n_char: int) -> str:
 78 |     """Random Base62 string."""
 79 |     alphabet = string.digits + string.ascii_letters.swapcase()
 80 |     uid = "".join(secrets.choice(alphabet) for i in range(n_char))
 81 |     return uid
 82 | 
 83 | 
 84 | def base26(n_char: int):
 85 |     """ASCII lowercase."""
 86 |     alphabet = string.ascii_lowercase
 87 |     uid = "".join(secrets.choice(alphabet) for i in range(n_char))
 88 |     return uid
 89 | 
 90 | 
 91 | def base62_4() -> str:
 92 |     return base62(4)
 93 | 
 94 | 
 95 | def base62_8() -> str:
 96 |     """Random Base62 string of length 8."""
 97 |     return base62(8)
 98 | 
 99 | 
100 | def base62_12() -> str:
101 |     """Random Base62 string of length 12."""
102 |     return base62(12)
103 | 
104 | 
105 | def base62_16() -> str:
106 |     """Random Base62 string of length 16."""
107 |     return base62(16)
108 | 
109 | 
110 | def base62_20() -> str:
111 |     """Random Base62 string of length 20."""
112 |     return base62(20)
113 | 
114 | 
115 | def base62_24() -> str:
116 |     """Random Base62 string of length 24."""
117 |     return base62(24)
118 | 


--------------------------------------------------------------------------------
/lamindb/base/users.py:
--------------------------------------------------------------------------------
 1 | user_id_cache = {}
 2 | 
 3 | 
 4 | def current_user_id() -> int:
 5 |     import lamindb_setup as ln_setup
 6 |     from lamindb_setup import settings
 7 |     from lamindb_setup._init_instance import register_user
 8 | 
 9 |     from lamindb.models import User
10 | 
11 |     def query_user_id():
12 |         if ln_setup.core.django.IS_MIGRATING:
13 |             return 1
14 |         else:
15 |             try:
16 |                 user_id = User.objects.get(uid=settings.user.uid).id
17 |             except User.DoesNotExist:
18 |                 register_user(settings.user)
19 |                 user_id = User.objects.get(uid=settings.user.uid).id
20 |             return user_id
21 | 
22 |     if settings._instance_exists:
23 |         if settings.instance.slug not in user_id_cache:
24 |             user_id_cache[settings.instance.slug] = query_user_id()
25 |         return user_id_cache[settings.instance.slug]
26 |     else:
27 |         return query_user_id()
28 | 


--------------------------------------------------------------------------------
/lamindb/core/__init__.py:
--------------------------------------------------------------------------------
 1 | """Core library.
 2 | 
 3 | Settings & context:
 4 | 
 5 | .. autosummary::
 6 |    :toctree: .
 7 | 
 8 |    Settings
 9 |    subsettings
10 |    Context
11 | 
12 | Artifact loaders:
13 | 
14 | .. autosummary::
15 |    :toctree: .
16 | 
17 |    loaders
18 | 
19 | Data loaders:
20 | 
21 | .. autosummary::
22 |    :toctree: .
23 | 
24 |    MappedCollection
25 | 
26 | Modules:
27 | 
28 | .. autosummary::
29 |    :toctree: .
30 | 
31 |    datasets
32 |    storage
33 |    logger
34 | 
35 | """
36 | 
37 | from lamin_utils import logger
38 | from lamin_utils._inspect import InspectResult
39 | 
40 | from .. import errors as exceptions
41 | from . import datasets, loaders, subsettings, types
42 | from ._context import Context
43 | from ._mapped_collection import MappedCollection
44 | from ._settings import Settings
45 | 


--------------------------------------------------------------------------------
/lamindb/core/_compat.py:
--------------------------------------------------------------------------------
 1 | import importlib.util
 2 | from typing import Any, Callable, TypeVar
 3 | 
 4 | T = TypeVar("T")
 5 | 
 6 | 
 7 | def is_package_installed(package_name: str) -> bool:
 8 |     spec = importlib.util.find_spec(package_name)
 9 |     return spec is not None
10 | 
11 | 
12 | def with_package(package_name: str, operation: Callable[[Any], T]) -> T:
13 |     """Execute an operation that requires a specific package.
14 | 
15 |     Args:
16 |         package_name: Package name (e.g., "mudata")
17 |         operation: Function that takes the imported module and returns a result
18 | 
19 |     Examples:
20 |         # For direct package functions
21 |         result = with_package("mudata", lambda mod: mod.read_zarr(path))
22 |     """
23 |     try:
24 |         module = importlib.import_module(package_name)
25 |         return operation(module)
26 |     except ImportError:
27 |         raise ImportError(
28 |             f"Package '{package_name}' is required but not installed. "
29 |             f"Please install with: pip install {package_name}"
30 |         ) from None
31 | 
32 | 
33 | def with_package_obj(
34 |     obj: Any, class_name: str, package_name: str, operation: Callable[[Any], T]
35 | ) -> tuple[bool, T | None]:
36 |     """Handle operations on objects that require specific packages.
37 | 
38 |     Args:
39 |         obj: The object to operate on
40 |         class_name: Expected class name (e.g., "MuData")
41 |         package_name: Package that provides the class (e.g., "mudata")
42 |         operation: Function to call with the object if package is available.
43 | 
44 |     Examples:
45 |         # For instance methods
46 |         handled, res = apply_class_func(dmem, "MuData", "mudata",
47 |                                       lambda obj: obj.write(filepath))
48 |     """
49 |     if obj.__class__.__name__ == class_name:
50 |         try:
51 |             importlib.import_module(package_name)
52 |             result = operation(obj)
53 |             return True, result
54 |         except ImportError:
55 |             raise ImportError(
56 |                 f"Object appears to be {class_name} but '{package_name}' package is not installed. "
57 |                 f"Please install with: pip install {package_name}"
58 |             ) from None
59 | 
60 |     return False, None
61 | 


--------------------------------------------------------------------------------
/lamindb/core/_track_environment.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import subprocess
 4 | import sys
 5 | from typing import TYPE_CHECKING
 6 | 
 7 | import lamindb_setup as ln_setup
 8 | from lamin_utils import logger
 9 | 
10 | if TYPE_CHECKING:
11 |     from lamindb.models import Run
12 | 
13 | 
14 | def track_environment(run: Run) -> None:
15 |     filepath = ln_setup.settings.cache_dir / f"run_env_pip_{run.uid}.txt"
16 |     # create a requirements.txt
17 |     # we don't create a conda environment.yml mostly for its slowness
18 |     try:
19 |         with open(filepath, "w") as f:
20 |             result = subprocess.run(
21 |                 [sys.executable, "-m", "pip", "freeze"],
22 |                 stdout=f,
23 |             )
24 |     except OSError as e:
25 |         result = None
26 |         logger.warning(f"could not run pip freeze with error {e}")
27 |     if result is not None and result.returncode == 0:
28 |         logger.info(f"tracked pip freeze > {str(filepath)}")
29 | 


--------------------------------------------------------------------------------
/lamindb/core/datasets/__init__.py:
--------------------------------------------------------------------------------
  1 | """Test datasets.
  2 | 
  3 | The mini immuno dataset.
  4 | 
  5 | .. autosummary::
  6 |    :toctree: .
  7 | 
  8 |    mini_immuno
  9 | 
 10 | Small in-memory datasets.
 11 | 
 12 | .. autosummary::
 13 |    :toctree: .
 14 | 
 15 |    anndata_with_obs
 16 | 
 17 | Files.
 18 | 
 19 | .. autosummary::
 20 |    :toctree: .
 21 | 
 22 |    file_fcs
 23 |    file_fcs_alpert19
 24 |    file_tsv_rnaseq_nfcore_salmon_merged_gene_counts
 25 |    file_jpg_paradisi05
 26 |    file_tiff_suo22
 27 |    file_fastq
 28 |    file_bam
 29 |    file_mini_csv
 30 | 
 31 | Directories.
 32 | 
 33 | .. autosummary::
 34 |    :toctree: .
 35 | 
 36 |    dir_scrnaseq_cellranger
 37 |    dir_iris_images
 38 | 
 39 | Dataframe, AnnData, MuData.
 40 | 
 41 | .. autosummary::
 42 |    :toctree: .
 43 | 
 44 |    df_iris
 45 |    df_iris_in_meter
 46 |    df_iris_in_meter_study1
 47 |    df_iris_in_meter_study2
 48 |    anndata_mouse_sc_lymph_node
 49 |    anndata_human_immune_cells
 50 |    anndata_pbmc68k_reduced
 51 |    anndata_file_pbmc68k_test
 52 |    anndata_pbmc3k_processed
 53 |    anndata_with_obs
 54 |    anndata_suo22_Visium10X
 55 |    mudata_papalexi21_subset
 56 |    schmidt22_crispra_gws_IFNG
 57 |    schmidt22_perturbseq
 58 | 
 59 | Other.
 60 | 
 61 | .. autosummary::
 62 |    :toctree: .
 63 | 
 64 |    fake_bio_notebook_titles
 65 | """
 66 | 
 67 | from . import mini_immuno
 68 | from ._core import (
 69 |     anndata_file_pbmc68k_test,
 70 |     anndata_human_immune_cells,
 71 |     anndata_mouse_sc_lymph_node,
 72 |     anndata_pbmc3k_processed,
 73 |     anndata_pbmc68k_reduced,
 74 |     anndata_suo22_Visium10X,
 75 |     df_iris,
 76 |     df_iris_in_meter,
 77 |     df_iris_in_meter_study1,
 78 |     df_iris_in_meter_study2,
 79 |     dir_iris_images,
 80 |     dir_scrnaseq_cellranger,
 81 |     file_bam,
 82 |     file_fastq,
 83 |     file_fcs,
 84 |     file_fcs_alpert19,
 85 |     file_jpg_paradisi05,
 86 |     file_mini_csv,
 87 |     file_tiff_suo22,
 88 |     file_tsv_rnaseq_nfcore_salmon_merged_gene_counts,
 89 |     mudata_papalexi21_subset,
 90 |     schmidt22_crispra_gws_IFNG,
 91 |     schmidt22_perturbseq,
 92 |     spatialdata_blobs,
 93 | )
 94 | from ._fake import fake_bio_notebook_titles
 95 | from ._small import (
 96 |     anndata_with_obs,
 97 |     small_dataset3_cellxgene,
 98 | )
 99 | 
100 | small_dataset1 = mini_immuno.get_dataset1  # backward compat
101 | small_dataset2 = mini_immuno.get_dataset2  # backward compat
102 | 


--------------------------------------------------------------------------------
/lamindb/core/datasets/_fake.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | 
 4 | def fake_bio_notebook_titles(n=100) -> list[str]:
 5 |     """A fake collection of study titles."""
 6 |     from faker import Faker
 7 | 
 8 |     fake = Faker()
 9 | 
10 |     from faker_biology.mol_biol import Antibody
11 |     from faker_biology.physiology import CellType, Organ, Organelle
12 | 
13 |     fake.add_provider(CellType)
14 |     fake.add_provider(Organ)
15 |     fake.add_provider(Organelle)
16 |     fake.add_provider(Antibody)
17 | 
18 |     my_words = [
19 |         "study",
20 |         "investigate",
21 |         "research",
22 |         "result",
23 |         "cluster",
24 |         "rank",
25 |         "candidate",
26 |         "visualize",
27 |         "efficiency",
28 |         "classify",
29 |     ]
30 |     my_words += [fake.organ() for i in range(5)] + ["intestine", "intestinal"]
31 |     my_words += [fake.celltype() for i in range(10)]
32 |     my_words += [fake.antibody_isotype() for i in range(20)]
33 | 
34 |     my_notebook_titles = [fake.sentence(ext_word_list=my_words) for i in range(n)]
35 | 
36 |     return my_notebook_titles
37 | 


--------------------------------------------------------------------------------
/lamindb/core/datasets/_small.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import Any, Literal
 4 | 
 5 | import anndata as ad
 6 | import numpy as np
 7 | import pandas as pd
 8 | 
 9 | 
10 | def small_dataset3_cellxgene(
11 |     otype: Literal["DataFrame", "AnnData"] = "AnnData",
12 | ) -> tuple[pd.DataFrame, dict[str, Any]] | ad.AnnData:
13 |     # TODO: consider other ids for other organisms
14 |     # "ENSMUSG00002076988"
15 |     var_ids = ["invalid_ensembl_id", "ENSG00000000419", "ENSG00000139618"]
16 |     dataset_dict = {
17 |         var_ids[0]: [2, 3, 3],
18 |         var_ids[1]: [3, 4, 5],
19 |         var_ids[2]: [4, 2, 3],
20 |         "disease_ontology_term_id": ["MONDO:0004975", "MONDO:0004980", "MONDO:0004980"],
21 |         "organism": ["human", "human", "human"],
22 |         "sex": ["female", "male", "unknown"],
23 |         "sex_ontology_term_id": ["PATO:0000383", "PATO:0000384", "unknown"],
24 |         "tissue": ["lungg", "lungg", "heart"],
25 |         "donor": ["-1", "1", "2"],
26 |     }
27 |     dataset_df = pd.DataFrame(
28 |         dataset_dict,
29 |         index=["barcode1", "barcode2", "barcode3"],
30 |     )
31 |     dataset_df["tissue"] = dataset_df["tissue"].astype("category")
32 |     ad.AnnData(
33 |         dataset_df[var_ids],
34 |         obs=dataset_df[[key for key in dataset_dict if key not in var_ids]],
35 |     )
36 |     if otype == "DataFrame":
37 |         return dataset_df
38 |     else:
39 |         dataset_ad = ad.AnnData(dataset_df.iloc[:, :3], obs=dataset_df.iloc[:, 3:])
40 |         return dataset_ad
41 | 
42 | 
43 | def anndata_with_obs() -> ad.AnnData:
44 |     """Create a mini anndata with cell_type, disease and tissue."""
45 |     import anndata as ad
46 |     import bionty.base as bionty_base
47 | 
48 |     celltypes = ["T cell", "hematopoietic stem cell", "hepatocyte", "my new cell type"]
49 |     celltype_ids = ["CL:0000084", "CL:0000037", "CL:0000182", ""]
50 |     diseases = [
51 |         "chronic kidney disease",
52 |         "liver lymphoma",
53 |         "cardiac ventricle disorder",
54 |         "Alzheimer disease",
55 |     ]
56 |     tissues = ["kidney", "liver", "heart", "brain"]
57 |     df = pd.DataFrame()
58 |     df["cell_type"] = celltypes * 10
59 |     df["cell_type_id"] = celltype_ids * 10
60 |     df["tissue"] = tissues * 10
61 |     df["disease"] = diseases * 10
62 |     df.index = "obs" + df.index.astype(str)
63 | 
64 |     adata = ad.AnnData(X=np.zeros(shape=(40, 100), dtype=np.float32), obs=df)
65 |     adata.var.index = bionty_base.Gene().df().head(100)["ensembl_gene_id"].values
66 | 
67 |     return adata
68 | 


--------------------------------------------------------------------------------
/lamindb/core/exceptions.py:
--------------------------------------------------------------------------------
1 | from ..errors import *  # noqa: F403 backward compat
2 | 


--------------------------------------------------------------------------------
/lamindb/core/storage/__init__.py:
--------------------------------------------------------------------------------
 1 | """Storage API.
 2 | 
 3 | Valid suffixes.
 4 | 
 5 | .. autosummary::
 6 |    :toctree: .
 7 | 
 8 |    VALID_SUFFIXES
 9 | 
10 | Array accessors.
11 | 
12 | .. autosummary::
13 |    :toctree: .
14 | 
15 |    AnnDataAccessor
16 |    BackedAccessor
17 | """
18 | 
19 | from lamindb_setup.core.upath import LocalPathClasses, UPath, infer_filesystem
20 | 
21 | from ._backed_access import AnnDataAccessor, BackedAccessor
22 | from ._tiledbsoma import save_tiledbsoma_experiment
23 | from ._valid_suffixes import VALID_SUFFIXES
24 | from .objects import infer_suffix, write_to_disk
25 | from .paths import delete_storage
26 | 


--------------------------------------------------------------------------------
/lamindb/core/storage/_polars_lazy_df.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from contextlib import contextmanager
 4 | from pathlib import Path
 5 | from typing import TYPE_CHECKING
 6 | 
 7 | if TYPE_CHECKING:
 8 |     from collections.abc import Iterator
 9 | 
10 |     from polars import LazyFrame as PolarsLazyFrame
11 |     from upath import UPath
12 | 
13 | POLARS_SUFFIXES = (".parquet", ".csv", ".ndjson", ".ipc")
14 | 
15 | 
16 | @contextmanager
17 | def _open_polars_lazy_df(
18 |     paths: UPath | list[UPath], **kwargs
19 | ) -> Iterator[PolarsLazyFrame]:
20 |     try:
21 |         import polars as pl
22 |     except ImportError as ie:
23 |         raise ImportError("Please install polars: pip install polars") from ie
24 | 
25 |     scans = {
26 |         ".parquet": pl.scan_parquet,
27 |         ".csv": pl.scan_csv,
28 |         ".ndjson": pl.scan_ndjson,
29 |         ".ipc": pl.scan_ipc,
30 |     }
31 | 
32 |     path_list = []
33 |     if isinstance(paths, Path):
34 |         paths = [paths]
35 |     for path in paths:
36 |         # assume http is always a file
37 |         if getattr(path, "protocol", None) not in {"http", "https"} and path.is_dir():
38 |             path_list += [p for p in path.rglob("*") if p.suffix != ""]
39 |         else:
40 |             path_list.append(path)
41 | 
42 |     open_files = []
43 | 
44 |     try:
45 |         for path in path_list:
46 |             open_files.append(path.open(mode="rb"))
47 | 
48 |         yield scans[path_list[0].suffix](open_files, **kwargs)
49 |     finally:
50 |         for open_file in open_files:
51 |             open_file.close()
52 | 


--------------------------------------------------------------------------------
/lamindb/core/storage/_pyarrow_dataset.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import TYPE_CHECKING
 4 | 
 5 | import pyarrow.dataset
 6 | from lamindb_setup.core.upath import LocalPathClasses
 7 | 
 8 | if TYPE_CHECKING:
 9 |     from pyarrow.dataset import Dataset as PyArrowDataset
10 |     from upath import UPath
11 | 
12 | 
13 | PYARROW_SUFFIXES = (".parquet", ".csv", ".json", ".orc", ".arrow", ".feather", ".ipc")
14 | 
15 | 
16 | def _open_pyarrow_dataset(paths: UPath | list[UPath], **kwargs) -> PyArrowDataset:
17 |     if isinstance(paths, list):
18 |         # a single path can be a directory, but a list of paths
19 |         # has to be a flat list of files
20 |         paths_str = []
21 |         path0 = paths[0]
22 |         if isinstance(path0, LocalPathClasses):
23 |             path_to_str = lambda p: p.as_posix()
24 |             filesystem = None
25 |         else:
26 |             path_to_str = lambda p: p.path
27 |             filesystem = path0.fs
28 |         for path in paths:
29 |             if (
30 |                 getattr(path, "protocol", None) not in {"http", "https"}
31 |                 and path.is_dir()
32 |             ):
33 |                 paths_str += [path_to_str(p) for p in path.rglob("*") if p.suffix != ""]
34 |             else:
35 |                 paths_str.append(path_to_str(path))
36 |     elif isinstance(paths, LocalPathClasses):
37 |         paths_str, filesystem = paths.as_posix(), None
38 |     else:
39 |         paths_str, filesystem = paths.path, paths.fs
40 | 
41 |     return pyarrow.dataset.dataset(paths_str, filesystem=filesystem, **kwargs)
42 | 


--------------------------------------------------------------------------------
/lamindb/core/storage/_valid_suffixes.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from lamindb_setup.core.upath import VALID_COMPOSITE_SUFFIXES, VALID_SIMPLE_SUFFIXES
 4 | 
 5 | # add new composite suffixes like so
 6 | VALID_COMPOSITE_SUFFIXES.update(
 7 |     {
 8 |         ".vitessce.json",
 9 |         ".ome.zarr",
10 |     }
11 | )
12 | # can do the same for simple valid suffixes
13 | 
14 | 
15 | class VALID_SUFFIXES:
16 |     """Valid suffixes."""
17 | 
18 |     SIMPLE: set[str] = VALID_SIMPLE_SUFFIXES
19 |     """Simple suffixes."""
20 |     COMPOSITE: set[str] = VALID_COMPOSITE_SUFFIXES
21 |     """Composite suffixes."""
22 | 


--------------------------------------------------------------------------------
/lamindb/core/storage/objects.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from pathlib import PurePosixPath
  4 | from typing import TYPE_CHECKING, TypeAlias
  5 | 
  6 | from anndata import AnnData
  7 | from pandas import DataFrame
  8 | 
  9 | from lamindb.core._compat import (
 10 |     with_package_obj,
 11 | )
 12 | from lamindb.core.types import ScverseDataStructures
 13 | 
 14 | if TYPE_CHECKING:
 15 |     from lamindb_setup.core.types import UPathStr
 16 | 
 17 | SupportedDataTypes: TypeAlias = DataFrame | ScverseDataStructures
 18 | 
 19 | 
 20 | def infer_suffix(dmem: SupportedDataTypes, format: str | None = None):
 21 |     """Infer LaminDB storage file suffix from a data object."""
 22 |     if isinstance(dmem, AnnData):
 23 |         if format is not None:
 24 |             # should be `.h5ad`, `.`zarr`, or `.anndata.zarr`
 25 |             if format not in {"h5ad", "zarr", "anndata.zarr"}:
 26 |                 raise ValueError(
 27 |                     "Error when specifying AnnData storage format, it should be"
 28 |                     f" 'h5ad', 'zarr', not '{format}'. Check 'format'"
 29 |                     " or the suffix of 'key'."
 30 |                 )
 31 |             return "." + format
 32 |         return ".h5ad"
 33 | 
 34 |     if isinstance(dmem, DataFrame):
 35 |         if format == ".csv":
 36 |             return ".csv"
 37 |         return ".parquet"
 38 | 
 39 |     if with_package_obj(
 40 |         dmem,
 41 |         "MuData",
 42 |         "mudata",
 43 |         lambda obj: True,  # Just checking type, not calling any method
 44 |     )[0]:
 45 |         return ".h5mu"
 46 | 
 47 |     has_spatialdata, spatialdata_suffix = with_package_obj(
 48 |         dmem,
 49 |         "SpatialData",
 50 |         "spatialdata",
 51 |         lambda obj: (
 52 |             format
 53 |             if format is not None and format in {"spatialdata.zarr", "zarr"}
 54 |             else ".zarr"
 55 |             if format is None
 56 |             else (_ for _ in ()).throw(
 57 |                 ValueError(
 58 |                     "Error when specifying SpatialData storage format, it should be"
 59 |                     f" 'zarr', 'spatialdata.zarr', not '{format}'. Check 'format'"
 60 |                     " or the suffix of 'key'."
 61 |                 )
 62 |             )
 63 |         ),
 64 |     )
 65 |     if has_spatialdata:
 66 |         return spatialdata_suffix
 67 |     else:
 68 |         raise NotImplementedError
 69 | 
 70 | 
 71 | def write_to_disk(dmem: SupportedDataTypes, filepath: UPathStr) -> None:
 72 |     """Writes the passed in memory data to disk to a specified path."""
 73 |     if isinstance(dmem, AnnData):
 74 |         suffix = PurePosixPath(filepath).suffix
 75 |         if suffix == ".h5ad":
 76 |             dmem.write_h5ad(filepath)
 77 |             return
 78 |         elif suffix == ".zarr":
 79 |             dmem.write_zarr(filepath)
 80 |             return
 81 |         else:
 82 |             raise NotImplementedError
 83 | 
 84 |     if isinstance(dmem, DataFrame):
 85 |         if filepath.suffix == ".csv":
 86 |             dmem.to_csv(filepath)
 87 |             return
 88 |         dmem.to_parquet(filepath)
 89 |         return
 90 | 
 91 |     if with_package_obj(dmem, "MuData", "mudata", lambda obj: obj.write(filepath))[0]:
 92 |         return
 93 | 
 94 |     if with_package_obj(
 95 |         dmem,
 96 |         "SpatialData",
 97 |         "spatialdata",
 98 |         lambda obj: obj.write(filepath, overwrite=True),
 99 |     )[0]:
100 |         return
101 | 
102 |     raise NotImplementedError
103 | 


--------------------------------------------------------------------------------
/lamindb/core/subsettings/__init__.py:
--------------------------------------------------------------------------------
 1 | """Sub settings.
 2 | 
 3 | .. autosummary::
 4 |    :toctree: .
 5 | 
 6 |    CreationSettings
 7 |    AnnotationSettings
 8 | 
 9 | """
10 | 
11 | from ._annotation_settings import AnnotationSettings
12 | from ._creation_settings import CreationSettings
13 | 


--------------------------------------------------------------------------------
/lamindb/core/subsettings/_annotation_settings.py:
--------------------------------------------------------------------------------
 1 | class AnnotationSettings:
 2 |     n_max_records: int = 1000
 3 |     """Maximal number of records to annotate with during automated annotation.
 4 | 
 5 |     If the number of records to annotate exceeds this limit, print a warning and do not annotate.
 6 | 
 7 |     The number is calculated per feature for labels, and per schema for features.
 8 |     """
 9 | 
10 | 
11 | annotation_settings = AnnotationSettings()
12 | 


--------------------------------------------------------------------------------
/lamindb/core/subsettings/_creation_settings.py:
--------------------------------------------------------------------------------
 1 | class CreationSettings:
 2 |     search_names: bool = True
 3 |     """Switch off to speed up creating records (default `True`).
 4 | 
 5 |     If `True`, search for alternative names and avoids duplicates.
 6 | 
 7 |     FAQ: :doc:`/faq/idempotency`
 8 |     """
 9 |     artifact_skip_size_hash: bool = False
10 |     """To speed up registering high numbers of files (default `False`).
11 | 
12 |     This bypasses queries for size and hash to AWS & GCP.
13 | 
14 |     It speeds up file creation by about a factor 100.
15 |     """
16 |     artifact_silence_missing_run_warning: bool = False
17 |     """Silence warning about missing run & transform during artifact creation (default `False`)."""
18 |     _artifact_use_virtual_keys: bool = True
19 |     """Treat `key` parameter in :class:`~lamindb.Artifact` as virtual.
20 | 
21 |     If `True`, the `key` is **not** used to construct file paths, but file paths are
22 |     based on the `uid` of artifact.
23 |     """
24 | 
25 | 
26 | creation_settings = CreationSettings()
27 | 


--------------------------------------------------------------------------------
/lamindb/core/types.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import TYPE_CHECKING, TypeVar
 4 | 
 5 | from anndata import AnnData
 6 | from lamindb_setup.core.types import UPathStr
 7 | 
 8 | from lamindb.base.types import (
 9 |     Dtype,
10 |     FieldAttr,
11 |     ListLike,
12 |     StrField,
13 |     TransformType,
14 | )
15 | 
16 | MuData = TypeVar("MuData")
17 | SpatialData = TypeVar("SpatialData")
18 | 
19 | ScverseDataStructures = AnnData | MuData | SpatialData
20 | 


--------------------------------------------------------------------------------
/lamindb/curators/__init__.py:
--------------------------------------------------------------------------------
 1 | """Curators.
 2 | 
 3 | .. autosummary::
 4 |    :toctree: .
 5 | 
 6 |    DataFrameCurator
 7 |    AnnDataCurator
 8 |    MuDataCurator
 9 |    SpatialDataCurator
10 |    TiledbsomaExperimentCurator
11 | 
12 | Modules.
13 | 
14 | .. autosummary::
15 |    :toctree: .
16 | 
17 |    core
18 | 
19 | """
20 | 
21 | from ._legacy import (  # backward compat
22 |     CellxGeneAnnDataCatManager,
23 |     PertAnnDataCatManager,
24 | )
25 | from .core import (
26 |     AnnDataCurator,
27 |     DataFrameCurator,
28 |     MuDataCurator,
29 |     SpatialDataCurator,
30 |     TiledbsomaExperimentCurator,
31 | )
32 | 
33 | __all__ = [
34 |     "CellxGeneAnnDataCatManager",
35 |     "PertAnnDataCatManager",
36 |     "AnnDataCurator",
37 |     "DataFrameCurator",
38 |     "MuDataCurator",
39 |     "SpatialDataCurator",
40 |     "TiledbsomaExperimentCurator",
41 | ]
42 | 


--------------------------------------------------------------------------------
/lamindb/curators/_cellxgene_schemas/schema_versions.csv:
--------------------------------------------------------------------------------
 1 | schema_version,entity,organism,source,version
 2 | 4.0.0,CellType,all,cl,2023-08-24
 3 | 4.0.0,ExperimentalFactor,all,efo,3.57.0
 4 | 4.0.0,Ethnicity,human,hancestro,3.0
 5 | 4.0.0,DevelopmentalStage,human,hsapdv,2020-03-10
 6 | 4.0.0,DevelopmentalStage,mouse,mmusdv,2020-03-10
 7 | 4.0.0,Disease,all,mondo,2023-08-02
 8 | 4.0.0,Organism,all,ncbitaxon,2023-06-20
 9 | 4.0.0,Phenotype,all,pato,2023-05-18
10 | 4.0.0,Tissue,all,uberon,2023-09-05
11 | 5.0.0,CellType,all,cl,2024-01-04
12 | 5.0.0,ExperimentalFactor,all,efo,3.62.0
13 | 5.0.0,Ethnicity,human,hancestro,3.0
14 | 5.0.0,DevelopmentalStage,human,hsapdv,2020-03-10
15 | 5.0.0,DevelopmentalStage,mouse,mmusdv,2020-03-10
16 | 5.0.0,Disease,all,mondo,2024-01-03
17 | 5.0.0,Organism,all,ncbitaxon,2023-06-20
18 | 5.0.0,Phenotype,all,pato,2023-05-18
19 | 5.0.0,Tissue,all,uberon,2024-01-18
20 | 5.0.0,Gene,human,ensembl,release-110
21 | 5.0.0,Gene,mouse,ensembl,release-110
22 | 5.1.0,CellType,all,cl,2024-04-05
23 | 5.1.0,ExperimentalFactor,all,efo,3.65.0
24 | 5.1.0,Ethnicity,human,hancestro,3.0
25 | 5.1.0,DevelopmentalStage,human,hsapdv,2020-03-10
26 | 5.1.0,DevelopmentalStage,mouse,mmusdv,2020-03-10
27 | 5.1.0,Disease,all,mondo,2024-05-08
28 | 5.1.0,Organism,all,ncbitaxon,2023-06-20
29 | 5.1.0,Phenotype,all,pato,2023-05-18
30 | 5.1.0,Tissue,all,uberon,2024-03-22
31 | 5.1.0,Gene,human,ensembl,release-110
32 | 5.1.0,Gene,mouse,ensembl,release-110
33 | 5.2.0,CellType,all,cl,2024-08-16
34 | 5.2.0,ExperimentalFactor,all,efo,3.69.0
35 | 5.2.0,Ethnicity,human,hancestro,3.0
36 | 5.2.0,DevelopmentalStage,human,hsapdv,2024-05-28
37 | 5.2.0,DevelopmentalStage,mouse,mmusdv,2024-05-28
38 | 5.2.0,Disease,all,mondo,2024-08-06
39 | 5.2.0,Organism,all,ncbitaxon,2023-06-20
40 | 5.2.0,Phenotype,all,pato,2023-05-18
41 | 5.2.0,Tissue,all,uberon,2024-08-07
42 | 5.2.0,Gene,human,ensembl,release-110
43 | 5.2.0,Gene,mouse,ensembl,release-110
44 | 


--------------------------------------------------------------------------------
/lamindb/errors.py:
--------------------------------------------------------------------------------
  1 | """Errors.
  2 | 
  3 | .. autosummary::
  4 |    :toctree: .
  5 | 
  6 |    ValidationError
  7 |    InvalidArgument
  8 |    DoesNotExist
  9 |    NotebookNotSaved
 10 |    MissingContextUID
 11 |    UpdateContext
 12 |    IntegrityError
 13 |    SQLRecordNameChangeIntegrityError
 14 | 
 15 | """
 16 | 
 17 | # inheriting from SystemExit has the sole purpose of suppressing
 18 | # the traceback - this isn't optimal but the current best solution
 19 | # https://laminlabs.slack.com/archives/C04A0RMA0SC/p1726856875597489
 20 | 
 21 | 
 22 | class ValidationError(Exception):
 23 |     """Validation error."""
 24 | 
 25 |     pass
 26 | 
 27 | 
 28 | class InvalidArgument(Exception):
 29 |     """Invalid method or function argument."""
 30 | 
 31 |     pass
 32 | 
 33 | 
 34 | class TrackNotCalled(Exception):
 35 |     """`ln.track()` wasn't called."""
 36 | 
 37 |     pass
 38 | 
 39 | 
 40 | class NotebookNotSaved(Exception):
 41 |     """Notebook wasn't saved."""
 42 | 
 43 |     pass
 44 | 
 45 | 
 46 | # equivalent to Django's DoesNotExist
 47 | # and SQLAlchemy's NoResultFound
 48 | class DoesNotExist(Exception):
 49 |     """No record found."""
 50 | 
 51 |     pass
 52 | 
 53 | 
 54 | class InconsistentKey(Exception):
 55 |     """Inconsistent transform or artifact `key`."""
 56 | 
 57 |     pass
 58 | 
 59 | 
 60 | class SQLRecordNameChangeIntegrityError(Exception):
 61 |     """Custom exception for name change errors."""
 62 | 
 63 |     pass
 64 | 
 65 | 
 66 | class FieldValidationError(Exception):
 67 |     """Field validation error."""
 68 | 
 69 |     pass
 70 | 
 71 | 
 72 | # -------------------------------------------------------------------------------------
 73 | # run context
 74 | # -------------------------------------------------------------------------------------
 75 | 
 76 | 
 77 | class IntegrityError(Exception):
 78 |     """Integrity error.
 79 | 
 80 |     For instance, it's not allowed to delete artifacts outside managed storage
 81 |     locations.
 82 |     """
 83 | 
 84 |     pass
 85 | 
 86 | 
 87 | class MissingContextUID(SystemExit):
 88 |     """User didn't define transform settings."""
 89 | 
 90 |     pass
 91 | 
 92 | 
 93 | class UpdateContext(SystemExit):
 94 |     """Transform settings require update."""
 95 | 
 96 |     pass
 97 | 
 98 | 
 99 | # -------------------------------------------------------------------------------------
100 | # record
101 | # -------------------------------------------------------------------------------------
102 | 
103 | 
104 | class NoWriteAccess(Exception):
105 |     """No write access to a space."""
106 | 
107 |     pass
108 | 


--------------------------------------------------------------------------------
/lamindb/examples/__init__.py:
--------------------------------------------------------------------------------
 1 | """Examples.
 2 | 
 3 | .. autosummary::
 4 |    :toctree: .
 5 | 
 6 |    ingest_mini_immuno_datasets
 7 |    schemas
 8 | 
 9 | """
10 | 
11 | from . import schemas
12 | 
13 | 
14 | def ingest_mini_immuno_datasets():
15 |     """Ingest mini immuno datasets.
16 | 
17 |     .. literalinclude:: scripts/ingest_mini_immuno_datasets.py
18 |         :language: python
19 |     """
20 |     import sys
21 |     from pathlib import Path
22 | 
23 |     docs_path = Path(__file__).parent.parent.parent / "docs" / "scripts"
24 |     if str(docs_path) not in sys.path:
25 |         sys.path.append(str(docs_path))
26 | 
27 |     import ingest_mini_immuno_datasets  # noqa
28 | 


--------------------------------------------------------------------------------
/lamindb/examples/schemas/__init__.py:
--------------------------------------------------------------------------------
 1 | """Example schemas.
 2 | 
 3 | .. autosummary::
 4 |    :toctree: .
 5 | 
 6 |    valid_features
 7 |    anndata_ensembl_gene_ids_and_valid_features_in_obs
 8 | 
 9 | """
10 | 
11 | from ._anndata import anndata_ensembl_gene_ids_and_valid_features_in_obs
12 | from ._simple import valid_features
13 | 


--------------------------------------------------------------------------------
/lamindb/examples/schemas/_anndata.py:
--------------------------------------------------------------------------------
 1 | from ... import Schema
 2 | 
 3 | 
 4 | def anndata_ensembl_gene_ids_and_valid_features_in_obs() -> Schema:
 5 |     """Return a schema for an AnnData with Ensembl gene IDs and valid features in obs.
 6 | 
 7 |     .. literalinclude:: scripts/define_schema_anndata_ensembl_gene_ids_and_valid_features_in_obs.py
 8 |         :language: python
 9 |     """
10 |     import subprocess
11 |     from pathlib import Path
12 | 
13 |     docs_path = Path(__file__).parent.parent.parent.parent / "docs" / "scripts"
14 |     subprocess.run(
15 |         [
16 |             "python",
17 |             str(
18 |                 docs_path
19 |                 / "define_schema_anndata_ensembl_gene_ids_and_valid_features_in_obs.py"
20 |             ),
21 |         ],
22 |         check=True,
23 |     )
24 | 
25 |     return Schema.get(name="anndata_ensembl_gene_ids_and_valid_features_in_obs")
26 | 


--------------------------------------------------------------------------------
/lamindb/examples/schemas/_simple.py:
--------------------------------------------------------------------------------
 1 | from ... import Schema
 2 | 
 3 | 
 4 | def valid_features() -> Schema:
 5 |     """Return a schema for an AnnData with Ensembl gene IDs and valid features in obs.
 6 | 
 7 |     .. literalinclude:: scripts/define_schema_anndata_ensembl_gene_ids_and_valid_features_in_obs.py
 8 |         :language: python
 9 |     """
10 |     import subprocess
11 |     from pathlib import Path
12 | 
13 |     docs_path = Path(__file__).parent.parent.parent.parent / "docs" / "scripts"
14 |     subprocess.run(
15 |         ["python", str(docs_path / "define_valid_features.py")],
16 |         check=True,
17 |     )
18 | 
19 |     return Schema.get(name="valid_features")
20 | 


--------------------------------------------------------------------------------
/lamindb/integrations/__init__.py:
--------------------------------------------------------------------------------
 1 | """Integrations.
 2 | 
 3 | .. autosummary::
 4 |    :toctree: .
 5 | 
 6 |    save_vitessce_config
 7 |    save_tiledbsoma_experiment
 8 | """
 9 | 
10 | from lamindb.core.storage import save_tiledbsoma_experiment
11 | 
12 | from ._vitessce import save_vitessce_config
13 | 


--------------------------------------------------------------------------------
/lamindb/migrations/0070_lamindbv1_migrate_data.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 5.2 on 2025-01-05 11:58
 2 | 
 3 | from pathlib import Path
 4 | 
 5 | import lamindb_setup as ln_setup
 6 | import psycopg2
 7 | from django.db import migrations
 8 | 
 9 | 
10 | def get_artifact_path_psycopg2(artifact_id):
11 |     """Get artifact path using psycopg2."""
12 |     query = """
13 |         SELECT
14 |             s.root || '/.lamindb/' || a.uid || a.suffix AS full_path
15 |         FROM
16 |             lamindb_artifact a
17 |             JOIN lamindb_storage s ON a.storage_id = s.id
18 |         WHERE
19 |             a.id = %s
20 |     """
21 | 
22 |     with psycopg2.connect(ln_setup.settings.instance.db) as conn:
23 |         with conn.cursor() as cur:
24 |             cur.execute(query, (artifact_id,))
25 |             return cur.fetchone()[0]
26 | 
27 | 
28 | def transfer_source_code(apps, schema_editor):
29 |     from lamindb._finish import notebook_to_script
30 | 
31 |     Transform = apps.get_model("lamindb", "Transform")
32 |     transforms = Transform.objects.filter(
33 |         _source_code_artifact__isnull=False,
34 |     ).select_related("_source_code_artifact")
35 | 
36 |     for transform in transforms:
37 |         print(f"migrating source code of transform {transform}")
38 |         artifact = transform._source_code_artifact
39 |         print("artifact", artifact.uid)
40 | 
41 |         path_str = get_artifact_path_psycopg2(artifact.id)
42 |         print(ln_setup.settings.storage.root_as_str)
43 |         print(path_str)
44 |         if path_str.startswith(ln_setup.settings.storage.root_as_str):
45 |             path = (
46 |                 ln_setup.settings.storage.root
47 |                 / f".lamindb/{artifact.uid}{artifact.suffix}"
48 |             )
49 |         else:
50 |             path = ln_setup.core.upath.UPath(path_str)
51 |         if path.exists():
52 |             if path_str.startswith("s3://"):
53 |                 local_path = Path(f"temp{path.suffix}")
54 |                 path.download_to(local_path)
55 |             else:
56 |                 local_path = path
57 | 
58 |             if artifact.suffix == ".ipynb":
59 |                 transform.source_code = notebook_to_script(transform, local_path)
60 |             else:
61 |                 transform.source_code = local_path.read_text()
62 |             transform.hash = artifact.hash
63 |             path.unlink()
64 |         else:
65 |             print(f"path did not exist: {path_str}")
66 |         transform._source_code_artifact = None
67 |         transform.save()
68 |         artifact.delete()
69 | 
70 | 
71 | class Migration(migrations.Migration):
72 |     dependencies = [
73 |         ("lamindb", "0069_squashed"),
74 |     ]
75 | 
76 |     operations = [
77 |         migrations.RunPython(transfer_source_code),
78 |     ]
79 | 


--------------------------------------------------------------------------------
/lamindb/migrations/0079_alter_rundata_value_json_and_more.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 5.2 on 2025-01-16 01:29
 2 | 
 3 | import django.db.models.deletion
 4 | from django.db import migrations, models
 5 | 
 6 | import lamindb.base.fields
 7 | 
 8 | 
 9 | class Migration(migrations.Migration):
10 |     dependencies = [
11 |         ("lamindb", "0078_lamindbv1_part6c"),
12 |     ]
13 | 
14 |     operations = [
15 |         migrations.AlterField(
16 |             model_name="rundata",
17 |             name="value_json",
18 |             field=models.JSONField(blank=True, null=True),
19 |         ),
20 |         migrations.AlterField(
21 |             model_name="tidytabledata",
22 |             name="value_json",
23 |             field=models.JSONField(blank=True, null=True),
24 |         ),
25 |         migrations.AlterField(
26 |             model_name="tidytable",
27 |             name="schema",
28 |             field=lamindb.base.fields.ForeignKey(
29 |                 blank=True,
30 |                 null=True,
31 |                 on_delete=django.db.models.deletion.SET_NULL,
32 |                 related_name="_tidytables",
33 |                 to="lamindb.schema",
34 |             ),
35 |         ),
36 |     ]
37 | 


--------------------------------------------------------------------------------
/lamindb/migrations/0081_revert_textfield_collection.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 5.2 on 2025-01-21 17:03
 2 | 
 3 | from django.db import migrations
 4 | 
 5 | import lamindb.base.fields
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 |     dependencies = [
10 |         ("lamindb", "0080_polish_lamindbv1"),
11 |     ]
12 | 
13 |     operations = [
14 |         migrations.AlterField(
15 |             model_name="collection",
16 |             name="description",
17 |             field=lamindb.base.fields.TextField(
18 |                 blank=True, db_index=True, default=None, null=True
19 |             ),
20 |         ),
21 |     ]
22 | 


--------------------------------------------------------------------------------
/lamindb/migrations/0082_alter_feature_dtype.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 5.2 on 2025-01-25 08:26
 2 | 
 3 | from django.db import migrations
 4 | 
 5 | import lamindb.base.fields
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 |     dependencies = [
10 |         ("lamindb", "0081_revert_textfield_collection"),
11 |     ]
12 | 
13 |     operations = [
14 |         migrations.AlterField(
15 |             model_name="feature",
16 |             name="dtype",
17 |             field=lamindb.base.fields.CharField(
18 |                 blank=True, db_index=True, default=None, max_length=255, null=True
19 |             ),
20 |         ),
21 |     ]
22 | 


--------------------------------------------------------------------------------
/lamindb/migrations/0083_alter_feature_is_type_alter_flextable_is_type_and_more.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 5.2 on 2025-01-25 13:29
 2 | 
 3 | from django.db import migrations
 4 | 
 5 | import lamindb.base.fields
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 |     dependencies = [
10 |         ("lamindb", "0082_alter_feature_dtype"),
11 |     ]
12 | 
13 |     operations = [
14 |         migrations.RunSQL(
15 |             sql="""
16 |                 UPDATE lamindb_feature
17 |                 SET is_type = FALSE
18 |                 WHERE is_type IS NULL;
19 | 
20 |                 UPDATE lamindb_flextable
21 |                 SET is_type = FALSE
22 |                 WHERE is_type IS NULL;
23 | 
24 |                 UPDATE lamindb_param
25 |                 SET is_type = FALSE
26 |                 WHERE is_type IS NULL;
27 | 
28 |                 UPDATE lamindb_project
29 |                 SET is_type = FALSE
30 |                 WHERE is_type IS NULL;
31 | 
32 |                 UPDATE lamindb_reference
33 |                 SET is_type = FALSE
34 |                 WHERE is_type IS NULL;
35 | 
36 |                 UPDATE lamindb_schema
37 |                 SET is_type = FALSE
38 |                 WHERE is_type IS NULL;
39 | 
40 |                 UPDATE lamindb_ulabel
41 |                 SET is_type = FALSE
42 |                 WHERE is_type IS NULL;
43 |             """
44 |         ),
45 |         migrations.AlterField(
46 |             model_name="feature",
47 |             name="is_type",
48 |             field=lamindb.base.fields.BooleanField(
49 |                 blank=True, db_index=True, default=False
50 |             ),
51 |         ),
52 |         migrations.AlterField(
53 |             model_name="flextable",
54 |             name="is_type",
55 |             field=lamindb.base.fields.BooleanField(
56 |                 blank=True, db_index=True, default=False
57 |             ),
58 |         ),
59 |         migrations.AlterField(
60 |             model_name="param",
61 |             name="is_type",
62 |             field=lamindb.base.fields.BooleanField(
63 |                 blank=True, db_index=True, default=False
64 |             ),
65 |         ),
66 |         migrations.AlterField(
67 |             model_name="project",
68 |             name="is_type",
69 |             field=lamindb.base.fields.BooleanField(
70 |                 blank=True, db_index=True, default=False
71 |             ),
72 |         ),
73 |         migrations.AlterField(
74 |             model_name="reference",
75 |             name="is_type",
76 |             field=lamindb.base.fields.BooleanField(
77 |                 blank=True, db_index=True, default=False
78 |             ),
79 |         ),
80 |         migrations.AlterField(
81 |             model_name="schema",
82 |             name="is_type",
83 |             field=lamindb.base.fields.BooleanField(
84 |                 blank=True, db_index=True, default=False
85 |             ),
86 |         ),
87 |         migrations.AlterField(
88 |             model_name="ulabel",
89 |             name="is_type",
90 |             field=lamindb.base.fields.BooleanField(
91 |                 blank=True, db_index=True, default=False
92 |             ),
93 |         ),
94 |     ]
95 | 


--------------------------------------------------------------------------------
/lamindb/migrations/0084_alter_schemafeature_feature_and_more.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 5.2 on 2025-01-27 07:22
 2 | 
 3 | import django.db.models.deletion
 4 | from django.db import migrations
 5 | 
 6 | import lamindb.base.fields
 7 | 
 8 | 
 9 | class Migration(migrations.Migration):
10 |     dependencies = [
11 |         ("lamindb", "0083_alter_feature_is_type_alter_flextable_is_type_and_more"),
12 |     ]
13 | 
14 |     operations = [
15 |         migrations.AlterField(
16 |             model_name="schemafeature",
17 |             name="feature",
18 |             field=lamindb.base.fields.ForeignKey(
19 |                 blank=True,
20 |                 on_delete=django.db.models.deletion.PROTECT,
21 |                 related_name="links_schema",
22 |                 to="lamindb.feature",
23 |             ),
24 |         ),
25 |         migrations.AlterField(
26 |             model_name="schemafeature",
27 |             name="schema",
28 |             field=lamindb.base.fields.ForeignKey(
29 |                 blank=True,
30 |                 on_delete=django.db.models.deletion.CASCADE,
31 |                 related_name="links_feature",
32 |                 to="lamindb.schema",
33 |             ),
34 |         ),
35 |     ]
36 | 


--------------------------------------------------------------------------------
/lamindb/migrations/0085_alter_feature_is_type_alter_flextable_is_type_and_more.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 5.2 on 2025-01-27 13:48
 2 | 
 3 | from django.db import migrations
 4 | 
 5 | import lamindb.base.fields
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 |     dependencies = [
10 |         ("lamindb", "0084_alter_schemafeature_feature_and_more"),
11 |     ]
12 | 
13 |     operations = [
14 |         migrations.AlterField(
15 |             model_name="feature",
16 |             name="is_type",
17 |             field=lamindb.base.fields.BooleanField(
18 |                 blank=True, db_index=True, default=False, null=True
19 |             ),
20 |         ),
21 |         migrations.AlterField(
22 |             model_name="flextable",
23 |             name="is_type",
24 |             field=lamindb.base.fields.BooleanField(
25 |                 blank=True, db_index=True, default=False, null=True
26 |             ),
27 |         ),
28 |         migrations.AlterField(
29 |             model_name="param",
30 |             name="is_type",
31 |             field=lamindb.base.fields.BooleanField(
32 |                 blank=True, db_index=True, default=False, null=True
33 |             ),
34 |         ),
35 |         migrations.AlterField(
36 |             model_name="project",
37 |             name="is_type",
38 |             field=lamindb.base.fields.BooleanField(
39 |                 blank=True, db_index=True, default=False, null=True
40 |             ),
41 |         ),
42 |         migrations.AlterField(
43 |             model_name="reference",
44 |             name="is_type",
45 |             field=lamindb.base.fields.BooleanField(
46 |                 blank=True, db_index=True, default=False, null=True
47 |             ),
48 |         ),
49 |         migrations.AlterField(
50 |             model_name="schema",
51 |             name="is_type",
52 |             field=lamindb.base.fields.BooleanField(
53 |                 blank=True, db_index=True, default=False, null=True
54 |             ),
55 |         ),
56 |         migrations.AlterField(
57 |             model_name="ulabel",
58 |             name="is_type",
59 |             field=lamindb.base.fields.BooleanField(
60 |                 blank=True, db_index=True, default=False, null=True
61 |             ),
62 |         ),
63 |     ]
64 | 


--------------------------------------------------------------------------------
/lamindb/migrations/0086_various.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 5.2 on 2025-02-06 07:10
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | import lamindb.base.fields
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 |     dependencies = [
10 |         ("lamindb", "0085_alter_feature_is_type_alter_flextable_is_type_and_more"),
11 |     ]
12 | 
13 |     operations = [
14 |         migrations.AlterField(
15 |             model_name="transform",
16 |             name="hash",
17 |             field=lamindb.base.fields.CharField(
18 |                 blank=True,
19 |                 db_index=True,
20 |                 default=None,
21 |                 max_length=22,
22 |                 null=True,
23 |                 unique=True,
24 |             ),
25 |         ),
26 |         migrations.AlterField(
27 |             model_name="artifact",
28 |             name="hash",
29 |             field=lamindb.base.fields.CharField(
30 |                 blank=True,
31 |                 db_index=True,
32 |                 default=None,
33 |                 max_length=22,
34 |                 null=True,
35 |                 unique=True,
36 |             ),
37 |         ),
38 |         migrations.AlterField(
39 |             model_name="collection",
40 |             name="hash",
41 |             field=lamindb.base.fields.CharField(
42 |                 blank=True,
43 |                 db_index=True,
44 |                 default=None,
45 |                 max_length=22,
46 |                 null=True,
47 |                 unique=True,
48 |             ),
49 |         ),
50 |         migrations.CreateModel(
51 |             name="Migration",
52 |             fields=[
53 |                 (
54 |                     "id",
55 |                     models.BigAutoField(
56 |                         auto_created=True,
57 |                         primary_key=True,
58 |                         serialize=False,
59 |                         verbose_name="ID",
60 |                     ),
61 |                 ),
62 |                 (
63 |                     "app",
64 |                     lamindb.base.fields.CharField(
65 |                         blank=True, default=None, max_length=255
66 |                     ),
67 |                 ),
68 |                 (
69 |                     "name",
70 |                     lamindb.base.fields.CharField(
71 |                         blank=True, default=None, max_length=255
72 |                     ),
73 |                 ),
74 |                 ("applied", lamindb.base.fields.DateTimeField(blank=True)),
75 |             ],
76 |             options={
77 |                 "db_table": "django_migrations",
78 |                 "managed": False,
79 |             },
80 |         ),
81 |         migrations.AlterField(
82 |             model_name="param",
83 |             name="dtype",
84 |             field=lamindb.base.fields.CharField(
85 |                 blank=True, db_index=True, default=None, max_length=64, null=True
86 |             ),
87 |         ),
88 |         migrations.AlterField(
89 |             model_name="param",
90 |             name="dtype",
91 |             field=lamindb.base.fields.CharField(
92 |                 blank=True, db_index=True, default=None, max_length=255, null=True
93 |             ),
94 |         ),
95 |     ]
96 | 


--------------------------------------------------------------------------------
/lamindb/migrations/0087_rename__schemas_m2m_artifact_feature_sets_and_more.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 5.2 on 2025-02-13 12:00
 2 | 
 3 | import django.db.models.deletion
 4 | from django.db import migrations, models
 5 | 
 6 | import lamindb.base.fields
 7 | 
 8 | 
 9 | class Migration(migrations.Migration):
10 |     dependencies = [
11 |         ("lamindb", "0086_various"),
12 |     ]
13 | 
14 |     operations = [
15 |         migrations.RenameField(
16 |             model_name="artifact",
17 |             old_name="_schemas_m2m",
18 |             new_name="feature_sets",
19 |         ),
20 |         migrations.AlterField(
21 |             model_name="artifact",
22 |             name="schema",
23 |             field=lamindb.base.fields.ForeignKey(
24 |                 blank=True,
25 |                 default=None,
26 |                 null=True,
27 |                 on_delete=django.db.models.deletion.PROTECT,
28 |                 related_name="validated_artifacts",
29 |                 to="lamindb.schema",
30 |             ),
31 |         ),
32 |         migrations.AlterField(
33 |             model_name="artifact",
34 |             name="feature_sets",
35 |             field=models.ManyToManyField(
36 |                 related_name="artifacts",
37 |                 through="lamindb.ArtifactSchema",
38 |                 to="lamindb.schema",
39 |             ),
40 |         ),
41 |     ]
42 | 


--------------------------------------------------------------------------------
/lamindb/migrations/0090_runproject_project_runs.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 5.2 on 2025-03-05 10:20
 2 | 
 3 | import django.db.models.deletion
 4 | import django.db.models.functions.datetime
 5 | from django.db import migrations, models
 6 | 
 7 | import lamindb.base.fields
 8 | import lamindb.base.users
 9 | import lamindb.models.sqlrecord
10 | 
11 | 
12 | class Migration(migrations.Migration):
13 |     dependencies = [
14 |         ("lamindb", "0089_subsequent_runs"),
15 |     ]
16 | 
17 |     operations = [
18 |         migrations.CreateModel(
19 |             name="RunProject",
20 |             fields=[
21 |                 ("id", models.BigAutoField(primary_key=True, serialize=False)),
22 |                 (
23 |                     "created_at",
24 |                     lamindb.base.fields.DateTimeField(
25 |                         blank=True,
26 |                         db_default=django.db.models.functions.datetime.Now(),
27 |                         db_index=True,
28 |                         editable=False,
29 |                     ),
30 |                 ),
31 |                 (
32 |                     "created_by",
33 |                     lamindb.base.fields.ForeignKey(
34 |                         blank=True,
35 |                         default=lamindb.base.users.current_user_id,
36 |                         editable=False,
37 |                         on_delete=django.db.models.deletion.PROTECT,
38 |                         related_name="+",
39 |                         to="lamindb.user",
40 |                     ),
41 |                 ),
42 |                 (
43 |                     "project",
44 |                     lamindb.base.fields.ForeignKey(
45 |                         blank=True,
46 |                         on_delete=django.db.models.deletion.PROTECT,
47 |                         related_name="links_run",
48 |                         to="lamindb.project",
49 |                     ),
50 |                 ),
51 |                 (
52 |                     "run",
53 |                     lamindb.base.fields.ForeignKey(
54 |                         blank=True,
55 |                         on_delete=django.db.models.deletion.CASCADE,
56 |                         related_name="links_project",
57 |                         to="lamindb.run",
58 |                     ),
59 |                 ),
60 |             ],
61 |             options={
62 |                 "unique_together": {("run", "project")},
63 |             },
64 |             bases=(models.Model, lamindb.models.sqlrecord.IsLink),
65 |         ),
66 |         migrations.AddField(
67 |             model_name="project",
68 |             name="runs",
69 |             field=models.ManyToManyField(
70 |                 related_name="projects", through="lamindb.RunProject", to="lamindb.run"
71 |             ),
72 |         ),
73 |     ]
74 | 


--------------------------------------------------------------------------------
/lamindb/migrations/0091_alter_featurevalue_options_alter_space_options_and_more.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 5.1.4 on 2025-04-30 09:11
 2 | 
 3 | from django.db import migrations
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [
 8 |         ("lamindb", "0090_runproject_project_runs"),
 9 |     ]
10 | 
11 |     operations = [
12 |         migrations.AlterModelOptions(
13 |             name="featurevalue",
14 |             options={"base_manager_name": "objects"},
15 |         ),
16 |         migrations.AlterModelOptions(
17 |             name="space",
18 |             options={"base_manager_name": "objects"},
19 |         ),
20 |         migrations.AlterModelOptions(
21 |             name="user",
22 |             options={"base_manager_name": "objects"},
23 |         ),
24 |     ]
25 | 


--------------------------------------------------------------------------------
/lamindb/migrations/0092_alter_artifactfeaturevalue_artifact_and_more.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 5.2 on 2025-05-06 20:34
 2 | 
 3 | import django.db.models.deletion
 4 | from django.db import migrations
 5 | 
 6 | import lamindb.base.fields
 7 | 
 8 | 
 9 | class Migration(migrations.Migration):
10 |     dependencies = [
11 |         ("lamindb", "0091_alter_featurevalue_options_alter_space_options_and_more"),
12 |     ]
13 | 
14 |     operations = [
15 |         migrations.AlterField(
16 |             model_name="artifactfeaturevalue",
17 |             name="artifact",
18 |             field=lamindb.base.fields.ForeignKey(
19 |                 blank=True,
20 |                 on_delete=django.db.models.deletion.CASCADE,
21 |                 related_name="links_featurevalue",
22 |                 to="lamindb.artifact",
23 |             ),
24 |         ),
25 |         migrations.AlterField(
26 |             model_name="artifactfeaturevalue",
27 |             name="featurevalue",
28 |             field=lamindb.base.fields.ForeignKey(
29 |                 blank=True,
30 |                 on_delete=django.db.models.deletion.PROTECT,
31 |                 related_name="links_artifact",
32 |                 to="lamindb.featurevalue",
33 |             ),
34 |         ),
35 |         migrations.AlterField(
36 |             model_name="artifactparamvalue",
37 |             name="artifact",
38 |             field=lamindb.base.fields.ForeignKey(
39 |                 blank=True,
40 |                 on_delete=django.db.models.deletion.CASCADE,
41 |                 related_name="links_paramvalue",
42 |                 to="lamindb.artifact",
43 |             ),
44 |         ),
45 |         migrations.AlterField(
46 |             model_name="artifactparamvalue",
47 |             name="paramvalue",
48 |             field=lamindb.base.fields.ForeignKey(
49 |                 blank=True,
50 |                 on_delete=django.db.models.deletion.PROTECT,
51 |                 related_name="links_artifact",
52 |                 to="lamindb.paramvalue",
53 |             ),
54 |         ),
55 |         migrations.AlterField(
56 |             model_name="runparamvalue",
57 |             name="paramvalue",
58 |             field=lamindb.base.fields.ForeignKey(
59 |                 blank=True,
60 |                 on_delete=django.db.models.deletion.PROTECT,
61 |                 related_name="links_run",
62 |                 to="lamindb.paramvalue",
63 |             ),
64 |         ),
65 |         migrations.AlterField(
66 |             model_name="runparamvalue",
67 |             name="run",
68 |             field=lamindb.base.fields.ForeignKey(
69 |                 blank=True,
70 |                 on_delete=django.db.models.deletion.CASCADE,
71 |                 related_name="links_paramvalue",
72 |                 to="lamindb.run",
73 |             ),
74 |         ),
75 |     ]
76 | 


--------------------------------------------------------------------------------
/lamindb/migrations/0093_alter_schemacomponent_unique_together.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 5.2 on 2025-05-07 12:16
 2 | 
 3 | from django.db import migrations
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [
 8 |         ("lamindb", "0092_alter_artifactfeaturevalue_artifact_and_more"),
 9 |     ]
10 | 
11 |     operations = [
12 |         migrations.AlterUniqueTogether(
13 |             name="schemacomponent",
14 |             unique_together={("composite", "slot"), ("composite", "slot", "component")},
15 |         ),
16 |     ]
17 | 


--------------------------------------------------------------------------------
/lamindb/migrations/0094_writeloglock_writelogmigrationstate_and_more.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 5.1.7 on 2025-05-10 00:32
 2 | 
 3 | import django.db.models.deletion
 4 | from django.db import migrations, models
 5 | 
 6 | 
 7 | class Migration(migrations.Migration):
 8 |     dependencies = [
 9 |         ("lamindb", "0093_alter_schemacomponent_unique_together"),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.CreateModel(
14 |             name="WriteLogLock",
15 |             fields=[
16 |                 (
17 |                     "id",
18 |                     models.BigAutoField(
19 |                         auto_created=True,
20 |                         primary_key=True,
21 |                         serialize=False,
22 |                         verbose_name="ID",
23 |                     ),
24 |                 ),
25 |                 ("locked", models.BooleanField()),
26 |             ],
27 |         ),
28 |         migrations.CreateModel(
29 |             name="MigrationState",
30 |             fields=[
31 |                 ("id", models.SmallAutoField(primary_key=True, serialize=False)),
32 |                 ("migration_state_id", models.JSONField()),
33 |             ],
34 |         ),
35 |         migrations.CreateModel(
36 |             name="TableState",
37 |             fields=[
38 |                 ("id", models.SmallAutoField(primary_key=True, serialize=False)),
39 |                 ("table_name", models.CharField(max_length=255)),
40 |                 ("backfilled", models.BooleanField()),
41 |             ],
42 |         ),
43 |         migrations.CreateModel(
44 |             name="WriteLog",
45 |             fields=[
46 |                 ("seqno", models.AutoField(primary_key=True, serialize=False)),
47 |                 (
48 |                     "uid",
49 |                     models.CharField(
50 |                         db_index=True, editable=False, max_length=18, unique=True
51 |                     ),
52 |                 ),
53 |                 ("space_uid", models.CharField(max_length=12, null=True)),
54 |                 ("created_by_uid", models.CharField(default="00000000", max_length=8)),
55 |                 ("branch_code", models.IntegerField(default=1)),
56 |                 (
57 |                     "run_uid",
58 |                     models.CharField(default="0000000000000000", max_length=16),
59 |                 ),
60 |                 ("record_uid", models.JSONField(null=True)),
61 |                 ("record_data", models.JSONField(null=True)),
62 |                 ("event_type", models.PositiveSmallIntegerField()),
63 |                 ("created_at", models.DateTimeField()),
64 |                 (
65 |                     "migration_state",
66 |                     models.ForeignKey(
67 |                         on_delete=django.db.models.deletion.PROTECT,
68 |                         to="lamindb.migrationstate",
69 |                     ),
70 |                 ),
71 |                 (
72 |                     "table",
73 |                     models.ForeignKey(
74 |                         on_delete=django.db.models.deletion.PROTECT,
75 |                         to="lamindb.tablestate",
76 |                     ),
77 |                 ),
78 |             ],
79 |             options={
80 |                 "verbose_name": "Write Log",
81 |                 "verbose_name_plural": "Write Logs",
82 |             },
83 |         ),
84 |     ]
85 | 


--------------------------------------------------------------------------------
/lamindb/migrations/0097_remove_schemaparam_param_remove_paramvalue_param_and_more.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 5.2 on 2025-05-11 18:54
 2 | 
 3 | from django.db import migrations
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [
 8 |         ("lamindb", "0096_remove_artifact__param_values_and_more"),
 9 |     ]
10 | 
11 |     operations = [
12 |         migrations.DeleteModel(
13 |             name="ArtifactParamValue",
14 |         ),
15 |         migrations.DeleteModel(
16 |             name="SchemaParam",
17 |         ),
18 |         migrations.DeleteModel(
19 |             name="Param",
20 |         ),
21 |         migrations.DeleteModel(
22 |             name="ParamValue",
23 |         ),
24 |         migrations.DeleteModel(
25 |             name="RunParamValue",
26 |         ),
27 |     ]
28 | 


--------------------------------------------------------------------------------
/lamindb/migrations/0099_alter_writelog_seqno.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 5.1.7 on 2025-05-23 23:20
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [
 8 |         ("lamindb", "0098_alter_feature_type_alter_project_type_and_more"),
 9 |     ]
10 | 
11 |     operations = [
12 |         migrations.AlterField(
13 |             model_name="writelog",
14 |             name="seqno",
15 |             field=models.BigAutoField(primary_key=True, serialize=False),
16 |         ),
17 |         migrations.RenameField(
18 |             model_name="writelog",
19 |             old_name="seqno",
20 |             new_name="id",
21 |         ),
22 |     ]
23 | 


--------------------------------------------------------------------------------
/lamindb/migrations/0100_branch_alter_artifact__branch_code_and_more.py:
--------------------------------------------------------------------------------
  1 | # Generated by Django 5.2 on 2025-05-25 11:59
  2 | 
  3 | import django.db.models.deletion
  4 | import django.db.models.functions.datetime
  5 | from django.db import migrations, models
  6 | 
  7 | import lamindb.base.fields
  8 | 
  9 | 
 10 | def update_space_uids_and_create_branches(apps, schema_editor):
 11 |     Space = apps.get_model("lamindb", "Space")
 12 |     Space.objects.filter(uid="00000000").update(uid="A")
 13 |     Branch = apps.get_model("lamindb", "Branch")
 14 |     Branch.objects.get_or_create(
 15 |         id=-1,
 16 |         uid="T",
 17 |         name="Trash",
 18 |         description="The trash.",
 19 |     )
 20 |     Branch.objects.get_or_create(
 21 |         id=0,
 22 |         uid="A",
 23 |         name="Archive",
 24 |         description="The archive.",
 25 |     )
 26 |     Branch.objects.get_or_create(
 27 |         uid="M",
 28 |         name="Main",
 29 |         description="The main & default branch of the instance.",
 30 |     )
 31 | 
 32 | 
 33 | class Migration(migrations.Migration):
 34 |     dependencies = [
 35 |         ("lamindb", "0099_alter_writelog_seqno"),
 36 |     ]
 37 | 
 38 |     operations = [
 39 |         migrations.CreateModel(
 40 |             name="Branch",
 41 |             fields=[
 42 |                 ("id", models.AutoField(primary_key=True, serialize=False)),
 43 |                 ("name", models.CharField(db_index=True, max_length=100)),
 44 |                 (
 45 |                     "uid",
 46 |                     lamindb.base.fields.CharField(
 47 |                         blank=True,
 48 |                         db_default="M",
 49 |                         db_index=True,
 50 |                         default="M",
 51 |                         editable=False,
 52 |                         max_length=12,
 53 |                         unique=True,
 54 |                     ),
 55 |                 ),
 56 |                 (
 57 |                     "description",
 58 |                     lamindb.base.fields.CharField(
 59 |                         blank=True, default=None, max_length=255, null=True
 60 |                     ),
 61 |                 ),
 62 |                 (
 63 |                     "created_at",
 64 |                     lamindb.base.fields.DateTimeField(
 65 |                         blank=True,
 66 |                         db_default=django.db.models.functions.datetime.Now(),
 67 |                         db_index=True,
 68 |                         editable=False,
 69 |                     ),
 70 |                 ),
 71 |                 (
 72 |                     "created_by",
 73 |                     lamindb.base.fields.ForeignKey(
 74 |                         blank=True,
 75 |                         default=None,
 76 |                         null=True,
 77 |                         on_delete=django.db.models.deletion.CASCADE,
 78 |                         related_name="+",
 79 |                         to="lamindb.user",
 80 |                     ),
 81 |                 ),
 82 |             ],
 83 |             options={
 84 |                 "abstract": False,
 85 |                 "base_manager_name": "objects",
 86 |             },
 87 |         ),
 88 |         migrations.AlterField(
 89 |             model_name="space",
 90 |             name="uid",
 91 |             field=lamindb.base.fields.CharField(
 92 |                 blank=True,
 93 |                 db_default="A",
 94 |                 db_index=True,
 95 |                 default="A",
 96 |                 editable=False,
 97 |                 max_length=12,
 98 |                 unique=True,
 99 |             ),
100 |         ),
101 |         migrations.RunPython(update_space_uids_and_create_branches),
102 |     ]
103 | 


--------------------------------------------------------------------------------
/lamindb/migrations/0102_remove_writelog_branch_code_and_more.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 5.2 on 2025-05-27 11:29
 2 | 
 3 | import django.db.models.deletion
 4 | from django.db import migrations, models
 5 | 
 6 | 
 7 | class Migration(migrations.Migration):
 8 |     dependencies = [
 9 |         ("lamindb", "0101_alter_artifact_hash_alter_feature_name_and_more"),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.RemoveField(
14 |             model_name="writelog",
15 |             name="branch_code",
16 |         ),
17 |         migrations.RemoveField(
18 |             model_name="writelog",
19 |             name="space_uid",
20 |         ),
21 |         migrations.AddField(
22 |             model_name="writelog",
23 |             name="branch",
24 |             field=models.ForeignKey(
25 |                 default=1,
26 |                 on_delete=django.db.models.deletion.PROTECT,
27 |                 to="lamindb.branch",
28 |             ),
29 |         ),
30 |         migrations.AddField(
31 |             model_name="writelog",
32 |             name="space",
33 |             field=models.ForeignKey(
34 |                 default=1,
35 |                 on_delete=django.db.models.deletion.PROTECT,
36 |                 to="lamindb.space",
37 |             ),
38 |         ),
39 |         migrations.AlterField(
40 |             model_name="writelog",
41 |             name="run_uid",
42 |             field=models.CharField(default="0000000000000000", max_length=20),
43 |         ),
44 |         migrations.AlterField(
45 |             model_name="writelog",
46 |             name="record_uid",
47 |             field=models.JSONField(db_index=True, default=0),
48 |             preserve_default=False,
49 |         ),
50 |         migrations.AlterModelOptions(
51 |             name="migrationstate",
52 |             options={"base_manager_name": "objects"},
53 |         ),
54 |         migrations.AlterModelOptions(
55 |             name="tablestate",
56 |             options={"base_manager_name": "objects"},
57 |         ),
58 |         migrations.AlterField(
59 |             model_name="writelog",
60 |             name="migration_state",
61 |             field=models.ForeignKey(
62 |                 on_delete=django.db.models.deletion.PROTECT, to="lamindb.migrationstate"
63 |             ),
64 |         ),
65 |         migrations.AlterField(
66 |             model_name="writelog",
67 |             name="table",
68 |             field=models.ForeignKey(
69 |                 on_delete=django.db.models.deletion.PROTECT, to="lamindb.tablestate"
70 |             ),
71 |         ),
72 |     ]
73 | 


--------------------------------------------------------------------------------
/lamindb/migrations/0103_remove_writelog_migration_state_and_more.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 5.2 on 2025-05-29 12:02
 2 | 
 3 | from django.db import migrations
 4 | 
 5 | 
 6 | def fix_artifact_kind(apps, schema_editor):
 7 |     Artifact = apps.get_model("lamindb", "Artifact")
 8 |     Artifact.objects.filter(kind="__lamindb__").update(kind="__lamindb_run__")
 9 | 
10 | 
11 | class Migration(migrations.Migration):
12 |     dependencies = [
13 |         ("lamindb", "0102_remove_writelog_branch_code_and_more"),
14 |     ]
15 | 
16 |     operations = [
17 |         migrations.RunPython(fix_artifact_kind),
18 |         migrations.RemoveField(
19 |             model_name="writelog",
20 |             name="migration_state",
21 |         ),
22 |         migrations.RemoveField(
23 |             model_name="writelog",
24 |             name="table",
25 |         ),
26 |         migrations.RemoveField(
27 |             model_name="writelog",
28 |             name="branch",
29 |         ),
30 |         migrations.RemoveField(
31 |             model_name="writelog",
32 |             name="space",
33 |         ),
34 |         migrations.DeleteModel(
35 |             name="WriteLogLock",
36 |         ),
37 |         migrations.DeleteModel(
38 |             name="MigrationState",
39 |         ),
40 |         migrations.DeleteModel(
41 |             name="TableState",
42 |         ),
43 |         migrations.DeleteModel(
44 |             name="WriteLog",
45 |         ),
46 |     ]
47 | 


--------------------------------------------------------------------------------
/lamindb/migrations/0105_record_unique_name.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 5.2 on 2025-06-03 19:37
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [
 8 |         ("lamindb", "0104_squashed"),
 9 |     ]
10 | 
11 |     operations = [
12 |         migrations.AddConstraint(
13 |             model_name="record",
14 |             constraint=models.UniqueConstraint(
15 |                 condition=models.Q(("is_type", True)),
16 |                 fields=("name",),
17 |                 name="unique_name",
18 |             ),
19 |         ),
20 |     ]
21 | 


--------------------------------------------------------------------------------
/lamindb/migrations/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/laminlabs/lamindb/0615476ba7f3680f4ff961851e6522d11e7f0a5c/lamindb/migrations/__init__.py


--------------------------------------------------------------------------------
/lamindb/models/__init__.py:
--------------------------------------------------------------------------------
  1 | """Models library.
  2 | 
  3 | .. autosummary::
  4 |    :toctree: .
  5 | 
  6 |    BaseSQLRecord
  7 |    SQLRecord
  8 |    Registry
  9 |    BasicQuerySet
 10 |    QuerySet
 11 |    ArtifactSet
 12 |    QueryManager
 13 |    SQLRecordList
 14 |    FeatureManager
 15 |    LabelManager
 16 |    IsVersioned
 17 |    CanCurate
 18 |    HasParents
 19 |    TracksRun
 20 |    TracksUpdates
 21 |    FeatureValue
 22 |    InspectResult
 23 |    ValidateFields
 24 |    SchemaOptionals
 25 | 
 26 | """
 27 | 
 28 | # ruff: noqa: I001
 29 | from lamin_utils._inspect import InspectResult
 30 | from ._is_versioned import IsVersioned
 31 | from .can_curate import CanCurate
 32 | from .sqlrecord import (
 33 |     BaseSQLRecord,
 34 |     SQLRecord,
 35 |     Registry,
 36 |     Space,
 37 |     Branch,
 38 |     Migration,
 39 |     ValidateFields,
 40 |     format_field_value,
 41 |     record_repr,
 42 |     IsLink,
 43 | )
 44 | from .core import Storage
 45 | from .transform import Transform
 46 | from .run import Run, TracksRun, TracksUpdates, current_run, User
 47 | from .feature import Feature, FeatureValue
 48 | from .schema import Schema
 49 | from .ulabel import ULabel
 50 | 
 51 | # should come last as it needs everything else
 52 | from .artifact import Artifact
 53 | from ._feature_manager import FeatureManager
 54 | from ._label_manager import LabelManager
 55 | from .collection import Collection, CollectionArtifact
 56 | from .project import Person, Project, Reference
 57 | from .query_manager import QueryManager
 58 | from .query_set import BasicQuerySet, QuerySet, SQLRecordList
 59 | from .artifact_set import ArtifactSet
 60 | from .has_parents import HasParents
 61 | from datetime import datetime as _datetime
 62 | 
 63 | FeatureSet = Schema  # backward compat
 64 | 
 65 | # link models
 66 | from .artifact import ArtifactFeatureValue
 67 | from .project import (
 68 |     ArtifactProject,
 69 |     TransformProject,
 70 |     CollectionProject,
 71 |     ULabelProject,
 72 |     FeatureProject,
 73 |     SchemaProject,
 74 |     ArtifactReference,
 75 |     CollectionReference,
 76 |     SheetProject,
 77 |     RunProject,
 78 |     RecordProject,
 79 |     PersonProject,
 80 | )
 81 | from .run import RunFeatureValue
 82 | from .schema import (
 83 |     SchemaFeature,
 84 |     ArtifactSchema,
 85 |     SchemaComponent,
 86 |     SchemaOptionals,
 87 | )
 88 | from .ulabel import ArtifactULabel, TransformULabel, RunULabel, CollectionULabel
 89 | 
 90 | from .record import (
 91 |     Record,
 92 |     Sheet,
 93 |     RecordJson,
 94 |     RecordRecord,
 95 |     RecordULabel,
 96 |     RecordRun,
 97 |     RecordArtifact,
 98 | )
 99 | 
100 | 
101 | LinkORM = IsLink  # backward compat
102 | ParamValue = FeatureValue  # backward compat
103 | ArtifactParamValue = ArtifactFeatureValue  # backward compat
104 | RunParamValue = RunFeatureValue  # backward compat
105 | Param = Feature  # backward compat
106 | BasicRecord = BaseSQLRecord  # backward compat
107 | 


--------------------------------------------------------------------------------
/lamindb/models/_relations.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from typing import TYPE_CHECKING
  4 | 
  5 | import lamindb_setup as ln_setup
  6 | from django.db.models import ManyToManyField
  7 | from lamindb_setup._connect_instance import (
  8 |     get_owner_name_from_identifier,
  9 |     load_instance_settings,
 10 | )
 11 | from lamindb_setup.core._settings_store import instance_settings_file
 12 | 
 13 | from lamindb.models.sqlrecord import IsLink
 14 | 
 15 | if TYPE_CHECKING:
 16 |     from lamindb.models.sqlrecord import Registry, SQLRecord
 17 | 
 18 | 
 19 | def get_schema_modules(instance: str | None) -> set[str]:
 20 |     if instance is None or instance == "default":
 21 |         schema_modules = set(ln_setup.settings.instance.modules)
 22 |         schema_modules.add("core")
 23 |         return schema_modules
 24 |     owner, name = get_owner_name_from_identifier(instance)
 25 |     settings_file = instance_settings_file(name, owner)
 26 |     if settings_file.exists():
 27 |         modules = set(load_instance_settings(settings_file).modules)
 28 |     else:
 29 |         cache_filepath = (
 30 |             ln_setup.settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
 31 |         )
 32 |         if cache_filepath.exists():
 33 |             modules = set(cache_filepath.read_text().split("\n")[1].split(","))
 34 |         else:
 35 |             raise ValueError(f"Instance {instance} not found")
 36 |     shared_schema_modules = set(ln_setup.settings.instance.modules).intersection(
 37 |         modules
 38 |     )
 39 |     shared_schema_modules.add("core")
 40 |     return shared_schema_modules
 41 | 
 42 | 
 43 | # this function here should likely be renamed
 44 | # it maps the __get_name_with_module__() onto the actual model
 45 | def dict_module_name_to_model_name(
 46 |     registry: Registry, instance: str | None = None
 47 | ) -> dict[str, Registry]:
 48 |     schema_modules = get_schema_modules(instance)
 49 |     d: dict = {
 50 |         i.related_model.__get_name_with_module__(): i.related_model
 51 |         for i in registry._meta.related_objects
 52 |         if i.related_name is not None
 53 |         and i.related_model.__get_module_name__() in schema_modules
 54 |     }
 55 |     d.update(
 56 |         {
 57 |             i.related_model.__get_name_with_module__(): i.related_model
 58 |             for i in registry._meta.many_to_many
 59 |             if i.name is not None
 60 |             and i.related_model.__get_module_name__() in schema_modules
 61 |         }
 62 |     )
 63 |     return d
 64 | 
 65 | 
 66 | def dict_related_model_to_related_name(
 67 |     registry: type[SQLRecord], links: bool = False, instance: str | None = None
 68 | ) -> dict[str, str]:
 69 |     def include(model: SQLRecord):
 70 |         return not links != issubclass(model, IsLink)
 71 | 
 72 |     schema_modules = get_schema_modules(instance)
 73 | 
 74 |     related_objects = registry._meta.related_objects + registry._meta.many_to_many
 75 |     d: dict = {
 76 |         record.related_model.__get_name_with_module__(): (
 77 |             record.related_name
 78 |             if not isinstance(record, ManyToManyField)
 79 |             else record.name
 80 |         )
 81 |         for record in related_objects
 82 |         if (
 83 |             record.name is not None
 84 |             and include(record.related_model)
 85 |             and record.related_model.__get_module_name__() in schema_modules
 86 |         )
 87 |     }
 88 |     return d
 89 | 
 90 | 
 91 | def get_related_name(features_type: type[SQLRecord]) -> str:
 92 |     from lamindb.models.schema import Schema
 93 | 
 94 |     candidates = [
 95 |         field.related_name
 96 |         for field in Schema._meta.related_objects
 97 |         if field.related_model == features_type
 98 |     ]
 99 |     if not candidates:
100 |         raise ValueError(
101 |             f"Can't create feature sets from {features_type.__name__} because it's not"
102 |             " related to it!\nYou need to create a link model between Schema and"
103 |             " your SQLRecord in your custom module.\nTo do so, add a"
104 |             " line:\n_feature_sets = models.ManyToMany(Schema,"
105 |             " related_name='mythings')\n"
106 |         )
107 |     return candidates[0]
108 | 


--------------------------------------------------------------------------------
/lamindb/models/core.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from typing import (
  4 |     TYPE_CHECKING,
  5 |     overload,
  6 | )
  7 | 
  8 | from django.db import models
  9 | 
 10 | from lamindb.base.fields import (
 11 |     CharField,
 12 | )
 13 | 
 14 | from ..base.ids import base62_12
 15 | from .run import TracksRun, TracksUpdates
 16 | from .sqlrecord import SQLRecord
 17 | 
 18 | if TYPE_CHECKING:
 19 |     from pathlib import Path
 20 | 
 21 |     from upath import UPath
 22 | 
 23 |     from .artifact import Artifact
 24 | 
 25 | 
 26 | class Storage(SQLRecord, TracksRun, TracksUpdates):
 27 |     """Storage locations of artifacts such as folders and S3 buckets.
 28 | 
 29 |     A storage location is either a folder (local or in the cloud) or
 30 |     an entire S3/GCP bucket.
 31 | 
 32 |     A LaminDB instance can manage and link multiple storage locations. But any
 33 |     storage location is managed by *at most one* LaminDB instance.
 34 | 
 35 |     .. dropdown:: Managed vs. linked storage locations
 36 | 
 37 |         The LaminDB instance can update & delete artifacts in managed storage
 38 |         locations but merely read artifacts in linked storage locations.
 39 | 
 40 |         The `instance_uid` field defines the managing LaminDB instance of a
 41 |         storage location.
 42 | 
 43 |         When you delete a LaminDB instance, you'll be warned about data in managed
 44 |         storage locations while data in linked storage locations is ignored.
 45 | 
 46 |     See Also:
 47 |         :attr:`~lamindb.core.Settings.storage`
 48 |             Default storage.
 49 |         :attr:`~lamindb.setup.core.StorageSettings`
 50 |             Storage settings.
 51 | 
 52 |     Examples:
 53 | 
 54 |         Configure the default storage location on the command line::
 55 | 
 56 |             lamin init --storage ./myfolder  # or "s3://my-bucket" or "gs://my-bucket"
 57 | 
 58 |         View the current storage location for writing artifacts::
 59 | 
 60 |             import lamindb as ln
 61 | 
 62 |             print(ln.settings.storage)
 63 | 
 64 |         Change the current storage location for writing artifacts::
 65 | 
 66 |             ln.settings.storage = "./myfolder2"  # or "s3://my-bucket2" or "gs://my-bucket2"
 67 | 
 68 |         View all storage locations used by the current instance::
 69 | 
 70 |             ln.Storage.df()
 71 |     """
 72 | 
 73 |     class Meta(SQLRecord.Meta, TracksRun.Meta, TracksUpdates.Meta):
 74 |         abstract = False
 75 | 
 76 |     _name_field: str = "root"
 77 | 
 78 |     id: int = models.AutoField(primary_key=True)
 79 |     """Internal id, valid only in one DB instance."""
 80 |     uid: str = CharField(
 81 |         editable=False, unique=True, max_length=12, default=base62_12, db_index=True
 82 |     )
 83 |     """Universal id, valid across DB instances."""
 84 |     root: str = CharField(db_index=True, unique=True)
 85 |     """Root path of storage (cloud or local path)."""
 86 |     description: str | None = CharField(db_index=True, null=True)
 87 |     """A description of what the storage location is used for (optional)."""
 88 |     type: str = CharField(max_length=30, db_index=True)
 89 |     """Can be "local" vs. "s3" vs. "gs"."""
 90 |     region: str | None = CharField(max_length=64, db_index=True, null=True)
 91 |     """Cloud storage region, if applicable."""
 92 |     instance_uid: str | None = CharField(max_length=12, db_index=True, null=True)
 93 |     """Instance that manages this storage location."""
 94 |     artifacts: Artifact
 95 |     """Artifacts contained in this storage location."""
 96 | 
 97 |     @overload
 98 |     def __init__(
 99 |         self,
100 |         root: str,
101 |         type: str,
102 |         region: str | None,
103 |     ): ...
104 | 
105 |     @overload
106 |     def __init__(
107 |         self,
108 |         *db_args,
109 |     ): ...
110 | 
111 |     def __init__(
112 |         self,
113 |         *args,
114 |         **kwargs,
115 |     ):
116 |         super().__init__(*args, **kwargs)
117 | 
118 |     @property
119 |     def path(self) -> Path | UPath:
120 |         """Path.
121 | 
122 |         Uses the `.root` field and converts it into a `Path` or `UPath`.
123 |         """
124 |         from lamindb_setup.core.upath import create_path
125 | 
126 |         access_token = self._access_token if hasattr(self, "_access_token") else None
127 |         return create_path(self.root, access_token=access_token)
128 | 


--------------------------------------------------------------------------------
/lamindb/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/laminlabs/lamindb/0615476ba7f3680f4ff961851e6522d11e7f0a5c/lamindb/py.typed


--------------------------------------------------------------------------------
/lamindb/setup/__init__.py:
--------------------------------------------------------------------------------
 1 | import lamindb_setup as _lamindb_setup
 2 | from lamindb_setup import *  # noqa: F403
 3 | from lamindb_setup import (
 4 |     connect,
 5 |     delete,
 6 |     init,
 7 |     settings,
 8 | )
 9 | 
10 | from . import core
11 | 
12 | del connect  # we have this at the root level, hence, we don't want it here
13 | __doc__ = _lamindb_setup.__doc__.replace("lamindb_setup", "lamindb.setup")
14 | settings.__doc__ = settings.__doc__.replace("lamindb_setup", "lamindb.setup")
15 | 


--------------------------------------------------------------------------------
/lamindb/setup/core/__init__.py:
--------------------------------------------------------------------------------
1 | import lamindb_setup as _lamindb_setup
2 | from lamindb_setup.core import *  # noqa: F403
3 | 
4 | __doc__ = _lamindb_setup.core.__doc__.replace("lamindb_setup", "lamindb.setup")
5 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import shutil
 2 | from pathlib import Path
 3 | 
 4 | import pytest
 5 | 
 6 | 
 7 | @pytest.fixture(scope="function")
 8 | def clean_soma_files(request):
 9 |     path = request.param if hasattr(request, "param") else "small_dataset.tiledbsoma"
10 |     if Path(path).exists():
11 |         shutil.rmtree(path)
12 | 
13 |     yield path
14 | 
15 |     if Path(path).exists():
16 |         shutil.rmtree(path)
17 | 


--------------------------------------------------------------------------------
/tests/core/_dataset_fixtures.py:
--------------------------------------------------------------------------------
 1 | import anndata as ad
 2 | import lamindb as ln
 3 | import mudata as md
 4 | import numpy as np
 5 | import pandas as pd
 6 | import pytest
 7 | import spatialdata as sd
 8 | import tiledbsoma
 9 | import tiledbsoma.io
10 | from scipy.sparse import csr_matrix
11 | 
12 | 
13 | @pytest.fixture(scope="session")
14 | def get_small_adata():
15 |     return ad.AnnData(
16 |         X=np.array([[1, 2, 3], [4, 5, 6]]),
17 |         obs={"feat1": ["A", "B"]},
18 |         var=pd.DataFrame(index=["MYC", "TCF7", "GATA1"]),
19 |         obsm={"X_pca": np.array([[1, 2], [3, 4]])},
20 |     )
21 | 
22 | 
23 | @pytest.fixture(scope="session")
24 | def get_small_mdata():
25 |     adata1 = ad.AnnData(
26 |         X=np.array([[1, 2, 3], [4, 5, 6]]),
27 |         obs={"feat1": ["A", "B"]},
28 |         var=pd.DataFrame(index=["MYC", "TCF7", "GATA1"]),
29 |         obsm={"X_pca": np.array([[1, 2], [3, 4]])},
30 |     )
31 | 
32 |     adata2 = ad.AnnData(
33 |         X=np.array([[7, 8], [9, 10]]),
34 |         obs={"feat2": ["C", "D"]},
35 |         var=pd.DataFrame(index=["FOXP3", "CD8A"]),
36 |         obsm={"X_umap": np.array([[5, 6], [7, 8]])},
37 |     )
38 | 
39 |     return md.MuData({"rna": adata1, "protein": adata2})
40 | 
41 | 
42 | @pytest.fixture(scope="session")
43 | def get_small_sdata():
44 |     adata = ad.AnnData(
45 |         X=csr_matrix(np.array([[0.1, 0.2], [0.3, 0.4]])),
46 |         obs=pd.DataFrame(index=["cell1", "cell2"]),
47 |         var=pd.DataFrame(index=["gene1", "gene2"]),
48 |     )
49 | 
50 |     {
51 |         "region1": np.array([[[0, 0], [0, 1], [1, 1], [1, 0]]]),
52 |         "region2": np.array([[[2, 2], [2, 3], [3, 3], [3, 2]]]),
53 |     }
54 | 
55 |     sdata_obj = sd.SpatialData(
56 |         tables={"gene_expression": adata},
57 |     )
58 | 
59 |     return sdata_obj
60 | 
61 | 
62 | @pytest.fixture(scope="session")
63 | def get_small_soma_experiment():
64 |     adata = ln.core.datasets.mini_immuno.get_dataset1(otype="AnnData")
65 |     tiledbsoma.io.from_anndata("test.tiledbsoma", adata, measurement_name="RNA")
66 | 
67 |     exp = tiledbsoma.Experiment.open("test.tiledbsoma")
68 | 
69 |     return exp
70 | 


--------------------------------------------------------------------------------
/tests/core/conftest.py:
--------------------------------------------------------------------------------
  1 | import shutil
  2 | from pathlib import Path
  3 | from subprocess import DEVNULL, run
  4 | from time import perf_counter
  5 | 
  6 | import lamindb_setup as ln_setup
  7 | import pytest
  8 | from lamin_utils import logger
  9 | from laminci.db import setup_local_test_postgres
 10 | 
 11 | AUTO_CONNECT = ln_setup.settings.auto_connect
 12 | ln_setup.settings.auto_connect = False
 13 | 
 14 | import lamindb as ln
 15 | 
 16 | 
 17 | def pytest_sessionstart():
 18 |     t_execute_start = perf_counter()
 19 | 
 20 |     ln_setup._TESTING = True
 21 |     pgurl = setup_local_test_postgres()
 22 |     ln.setup.init(
 23 |         storage="./default_storage_unit_core",
 24 |         modules="bionty",
 25 |         name="lamindb-unit-tests-core",
 26 |         db=pgurl,
 27 |     )
 28 |     ln.setup.settings.auto_connect = True
 29 |     ln.settings.creation.artifact_silence_missing_run_warning = True
 30 |     total_time_elapsed = perf_counter() - t_execute_start
 31 |     print(f"Time to setup the instance: {total_time_elapsed:.3f}s")
 32 | 
 33 | 
 34 | def pytest_sessionfinish(session: pytest.Session):
 35 |     logger.set_verbosity(1)
 36 |     shutil.rmtree("./default_storage_unit_core")
 37 |     ln.setup.delete("lamindb-unit-tests-core", force=True)
 38 |     run("docker stop pgtest && docker rm pgtest", shell=True, stdout=DEVNULL)  # noqa: S602
 39 |     ln.setup.settings.auto_connect = AUTO_CONNECT
 40 | 
 41 | 
 42 | @pytest.fixture
 43 | def ccaplog(caplog):
 44 |     """Add caplog handler to our custom logger at session start."""
 45 |     from lamin_utils._logger import logger
 46 | 
 47 |     # Add caplog's handler to our custom logger
 48 |     logger.addHandler(caplog.handler)
 49 | 
 50 |     yield caplog
 51 | 
 52 |     # Clean up at the end of the session
 53 |     logger.removeHandler(caplog.handler)
 54 | 
 55 | 
 56 | @pytest.fixture(
 57 |     scope="module",
 58 |     params=[
 59 |         # tuple of is_in_registered_storage, path, suffix, hash of test_dir
 60 |         (True, "./default_storage_unit_core/", ".csv", "iGtHiFEBV3r1_TFovdQCgw"),
 61 |         (True, "./default_storage_unit_core/", "", "iGtHiFEBV3r1_TFovdQCgw"),
 62 |         (True, "./registered_storage/", ".csv", "iGtHiFEBV3r1_TFovdQCgw"),
 63 |         (True, "./registered_storage/", "", "iGtHiFEBV3r1_TFovdQCgw"),
 64 |         (False, "./nonregistered_storage/", ".csv", "iGtHiFEBV3r1_TFovdQCgw"),
 65 |         (False, "./nonregistered_storage/", "", "iGtHiFEBV3r1_TFovdQCgw"),
 66 |     ],
 67 | )
 68 | def get_test_filepaths(request):  # -> Tuple[bool, Path, Path, Path, str]
 69 |     import lamindb as ln
 70 | 
 71 |     is_in_registered_storage: bool = request.param[0]
 72 |     root_dir: Path = Path(request.param[1])
 73 |     suffix: str = request.param[2]
 74 |     hash_test_dir: str = request.param[3]
 75 |     if is_in_registered_storage:
 76 |         # ensure that it's actually registered
 77 |         if ln.Storage.filter(root=root_dir.resolve().as_posix()).one_or_none() is None:
 78 |             ln.Storage(root=root_dir.resolve().as_posix(), type="local").save()
 79 |     else:
 80 |         assert (
 81 |             ln.Storage.filter(root=root_dir.resolve().as_posix()).one_or_none() is None
 82 |         )
 83 |     test_dirpath = root_dir / "my_dir/"
 84 |     test_dirpath.mkdir(parents=True, exist_ok=True)
 85 |     # create a first file
 86 |     test_filepath0 = test_dirpath / f"my_file{suffix}"
 87 |     test_filepath0.write_text("0")
 88 |     # create a second, duplicated file
 89 |     test_filepath1 = test_dirpath / f"my_file1{suffix}"
 90 |     test_filepath1.write_text("0")
 91 |     # create a non-duplicated file
 92 |     test_filepath2 = test_dirpath / f"my_file2{suffix}"
 93 |     test_filepath2.write_text("1")
 94 |     # return a boolean indicating whether test filepath is in default storage
 95 |     # and the test filepath
 96 |     yield (
 97 |         is_in_registered_storage,
 98 |         root_dir,
 99 |         test_dirpath,
100 |         test_filepath0,
101 |         suffix,
102 |         hash_test_dir,
103 |     )
104 |     shutil.rmtree(test_dirpath)
105 | 


--------------------------------------------------------------------------------
/tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <meta charset="utf-8" />
 4 |   
 5 |   
 6 | 
 7 |   <!-- rnb-text-begin -->
 8 |   <!-- rnb-text-end -->
 9 |   <!-- rnb-chunk-begin -->
10 |   <!-- rnb-output-begin eyJkYXRhIjoiXG48IS0tIHJuYi1zb3VyY2UtYmVnaW4gZXlKa1lYUmhJam9pWUdCZ2NseHViR2xpY21GeWVTaHNZVzFwYm5JcFhHNWNibVJpSUR3dElHTnZibTVsWTNRb0tWeHVZR0JnSW4wPSAtLT5cblxuYGBgclxubGlicmFyeShsYW1pbnIpXG5cbmRiIDwtIGNvbm5lY3QoKVxuYGBgXG5cbjwhLS0gcm5iLXNvdXJjZS1lbmQgLS0+XG4ifQ== -->
11 |   <!-- rnb-source-begin eyJkYXRhIjoiYGBgclxubGlicmFyeShsYW1pbnIpXG5cbmRiIDwtIGNvbm5lY3QoKVxuYGBgIn0= -->
12 |   <pre class="r"><code>library(laminr)
13 | 
14 | db &lt;- connect()</code></pre>
15 |   <!-- rnb-source-end -->
16 |   <!-- rnb-output-end -->
17 |   <!-- rnb-output-begin eyJkYXRhIjoi4oaSIGNvbm5lY3RlZCBsYW1pbmRiOiBsYW1pbmxhYnMvbGFtaW5kYXRhXG4ifQ== -->
18 |   <pre><code>→ connected lamindb: laminlabs/lamindata</code></pre>
19 |   <!-- rnb-output-end -->
20 |   <!-- rnb-output-begin eyJkYXRhIjoiXG48IS0tIHJuYi1zb3VyY2UtYmVnaW4gZXlKa1lYUmhJam9pWUdCZ2NseHVaR0lrZEhKaFkyc29YQ0pzVDFOamRYaEVWRVJGTUhFd01EQXdYQ0lwWEc1Z1lHQWlmUT09IC0tPlxuXG5gYGByXG5kYiR0cmFjayhcImxPU2N1eERUREUwcTAwMDBcIilcbmBgYFxuXG48IS0tIHJuYi1zb3VyY2UtZW5kIC0tPlxuIn0= -->
21 |   <!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuZGIkdHJhY2soXCJsT1NjdXhEVERFMHEwMDAwXCIpXG5gYGAifQ== -->
22 |   <pre class="r"><code>db$track(&quot;lOScuxDTDE0q0000&quot;)</code></pre>
23 |   <!-- rnb-source-end -->
24 |   <!-- rnb-output-end -->
25 |   <!-- rnb-output-begin eyJkYXRhIjoi4oaSIGxvYWRlZCBUcmFuc2Zvcm0oJ2xPU2N1eERUJyksIHN0YXJ0ZWQgUnVuKCdHV3BhVHRVZycpIGF0IDIwMjQtMTItMDEgMTc6NDk6MTggVVRDXG4ifQ== -->
26 |   <pre><code>→ loaded Transform(&#39;lOScuxDT&#39;), started Run(&#39;GWpaTtUg&#39;) at 2024-12-01 17:49:18 UTC</code></pre>
27 |   <!-- rnb-output-end -->
28 |   <!-- rnb-chunk-end -->
29 |   <!-- rnb-text-begin -->
30 |   <!-- rnb-text-end -->
31 |   <!-- rnb-chunk-begin -->
32 |   <!-- rnb-output-begin eyJkYXRhIjoiXG48IS0tIHJuYi1zb3VyY2UtYmVnaW4gZXlKa1lYUmhJam9pWUdCZ2NseHVaR0lrWm1sdWFYTm9LQ2xjYm1CZ1lDSjkgLS0+XG5cbmBgYHJcbmRiJGZpbmlzaCgpXG5gYGBcblxuPCEtLSBybmItc291cmNlLWVuZCAtLT5cbiJ9 -->
33 |   <!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuZGIkZmluaXNoKClcbmBgYCJ9 -->
34 |   <pre class="r"><code>db$finish()</code></pre>
35 |   <!-- rnb-source-end -->
36 |   <!-- rnb-output-end -->
37 |   <!-- rnb-output-begin eyJkYXRhIjoiRXJyb3IgaW4gcHlfY2FsbF9pbXBsKGNhbGxhYmxlLCBjYWxsX2FyZ3MkdW5uYW1lZCwgY2FsbF9hcmdzJG5hbWVkKSA6IFxuICBsYW1pbmRiLmNvcmUuZXhjZXB0aW9ucy5Ob3RlYm9va05vdFNhdmVkOiBQbGVhc2Ugc2F2ZSB0aGUgbm90ZWJvb2sgaW4gUlN0dWRpbyAoc2hvcnRjdXQgYENNRCArIHNgKSB3aXRoaW4gMiBzZWMgYmVmb3JlIGNhbGxpbmcgYGRiJGZpbmlzaCgpYFxuUnVuIFx1MDAxYl04Oztyc3R1ZGlvOnJ1bjpyZXRpY3VsYXRlOjpweV9sYXN0X2Vycm9yKClcdTAwMDdgcmV0aWN1bGF0ZTo6cHlfbGFzdF9lcnJvcigpYFx1MDAxYl04OztcdTAwMDcgZm9yIGRldGFpbHMuXG4ifQ== -->
38 |   <pre><code>MoreOUTPUT </code></pre>
39 |   <!-- rnb-output-end -->
40 |   <!-- rnb-chunk-end -->
41 |   <!-- rnb-text-begin -->
42 | </html>
43 | 


--------------------------------------------------------------------------------
/tests/core/notebooks/basic-r-notebook.Rmd.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <meta charset="utf-8" />
 4 |   <title>My exemplary R analysis</title>
 5 |   <h1 class="title toc-ignore">My exemplary R analysis</h1>
 6 | 
 7 |   <!-- rnb-text-begin -->
 8 |   <!-- rnb-text-end -->
 9 |   <!-- rnb-chunk-begin -->
10 |   <!-- rnb-output-begin eyJkYXRhIjoiXG48IS0tIHJuYi1zb3VyY2UtYmVnaW4gZXlKa1lYUmhJam9pWUdCZ2NseHViR2xpY21GeWVTaHNZVzFwYm5JcFhHNWNibVJpSUR3dElHTnZibTVsWTNRb0tWeHVZR0JnSW4wPSAtLT5cblxuYGBgclxubGlicmFyeShsYW1pbnIpXG5cbmRiIDwtIGNvbm5lY3QoKVxuYGBgXG5cbjwhLS0gcm5iLXNvdXJjZS1lbmQgLS0+XG4ifQ== -->
11 |   <!-- rnb-source-begin eyJkYXRhIjoiYGBgclxubGlicmFyeShsYW1pbnIpXG5cbmRiIDwtIGNvbm5lY3QoKVxuYGBgIn0= -->
12 |   <pre class="r"><code>library(laminr)
13 | 
14 | db &lt;- connect()</code></pre>
15 |   <!-- rnb-source-end -->
16 |   <!-- rnb-output-end -->
17 |   <!-- rnb-output-begin eyJkYXRhIjoi4oaSIGNvbm5lY3RlZCBsYW1pbmRiOiBsYW1pbmxhYnMvbGFtaW5kYXRhXG4ifQ== -->
18 |   <pre><code>→ connected lamindb: laminlabs/lamindata</code></pre>
19 |   <!-- rnb-output-end -->
20 |   <!-- rnb-output-begin eyJkYXRhIjoiXG48IS0tIHJuYi1zb3VyY2UtYmVnaW4gZXlKa1lYUmhJam9pWUdCZ2NseHVaR0lrZEhKaFkyc29YQ0pzVDFOamRYaEVWRVJGTUhFd01EQXdYQ0lwWEc1Z1lHQWlmUT09IC0tPlxuXG5gYGByXG5kYiR0cmFjayhcImxPU2N1eERUREUwcTAwMDBcIilcbmBgYFxuXG48IS0tIHJuYi1zb3VyY2UtZW5kIC0tPlxuIn0= -->
21 |   <!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuZGIkdHJhY2soXCJsT1NjdXhEVERFMHEwMDAwXCIpXG5gYGAifQ== -->
22 |   <pre class="r"><code>db$track(&quot;lOScuxDTDE0q0000&quot;)</code></pre>
23 |   <!-- rnb-source-end -->
24 |   <!-- rnb-output-end -->
25 |   <!-- rnb-output-begin eyJkYXRhIjoi4oaSIGxvYWRlZCBUcmFuc2Zvcm0oJ2xPU2N1eERUJyksIHN0YXJ0ZWQgUnVuKCdHV3BhVHRVZycpIGF0IDIwMjQtMTItMDEgMTc6NDk6MTggVVRDXG4ifQ== -->
26 |   <pre><code>→ loaded Transform(&#39;lOScuxDT&#39;), started Run(&#39;GWpaTtUg&#39;) at 2024-12-01 17:49:18 UTC</code></pre>
27 |   <!-- rnb-output-end -->
28 |   <!-- rnb-chunk-end -->
29 |   <!-- rnb-text-begin -->
30 |   <!-- rnb-text-end -->
31 |   <!-- rnb-chunk-begin -->
32 |   <!-- rnb-output-begin eyJkYXRhIjoiXG48IS0tIHJuYi1zb3VyY2UtYmVnaW4gZXlKa1lYUmhJam9pWUdCZ2NseHVaR0lrWm1sdWFYTm9LQ2xjYm1CZ1lDSjkgLS0+XG5cbmBgYHJcbmRiJGZpbmlzaCgpXG5gYGBcblxuPCEtLSBybmItc291cmNlLWVuZCAtLT5cbiJ9 -->
33 |   <!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuZGIkZmluaXNoKClcbmBgYCJ9 -->
34 |   <pre class="r"><code>db$finish()</code></pre>
35 |   <!-- rnb-source-end -->
36 |   <!-- rnb-output-end -->
37 |   <!-- rnb-output-begin eyJkYXRhIjoiRXJyb3IgaW4gcHlfY2FsbF9pbXBsKGNhbGxhYmxlLCBjYWxsX2FyZ3MkdW5uYW1lZCwgY2FsbF9hcmdzJG5hbWVkKSA6IFxuICBsYW1pbmRiLmNvcmUuZXhjZXB0aW9ucy5Ob3RlYm9va05vdFNhdmVkOiBQbGVhc2Ugc2F2ZSB0aGUgbm90ZWJvb2sgaW4gUlN0dWRpbyAoc2hvcnRjdXQgYENNRCArIHNgKSB3aXRoaW4gMiBzZWMgYmVmb3JlIGNhbGxpbmcgYGRiJGZpbmlzaCgpYFxuUnVuIFx1MDAxYl04Oztyc3R1ZGlvOnJ1bjpyZXRpY3VsYXRlOjpweV9sYXN0X2Vycm9yKClcdTAwMDdgcmV0aWN1bGF0ZTo6cHlfbGFzdF9lcnJvcigpYFx1MDAxYl04OztcdTAwMDcgZm9yIGRldGFpbHMuXG4ifQ== -->
38 |   <pre><code>MoreOUTPUT ! please hit SHORTCUT to save the notebook in your editor and re-run finish()</code></pre>
39 |   <!-- rnb-output-end -->
40 |   <!-- rnb-chunk-end -->
41 |   <!-- rnb-text-begin -->
42 | </html>
43 | 


--------------------------------------------------------------------------------
/tests/core/notebooks/duplicate/with-title-initialized-consecutive-finish.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# My duplicated test notebook (consecutive) with `ln.finish()`"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "markdown",
12 |    "metadata": {},
13 |    "source": [
14 |     "This has actually different content than the original one in the `notebooks/` folder."
15 |    ]
16 |   },
17 |   {
18 |    "cell_type": "code",
19 |    "execution_count": null,
20 |    "metadata": {},
21 |    "outputs": [],
22 |    "source": [
23 |     "import lamindb as ln\n",
24 |     "\n",
25 |     "ln.track()"
26 |    ]
27 |   }
28 |  ],
29 |  "metadata": {
30 |   "kernelspec": {
31 |    "display_name": "py310",
32 |    "language": "python",
33 |    "name": "python3"
34 |   },
35 |   "language_info": {
36 |    "codemirror_mode": {
37 |     "name": "ipython",
38 |     "version": 3
39 |    },
40 |    "file_extension": ".py",
41 |    "mimetype": "text/x-python",
42 |    "name": "python",
43 |    "nbconvert_exporter": "python",
44 |    "pygments_lexer": "ipython3",
45 |    "version": "3.12.8"
46 |   }
47 |  },
48 |  "nbformat": 4,
49 |  "nbformat_minor": 2
50 | }
51 | 


--------------------------------------------------------------------------------
/tests/core/notebooks/no-title.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "0",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "A notebook without title."
 9 |    ]
10 |   },
11 |   {
12 |    "cell_type": "code",
13 |    "execution_count": null,
14 |    "id": "1",
15 |    "metadata": {},
16 |    "outputs": [],
17 |    "source": [
18 |     "import lamindb as ln"
19 |    ]
20 |   },
21 |   {
22 |    "cell_type": "code",
23 |    "execution_count": null,
24 |    "id": "2",
25 |    "metadata": {},
26 |    "outputs": [],
27 |    "source": [
28 |     "# pass stem uid\n",
29 |     "ln.track(\"123456789ABC\")"
30 |    ]
31 |   },
32 |   {
33 |    "cell_type": "code",
34 |    "execution_count": null,
35 |    "id": "3",
36 |    "metadata": {},
37 |    "outputs": [],
38 |    "source": [
39 |     "assert ln.context.transform.description == \"no-title.ipynb\"\n",
40 |     "assert ln.context.transform.key == \"no-title.ipynb\""
41 |    ]
42 |   }
43 |  ],
44 |  "metadata": {
45 |   "kernelspec": {
46 |    "display_name": "Python 3.9.12 ('base1')",
47 |    "language": "python",
48 |    "name": "python3"
49 |   },
50 |   "language_info": {
51 |    "codemirror_mode": {
52 |     "name": "ipython",
53 |     "version": 3
54 |    },
55 |    "file_extension": ".py",
56 |    "mimetype": "text/x-python",
57 |    "name": "python",
58 |    "nbconvert_exporter": "python",
59 |    "pygments_lexer": "ipython3",
60 |    "version": "3.12.8"
61 |   },
62 |   "nbproject": {
63 |    "id": "Irn3xQyQ40GU",
64 |    "pypackage": {
65 |     "nbproject": "0.0.7+2.g8521e30"
66 |    },
67 |    "time_init": "2022-06-08T14:42:31.551211+00:00",
68 |    "version": "0"
69 |   },
70 |   "vscode": {
71 |    "interpreter": {
72 |     "hash": "2775e555cdc2d728c54aa22130c79afb1fa4da64f22f2fc6dcc2aa346c4e0672"
73 |    }
74 |   }
75 |  },
76 |  "nbformat": 4,
77 |  "nbformat_minor": 5
78 | }
79 | 


--------------------------------------------------------------------------------
/tests/core/notebooks/with-title-initialized-consecutive-finish-not-last-cell.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# My test notebook (consecutive) with `ln.finish()` not in last cell"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "code",
12 |    "execution_count": null,
13 |    "metadata": {},
14 |    "outputs": [],
15 |    "source": [
16 |     "import lamindb as ln"
17 |    ]
18 |   },
19 |   {
20 |    "cell_type": "code",
21 |    "execution_count": null,
22 |    "metadata": {},
23 |    "outputs": [],
24 |    "source": [
25 |     "# do not pass uid purposefully\n",
26 |     "ln.track()"
27 |    ]
28 |   },
29 |   {
30 |    "cell_type": "code",
31 |    "execution_count": null,
32 |    "metadata": {},
33 |    "outputs": [],
34 |    "source": [
35 |     "print(\"my consecutive cell\")"
36 |    ]
37 |   },
38 |   {
39 |    "cell_type": "code",
40 |    "execution_count": null,
41 |    "metadata": {},
42 |    "outputs": [],
43 |    "source": [
44 |     "ln.finish(ignore_non_consecutive=True)"
45 |    ]
46 |   },
47 |   {
48 |    "cell_type": "code",
49 |    "execution_count": null,
50 |    "metadata": {},
51 |    "outputs": [],
52 |    "source": [
53 |     "print(\"my consecutive cell\")"
54 |    ]
55 |   }
56 |  ],
57 |  "metadata": {
58 |   "kernelspec": {
59 |    "display_name": "py39",
60 |    "language": "python",
61 |    "name": "python3"
62 |   },
63 |   "language_info": {
64 |    "codemirror_mode": {
65 |     "name": "ipython",
66 |     "version": 3
67 |    },
68 |    "file_extension": ".py",
69 |    "mimetype": "text/x-python",
70 |    "name": "python",
71 |    "nbconvert_exporter": "python",
72 |    "pygments_lexer": "ipython3",
73 |    "version": "3.12.8"
74 |   }
75 |  },
76 |  "nbformat": 4,
77 |  "nbformat_minor": 2
78 | }
79 | 


--------------------------------------------------------------------------------
/tests/core/notebooks/with-title-initialized-consecutive-finish.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# My test notebook (consecutive) with `ln.finish()`"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "code",
12 |    "execution_count": null,
13 |    "metadata": {},
14 |    "outputs": [],
15 |    "source": [
16 |     "import lamindb as ln\n",
17 |     "import pytest"
18 |    ]
19 |   },
20 |   {
21 |    "cell_type": "code",
22 |    "execution_count": null,
23 |    "metadata": {},
24 |    "outputs": [],
25 |    "source": [
26 |     "with pytest.raises(ln.errors.InvalidArgument) as error:\n",
27 |     "    ln.track(\"ujPaFZ\")\n",
28 |     "print(error.exconly())\n",
29 |     "assert error.exconly().startswith(\n",
30 |     "    'lamindb.errors.InvalidArgument: Please pass an auto-generated uid instead of \"ujPaFZ\". Resolve by running:'\n",
31 |     ")"
32 |    ]
33 |   },
34 |   {
35 |    "cell_type": "code",
36 |    "execution_count": null,
37 |    "metadata": {},
38 |    "outputs": [],
39 |    "source": [
40 |     "# with uid passed\n",
41 |     "ln.track(\"ujPaFZatnMLG0000\")"
42 |    ]
43 |   },
44 |   {
45 |    "cell_type": "code",
46 |    "execution_count": null,
47 |    "metadata": {},
48 |    "outputs": [],
49 |    "source": [
50 |     "print(\"my consecutive cell\")"
51 |    ]
52 |   },
53 |   {
54 |    "cell_type": "code",
55 |    "execution_count": null,
56 |    "metadata": {},
57 |    "outputs": [],
58 |    "source": [
59 |     "print(\"my consecutive cell\")"
60 |    ]
61 |   },
62 |   {
63 |    "cell_type": "code",
64 |    "execution_count": null,
65 |    "metadata": {},
66 |    "outputs": [],
67 |    "source": [
68 |     "ln.finish()"
69 |    ]
70 |   }
71 |  ],
72 |  "metadata": {
73 |   "kernelspec": {
74 |    "display_name": "py312",
75 |    "language": "python",
76 |    "name": "python3"
77 |   },
78 |   "language_info": {
79 |    "codemirror_mode": {
80 |     "name": "ipython",
81 |     "version": 3
82 |    },
83 |    "file_extension": ".py",
84 |    "mimetype": "text/x-python",
85 |    "name": "python",
86 |    "nbconvert_exporter": "python",
87 |    "pygments_lexer": "ipython3",
88 |    "version": "3.12.8"
89 |   }
90 |  },
91 |  "nbformat": 4,
92 |  "nbformat_minor": 2
93 | }
94 | 


--------------------------------------------------------------------------------
/tests/core/scripts/duplicate1/script-to-test-versioning.py:
--------------------------------------------------------------------------------
1 | import lamindb as ln
2 | 
3 | ln.context.version = "1"
4 | ln.track("Ro1gl7n8YrdH0001")
5 | 


--------------------------------------------------------------------------------
/tests/core/scripts/duplicate2/script-to-test-versioning.py:
--------------------------------------------------------------------------------
1 | import lamindb as ln
2 | 
3 | ln.context.version = "2"
4 | ln.track("Ro1gl7n8YrdH0001")
5 | 
6 | assert ln.context.transform.version == "2"
7 | 


--------------------------------------------------------------------------------
/tests/core/scripts/duplicate3/script-to-test-versioning.py:
--------------------------------------------------------------------------------
1 | import lamindb as ln
2 | 
3 | ln.context.version = "3"
4 | ln.track("Ro1gl7n8YrdH0001")
5 | 


--------------------------------------------------------------------------------
/tests/core/scripts/duplicate4/script-to-test-versioning.py:
--------------------------------------------------------------------------------
1 | import lamindb as ln
2 | 
3 | ln.track()
4 | 


--------------------------------------------------------------------------------
/tests/core/scripts/script-to-test-filename-change.py:
--------------------------------------------------------------------------------
1 | import lamindb as ln
2 | 
3 | ln.track("Ro1gl7n8YrdH0000")
4 | 


--------------------------------------------------------------------------------
/tests/core/scripts/script-to-test-versioning.py:
--------------------------------------------------------------------------------
1 | import lamindb as ln
2 | 
3 | ln.context.version = "1"
4 | ln.track("Ro1gl7n8YrdH0000")
5 | 


--------------------------------------------------------------------------------
/tests/core/test_artifact_folders.py:
--------------------------------------------------------------------------------
 1 | import lamindb as ln
 2 | import pytest
 3 | from lamindb.errors import InvalidArgument
 4 | 
 5 | 
 6 | @pytest.mark.parametrize("key", [None, "my_new_folder"])
 7 | def test_folder_like_artifact(get_test_filepaths, key):
 8 |     # get variables from fixture
 9 |     is_in_registered_storage = get_test_filepaths[0]
10 |     test_dirpath = get_test_filepaths[2]
11 |     hash_test_dir = get_test_filepaths[5]
12 | 
13 |     # run tests on initial Artifact creation
14 |     if key is not None and is_in_registered_storage:
15 |         with pytest.raises(InvalidArgument) as error:
16 |             ln.Artifact(test_dirpath, key=key)
17 |         assert error.exconly().startswith(
18 |             "lamindb.errors.InvalidArgument: The path"  # The path {data} is already in registered storage
19 |         )
20 |         return None
21 |     if key is None and not is_in_registered_storage:
22 |         with pytest.raises(ValueError) as error:
23 |             ln.Artifact(test_dirpath, key=key)
24 |         assert error.exconly().startswith(
25 |             "ValueError: Pass one of key, run or description as a parameter"
26 |         )
27 |         return None
28 |     artifact1 = ln.Artifact(test_dirpath, key=key)
29 |     assert artifact1.n_files == 3
30 |     assert artifact1.hash == hash_test_dir
31 |     assert artifact1._state.adding
32 |     assert artifact1.description is None
33 |     assert artifact1.path.exists()
34 |     artifact1.save()
35 | 
36 |     # run tests on re-creating the Artifact
37 |     artifact2 = ln.Artifact(test_dirpath, key=key, description="something")
38 |     assert not artifact2._state.adding
39 |     assert artifact1.id == artifact2.id
40 |     assert artifact1.uid == artifact2.uid
41 |     assert artifact1.storage == artifact2.storage
42 |     assert artifact2.path.exists()
43 |     assert artifact2.description == "something"
44 | 
45 |     # now put another file in the test directory
46 | 
47 |     # create a first file
48 |     test_filepath_added = test_dirpath / "my_file_added.txt"
49 |     test_filepath_added.write_text("2")
50 |     artifact3 = ln.Artifact(test_dirpath, key=key, revises=artifact1)
51 |     assert artifact3.n_files == 4
52 |     assert artifact3.hash != hash_test_dir
53 |     assert artifact3._state.adding
54 |     assert artifact3.description is None
55 |     assert artifact3.path.exists()
56 |     artifact3.save()
57 | 
58 |     # the state of artifact1 is lost, because artifact3 is stored at the same path
59 |     assert artifact3.overwrite_versions
60 |     assert artifact1.overwrite_versions
61 |     assert artifact3.path == artifact1.path
62 |     test_filepath_added.unlink()
63 | 
64 |     # delete the artifact
65 |     artifact2.delete(permanent=True, storage=False)
66 |     artifact3.delete(permanent=True, storage=False)
67 | 
68 | 
69 | def test_overwrite_versions_false(get_test_filepaths):
70 |     # get variables from fixture
71 |     is_in_registered_storage = get_test_filepaths[0]
72 |     test_dirpath = get_test_filepaths[2]
73 |     hash_test_dir = get_test_filepaths[5]
74 |     if is_in_registered_storage:
75 |         return
76 |     artifact1 = ln.Artifact(
77 |         test_dirpath, key="my_folder", overwrite_versions=False
78 |     ).save()
79 |     assert artifact1.hash == hash_test_dir
80 |     # skip artifact2 because we already test this above
81 |     # create a first file
82 |     test_filepath_added = test_dirpath / "my_file_added.txt"
83 |     test_filepath_added.write_text("2")
84 |     artifact3 = ln.Artifact(test_dirpath, key="my_folder", overwrite_versions=False)
85 |     assert artifact3.hash != hash_test_dir
86 |     artifact3.save()
87 |     # the state of artifact1 is lost, because artifact3 is stored at the same path
88 |     assert not artifact3.overwrite_versions
89 |     assert not artifact1.overwrite_versions
90 |     assert artifact3.path != artifact1.path
91 |     test_filepath_added.unlink()
92 |     artifact1.delete(permanent=True, storage=False)
93 |     artifact3.delete(permanent=True, storage=False)
94 | 


--------------------------------------------------------------------------------
/tests/core/test_data.py:
--------------------------------------------------------------------------------
 1 | import lamindb as ln
 2 | import pytest
 3 | 
 4 | 
 5 | def test_rename():
 6 |     import pandas as pd
 7 |     from lamindb.errors import SQLRecordNameChangeIntegrityError
 8 | 
 9 |     df = pd.DataFrame(
10 |         {
11 |             "feature_to_rename": [
12 |                 "label-to-rename",
13 |                 "label-to-rename",
14 |                 "label-not-to-rename",
15 |             ],
16 |             "feature_to_rename2": [
17 |                 "label-not-to-rename",
18 |                 "label-not-to-rename",
19 |                 "label-not-to-rename",
20 |             ],
21 |         }
22 |     )
23 | 
24 |     curator = ln.Curator.from_df(
25 |         df,
26 |         categoricals={
27 |             "feature_to_rename": ln.ULabel.name,
28 |             "feature_to_rename2": ln.ULabel.name,
29 |         },
30 |     )
31 |     curator.add_new_from("feature_to_rename")
32 |     curator.add_new_from("feature_to_rename2")
33 |     artifact = curator.save_artifact(description="test-rename")
34 |     assert artifact.ulabels.through.objects.filter(
35 |         feature__name="feature_to_rename", ulabel__name="label-to-rename"
36 |     ).exists()
37 |     assert ln.Artifact.filter(feature_sets__features__name="feature_to_rename").exists()
38 | 
39 |     # rename label
40 |     ulabel = ln.ULabel.get(name="label-to-rename")
41 |     with pytest.raises(SQLRecordNameChangeIntegrityError):
42 |         ulabel.name = "label-renamed"
43 |         ulabel.save()
44 | 
45 |     artifact.labels.make_external(ulabel)
46 |     assert not artifact.ulabels.through.objects.filter(
47 |         feature__name="feature_to_rename", ulabel__name="label-to-rename"
48 |     ).exists()
49 |     ulabel.name = "label-renamed"
50 |     ulabel.save()
51 | 
52 |     # rename feature
53 |     feature = ln.Feature.get(name="feature_to_rename")
54 |     with pytest.raises(SQLRecordNameChangeIntegrityError):
55 |         feature.name = "feature_renamed"
56 |         feature.save()
57 | 
58 |     artifact.features.make_external(feature)
59 |     assert not ln.Artifact.filter(
60 |         feature_sets__features__name="feature_to_rename"
61 |     ).exists()
62 |     assert ln.Artifact.filter(
63 |         feature_sets__features__name="feature_to_rename2"
64 |     ).exists()
65 |     feature.name = "feature_renamed"
66 |     feature.save()
67 | 
68 |     # rename the other feature, automatically deletes no-member schema
69 |     feature2 = ln.Feature.get(name="feature_to_rename2")
70 |     artifact.features.make_external(feature2)
71 |     assert artifact.feature_sets.count() == 0
72 | 
73 |     # clean up
74 |     artifact.delete(permanent=True)
75 |     ln.Schema.filter().delete()
76 |     ln.ULabel.filter().delete()
77 |     ln.Feature.filter().delete()
78 | 


--------------------------------------------------------------------------------
/tests/core/test_db.py:
--------------------------------------------------------------------------------
 1 | import lamindb as ln
 2 | 
 3 | 
 4 | def test_create_to_load():
 5 |     transform = ln.Transform(version="0", key="test", type="pipeline")
 6 |     transform.save()
 7 |     run = ln.Run(transform=transform)
 8 |     run.save()
 9 |     ln.Storage.get(root=str(ln.setup.settings.storage.root))
10 | 


--------------------------------------------------------------------------------
/tests/core/test_delete.py:
--------------------------------------------------------------------------------
 1 | import lamindb as ln
 2 | 
 3 | 
 4 | def test_delete_record():
 5 |     names = ["label1", "label2", "label3"]
 6 |     labels = [ln.ULabel(name=name) for name in names]
 7 |     ln.save(labels)
 8 |     ln.ULabel.filter(name__in=names).delete()
 9 |     assert ln.ULabel.filter(name__in=names).count() == 0
10 | 


--------------------------------------------------------------------------------
/tests/core/test_feature.py:
--------------------------------------------------------------------------------
  1 | import bionty as bt
  2 | import lamindb as ln
  3 | import pandas as pd
  4 | import pytest
  5 | from lamindb.errors import ValidationError
  6 | from lamindb.models.feature import serialize_pandas_dtype
  7 | from pandas.api.types import is_string_dtype
  8 | 
  9 | 
 10 | @pytest.fixture(scope="module")
 11 | def df():
 12 |     return pd.DataFrame(
 13 |         {
 14 |             "feat1": [1, 2, 3],
 15 |             "feat2": [3.1, 4.2, 5.3],
 16 |             "feat3": ["cond1", "cond2", "cond2"],
 17 |             "feat4": ["id1", "id2", "id3"],
 18 |             "rando_feature": ["rando1", "rando2", "rando3"],
 19 |         }
 20 |     )
 21 | 
 22 | 
 23 | def test_feature_init():
 24 |     # no args allowed
 25 |     with pytest.raises(ValueError):
 26 |         ln.Feature("x")
 27 |     # no dtype passed
 28 |     with pytest.raises(ValidationError):
 29 |         ln.Feature(name="feat")
 30 |     # is OK if also is_type is passed
 31 |     ln.Feature(name="Feat", is_type=True)
 32 |     # wrong type
 33 |     with pytest.raises(ValueError):
 34 |         ln.Feature(name="feat", dtype="x")
 35 |     # type has to be a list of SQLRecord types
 36 |     with pytest.raises(ValidationError):
 37 |         ln.Feature(name="feat", dtype="cat[1]")
 38 |     # ensure feat1 does not exist
 39 |     if feat1 := ln.Feature.filter(name="feat1").one_or_none() is not None:
 40 |         feat1.delete()
 41 |     feat1 = ln.Feature(name="feat", dtype="str").save()
 42 |     with pytest.raises(ValidationError) as error:
 43 |         ln.Feature(name="feat", dtype="cat")
 44 |     assert (
 45 |         error.exconly()
 46 |         == "lamindb.errors.ValidationError: Feature feat already exists with dtype str, you passed cat"
 47 |     )
 48 |     feat1.delete()
 49 | 
 50 |     # should just return the feature
 51 |     feat2 = ln.Feature(name="feat2", dtype="str", description="feat2").save()
 52 |     feat2_again = ln.Feature(name="feat2", dtype="str", description="feat2").save()
 53 |     assert feat2 == feat2_again
 54 |     feat2.delete()
 55 | 
 56 |     # check that this works
 57 |     feature = ln.Feature(name="feat1", dtype="cat[ULabel|bionty.Gene]")
 58 |     # check that it also works via objects
 59 |     feature = ln.Feature(name="feat1", dtype=[ln.ULabel, bt.Gene])
 60 |     assert feature.dtype == "cat[ULabel|bionty.Gene]"
 61 | 
 62 | 
 63 | def test_feature_from_df(df):
 64 |     if feat1 := ln.Feature.filter(name="feat1").one_or_none() is not None:
 65 |         feat1.delete()
 66 |     features = ln.Feature.from_df(df.iloc[:, :4]).save()
 67 |     artifact = ln.Artifact.from_df(df, description="test").save()
 68 |     # test for deprecated add_feature_set
 69 |     artifact.features.add_feature_set(ln.Schema(features), slot="columns")
 70 |     features = artifact.features["columns"]
 71 |     assert len(features) == len(df.columns[:4])
 72 |     [col for col in df.columns if is_string_dtype(df[col])]
 73 |     categoricals = {
 74 |         col: df[col] for col in df.columns if isinstance(df[col], pd.CategoricalDtype)
 75 |     }
 76 |     for feature in features:
 77 |         if feature.name in categoricals:
 78 |             assert feature.dtype == "cat"
 79 |         else:
 80 |             orig_type = df[feature.name].dtype
 81 |             assert feature.dtype == serialize_pandas_dtype(orig_type)
 82 |     for feature in features:
 83 |         feature.save()
 84 |     labels = [ln.ULabel(name=name) for name in df["feat3"].unique()]
 85 |     ln.save(labels)
 86 |     feature = ln.Feature.get(name="feat3")
 87 |     feature.dtype = "cat"
 88 |     feature.save()
 89 |     with pytest.raises(ValidationError) as err:
 90 |         artifact.labels.add(labels, feature=feature)
 91 |     assert (
 92 |         err.exconly()
 93 |         == "lamindb.errors.ValidationError: Cannot manually annotate a feature measured *within* the dataset. Please use a Curator."
 94 |     )
 95 |     extfeature = ln.Feature(name="extfeat", dtype="str").save()
 96 |     with pytest.raises(ValidationError) as err:
 97 |         artifact.labels.add(labels, feature=extfeature)
 98 |     assert (
 99 |         err.exconly()
100 |         == f"lamindb.errors.ValidationError: Feature {extfeature.name} needs dtype='cat' for label annotation, currently has dtype='str'"
101 |     )
102 | 
103 |     # clean up
104 |     artifact.delete(permanent=True)
105 |     ln.Schema.filter().all().delete()
106 |     ln.ULabel.filter().all().delete()
107 |     ln.Feature.filter().all().delete()
108 | 


--------------------------------------------------------------------------------
/tests/core/test_from_values.py:
--------------------------------------------------------------------------------
  1 | import bionty as bt
  2 | import lamindb as ln
  3 | import pandas as pd
  4 | import pytest
  5 | 
  6 | 
  7 | @pytest.fixture(scope="module")
  8 | def df():
  9 |     return pd.DataFrame(
 10 |         (
 11 |             ["T cell", "CL:0000084"],
 12 |             ["hepatocyte", "CL:0000182"],
 13 |             ["my new cell type", ""],
 14 |         ),
 15 |         columns=["cell_type", "cell_type_id"],
 16 |     )
 17 | 
 18 | 
 19 | def test_from_values_name(df):
 20 |     bt.CellType.filter().delete()
 21 |     assert df["cell_type"].tolist() == ["T cell", "hepatocyte", "my new cell type"]
 22 |     # create records from bionty
 23 |     result = bt.CellType.from_values(df.cell_type, "name")
 24 |     ids = [i.ontology_id for i in result]
 25 |     assert len(result) == 2
 26 |     assert set(ids) == {"CL:0000084", "CL:0000182"}
 27 |     assert result[0].source.entity == "bionty.CellType"
 28 | 
 29 |     # wrong field type
 30 |     with pytest.raises(TypeError):
 31 |         result = bt.CellType.from_values(df.cell_type, field=bt.CellType)
 32 | 
 33 | 
 34 | def test_from_values_ontology_id(df):
 35 |     assert df["cell_type_id"].tolist() == ["CL:0000084", "CL:0000182", ""]
 36 |     result = bt.CellType.from_values(df.cell_type_id, "ontology_id")
 37 |     names = {i.name for i in result}
 38 |     assert len(result) == 2
 39 |     assert names == {"T cell", "hepatocyte"}
 40 |     assert result[0].source.entity == "bionty.CellType"
 41 | 
 42 | 
 43 | def test_from_values_multiple_match():
 44 |     records = bt.Gene.from_values(["ABC1", "PDCD1"], bt.Gene.symbol, organism="human")
 45 |     assert len(records) == 3
 46 | 
 47 | 
 48 | def test_get_or_create_records():
 49 |     names = ["ulabel" + str(i) for i in range(25)]
 50 |     labels = [ln.ULabel(name=name) for name in names]
 51 |     ln.save(labels)
 52 |     # more than 20 existing values
 53 |     labels = ln.ULabel.from_values(names, field="name")
 54 |     assert len(labels) == 25
 55 | 
 56 | 
 57 | def test_from_values_synonyms_aware():
 58 |     bt.CellType.from_source(name="T cell").save()
 59 |     # existing validated values
 60 |     records = bt.CellType.from_values(["T cell"], "name")
 61 |     assert len(records) == 1
 62 |     assert records[0].name == "T cell"
 63 |     assert isinstance(records[0].source, bt.Source)
 64 |     # existing validated values and synonyms
 65 |     records = bt.CellType.from_values(["T cell", "T-cell"], "name")
 66 |     assert len(records) == 1
 67 |     assert records[0].name == "T cell"
 68 |     assert isinstance(records[0].source, bt.Source)
 69 |     # bionty values and synonyms
 70 |     records = bt.CellType.from_values(["B-cell", "B cell"], "name")
 71 |     assert len(records) == 1
 72 |     assert records[0].name == "B cell"
 73 |     assert isinstance(records[0].source, bt.Source)
 74 |     # all possibilities of validated values
 75 |     records = bt.CellType.from_values(
 76 |         ["T cell", "T-cell", "t cell", "B cell", "B-cell"], "name"
 77 |     )
 78 |     assert len(records) == 2
 79 |     names = [r.name for r in records]
 80 |     assert set(names) == {"T cell", "B cell"}
 81 |     assert isinstance(records[0].source, bt.Source)
 82 |     assert isinstance(records[1].source, bt.Source)
 83 |     # non-validated values
 84 |     records = bt.CellType.from_values(["T cell", "mycell"], "name")
 85 |     assert len(records) == 1
 86 |     assert records[0].name == "T cell"
 87 |     assert isinstance(records[0].source, bt.Source)
 88 |     assert records[0].ontology_id == "CL:0000084"
 89 |     bt.CellType.filter().all().delete()
 90 | 
 91 | 
 92 | def test_standardize():
 93 |     # only name field can be standardized
 94 |     results = bt.Gene.from_values(
 95 |         ["HES4", "TNFRSF4"], field=bt.Gene.ensembl_gene_id, organism="human"
 96 |     )
 97 |     assert len(results) == 0
 98 | 
 99 |     results = bt.Gene.from_values(
100 |         ["HES4", "TNFRSF4"], field=bt.Gene.symbol, organism="human"
101 |     )
102 |     assert len(results) == 2
103 | 


--------------------------------------------------------------------------------
/tests/core/test_has_parents.py:
--------------------------------------------------------------------------------
 1 | import lamindb as ln
 2 | from lamindb.models.has_parents import _add_emoji
 3 | 
 4 | 
 5 | def test_view_parents():
 6 |     label1 = ln.ULabel(name="label1")
 7 |     label2 = ln.ULabel(name="label2")
 8 |     label1.save()
 9 |     label2.save()
10 |     label1.parents.add(label2)
11 |     label1.view_parents(ln.ULabel.name, distance=1)
12 |     label1.delete()
13 |     label2.delete()
14 | 
15 | 
16 | def test_query_parents_children():
17 |     label1 = ln.ULabel(name="label1").save()
18 |     label2 = ln.ULabel(name="label2").save()
19 |     label3 = ln.ULabel(name="label3").save()
20 |     label1.children.add(label2)
21 |     label2.children.add(label3)
22 |     parents = label3.query_parents()
23 |     assert len(parents) == 2
24 |     assert label1 in parents and label2 in parents
25 |     children = label1.query_children()
26 |     assert len(children) == 2
27 |     assert label2 in children and label3 in children
28 |     label1.delete()
29 |     label2.delete()
30 |     label3.delete()
31 | 
32 | 
33 | def test_add_emoji():
34 |     transform = ln.Transform(key="test-12345", type="upload")
35 |     assert _add_emoji(transform, label="transform") == "🖥️ transform"
36 |     transform.save()
37 |     run = ln.Run(transform=transform)
38 |     assert _add_emoji(run, label="run") == "🖥️ run"
39 |     transform.delete()
40 | 
41 | 
42 | def test_view_lineage_circular():
43 |     import pandas as pd
44 | 
45 |     transform = ln.Transform(key="test").save()
46 |     run = ln.Run(transform=transform).save()
47 |     artifact = ln.Artifact.from_df(
48 |         pd.DataFrame({"a": [1, 2, 3]}), description="test artifact", run=run
49 |     ).save()
50 |     run.input_artifacts.add(artifact)
51 |     artifact.view_lineage()
52 |     artifact.delete(permanent=True)
53 |     run.delete()
54 |     transform.delete()
55 | 


--------------------------------------------------------------------------------
/tests/core/test_integrity.py:
--------------------------------------------------------------------------------
 1 | import lamindb_setup as ln_setup
 2 | 
 3 | 
 4 | def test_migrate_check():
 5 |     assert ln_setup.migrate.check()
 6 | 
 7 | 
 8 | def test_system_check():
 9 |     ln_setup.django("check")
10 | 


--------------------------------------------------------------------------------
/tests/core/test_manager.py:
--------------------------------------------------------------------------------
 1 | import lamindb as ln
 2 | 
 3 | 
 4 | def test_manager_list():
 5 |     label = ln.ULabel(name="manager label")
 6 |     label.save()
 7 |     label_names = [f"ULabel {i}" for i in range(3)]
 8 |     labels = [ln.ULabel(name=name) for name in label_names]
 9 |     ln.save(labels)
10 |     label.parents.set(labels)
11 |     assert len(label.parents.list()) == 3
12 |     assert "ULabel 1" in label.parents.list("name")
13 |     label.delete()
14 |     for label in labels:
15 |         label.delete()
16 | 


--------------------------------------------------------------------------------
/tests/core/test_models.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import textwrap
  3 | 
  4 | import lamindb as ln
  5 | import pandas as pd
  6 | import pytest
  7 | 
  8 | 
  9 | def _strip_ansi(text: str) -> str:
 10 |     """Remove ANSI escape sequences from a string."""
 11 |     ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
 12 |     return ansi_escape.sub("", text)
 13 | 
 14 | 
 15 | def test_registry__repr__feature():
 16 |     import lamindb.models as ln
 17 | 
 18 |     feature = ln.Param
 19 |     expected_repr = textwrap.dedent("""\
 20 |     Feature
 21 |       Simple fields
 22 |         .uid: CharField
 23 |         .name: CharField
 24 |         .dtype: CharField
 25 |         .is_type: BooleanField
 26 |         .unit: CharField
 27 |         .description: CharField
 28 |         .array_rank: SmallIntegerField
 29 |         .array_size: IntegerField
 30 |         .array_shape: JSONField
 31 |         .proxy_dtype: CharField
 32 |         .synonyms: TextField
 33 |         .created_at: DateTimeField
 34 |         .updated_at: DateTimeField
 35 |       Relational fields
 36 |         .branch: Branch
 37 |         .space: Space
 38 |         .created_by: User
 39 |         .run: Run
 40 |         .type: Feature
 41 |         .schemas: Schema
 42 |         .features: Feature
 43 |         .values: FeatureValue
 44 |         .projects: Project
 45 |     """).strip()
 46 | 
 47 |     actual_repr = _strip_ansi(repr(feature))
 48 |     print(actual_repr)
 49 |     assert actual_repr.strip() == expected_repr.strip()
 50 | 
 51 | 
 52 | def test_registry__repr__artifact():
 53 |     import lamindb.models as ln
 54 | 
 55 |     artifact = ln.Artifact
 56 |     expected_repr = textwrap.dedent("""\
 57 |     Artifact
 58 |       Simple fields
 59 |         .uid: CharField
 60 |         .key: CharField
 61 |         .description: CharField
 62 |         .suffix: CharField
 63 |         .kind: CharField
 64 |         .otype: CharField
 65 |         .size: BigIntegerField
 66 |         .hash: CharField
 67 |         .n_files: BigIntegerField
 68 |         .n_observations: BigIntegerField
 69 |         .version: CharField
 70 |         .is_latest: BooleanField
 71 |         .created_at: DateTimeField
 72 |         .updated_at: DateTimeField
 73 |       Relational fields
 74 |         .branch: Branch
 75 |         .space: Space
 76 |         .storage: Storage
 77 |         .run: Run
 78 |         .schema: Schema
 79 |         .created_by: User
 80 |         .ulabels: ULabel
 81 |         .input_of_runs: Run
 82 |         .feature_sets: Schema
 83 |         .collections: Collection
 84 |         .records: Record
 85 |         .references: Reference
 86 |         .projects: Project
 87 |       Bionty fields
 88 |         .organisms: bionty.Organism
 89 |         .genes: bionty.Gene
 90 |         .proteins: bionty.Protein
 91 |         .cell_markers: bionty.CellMarker
 92 |         .tissues: bionty.Tissue
 93 |         .cell_types: bionty.CellType
 94 |         .diseases: bionty.Disease
 95 |         .cell_lines: bionty.CellLine
 96 |         .phenotypes: bionty.Phenotype
 97 |         .pathways: bionty.Pathway
 98 |         .experimental_factors: bionty.ExperimentalFactor
 99 |         .developmental_stages: bionty.DevelopmentalStage
100 |         .ethnicities: bionty.Ethnicity
101 |     """).strip()
102 | 
103 |     actual_repr = _strip_ansi(repr(artifact))
104 |     print(actual_repr)
105 |     assert actual_repr.strip() == expected_repr.strip()
106 | 
107 | 
108 | def test_unsaved_relationship_modification_attempts():
109 |     af = ln.Artifact.from_df(
110 |         pd.DataFrame({"col1": [1, 2, 3], "col2": [4, 5, 6]}), description="testme"
111 |     )
112 | 
113 |     new_label = ln.ULabel(name="testlabel").save()
114 |     with pytest.raises(ValueError) as excinfo:
115 |         af.ulabels.add(new_label)
116 | 
117 |     assert (
118 |         str(excinfo.value)
119 |         == "You are trying to access the many-to-many relationships of an unsaved Artifact object. Please save it first using '.save()'."
120 |     )
121 | 
122 |     new_label.delete()
123 |     af.delete()
124 | 


--------------------------------------------------------------------------------
/tests/core/test_notebooks.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess
 3 | from pathlib import Path
 4 | 
 5 | import lamindb as ln
 6 | import nbproject_test
 7 | 
 8 | notebook_dir = Path(__file__).parent / "notebooks/"
 9 | notebook_dir_duplicate = Path(__file__).parent / "notebooks/duplicate/"
10 | 
11 | 
12 | def test_all_notebooks():
13 |     env = os.environ
14 |     env["LAMIN_TESTING"] = "true"
15 |     nbproject_test.execute_notebooks(notebook_dir)
16 |     nbproject_test.execute_notebooks(notebook_dir_duplicate)
17 |     del env["LAMIN_TESTING"]
18 | 
19 | 
20 | def test_run_after_rename_no_uid():
21 |     notebook_path = (
22 |         notebook_dir / "with-title-initialized-consecutive-finish-not-last-cell.ipynb"
23 |     )
24 |     result = subprocess.run(  # noqa: S602
25 |         f"jupyter nbconvert --to notebook --inplace --execute {notebook_path}",
26 |         shell=True,
27 |         capture_output=True,
28 |     )
29 |     print(result.stdout.decode())
30 |     print(result.stderr.decode())
31 |     assert result.returncode == 0
32 | 
33 |     uid = ln.Transform.get(
34 |         key="with-title-initialized-consecutive-finish-not-last-cell.ipynb"
35 |     ).uid
36 | 
37 |     # now, assume the user renames the notebook
38 |     new_path = notebook_path.with_name("no-uid-renamed.ipynb")
39 |     os.system(f"cp {notebook_path} {new_path}")  # noqa: S605
40 | 
41 |     env = os.environ
42 |     env["LAMIN_TESTING"] = "true"
43 |     result = subprocess.run(  # noqa: S602
44 |         f"jupyter nbconvert --to notebook --inplace --execute {new_path}",
45 |         shell=True,
46 |         capture_output=True,
47 |         env=env,
48 |     )
49 |     print(result.stdout.decode())
50 |     print(result.stderr.decode())
51 |     assert result.returncode == 0
52 |     del env["LAMIN_TESTING"]
53 | 
54 |     assert ln.Transform.get(key="no-uid-renamed.ipynb").uid == uid
55 | 
56 |     # new_path.unlink()
57 | 


--------------------------------------------------------------------------------
/tests/core/test_run.py:
--------------------------------------------------------------------------------
 1 | import lamindb as ln
 2 | import pytest
 3 | 
 4 | 
 5 | def test_run():
 6 |     transform = ln.Transform(key="My transform")
 7 |     with pytest.raises(ValueError) as error:
 8 |         ln.Run(transform)
 9 |     assert (
10 |         error.exconly()
11 |         == "ValueError: Please save transform record before creating a run"
12 |     )
13 |     transform.save()
14 |     run = ln.Run(transform)
15 |     assert run.reference is None
16 |     assert run.reference_type is None
17 |     run2 = ln.Run(transform, reference="test1", reference_type="test2")
18 |     assert run2.reference == "test1"
19 |     assert run2.reference_type == "test2"
20 |     assert run.uid != run2.uid
21 |     transform.delete()
22 | 
23 | 
24 | def test_edge_cases():
25 |     with pytest.raises(ValueError) as error:
26 |         ln.Run(1, 2)
27 |     assert error.exconly() == "ValueError: Only one non-keyword arg allowed: transform"
28 |     with pytest.raises(TypeError) as error:
29 |         ln.Run()
30 |     assert error.exconly() == "TypeError: Pass transform parameter"
31 | 


--------------------------------------------------------------------------------
/tests/core/test_save.py:
--------------------------------------------------------------------------------
 1 | import lamindb as ln
 2 | import pytest
 3 | from lamindb.models.save import prepare_error_message, store_artifacts
 4 | 
 5 | 
 6 | def test_bulk_save_and_update():
 7 |     label_names = [f"ULabel {i} new" for i in range(3)]
 8 |     labels = [ln.ULabel(name=name) for name in label_names]
 9 |     # test bulk creation of new records
10 |     ln.save(labels)
11 |     assert len(ln.ULabel.filter(name__in=label_names).distinct().all()) == 3
12 |     labels[0].name = "ULabel 0 updated"
13 |     # test bulk update of existing records
14 |     ln.save(labels)
15 |     assert len(ln.ULabel.filter(name__in=label_names).distinct().all()) == 2
16 |     assert ln.ULabel.get(name="ULabel 0 updated")
17 | 
18 | 
19 | def test_prepare_error_message():
20 |     ln.core.datasets.file_mini_csv()
21 |     artifact = ln.Artifact("mini.csv", description="test")
22 |     exception = Exception("exception")
23 | 
24 |     error = prepare_error_message([], [artifact], exception)
25 |     assert error.startswith(
26 |         "The following entries have been successfully uploaded and committed to the database"
27 |     )
28 | 
29 |     error = prepare_error_message([artifact], [], exception)
30 |     assert error.startswith("No entries were uploaded or committed to the database")
31 | 
32 | 
33 | def test_save_data_object():
34 |     ln.core.datasets.file_mini_csv()
35 |     artifact = ln.Artifact("mini.csv", description="test")
36 |     artifact.save()
37 |     assert artifact.path.exists()
38 |     artifact.delete(permanent=True, storage=True)
39 | 
40 | 
41 | def test_store_artifacts_acid():
42 |     ln.core.datasets.file_mini_csv()
43 |     artifact = ln.Artifact("mini.csv", description="test")
44 |     artifact._clear_storagekey = "test.csv"
45 |     # errors on check_and_attempt_clearing
46 |     with pytest.raises(RuntimeError):
47 |         artifact.save()
48 | 
49 |     with pytest.raises(RuntimeError) as error:
50 |         store_artifacts([artifact], using_key=None)
51 |     assert str(error.exconly()).startswith(
52 |         "RuntimeError: The following entries have been successfully uploaded"
53 |     )
54 | 
55 |     artifact.delete(permanent=True)
56 | 
57 | 
58 | def test_save_parents():
59 |     import bionty as bt
60 | 
61 |     records = bt.CellLine.from_values(["HEPG2", "HUVEC"])
62 |     ln.save(records)
63 |     assert bt.CellLine.get("4ea731nb").parents.df().shape[0] == 1
64 |     bt.CellLine.filter().delete()
65 | 


--------------------------------------------------------------------------------
/tests/core/test_search.py:
--------------------------------------------------------------------------------
 1 | import bionty as bt
 2 | import lamindb as ln
 3 | import pytest
 4 | 
 5 | 
 6 | @pytest.fixture(scope="module")
 7 | def prepare_cell_type_registry():
 8 |     bt.CellType.filter().all().delete()
 9 |     records = [
10 |         {
11 |             "ontology_id": "CL:0000084",
12 |             "name": "T cell",
13 |             "synonyms": "T-cell|T-lymphocyte|T lymphocyte",
14 |             "children": ["CL:0000798", "CL:0002420", "CL:0002419", "CL:0000789"],
15 |         },
16 |         {
17 |             "ontology_id": "CL:0000236",
18 |             "name": "B cell",
19 |             "synonyms": "B-lymphocyte|B lymphocyte|B-cell",
20 |             "children": ["CL:0009114", "CL:0001201"],
21 |         },
22 |         {
23 |             "ontology_id": "CL:0000696",
24 |             "name": "PP cell",
25 |             "synonyms": "type F enteroendocrine cell",
26 |             "children": ["CL:0002680"],
27 |         },
28 |         {
29 |             "ontology_id": "CL:0002072",
30 |             "name": "nodal myocyte",
31 |             "synonyms": "P cell|myocytus nodalis|cardiac pacemaker cell",
32 |             "children": ["CL:1000409", "CL:1000410"],
33 |         },
34 |     ]
35 |     public_records = []
36 |     for ref_record in records:
37 |         record = bt.CellType.from_source(ontology_id=ref_record["ontology_id"])
38 |         assert record.name == ref_record["name"]
39 |         assert set(record.synonyms.split("|")) == set(ref_record["synonyms"].split("|"))
40 |         public_records.append(record)
41 |     ln.save(public_records)
42 |     yield "prepared"
43 |     bt.CellType.filter().all().delete()
44 | 
45 | 
46 | def test_search_synonyms(prepare_cell_type_registry):
47 |     result = bt.CellType.search("P cell").df()
48 |     assert set(result.name.iloc[:2]) == {"nodal myocyte", "PP cell"}
49 | 
50 | 
51 | def test_search_limit(prepare_cell_type_registry):
52 |     result = bt.CellType.search("P cell", limit=1).df()
53 |     assert len(result) == 1
54 | 
55 | 
56 | def test_search_case_sensitive(prepare_cell_type_registry):
57 |     result = bt.CellType.search("b cell", case_sensitive=False).df()
58 |     assert result.name.iloc[0] == "B cell"
59 | 
60 | 
61 | def test_search_None():
62 |     with pytest.raises(
63 |         ValueError, match="Cannot search for None value! Please pass a valid string."
64 |     ):
65 |         bt.CellType.search(None)
66 | 


--------------------------------------------------------------------------------
/tests/core/test_tracked.py:
--------------------------------------------------------------------------------
 1 | import concurrent.futures
 2 | 
 3 | import lamindb as ln
 4 | import pandas as pd
 5 | import pytest
 6 | 
 7 | 
 8 | @ln.tracked()
 9 | def process_chunk(chunk_id: int) -> str:
10 |     # Create a simple DataFrame
11 |     df = pd.DataFrame(
12 |         {"id": range(chunk_id * 10, (chunk_id + 1) * 10), "value": range(10)}
13 |     )
14 | 
15 |     # Save it as an artifact
16 |     key = f"chunk_{chunk_id}.parquet"
17 |     artifact = ln.Artifact.from_df(df, key=key).save()
18 |     return artifact.key
19 | 
20 | 
21 | def test_tracked_parallel():
22 |     param_type = ln.Feature(name="Script[test_tracked.py]", is_type=True).save()
23 |     ln.Feature(name="chunk_id", dtype="int", type=param_type).save()
24 | 
25 |     with pytest.raises(RuntimeError) as err:
26 |         process_chunk(4)
27 |     assert (
28 |         err.exconly()
29 |         == "RuntimeError: Please track the global run context before using @ln.tracked(): ln.track()"
30 |     )
31 | 
32 |     # Ensure tracking is on
33 |     ln.track()
34 | 
35 |     # Number of parallel executions
36 |     n_parallel = 3
37 | 
38 |     # Use ThreadPoolExecutor for parallel execution
39 |     with concurrent.futures.ThreadPoolExecutor(max_workers=n_parallel) as executor:
40 |         # Submit all tasks
41 |         futures = [executor.submit(process_chunk, i) for i in range(n_parallel)]
42 |         # Get results as they complete
43 |         chunk_keys = [
44 |             future.result() for future in concurrent.futures.as_completed(futures)
45 |         ]
46 | 
47 |     # Verify results
48 |     # Each execution should have created its own artifact with unique run
49 |     print(f"Created artifacts with keys: {chunk_keys}")
50 |     artifacts = [ln.Artifact.get(key=key) for key in chunk_keys]
51 | 
52 |     # Check that we got the expected number of artifacts
53 |     assert len(artifacts) == n_parallel
54 | 
55 |     # Verify each artifact has its own unique run
56 |     runs = [artifact.run for artifact in artifacts]
57 |     run_ids = [run.id for run in runs]
58 |     print(f"Run IDs: {run_ids}")
59 |     assert len(set(run_ids)) == n_parallel  # all runs should be unique
60 | 
61 |     # Verify each run has the correct start and finish times
62 |     for run in runs:
63 |         print(f"Run details: {run}")
64 |         assert run.started_at is not None
65 |         assert run.finished_at is not None
66 |         assert run.started_at < run.finished_at
67 | 
68 |     # Clean up test artifacts
69 |     for artifact in artifacts:
70 |         artifact.delete(permanent=True)
71 | 
72 |     ln.context._uid = None
73 |     ln.context._run = None
74 |     ln.context._transform = None
75 |     ln.context._path = None
76 | 
77 | 
78 | if __name__ == "__main__":
79 |     test_tracked_parallel()
80 | 


--------------------------------------------------------------------------------
/tests/core/test_ulabel.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | import lamindb as ln
 4 | import pytest
 5 | from lamindb.errors import FieldValidationError
 6 | 
 7 | 
 8 | def test_ulabel():
 9 |     with pytest.raises(
10 |         FieldValidationError,
11 |         match=re.escape(
12 |             "Only name, type, is_type, description, reference, reference_type are valid keyword arguments"
13 |         ),
14 |     ):
15 |         ln.ULabel(x=1)
16 | 
17 |     with pytest.raises(ValueError) as error:
18 |         ln.ULabel(1)
19 |     assert error.exconly() == "ValueError: Only one non-keyword arg allowed"
20 | 
21 |     with pytest.raises(
22 |         ValueError,
23 |         match=re.escape(
24 |             "'my_type' should start with a capital letter given you're defining a type"
25 |         ),
26 |     ):
27 |         ln.ULabel(name="my_type", is_type=True)
28 | 
29 | 
30 | def test_ulabel_plural_type_warning(ccaplog):
31 |     ln.ULabel(name="MyThings", is_type=True)
32 |     assert (
33 |         "name 'MyThings' for type ends with 's', in case you're naming with plural, consider the singular for a type name"
34 |         in ccaplog.text
35 |     )
36 | 


--------------------------------------------------------------------------------
/tests/core/test_view.py:
--------------------------------------------------------------------------------
1 | import lamindb as ln
2 | 
3 | 
4 | def test_vew():
5 |     ln.view(modules="core")
6 |     ln.view()
7 | 


--------------------------------------------------------------------------------
/tests/core/test_visibility.py:
--------------------------------------------------------------------------------
 1 | import lamindb as ln
 2 | 
 3 | 
 4 | def testbranch_id():
 5 |     # create a file with default branch_id
 6 |     with open("./testbranch_id.txt", "w") as f:
 7 |         f.write("branch_id")
 8 |     artifact = ln.Artifact("./testbranch_id.txt", description="testbranch_id").save()
 9 |     assert artifact.branch_id == 1
10 | 
11 |     # create a collection from file
12 |     collection = ln.Collection(artifact, key="testbranch_id").save()
13 | 
14 |     # delete a collection will put both collection but not linked artifact in trash
15 |     collection.delete()
16 |     assert collection.ordered_artifacts[0].branch_id == 1
17 |     result = ln.Collection.filter(key="testbranch_id").all()
18 |     assert len(result) == 0
19 |     result = ln.Collection.filter(key="testbranch_id", branch_id=1).all()
20 |     assert len(result) == 0
21 |     result = ln.Collection.filter(key="testbranch_id", visibility=1).all()
22 |     assert len(result) == 0
23 |     result = ln.Collection.filter(key="testbranch_id", branch_id=None).all()
24 |     assert len(result) == 1
25 |     result = ln.Collection.filter(key="testbranch_id", visibility=None).all()
26 |     assert len(result) == 1
27 | 
28 |     # restore
29 |     collection.restore()
30 |     assert collection.branch_id == 1
31 |     assert collection.ordered_artifacts[0].branch_id == 1
32 | 
33 |     # permanent delete
34 |     collection.delete(permanent=True)
35 |     result = ln.Artifact.filter(description="testbranch_id", branch_id=None).all()
36 |     # also permanently deleted linked file
37 |     assert len(result) == 1
38 | 


--------------------------------------------------------------------------------
/tests/curators/conftest.py:
--------------------------------------------------------------------------------
 1 | import shutil
 2 | 
 3 | import lamindb_setup as ln_setup
 4 | import pytest
 5 | 
 6 | 
 7 | def pytest_sessionstart():
 8 |     ln_setup.init(storage="./testdb", modules="bionty,wetlab")
 9 | 
10 | 
11 | def pytest_sessionfinish(session: pytest.Session):
12 |     shutil.rmtree("./testdb")
13 |     ln_setup.delete("testdb", force=True)
14 | 
15 | 
16 | @pytest.fixture
17 | def ccaplog(caplog):
18 |     """Add caplog handler to our custom logger at session start."""
19 |     from lamin_utils._logger import logger
20 | 
21 |     # Add caplog's handler to our custom logger
22 |     logger.addHandler(caplog.handler)
23 | 
24 |     yield caplog
25 | 
26 |     # Clean up at the end of the session
27 |     logger.removeHandler(caplog.handler)
28 | 


--------------------------------------------------------------------------------
/tests/curators/test_curators_multivalue.py:
--------------------------------------------------------------------------------
 1 | import bionty as bt
 2 | import lamindb as ln
 3 | import pandas as pd
 4 | import pytest
 5 | from lamindb.core.exceptions import ValidationError
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def df():
10 |     return pd.DataFrame(
11 |         {
12 |             "sample_id": [["sample1", "sample2"], ["sample2"], ["sample3"]],
13 |             "dose": [[1.2, 2.3], [1.2], [2.3]],
14 |             "cell_type": [["B cell", "T cell"], ["B cell"], ["T cell"]],
15 |             "tissue": [["blood", "pulmo"], ["blood"], ["lung"]],
16 |         }
17 |     )
18 | 
19 | 
20 | @pytest.fixture(scope="module")
21 | def lists_schema():
22 |     schema = ln.Schema(
23 |         name="lists schema cat",
24 |         features=[
25 |             ln.Feature(name="sample_id", dtype=list[str]).save(),
26 |             ln.Feature(name="dose", dtype=list[float]).save(),
27 |             ln.Feature(name="cell_type", dtype=list[str]).save(),
28 |             ln.Feature(name="tissue", dtype=list[bt.Tissue]).save(),
29 |         ],
30 |     ).save()
31 | 
32 |     yield schema
33 | 
34 |     schema.delete()
35 |     ln.Feature.filter().delete()
36 |     bt.Tissue.filter().delete()
37 | 
38 | 
39 | def test_curator_df_multivalue(df, lists_schema):
40 |     curator = ln.curators.DataFrameCurator(df, lists_schema)
41 |     with pytest.raises(ValidationError):
42 |         curator.validate()
43 |     assert curator.cat._cat_vectors.keys() == {"columns", "tissue"}
44 |     assert curator.cat._cat_vectors["tissue"]._validated == ["blood", "lung"]
45 |     assert curator.cat._cat_vectors["tissue"]._non_validated == ["pulmo"]
46 |     assert curator.cat._cat_vectors["tissue"]._synonyms == {"pulmo": "lung"}
47 | 
48 |     curator.cat.standardize("tissue")
49 |     assert curator.cat._cat_vectors["tissue"]._non_validated == []
50 |     assert df["tissue"].tolist() == [["blood", "lung"], ["blood"], ["lung"]]
51 | 
52 |     assert curator.validate() is None
53 | 


--------------------------------------------------------------------------------
/tests/curators/test_cxg_curator.py:
--------------------------------------------------------------------------------
 1 | import lamindb as ln
 2 | import numpy as np
 3 | 
 4 | 
 5 | def test_cxg_curator():
 6 |     schema_version = "5.2.0"
 7 |     adata = ln.core.datasets.small_dataset3_cellxgene()
 8 |     curator = ln.curators._legacy.CellxGeneAnnDataCatManager(
 9 |         adata, schema_version=schema_version
10 |     )
11 | 
12 |     adata.obs.rename(columns={"donor": "donor_id"}, inplace=True)
13 |     curator = ln.curators._legacy.CellxGeneAnnDataCatManager(
14 |         adata,
15 |         defaults=ln.curators._legacy.CellxGeneAnnDataCatManager.cxg_categoricals_defaults,
16 |         schema_version=schema_version,
17 |     )
18 |     assert not curator.validate()
19 | 
20 |     adata = adata[:, ~adata.var.index.isin(curator.non_validated["var_index"])]
21 |     adata.obs["tissue"] = adata.obs["tissue"].cat.rename_categories({"lungg": "lung"})
22 |     curator = ln.curators._legacy.CellxGeneAnnDataCatManager(
23 |         adata, schema_version=schema_version
24 |     )
25 |     assert curator.validate()
26 | 
27 |     artifact = curator.save_artifact(
28 |         key=f"examples/dataset-curated-against-cxg-{curator.schema_version}.h5ad"
29 |     )
30 |     title = "Cross-tissue immune cell analysis reveals tissue-specific features in humans (for test demo only)"
31 | 
32 |     adata.obsm["X_umap"] = np.zeros((adata.shape[0], 2))
33 |     adata_cxg = curator.to_cellxgene_anndata(is_primary_data=True, title=title)
34 |     assert "cell_type_ontology_term_id" in adata_cxg.obs.columns
35 | 
36 |     artifact.delete(permanent=True)
37 | 


--------------------------------------------------------------------------------
/tests/curators/test_pert_curator.py:
--------------------------------------------------------------------------------
  1 | # Here we use `PertCurator` to curate perturbation related columns in a subsetted `AnnData` object of [McFarland et al. 2020](https://www.nature.com/articles/s41467-020-17440-w).
  2 | 
  3 | import bionty as bt
  4 | import lamindb as ln
  5 | import pandas as pd
  6 | import wetlab as wl
  7 | 
  8 | 
  9 | def test_pert_curator():
 10 |     ln.settings.verbosity = "hint"
 11 |     adata = (
 12 |         ln.Artifact.using("laminlabs/lamindata")
 13 |         .get(key="scrna/micro-macfarland2020.h5ad")
 14 |         .load()
 15 |     )
 16 | 
 17 |     # ## Curate and register perturbations
 18 |     #
 19 |     # Required columns:
 20 |     # - Either "pert_target" or "pert_name" and "pert_type" ("pert_type" allows: "genetic", "drug", "biologic", "physical")
 21 |     # - If pert_dose = True (default), requires "pert_dose" in form of number+unit. E.g. 10.0nM
 22 |     # - If pert_time = True (default), requires "pert_time" in form of number+unit. E.g. 10.0h
 23 | 
 24 |     # +
 25 |     # rename the columns to match the expected format
 26 |     adata.obs["pert_time"] = adata.obs["time"].apply(
 27 |         lambda x: str(x).split(", ")[-1] + "h" if pd.notna(x) else x
 28 |     )  # we only take the last timepoint
 29 |     adata.obs["pert_dose"] = adata.obs["dose_value"].map(
 30 |         lambda x: f"{x}{adata.obs['dose_unit'].iloc[0]}" if pd.notna(x) else None
 31 |     )
 32 |     adata.obs.rename(
 33 |         columns={"perturbation": "pert_name", "perturbation_type": "pert_type"},
 34 |         inplace=True,
 35 |     )
 36 |     # fix the perturbation type as suggested by the curator
 37 |     adata.obs["pert_type"] = adata.obs["pert_type"].cat.rename_categories(
 38 |         {"CRISPR": "genetic", "drug": "compound"}
 39 |     )
 40 | 
 41 |     adata.obs["tissue_type"] = "cell culture"
 42 | 
 43 |     curator = ln.curators._legacy.PertAnnDataCatManager(adata)
 44 | 
 45 |     assert curator.validate() is not True
 46 | 
 47 |     # ### Genetic perturbations
 48 | 
 49 |     # register genetic perturbations with their target genes
 50 |     pert_target_map = {
 51 |         "sggpx4-1": "GPX4",
 52 |         "sggpx4-2": "GPX4",
 53 |         "sgor2j2": "OR2J2",  # cutting control
 54 |     }
 55 | 
 56 |     ln.settings.creation.search_names = False
 57 |     for sg_name, gene_symbol in pert_target_map.items():
 58 |         pert = wl.GeneticPerturbation.filter(
 59 |             system="CRISPR-Cas9", name=sg_name
 60 |         ).one_or_none()
 61 |         if pert is None:
 62 |             pert = wl.GeneticPerturbation(
 63 |                 system="CRISPR-Cas9",
 64 |                 name=sg_name,
 65 |                 description="cutting control" if sg_name == "sgor2j2" else None,
 66 |             ).save()
 67 |         target = wl.PerturbationTarget.filter(name=gene_symbol).one_or_none()
 68 |         if target is None:
 69 |             target = wl.PerturbationTarget(name=gene_symbol).save()
 70 |         pert.targets.add(target)
 71 |         genes = bt.Gene.filter(symbol=gene_symbol).all()
 72 |         if len(genes) == 0:
 73 |             genes = bt.Gene.from_values(
 74 |                 [gene_symbol], field=bt.Gene.symbol, organism="human"
 75 |             ).save()
 76 |         target.genes.add(*genes)
 77 |     ln.settings.creation.search_names = True
 78 | 
 79 |     adata.obs["pert_target"] = adata.obs["pert_genetic"].map(pert_target_map)
 80 | 
 81 |     # register the negative control without targets: Non-cutting control
 82 |     wl.GeneticPerturbation(
 83 |         name="sglacz", system="CRISPR-Cas9", description="non-cutting control"
 84 |     ).save()
 85 | 
 86 |     # ### Compounds
 87 | 
 88 |     # the remaining compounds are not in CHEBI and we create records for them
 89 |     curator.add_new_from("pert_compound")
 90 | 
 91 |     # manually fix sex and set assay
 92 |     adata.obs["sex"] = adata.obs["sex"].astype(str).str.lower()
 93 |     adata.obs["assay"] = "10x 3' v3"
 94 | 
 95 |     # subset the adata to only include the validated genes
 96 |     if "var_index" in curator.non_validated:
 97 |         adata = adata[
 98 |             :, ~adata.var_names.isin(curator.non_validated["var_index"])
 99 |         ].copy()
100 | 
101 |     # standardize disease and sex as suggested
102 |     curator.standardize("disease")
103 | 
104 |     curator = wl.PertCurator(adata)
105 |     curator.validate()
106 |     curator.standardize("all")
107 |     curator.add_new_from("all")
108 | 
109 |     assert curator.validate() is True
110 | 


--------------------------------------------------------------------------------
/tests/permissions/conftest.py:
--------------------------------------------------------------------------------
 1 | import shutil
 2 | from subprocess import DEVNULL, run
 3 | from time import perf_counter
 4 | 
 5 | import lamindb_setup as ln_setup
 6 | import pytest
 7 | from lamin_utils import logger
 8 | 
 9 | 
10 | def pytest_sessionstart():
11 |     t_execute_start = perf_counter()
12 | 
13 |     ln_setup.settings.auto_connect = True
14 |     # these are called in separate scripts because can't change connection
15 |     # within the same python process due to django
16 |     # init instance and setup RLS
17 |     run(  # noqa: S602
18 |         "python ./tests/permissions/scripts/setup_instance.py",
19 |         shell=True,
20 |         capture_output=False,
21 |     )
22 |     # populate permissions and models via the admin connection
23 |     run(  # noqa: S602
24 |         "python ./tests/permissions/scripts/setup_access.py",
25 |         shell=True,
26 |         capture_output=False,
27 |     )
28 | 
29 |     total_time_elapsed = perf_counter() - t_execute_start
30 |     print(f"Time to setup the instance: {total_time_elapsed:.3f}s")
31 | 
32 | 
33 | def pytest_sessionfinish(session: pytest.Session):
34 |     logger.set_verbosity(1)
35 |     shutil.rmtree("./default_storage_permissions")
36 |     ln_setup.delete("lamindb-test-permissions", force=True)
37 |     run("docker stop pgtest && docker rm pgtest", shell=True, stdout=DEVNULL)  # noqa: S602
38 | 


--------------------------------------------------------------------------------
/tests/permissions/jwt_utils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | import psycopg2
 4 | 
 5 | 
 6 | def sign_jwt(db_url, payload: dict) -> str:
 7 |     with psycopg2.connect(db_url) as conn, conn.cursor() as cur:
 8 |         cur.execute(
 9 |             """
10 |                 SELECT sign(
11 |                     %s::json,
12 |                     (SELECT security.get_secret('jwt_secret')),
13 |                     %s
14 |                 )
15 |                 """,
16 |             (json.dumps(payload), "HS256"),
17 |         )
18 |         token = cur.fetchone()[0]
19 |         if not token:
20 |             msg = "Failed to generate JWT"
21 |             raise ValueError(msg)
22 |         return token
23 | 


--------------------------------------------------------------------------------
/tests/permissions/scripts/check_lamin_dev.py:
--------------------------------------------------------------------------------
 1 | import lamindb_setup as ln_setup
 2 | 
 3 | ln_setup.settings.auto_connect = False
 4 | 
 5 | import lamindb as ln
 6 | 
 7 | assert ln.setup.settings.user.handle == "testuser1"
 8 | 
 9 | ln.connect("laminlabs/lamin-dev")
10 | 
11 | assert ln.setup.settings.instance.slug == "laminlabs/lamin-dev"
12 | 
13 | space_name = "Our test space for CI"
14 | ln.track(space=space_name)
15 | 
16 | assert ln.context.space.name == space_name
17 | ulabel = ln.ULabel(name="My test ulabel in test space").save()
18 | assert ulabel.space.name == "All"  # ulabel should end up in common space
19 | ulabel.delete()  # delete silently passes in case another worker deleted the ulabel
20 | assert (
21 |     ln.context.transform.space.name == space_name
22 | )  # transform and run in restricted space
23 | assert ln.context.run.space.name == space_name  # transform and run in restricted space
24 | ln.context.transform.delete()
25 | 


--------------------------------------------------------------------------------
/tests/permissions/scripts/clean_lamin_dev.py:
--------------------------------------------------------------------------------
 1 | import lamindb_setup as ln_setup
 2 | 
 3 | ln_setup.settings.auto_connect = False
 4 | 
 5 | import lamindb as ln
 6 | 
 7 | assert ln.setup.settings.user.handle == "testuser1"
 8 | 
 9 | ln.connect("laminlabs/lamin-dev")
10 | 
11 | assert ln.setup.settings.instance.slug == "laminlabs/lamin-dev"
12 | 
13 | artifact = ln.Artifact.get(key="mytest")
14 | assert artifact.space.name == "Our test space for CI"
15 | artifact.delete()
16 | 


--------------------------------------------------------------------------------
/tests/permissions/scripts/setup_access.py:
--------------------------------------------------------------------------------
 1 | import lamindb as ln  # noqa
 2 | import hubmodule.models as hm
 3 | from uuid import uuid4
 4 | from hubmodule._setup import _install_db_module
 5 | from laminhub_rest.core.postgres import DbRoleHandler
 6 | 
 7 | # create a db connection url that works with RLS
 8 | JWT_ROLE_NAME = "permissions_jwt"
 9 | 
10 | 
11 | def create_jwt_user(dsn_admin: str, jwt_role_name: str):
12 |     db_role_handler = DbRoleHandler(dsn_admin)
13 |     jwt_db_url = db_role_handler.create(
14 |         jwt_role_name, expires_in=None, alter_if_exists=True
15 |     )
16 |     db_role_handler.permission.grant_write_jwt(jwt_role_name)
17 |     return jwt_db_url
18 | 
19 | 
20 | pgurl = "postgresql://postgres:pwd@0.0.0.0:5432/pgtest"  # admin db connection url
21 | jwt_db_url = create_jwt_user(pgurl, jwt_role_name=JWT_ROLE_NAME)
22 | _install_db_module(pgurl, jwt_role_name=JWT_ROLE_NAME)
23 | 
24 | print("Created jwt db connection")
25 | 
26 | # create models
27 | 
28 | full_access = ln.Space(name="full access", uid="00000001").save()  # type: ignore
29 | select_access = ln.Space(name="select access", uid="00000002").save()  # type: ignore
30 | no_access = ln.Space(name="no access", uid="00000003").save()  # type: ignore
31 | # set read role for the default space
32 | account = hm.Account(
33 |     id=ln.setup.settings.user._uuid.hex, uid="accntid1", role="read"
34 | ).save()
35 | 
36 | # no access space
37 | ulabel = ln.ULabel(name="no_access_ulabel")
38 | ulabel.space = no_access
39 | ulabel.save()
40 | 
41 | project = ln.Project(name="No_access_project")  # type: ignore
42 | project.space = no_access
43 | project.save()
44 | 
45 | # setup write access space
46 | hm.AccessSpace(account=account, space=full_access, role="write").save()
47 | 
48 | ulabel = ln.ULabel(name="full_access_ulabel")
49 | ulabel.space = full_access
50 | ulabel.save()
51 | # setup read access space
52 | hm.AccessSpace(account=account, space=select_access, role="read").save()
53 | 
54 | ulabel = ln.ULabel(name="select_ulabel")
55 | ulabel.space = select_access
56 | ulabel.save()
57 | # artificial but better to test
58 | # create a link table referencing rows in different spaces
59 | ulabel.projects.add(project)
60 | 
61 | # default space, only select access by default
62 | ulabel = ln.ULabel(name="default_space_ulabel").save()
63 | ulabel.projects.add(project)
64 | 
65 | project = ln.Project(name="default_space_project").save()
66 | ulabel.projects.add(project)
67 | 
68 | # create a link table referencing ulabel from the default space and project from select space
69 | project = ln.Project(name="select_project")
70 | project.space = select_access
71 | project.save()
72 | 
73 | ulabel.projects.add(project)
74 | 
75 | # setup team and relevent models
76 | team_access = ln.Space(name="team access", uid="00000004").save()  # type: ignore
77 | team = hm.Team(id=uuid4().hex, uid="teamuiduid11", name="test_team", role="read").save()
78 | hm.AccountTeam(account=account, team=team).save()
79 | hm.AccessSpace(team=team, space=team_access, role="read").save()
80 | 
81 | feature = ln.Feature(name="team_access_feature", dtype=float)
82 | feature.space = team_access
83 | feature.save()
84 | 
85 | print("Created models")
86 | 
87 | # save jwt db connection
88 | 
89 | ln.setup.settings.instance._db = jwt_db_url
90 | ln.setup.settings.instance._persist()
91 | 


--------------------------------------------------------------------------------
/tests/permissions/scripts/setup_instance.py:
--------------------------------------------------------------------------------
 1 | import lamindb_setup as ln_setup
 2 | from laminci.db import setup_local_test_postgres
 3 | 
 4 | pgurl = setup_local_test_postgres()
 5 | 
 6 | ln_setup.init(
 7 |     storage="./default_storage_permissions",
 8 |     name="lamindb-test-permissions",
 9 |     db=pgurl,
10 | )
11 | 
12 | # can't add this app in the init because don't want t trigger the initial migration
13 | # that conflicts with _install_db_module
14 | ln_setup.settings.instance._schema_str = "hubmodule"
15 | ln_setup.settings.instance._persist()
16 | 


--------------------------------------------------------------------------------
/tests/storage/conftest.py:
--------------------------------------------------------------------------------
 1 | import shutil
 2 | from subprocess import DEVNULL, run
 3 | from time import perf_counter
 4 | 
 5 | import lamindb_setup as ln_setup
 6 | import pytest
 7 | from lamin_utils import logger
 8 | from laminci.db import setup_local_test_postgres
 9 | 
10 | AUTO_CONNECT = ln_setup.settings.auto_connect
11 | ln_setup.settings.auto_connect = False
12 | 
13 | import lamindb as ln
14 | 
15 | 
16 | def pytest_sessionstart():
17 |     t_execute_start = perf_counter()
18 | 
19 |     ln_setup._TESTING = True
20 |     pgurl = setup_local_test_postgres()
21 |     ln.setup.init(
22 |         storage="./default_storage_unit_storage",
23 |         modules="bionty",
24 |         name="lamindb-unit-tests-storage",
25 |         db=pgurl,
26 |     )
27 |     ln.setup.register()  # temporarily
28 |     ln.setup.settings.auto_connect = True
29 |     ln.settings.creation.artifact_silence_missing_run_warning = True
30 |     ln.settings.storage = (
31 |         "s3://lamindb-ci/test-data"  # register as valid storage location
32 |     )
33 |     ln.settings.storage = "./default_storage_unit_storage"
34 |     total_time_elapsed = perf_counter() - t_execute_start
35 |     print(f"Time to setup the instance: {total_time_elapsed:.3f}s")
36 | 
37 | 
38 | def pytest_sessionfinish(session: pytest.Session):
39 |     logger.set_verbosity(1)
40 |     shutil.rmtree("./default_storage_unit_storage")
41 |     # handle below better in the future
42 |     if ln.UPath("s3://lamindb-test/storage/.lamindb").exists():
43 |         ln.UPath("s3://lamindb-test/storage/.lamindb").rmdir()
44 |     another_storage = ln.UPath("s3://lamindb-ci/lamindb-unit-tests-cloud/.lamindb")
45 |     if another_storage.exists():
46 |         another_storage.rmdir()
47 |     ln.setup.delete("lamindb-unit-tests-storage", force=True)
48 |     run("docker stop pgtest && docker rm pgtest", shell=True, stdout=DEVNULL)  # noqa: S602
49 |     ln.setup.settings.auto_connect = AUTO_CONNECT
50 | 


--------------------------------------------------------------------------------
/tests/storage/test_artifact_zarr.py:
--------------------------------------------------------------------------------
 1 | import shutil
 2 | from pathlib import Path
 3 | 
 4 | import anndata as ad
 5 | import lamindb as ln
 6 | import numpy as np
 7 | import pandas as pd
 8 | import pytest
 9 | from lamindb.core.storage._zarr import identify_zarr_type
10 | from lamindb_setup.core.upath import (
11 |     CloudPath,
12 | )
13 | 
14 | 
15 | @pytest.fixture(scope="session")
16 | def get_small_adata():
17 |     return ad.AnnData(
18 |         X=np.array([[1, 2, 3], [4, 5, 6]]),
19 |         obs={"feat1": ["A", "B"]},
20 |         var=pd.DataFrame(index=["MYC", "TCF7", "GATA1"]),
21 |         obsm={"X_pca": np.array([[1, 2], [3, 4]])},
22 |     )
23 | 
24 | 
25 | def test_zarr_upload_cache(get_small_adata):
26 |     previous_storage = ln.setup.settings.storage.root_as_str
27 |     ln.settings.storage = "s3://lamindb-test/core"
28 | 
29 |     zarr_path = Path("./test_adata.zarr")
30 |     get_small_adata.write_zarr(zarr_path)
31 | 
32 |     artifact = ln.Artifact(zarr_path, key="test_adata.zarr")
33 |     assert artifact.otype == "AnnData"
34 |     assert artifact.n_files >= 1
35 |     artifact.save()
36 | 
37 |     assert isinstance(artifact.path, CloudPath)
38 |     assert artifact.path.exists()
39 |     assert identify_zarr_type(artifact.path) == "anndata"
40 | 
41 |     shutil.rmtree(artifact.cache())
42 | 
43 |     cache_path = artifact._cache_path
44 |     assert isinstance(artifact.load(), ad.AnnData)
45 |     assert cache_path.is_dir()
46 | 
47 |     shutil.rmtree(cache_path)
48 |     assert not cache_path.exists()
49 |     artifact.cache()
50 |     assert cache_path.is_dir()
51 | 
52 |     artifact.delete(permanent=True, storage=True)
53 |     shutil.rmtree(zarr_path)
54 | 
55 |     # test zarr from memory
56 |     artifact = ln.Artifact(get_small_adata, key="test_adata.anndata.zarr")
57 |     assert artifact._local_filepath.is_dir()
58 |     assert artifact.otype == "AnnData"
59 |     assert artifact.suffix == ".anndata.zarr"
60 |     assert artifact.n_files >= 1
61 | 
62 |     artifact.save()
63 |     assert isinstance(artifact.path, CloudPath)
64 |     assert artifact.path.exists()
65 |     cache_path = artifact._cache_path
66 |     assert cache_path.is_dir()
67 | 
68 |     shutil.rmtree(cache_path)
69 |     assert not cache_path.exists()
70 | 
71 |     artifact._memory_rep = None
72 | 
73 |     assert isinstance(artifact.load(), ad.AnnData)
74 |     assert cache_path.is_dir()
75 | 
76 |     artifact.delete(permanent=True, storage=True)
77 | 
78 |     ln.settings.storage = previous_storage
79 | 


--------------------------------------------------------------------------------
/tests/storage/test_switch_storage.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import lamindb as ln
 4 | 
 5 | 
 6 | def test_settings_switch_storage():
 7 |     ln.settings.storage = "./default_storage_unit_storage"
 8 |     assert (
 9 |         ln.settings.storage.root.resolve()
10 |         == Path("./default_storage_unit_storage").resolve()
11 |     )
12 |     new_storage_location = "s3://lamindb-ci/test-settings-switch-storage"
13 |     ln.settings.storage = new_storage_location
14 |     assert ln.setup.settings.storage.type_is_cloud
15 |     assert ln.setup.settings.storage.root_as_str == new_storage_location
16 |     # root.fs contains the underlying fsspec filesystem
17 |     # the following is set by lamindb to True for s3 by default
18 |     assert ln.setup.settings.storage.root.fs.cache_regions
19 |     ln.settings.storage = new_storage_location, {"cache_regions": False}
20 |     assert not ln.setup.settings.storage.root.fs.cache_regions
21 |     assert ln.Storage.filter(root=new_storage_location).one_or_none() is not None
22 |     # switch back to default storage
23 |     ln.settings.storage = "./default_storage_unit_storage"
24 | 


--------------------------------------------------------------------------------