├── lamindb ├── py.typed ├── migrations │ ├── __init__.py │ ├── 0134_run_params.py │ ├── 0124_page_artifact_page_collection_page_feature_page_and_more.py │ ├── 0110_rename_values_artifacts_record_linked_artifacts.py │ ├── 0093_alter_schemacomponent_unique_together.py │ ├── 0144_alter_transform_is_flow.py │ ├── 0131_record_unique_name_type_space.py │ ├── 0105_record_unique_name.py │ ├── 0081_revert_textfield_collection.py │ ├── 0082_alter_feature_dtype.py │ ├── 0128_artifact__real_key.py │ ├── 0099_alter_writelog_seqno.py │ ├── 0150_rename_params_record_extra_data_and_more.py │ ├── 0114_alter_run__status_code.py │ ├── 0148_artifact_params_record_params.py │ ├── 0091_alter_featurevalue_options_alter_space_options_and_more.py │ ├── 0097_remove_schemaparam_param_remove_paramvalue_param_and_more.py │ ├── 0119_rename_records_project_linked_in_records.py │ ├── 0106_transfer_data_migration.py │ ├── 0135_alter_collection_hash_and_more.py │ ├── 0108_remove_record_sheet_remove_sheetproject_sheet_and_more.py │ ├── 0138_remove_record_linked_users_user_linked_in_records.py │ ├── 0127_alter_run_status_code_feature_dtype.py │ ├── 0079_alter_rundata_value_json_and_more.py │ ├── 0084_alter_schemafeature_feature_and_more.py │ ├── 0117_fix_artifact_storage_hash_unique_constraints.py │ ├── 0143_remove_transform_entrypoint_transform_config_and_more.py │ ├── 0087_rename__schemas_m2m_artifact_feature_sets_and_more.py │ ├── 0103_remove_writelog_migration_state_and_more.py │ ├── 0115_alter_space_uid.py │ ├── 0116_remove_artifact_unique_artifact_storage_key_hash_and_more.py │ ├── 0113_lower_case_branch_and_space_names.py │ ├── 0120_add_record_fk_constraint.py │ ├── 0132_record_parents_record_reference_and_more.py │ ├── 0121_recorduser.py │ ├── 0085_alter_feature_is_type_alter_flextable_is_type_and_more.py │ ├── 0151_feature_update_feature_on_name_change.py │ ├── 0107_add_schema_to_record.py │ ├── 0102_remove_writelog_branch_code_and_more.py │ ├── 0070_lamindbv1_migrate_data.py │ ├── 0090_runproject_project_runs.py │ ├── 0092_alter_artifactfeaturevalue_artifact_and_more.py │ ├── 0139_alter_reference_text.py │ ├── 0125_artifact_is_locked_collection_is_locked_and_more.py │ ├── 0083_alter_feature_is_type_alter_flextable_is_type_and_more.py │ ├── 0123_alter_artifact_description_alter_branch_description_and_more.py │ └── 0086_various.py ├── examples │ ├── fixtures │ │ └── __init__.py │ ├── schemas │ │ ├── define_valid_features.py │ │ ├── __init__.py │ │ ├── define_schema_anndata_ensembl_gene_ids_and_valid_features_in_obs.py │ │ ├── _simple.py │ │ └── _anndata.py │ ├── __init__.py │ ├── cellxgene │ │ ├── __init__.py │ │ └── cellxgene_schema_versions.csv │ ├── datasets │ │ ├── define_mini_immuno_schema_flexible.py │ │ ├── define_mini_immuno_features_labels.py │ │ ├── _fake.py │ │ ├── save_mini_immuno_datasets.py │ │ └── __init__.py │ ├── mlflow │ │ └── __init__.py │ ├── wandb │ │ └── __init__.py │ └── croissant │ │ ├── __init__.py │ │ └── mini_immuno.anndata.zarr_metadata.json ├── base │ ├── ids.py │ ├── __init__.py │ ├── users.py │ ├── utils.py │ └── uids.py ├── core │ ├── exceptions.py │ ├── subsettings │ │ ├── __init__.py │ │ ├── _annotation_settings.py │ │ └── _creation_settings.py │ ├── types.py │ ├── storage │ │ ├── _valid_suffixes.py │ │ ├── __init__.py │ │ ├── _pyarrow_dataset.py │ │ └── _spatialdata_accessor.py │ ├── __init__.py │ ├── _track_environment.py │ └── _compat.py ├── setup │ ├── core │ │ └── __init__.py │ ├── types │ │ └── __init__.py │ ├── errors │ │ └── __init__.py │ ├── _switch.py │ └── __init__.py ├── integrations │ ├── __init__.py │ └── lightning.py └── curators │ └── __init__.py ├── docs ├── changelog.md ├── bionty.md ├── wetlab.md ├── lamindb.md ├── query-search.md ├── scripts │ ├── synced_with_git.py │ ├── run_track_and_finish.py │ ├── define_schema_anndata_uns.py │ ├── define_schema_df_metadata.py │ ├── curate_dataframe_flexible.py │ ├── curate_anndata_uns.py │ ├── curate_anndata_flexible.py │ ├── curate_dataframe_minimal_errors.py │ ├── run_track_with_features_and_params.py │ ├── curate_dataframe_attrs.py │ ├── curate_spatialdata.py │ ├── run_track_with_params.py │ ├── curate_dataframe_external_features.py │ ├── run_workflow.py │ ├── curate_soma_experiment.py │ ├── define_schema_spatialdata.py │ └── curate_mudata.py ├── storage │ ├── test-files │ │ ├── iris.data │ │ ├── iris.csv │ │ └── new_iris.csv │ ├── test_notebooks.py │ └── prepare-transfer-local-to-cloud.ipynb ├── index.md ├── storage.md ├── api.md ├── faq │ ├── test_notebooks.py │ ├── trash-archive.md │ ├── validate-fields.ipynb │ ├── import-modules.ipynb │ └── reference-field.ipynb ├── faq.md ├── guide.md ├── test_notebooks.py └── includes │ └── installation.md ├── tests ├── core │ ├── scripts │ │ ├── duplicate4 │ │ │ └── script-to-test-versioning.py │ │ ├── script-to-test-filename-change.py │ │ ├── script-to-test-versioning.py │ │ ├── duplicate1 │ │ │ └── script-to-test-versioning.py │ │ ├── duplicate3 │ │ │ └── script-to-test-versioning.py │ │ ├── duplicate5 │ │ │ └── script-to-test-versioning.py │ │ └── duplicate2 │ │ │ └── script-to-test-versioning.py │ ├── test_view.py │ ├── test_integrity.py │ ├── test_nbconvert.py │ ├── test_db.py │ ├── test_settings.py │ ├── test_manager.py │ ├── test_artifact_parquet.py │ ├── notebooks │ │ ├── duplicate │ │ │ └── with-title-initialized-consecutive-finish.ipynb │ │ ├── load_schema.ipynb │ │ ├── no-title.ipynb │ │ ├── with-title-initialized-consecutive-finish-not-last-cell.ipynb │ │ ├── with-title-initialized-consecutive-finish.ipynb │ │ ├── basic-r-notebook.Rmd.cleaned.html │ │ └── basic-r-notebook.Rmd.html │ ├── test_storage.py │ ├── test_artifact_anndata_with_curation.py │ ├── test_branches.py │ ├── test_run.py │ ├── test_delete.py │ ├── test_notebooks.py │ ├── test_has_parents.py │ ├── test_search.py │ ├── _dataset_fixtures.py │ ├── test_label_mutations.py │ ├── test_querydb.py │ └── test_rename_features_labels.py ├── curators │ ├── test_curators_remote.py │ ├── conftest.py │ └── test_curate_from_croissant.py ├── conftest.py ├── permissions │ ├── scripts │ │ └── setup_instance.py │ ├── jwt_utils.py │ └── conftest.py ├── no_instance │ └── test_no_default_instance.py ├── integrations │ └── conftest.py └── storage │ ├── test_connect_reconnect.py │ ├── test_artifact_zarr.py │ └── conftest.py ├── .github ├── ISSUE_TEMPLATE │ ├── enhancement.yml │ ├── config.yml │ ├── usage_question.yml │ └── bug_report.yml └── workflows │ └── doc-changes.yml ├── .gitmodules └── .pre-commit-config.yaml /lamindb/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lamindb/migrations/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lamindb/examples/fixtures/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lamindb/base/ids.py: -------------------------------------------------------------------------------- 1 | from .uids import * # noqa: F403 2 | -------------------------------------------------------------------------------- /docs/changelog.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | Actual content in lamin-docs. 4 | -------------------------------------------------------------------------------- /lamindb/core/exceptions.py: -------------------------------------------------------------------------------- 1 | from ..errors import * # noqa: F403 backward compat 2 | -------------------------------------------------------------------------------- /docs/bionty.md: -------------------------------------------------------------------------------- 1 | # `bionty` 2 | 3 | ```{eval-rst} 4 | .. automodule:: bionty 5 | ``` 6 | -------------------------------------------------------------------------------- /docs/wetlab.md: -------------------------------------------------------------------------------- 1 | # `wetlab` 2 | 3 | ```{eval-rst} 4 | .. automodule:: wetlab 5 | ``` 6 | -------------------------------------------------------------------------------- /docs/lamindb.md: -------------------------------------------------------------------------------- 1 | # `lamindb` 2 | 3 | ```{eval-rst} 4 | .. automodule:: lamindb 5 | ``` 6 | -------------------------------------------------------------------------------- /tests/core/scripts/duplicate4/script-to-test-versioning.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | ln.track() 4 | -------------------------------------------------------------------------------- /tests/core/scripts/script-to-test-filename-change.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | ln.track("Ro1gl7n8YrdH0001") 4 | -------------------------------------------------------------------------------- /docs/query-search.md: -------------------------------------------------------------------------------- 1 | # Query, search & stream 2 | 3 | ```{toctree} 4 | :maxdepth: 1 5 | 6 | registries 7 | arrays 8 | ``` 9 | -------------------------------------------------------------------------------- /tests/core/test_view.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | 4 | def test_view(): 5 | ln.view(modules="core") 6 | ln.view() 7 | -------------------------------------------------------------------------------- /tests/core/scripts/script-to-test-versioning.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | ln.context.version = "1" 4 | ln.track("Ro1gl7n8YrdH0000") 5 | -------------------------------------------------------------------------------- /lamindb/examples/schemas/define_valid_features.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | schema = ln.Schema(name="valid_features", itype=ln.Feature).save() 4 | -------------------------------------------------------------------------------- /tests/core/scripts/duplicate1/script-to-test-versioning.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | ln.context.version = "1" 4 | ln.track("Ro1gl7n8YrdH0001") 5 | -------------------------------------------------------------------------------- /tests/core/scripts/duplicate3/script-to-test-versioning.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | ln.context.version = "3" 4 | ln.track("Ro1gl7n8YrdH0002") 5 | -------------------------------------------------------------------------------- /docs/scripts/synced_with_git.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | ln.settings.sync_git_repo = "https://github.com/..." 4 | ln.track() 5 | # your code 6 | ln.finish() 7 | -------------------------------------------------------------------------------- /tests/core/scripts/duplicate5/script-to-test-versioning.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | # different from the one in duplicate4 4 | ln.track() 5 | 6 | ln.finish() 7 | -------------------------------------------------------------------------------- /tests/core/scripts/duplicate2/script-to-test-versioning.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | ln.context.version = "2" 4 | ln.track("Ro1gl7n8YrdH0002") 5 | 6 | assert ln.context.transform.version == "2" 7 | -------------------------------------------------------------------------------- /lamindb/setup/core/__init__.py: -------------------------------------------------------------------------------- 1 | import lamindb_setup as _lamindb_setup 2 | from lamindb_setup.core import * # noqa: F403 3 | 4 | __doc__ = _lamindb_setup.core.__doc__.replace("lamindb_setup", "lamindb.setup") 5 | -------------------------------------------------------------------------------- /lamindb/setup/types/__init__.py: -------------------------------------------------------------------------------- 1 | import lamindb_setup as _lamindb_setup 2 | from lamindb_setup.types import * # noqa: F403 3 | 4 | __doc__ = _lamindb_setup.types.__doc__.replace("lamindb_setup", "lamindb.setup") 5 | -------------------------------------------------------------------------------- /lamindb/setup/errors/__init__.py: -------------------------------------------------------------------------------- 1 | import lamindb_setup as _lamindb_setup 2 | from lamindb_setup.errors import * # noqa: F403 3 | 4 | __doc__ = _lamindb_setup.errors.__doc__.replace("lamindb_setup", "lamindb.setup") 5 | -------------------------------------------------------------------------------- /tests/core/test_integrity.py: -------------------------------------------------------------------------------- 1 | import lamindb_setup as ln_setup 2 | 3 | 4 | def test_migrate_check(): 5 | assert ln_setup.migrate.check() 6 | 7 | 8 | def test_system_check(): 9 | ln_setup.django("check") 10 | -------------------------------------------------------------------------------- /docs/storage/test-files/iris.data: -------------------------------------------------------------------------------- 1 | 5.1,3.5,1.4,0.2,Iris-setosa 2 | 4.9,3.0,1.4,0.2,Iris-setosa 3 | 7.0,3.2,4.7,1.4,Iris-versicolor 4 | 6.4,3.2,4.5,1.5,Iris-versicolor 5 | 6.2,3.4,5.4,2.3,Iris-virginica 6 | 5.9,3.0,5.1,1.8,Iris-virginica 7 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | ```{include} ../README.md 2 | :start-line: 0 3 | :end-line: 5 4 | ``` 5 | 6 | 7 | 8 | ```{toctree} 9 | :maxdepth: 1 10 | :hidden: 11 | 12 | guide 13 | api 14 | changelog 15 | ``` 16 | -------------------------------------------------------------------------------- /docs/storage.md: -------------------------------------------------------------------------------- 1 | # Storage 2 | 3 | ```{toctree} 4 | :maxdepth: 1 5 | 6 | storage/upload 7 | storage/add-replace-cache 8 | storage/anndata-accessor 9 | storage/prepare-transfer-local-to-cloud 10 | storage/transfer-local-to-cloud 11 | storage/vitessce 12 | ``` 13 | -------------------------------------------------------------------------------- /tests/core/test_nbconvert.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def test_nbconvert(): 5 | exit_code = os.system( # noqa: S605 6 | "jupyter nbconvert --to notebook --inplace --execute ./tests/core/notebooks/load_schema.ipynb" 7 | ) 8 | assert exit_code == 0 9 | -------------------------------------------------------------------------------- /docs/scripts/run_track_and_finish.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | ln.track() # initiate a tracked notebook/script run 4 | 5 | # your code automatically tracks inputs & outputs 6 | 7 | ln.finish() # mark run as finished, save execution report, source code & environment 8 | -------------------------------------------------------------------------------- /lamindb/core/subsettings/__init__.py: -------------------------------------------------------------------------------- 1 | """Sub settings. 2 | 3 | .. autoclass:: CreationSettings 4 | .. autoclass:: AnnotationSettings 5 | 6 | """ 7 | 8 | from ._annotation_settings import AnnotationSettings 9 | from ._creation_settings import CreationSettings 10 | -------------------------------------------------------------------------------- /docs/storage/test-files/iris.csv: -------------------------------------------------------------------------------- 1 | "sepal.length","sepal.width","petal.length","petal.width","variety" 2 | 5.1,3.5,1.4,.2,"Setosa" 3 | 4.9,3,1.4,.2,"Setosa" 4 | 7,3.2,4.7,1.4,"Versicolor" 5 | 6.4,3.2,4.5,1.5,"Versicolor" 6 | 6.3,3.3,6,2.5,"Virginica" 7 | 5.8,2.7,5.1,1.9,"Virginica" 8 | -------------------------------------------------------------------------------- /docs/storage/test-files/new_iris.csv: -------------------------------------------------------------------------------- 1 | ;sepal.length;sepal.width;petal.length;petal.width;variety 2 | 0;5.1;3.5;1.4;0.2;Setosa 3 | 1;4.9;3.0;1.4;0.2;Setosa 4 | 50;7.0;3.2;4.7;1.4;Versicolor 5 | 51;6.4;3.2;4.5;1.5;Versicolor 6 | 100;6.3;3.3;6.0;2.5;Virginica 7 | 101;5.8;2.7;5.1;1.9;Virginica 8 | -------------------------------------------------------------------------------- /docs/storage/test_notebooks.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import nbproject_test as test 4 | 5 | import lamindb as ln 6 | 7 | 8 | def test_notebooks(): 9 | nbdir = Path(__file__).parent 10 | ln.setup.login("testuser1") 11 | test.execute_notebooks(nbdir, write=True) 12 | -------------------------------------------------------------------------------- /docs/scripts/define_schema_anndata_uns.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | from define_schema_df_metadata import study_metadata_schema 4 | 5 | anndata_uns_schema = ln.Schema( 6 | otype="AnnData", 7 | slots={ 8 | "uns:study_metadata": study_metadata_schema, 9 | }, 10 | ).save() 11 | -------------------------------------------------------------------------------- /lamindb/examples/__init__.py: -------------------------------------------------------------------------------- 1 | """Examples. 2 | 3 | .. autosummary:: 4 | :toctree: . 5 | 6 | schemas 7 | datasets 8 | cellxgene 9 | croissant 10 | mlflow 11 | wandb 12 | 13 | """ 14 | 15 | from . import croissant, datasets, mlflow, schemas, wandb 16 | from .cellxgene import _cellxgene 17 | -------------------------------------------------------------------------------- /lamindb/examples/schemas/__init__.py: -------------------------------------------------------------------------------- 1 | """Example schemas. 2 | 3 | .. autofunction:: valid_features 4 | .. autofunction:: anndata_ensembl_gene_ids_and_valid_features_in_obs 5 | 6 | """ 7 | 8 | from ._anndata import anndata_ensembl_gene_ids_and_valid_features_in_obs 9 | from ._simple import valid_features 10 | -------------------------------------------------------------------------------- /tests/core/test_db.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | 4 | def test_create_to_load(): 5 | transform = ln.Transform(version="0", key="test", type="pipeline") 6 | transform.save() 7 | run = ln.Run(transform=transform) 8 | run.save() 9 | ln.Storage.get(root=str(ln.setup.settings.storage.root)) 10 | -------------------------------------------------------------------------------- /docs/scripts/define_schema_df_metadata.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | study_metadata_schema = ln.Schema( 4 | name="Study metadata schema", 5 | features=[ 6 | ln.Feature(name="temperature", dtype=float).save(), 7 | ln.Feature(name="experiment", dtype=str).save(), 8 | ], 9 | ).save() 10 | -------------------------------------------------------------------------------- /docs/api.md: -------------------------------------------------------------------------------- 1 | # API Reference 2 | 3 | 4 | 5 | ```{toctree} 6 | :maxdepth: 1 7 | :caption: CLI & lamindb 8 | :hidden: 9 | 10 | cli 11 | lamindb 12 | ``` 13 | 14 | ```{toctree} 15 | :maxdepth: 1 16 | :caption: Modules 17 | :hidden: 18 | 19 | bionty 20 | wetlab 21 | ``` 22 | -------------------------------------------------------------------------------- /docs/faq/test_notebooks.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import nbproject_test as test 4 | 5 | import lamindb as ln 6 | 7 | 8 | def test_notebooks(): 9 | nbdir = Path(__file__).parent 10 | ln.setup.login("testuser1") 11 | ln.setup.init(storage=nbdir / "mydata") 12 | test.execute_notebooks(nbdir, write=True) 13 | -------------------------------------------------------------------------------- /lamindb/examples/cellxgene/__init__.py: -------------------------------------------------------------------------------- 1 | """CELLxGENE utilities. 2 | 3 | .. autofunction:: save_cellxgene_defaults 4 | .. autofunction:: create_cellxgene_schema 5 | 6 | """ 7 | 8 | from ._cellxgene import ( 9 | create_cellxgene_schema, 10 | get_cxg_schema, 11 | save_cellxgene_defaults, 12 | save_cxg_defaults, 13 | ) 14 | -------------------------------------------------------------------------------- /docs/scripts/curate_dataframe_flexible.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | ln.examples.datasets.mini_immuno.define_features_labels() 4 | df = ln.examples.datasets.mini_immuno.get_dataset1(otype="DataFrame") 5 | artifact = ln.Artifact.from_dataframe( 6 | df, key="examples/dataset1.parquet", schema="valid_features" 7 | ).save() 8 | artifact.describe() 9 | -------------------------------------------------------------------------------- /docs/faq.md: -------------------------------------------------------------------------------- 1 | # FAQ 2 | 3 | ```{toctree} 4 | :maxdepth: 1 5 | 6 | faq/pydantic-pandera 7 | faq/idempotency 8 | faq/acid 9 | faq/track-run-inputs 10 | faq/curate-any 11 | faq/import-modules 12 | faq/reference-field 13 | faq/trash-archive 14 | faq/delete 15 | faq/keep-artifacts-local 16 | faq/validate-fields 17 | faq/symbol-mapping 18 | faq/search 19 | ``` 20 | -------------------------------------------------------------------------------- /docs/guide.md: -------------------------------------------------------------------------------- 1 | # Guide 2 | 3 | ```{toctree} 4 | :hidden: 5 | :caption: "Overview" 6 | 7 | README 8 | ``` 9 | 10 | ```{toctree} 11 | :hidden: 12 | :caption: "How to" 13 | 14 | query-search 15 | track 16 | curate 17 | manage-ontologies 18 | transfer 19 | ``` 20 | 21 | ```{toctree} 22 | :hidden: 23 | :caption: Other topics 24 | 25 | faq 26 | storage 27 | ``` 28 | -------------------------------------------------------------------------------- /lamindb/base/__init__.py: -------------------------------------------------------------------------------- 1 | """Base library. 2 | 3 | Is available also when no instance is setup. 4 | 5 | Modules 6 | ------- 7 | 8 | .. autosummary:: 9 | :toctree: . 10 | 11 | uids 12 | types 13 | fields 14 | dtypes 15 | utils 16 | 17 | """ 18 | 19 | from . import dtypes, fields, types, uids, utils 20 | from .utils import deprecated, doc_args 21 | -------------------------------------------------------------------------------- /tests/curators/test_curators_remote.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | 4 | def test_curator_remote(): 5 | lamindata_artifacts = ln.Artifact.connect("laminlabs/lamindata") 6 | curator = ln.curators.DataFrameCurator( 7 | lamindata_artifacts.get("Ywz5JiVNHOWSJDiK"), 8 | schema=ln.examples.schemas.valid_features(), 9 | ) 10 | curator.validate() 11 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/enhancement.yml: -------------------------------------------------------------------------------- 1 | name: Propose an enhancement 2 | description: Propose an enhancement. 3 | body: 4 | - type: textarea 5 | id: description 6 | attributes: 7 | label: Add a description 8 | placeholder: | 9 | This is a public repository! 10 | Do not reveal any internal information. 11 | validations: 12 | required: true 13 | -------------------------------------------------------------------------------- /docs/scripts/curate_anndata_uns.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | ln.examples.datasets.mini_immuno.define_features_labels() 4 | adata = ln.examples.datasets.mini_immuno.get_dataset1(otype="AnnData") 5 | schema = ln.Schema.get(name="Study metadata schema") 6 | artifact = ln.Artifact.from_anndata( 7 | adata, schema=schema, key="examples/mini_immuno_uns.h5ad" 8 | ) 9 | artifact.describe() 10 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | contact_links: 3 | - name: LaminHub issues 4 | url: https://github.com/laminlabs/laminhub-public 5 | about: If you have issues with the GUI/web app at lamin.ai, please report them here. 6 | - name: Enterprise support 7 | url: https://lamin.ai/contact 8 | about: If you have other questions, contact us directly. 9 | -------------------------------------------------------------------------------- /docs/scripts/curate_anndata_flexible.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | ln.examples.datasets.mini_immuno.define_features_labels() 4 | adata = ln.examples.datasets.mini_immuno.get_dataset1(otype="AnnData") 5 | artifact = ln.Artifact.from_anndata( 6 | adata, 7 | key="examples/mini_immuno.h5ad", 8 | schema="ensembl_gene_ids_and_valid_features_in_obs", 9 | ).save() 10 | artifact.describe() 11 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | from pathlib import Path 3 | 4 | import pytest 5 | 6 | 7 | @pytest.fixture(scope="function") 8 | def clean_soma_files(request): 9 | path = request.param if hasattr(request, "param") else "small_dataset.tiledbsoma" 10 | if Path(path).exists(): 11 | shutil.rmtree(path) 12 | 13 | yield path 14 | 15 | if Path(path).exists(): 16 | shutil.rmtree(path) 17 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/usage_question.yml: -------------------------------------------------------------------------------- 1 | name: Ask a usage question 2 | description: Ask a usage question. 3 | labels: 4 | - "usage question" 5 | body: 6 | - type: textarea 7 | id: description 8 | attributes: 9 | label: Add a description 10 | placeholder: | 11 | This is a public repository! 12 | Do not reveal any internal information. 13 | validations: 14 | required: true 15 | -------------------------------------------------------------------------------- /lamindb/core/subsettings/_annotation_settings.py: -------------------------------------------------------------------------------- 1 | class AnnotationSettings: 2 | n_max_records: int = 1000 3 | """Maximal number of records to annotate with during automated annotation. 4 | 5 | If the number of records to annotate exceeds this limit, print a warning and do not annotate. 6 | 7 | The number is calculated per feature for labels, and per schema for features. 8 | """ 9 | 10 | 11 | annotation_settings = AnnotationSettings() 12 | -------------------------------------------------------------------------------- /lamindb/core/types.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING, TypeVar 4 | 5 | from anndata import AnnData 6 | from lamindb_setup.types import UPathStr 7 | 8 | from lamindb.base.types import ( 9 | Dtype, 10 | FieldAttr, 11 | ListLike, 12 | StrField, 13 | TransformType, 14 | ) 15 | 16 | MuData = TypeVar("MuData") 17 | SpatialData = TypeVar("SpatialData") 18 | 19 | ScverseDataStructures = AnnData | MuData | SpatialData 20 | -------------------------------------------------------------------------------- /lamindb/migrations/0134_run_params.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-10-13 07:42 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("lamindb", "0133_artifactuser_artifact_users"), 9 | ] 10 | 11 | operations = [ 12 | migrations.AddField( 13 | model_name="run", 14 | name="params", 15 | field=models.JSONField(null=True), 16 | ), 17 | ] 18 | -------------------------------------------------------------------------------- /lamindb/examples/schemas/define_schema_anndata_ensembl_gene_ids_and_valid_features_in_obs.py: -------------------------------------------------------------------------------- 1 | import bionty as bt 2 | 3 | import lamindb as ln 4 | 5 | obs_schema = ln.examples.schemas.valid_features() 6 | varT_schema = ln.Schema( 7 | name="valid_ensembl_gene_ids", itype=bt.Gene.ensembl_gene_id 8 | ).save() 9 | schema = ln.Schema( 10 | name="anndata_ensembl_gene_ids_and_valid_features_in_obs", 11 | otype="AnnData", 12 | slots={"obs": obs_schema, "var.T": varT_schema}, 13 | ).save() 14 | -------------------------------------------------------------------------------- /docs/scripts/curate_dataframe_minimal_errors.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | schema = ln.examples.datasets.mini_immuno.define_mini_immuno_schema_flexible() 4 | df = ln.examples.datasets.mini_immuno.get_dataset1(otype="DataFrame") 5 | df.pop("donor") # remove donor column to trigger validation error 6 | try: 7 | artifact = ln.Artifact.from_dataframe( 8 | df, key="examples/dataset1.parquet", schema=schema 9 | ).save() 10 | except ln.errors.ValidationError as error: 11 | print(error) 12 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yml: -------------------------------------------------------------------------------- 1 | name: Report a bug 2 | description: Report a bug. 3 | labels: 4 | - ":bug: bug" 5 | body: 6 | - type: textarea 7 | id: report 8 | attributes: 9 | label: Add a description 10 | placeholder: | 11 | Describe and consider providing version information. Please ensure you're on the latest version of lamindb. 12 | This is a public repository! 13 | Do not reveal any internal information. 14 | validations: 15 | required: true 16 | -------------------------------------------------------------------------------- /lamindb/migrations/0124_page_artifact_page_collection_page_feature_page_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.1.12 on 2025-09-28 23:37 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ( 9 | "lamindb", 10 | "0123_alter_artifact_description_alter_branch_description_and_more", 11 | ), 12 | ] 13 | 14 | # this migration was later reverted, so we do not need to run it 15 | operations = [] # type: ignore 16 | -------------------------------------------------------------------------------- /lamindb/setup/_switch.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING 4 | 5 | from lamindb_setup import settings 6 | 7 | if TYPE_CHECKING: 8 | from lamindb.models import Branch, Space 9 | 10 | 11 | def switch(*, branch: str | Branch | None = None, space: str | Space | None = None): 12 | """Switch to a branch or space, create if not exists.""" 13 | if branch is not None: 14 | settings.branch = branch 15 | if space is not None: 16 | settings.space = space 17 | -------------------------------------------------------------------------------- /tests/core/test_settings.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | 4 | def test_settings_repr(): 5 | repr_str = repr(ln.settings) 6 | 7 | lines = repr_str.split("\n") 8 | assert "Settings" in lines[0] 9 | assert all(line.startswith(" ") for line in lines[1:]) 10 | 11 | content = "\n".join(lines[1:]) 12 | assert content.find("instance:") < content.find("storage:") 13 | assert content.find("storage:") < content.find("verbosity:") 14 | assert content.find("verbosity:") < content.find("track_run_inputs:") 15 | -------------------------------------------------------------------------------- /docs/scripts/run_track_with_features_and_params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import lamindb as ln 3 | 4 | 5 | if __name__ == "__main__": 6 | p = argparse.ArgumentParser() 7 | p.add_argument("--s3-folder", type=str) 8 | p.add_argument("--experiment", type=str) 9 | args = p.parse_args() 10 | features = { 11 | "s3_folder": args.s3_folder, 12 | "experiment": args.experiment, 13 | } 14 | ln.track(features=features, params={"example_param": 42}) 15 | 16 | # your code 17 | 18 | ln.finish() 19 | -------------------------------------------------------------------------------- /lamindb/migrations/0110_rename_values_artifacts_record_linked_artifacts.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-07-04 18:10 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("lamindb", "0109_record_input_of_runs_alter_record_run_and_more"), 9 | ] 10 | 11 | operations = [ 12 | migrations.RenameField( 13 | model_name="record", 14 | old_name="values_artifacts", 15 | new_name="linked_artifacts", 16 | ), 17 | ] 18 | -------------------------------------------------------------------------------- /lamindb/setup/__init__.py: -------------------------------------------------------------------------------- 1 | import lamindb_setup as _lamindb_setup 2 | from lamindb_setup import * # noqa: F403 3 | from lamindb_setup import ( 4 | connect, 5 | delete, 6 | init, 7 | settings, 8 | ) 9 | 10 | from . import core, errors, types 11 | from ._switch import switch # noqa: F401 12 | 13 | del connect # we have this at the root level, hence, we don't want it here 14 | __doc__ = _lamindb_setup.__doc__.replace("lamindb_setup", "lamindb.setup") 15 | settings.__doc__ = settings.__doc__.replace("lamindb_setup", "lamindb.setup") 16 | -------------------------------------------------------------------------------- /tests/core/test_manager.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | 4 | def test_manager_list(): 5 | label = ln.Record(name="manager label") 6 | label.save() 7 | label_names = [f"Record {i}" for i in range(3)] 8 | labels = [ln.Record(name=name) for name in label_names] 9 | ln.save(labels) 10 | label.parents.set(labels) 11 | assert len(label.parents.to_list()) == 3 12 | assert "Record 1" in label.parents.to_list("name") 13 | label.delete(permanent=True) 14 | for label in labels: 15 | label.delete(permanent=True) 16 | -------------------------------------------------------------------------------- /lamindb/migrations/0093_alter_schemacomponent_unique_together.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-05-07 12:16 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("lamindb", "0092_alter_artifactfeaturevalue_artifact_and_more"), 9 | ] 10 | 11 | operations = [ 12 | migrations.AlterUniqueTogether( 13 | name="schemacomponent", 14 | unique_together={("composite", "slot"), ("composite", "slot", "component")}, 15 | ), 16 | ] 17 | -------------------------------------------------------------------------------- /tests/permissions/scripts/setup_instance.py: -------------------------------------------------------------------------------- 1 | import lamindb_setup as ln_setup 2 | from laminci.db import setup_local_test_postgres 3 | 4 | pgurl = setup_local_test_postgres() 5 | 6 | ln_setup.init( 7 | storage="./default_storage_permissions", 8 | name="lamindb-test-permissions", 9 | db=pgurl, 10 | ) 11 | 12 | # can't add this app in the init because don't want t trigger the initial migration 13 | # that conflicts with _install_db_module 14 | ln_setup.settings.instance._schema_str = "hubmodule" 15 | ln_setup.settings.instance._persist() 16 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "sub/lamindb-setup"] 2 | path = sub/lamindb-setup 3 | url = https://github.com/laminlabs/lamindb-setup 4 | [submodule "sub/lamin-cli"] 5 | path = sub/lamin-cli 6 | url = https://github.com/laminlabs/lamin-cli 7 | [submodule "sub/bionty"] 8 | path = sub/bionty 9 | url = https://github.com/laminlabs/bionty 10 | [submodule "sub/wetlab"] 11 | path = sub/wetlab 12 | url = https://github.com/laminlabs/wetlab 13 | [submodule "sub/cellxgene-lamin"] 14 | path = sub/cellxgene-lamin 15 | url = https://github.com/laminlabs/cellxgene-lamin.git 16 | -------------------------------------------------------------------------------- /lamindb/examples/datasets/define_mini_immuno_schema_flexible.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | schema = ln.Schema( 4 | name="Mini immuno schema", 5 | features=[ 6 | ln.Feature.get(name="perturbation"), 7 | ln.Feature.get(name="cell_type_by_model"), 8 | ln.Feature.get(name="assay_oid"), 9 | ln.Feature.get(name="donor"), 10 | ln.Feature.get(name="concentration"), 11 | ln.Feature.get(name="treatment_time_h"), 12 | ], 13 | flexible=True, # _additional_ columns in a dataframe are validated & annotated 14 | ).save() 15 | -------------------------------------------------------------------------------- /docs/scripts/curate_dataframe_attrs.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | from .define_schema_df_metadata import study_metadata_schema 4 | 5 | df = ln.examples.datasets.mini_immuno.get_dataset1(otype="DataFrame") 6 | schema = ln.Schema( 7 | features=[ln.Feature(name="perturbation", dtype="str").save()], 8 | slots={"attrs": study_metadata_schema}, 9 | otype="DataFrame", 10 | ).save() 11 | curator = ln.curators.DataFrameCurator(df, schema=schema) 12 | curator.validate() 13 | artifact = curator.save_artifact(key="examples/df_with_attrs.parquet") 14 | artifact.describe() 15 | -------------------------------------------------------------------------------- /lamindb/migrations/0144_alter_transform_is_flow.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-11-16 13:46 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("lamindb", "0143_remove_transform_entrypoint_transform_config_and_more"), 9 | ] 10 | 11 | operations = [ 12 | migrations.AlterField( 13 | model_name="transform", 14 | name="is_flow", 15 | field=models.BooleanField(db_default=False, db_index=True, default=False), 16 | ), 17 | ] 18 | -------------------------------------------------------------------------------- /lamindb/migrations/0131_record_unique_name_type_space.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.1.12 on 2025-10-02 15:17 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("lamindb", "0130_branch_space_alter_artifactblock_artifact_and_more"), 9 | ] 10 | 11 | operations = [ 12 | migrations.AddConstraint( 13 | model_name="record", 14 | constraint=models.UniqueConstraint( 15 | fields=("name", "type", "space"), name="unique_name_type_space" 16 | ), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /lamindb/migrations/0105_record_unique_name.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-06-03 19:37 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("lamindb", "0104_alter_branch_uid"), 9 | ] 10 | 11 | operations = [ 12 | migrations.AddConstraint( 13 | model_name="record", 14 | constraint=models.UniqueConstraint( 15 | condition=models.Q(("is_type", True)), 16 | fields=("name",), 17 | name="unique_name", 18 | ), 19 | ), 20 | ] 21 | -------------------------------------------------------------------------------- /lamindb/core/storage/_valid_suffixes.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from lamindb_setup.core.upath import VALID_COMPOSITE_SUFFIXES, VALID_SIMPLE_SUFFIXES 4 | 5 | # add new composite suffixes like so 6 | VALID_COMPOSITE_SUFFIXES.update( 7 | { 8 | ".vitessce.json", 9 | ".ome.zarr", 10 | } 11 | ) 12 | # can do the same for simple valid suffixes 13 | 14 | 15 | class VALID_SUFFIXES: 16 | """Valid suffixes.""" 17 | 18 | SIMPLE: set[str] = VALID_SIMPLE_SUFFIXES 19 | """Simple suffixes.""" 20 | COMPOSITE: set[str] = VALID_COMPOSITE_SUFFIXES 21 | """Composite suffixes.""" 22 | -------------------------------------------------------------------------------- /lamindb/migrations/0081_revert_textfield_collection.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-01-21 17:03 2 | 3 | from django.db import migrations 4 | 5 | import lamindb.base.fields 6 | 7 | 8 | class Migration(migrations.Migration): 9 | dependencies = [ 10 | ("lamindb", "0080_polish_lamindbv1"), 11 | ] 12 | 13 | operations = [ 14 | migrations.AlterField( 15 | model_name="collection", 16 | name="description", 17 | field=lamindb.base.fields.TextField( 18 | blank=True, db_index=True, default=None, null=True 19 | ), 20 | ), 21 | ] 22 | -------------------------------------------------------------------------------- /lamindb/core/storage/__init__.py: -------------------------------------------------------------------------------- 1 | """Storage API. 2 | 3 | Valid suffixes. 4 | 5 | .. autodata:: VALID_SUFFIXES 6 | 7 | Array accessors. 8 | 9 | .. autoclass:: AnnDataAccessor 10 | .. autoclass:: SpatialDataAccessor 11 | .. autoclass:: BackedAccessor 12 | """ 13 | 14 | from lamindb_setup.core.upath import LocalPathClasses, UPath, infer_filesystem 15 | 16 | from ._backed_access import AnnDataAccessor, BackedAccessor, SpatialDataAccessor 17 | from ._tiledbsoma import save_tiledbsoma_experiment 18 | from ._valid_suffixes import VALID_SUFFIXES 19 | from .objects import infer_suffix, write_to_disk 20 | from .paths import delete_storage 21 | -------------------------------------------------------------------------------- /lamindb/migrations/0082_alter_feature_dtype.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-01-25 08:26 2 | 3 | from django.db import migrations 4 | 5 | import lamindb.base.fields 6 | 7 | 8 | class Migration(migrations.Migration): 9 | dependencies = [ 10 | ("lamindb", "0081_revert_textfield_collection"), 11 | ] 12 | 13 | operations = [ 14 | migrations.AlterField( 15 | model_name="feature", 16 | name="dtype", 17 | field=lamindb.base.fields.CharField( 18 | blank=True, db_index=True, default=None, max_length=255, null=True 19 | ), 20 | ), 21 | ] 22 | -------------------------------------------------------------------------------- /docs/scripts/curate_spatialdata.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | spatialdata = ln.examples.datasets.spatialdata_blobs() 4 | sdata_schema = ln.Schema.get(name="spatialdata_blobs_schema") 5 | curator = ln.curators.SpatialDataCurator(spatialdata, sdata_schema) 6 | try: 7 | curator.validate() 8 | except ln.errors.ValidationError: 9 | pass 10 | 11 | spatialdata.tables["table"].var.drop(index="ENSG00000999999", inplace=True) 12 | 13 | # validate again (must pass now) and save artifact 14 | artifact = ln.Artifact.from_spatialdata( 15 | spatialdata, key="examples/spatialdata1.zarr", schema=sdata_schema 16 | ).save() 17 | artifact.describe() 18 | -------------------------------------------------------------------------------- /lamindb/migrations/0128_artifact__real_key.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-10-01 09:07 2 | 3 | from django.db import migrations 4 | 5 | import lamindb.base.fields 6 | 7 | 8 | class Migration(migrations.Migration): 9 | dependencies = [ 10 | ("lamindb", "0127_alter_run_status_code_feature_dtype"), 11 | ] 12 | 13 | operations = [ 14 | migrations.AddField( 15 | model_name="artifact", 16 | name="_real_key", 17 | field=lamindb.base.fields.CharField( 18 | blank=True, db_index=True, default=None, max_length=1024, null=True 19 | ), 20 | ), 21 | ] 22 | -------------------------------------------------------------------------------- /lamindb/integrations/__init__.py: -------------------------------------------------------------------------------- 1 | """Integrations. 2 | 3 | Functions 4 | --------- 5 | 6 | .. autofunction:: save_vitessce_config 7 | .. autofunction:: save_tiledbsoma_experiment 8 | .. autofunction:: curate_from_croissant 9 | 10 | Modules 11 | ------- 12 | 13 | .. autosummary:: 14 | :toctree: . 15 | 16 | lightning 17 | """ 18 | 19 | from lamindb.core.storage import save_tiledbsoma_experiment 20 | 21 | from ._croissant import curate_from_croissant 22 | from ._vitessce import save_vitessce_config 23 | 24 | __all__ = [ 25 | "lightning", 26 | "save_tiledbsoma_experiment", 27 | "curate_from_croissant", 28 | "save_vitessce_config", 29 | ] 30 | -------------------------------------------------------------------------------- /lamindb/curators/__init__.py: -------------------------------------------------------------------------------- 1 | """Curators. 2 | 3 | .. autosummary:: 4 | :toctree: . 5 | 6 | DataFrameCurator 7 | AnnDataCurator 8 | MuDataCurator 9 | SpatialDataCurator 10 | TiledbsomaExperimentCurator 11 | 12 | Modules. 13 | 14 | .. autosummary:: 15 | :toctree: . 16 | 17 | core 18 | 19 | """ 20 | 21 | from .core import ( 22 | AnnDataCurator, 23 | DataFrameCurator, 24 | MuDataCurator, 25 | SpatialDataCurator, 26 | TiledbsomaExperimentCurator, 27 | ) 28 | 29 | __all__ = [ 30 | "AnnDataCurator", 31 | "DataFrameCurator", 32 | "MuDataCurator", 33 | "SpatialDataCurator", 34 | "TiledbsomaExperimentCurator", 35 | ] 36 | -------------------------------------------------------------------------------- /tests/permissions/jwt_utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import psycopg2 4 | 5 | 6 | def sign_jwt(db_url, payload: dict) -> str: 7 | with psycopg2.connect(db_url) as conn, conn.cursor() as cur: 8 | cur.execute( 9 | """ 10 | SELECT sign( 11 | %s::json, 12 | (SELECT security.get_secret('jwt_secret')), 13 | %s 14 | ) 15 | """, 16 | (json.dumps(payload), "HS256"), 17 | ) 18 | token = cur.fetchone()[0] 19 | if not token: 20 | msg = "Failed to generate JWT" 21 | raise ValueError(msg) 22 | return token 23 | -------------------------------------------------------------------------------- /docs/scripts/run_track_with_params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import lamindb as ln 3 | 4 | if __name__ == "__main__": 5 | p = argparse.ArgumentParser() 6 | p.add_argument("--input-dir", type=str) 7 | p.add_argument("--downsample", action="store_true") 8 | p.add_argument("--learning-rate", type=float) 9 | args = p.parse_args() 10 | params = { 11 | "input_dir": args.input_dir, 12 | "learning_rate": args.learning_rate, 13 | "preprocess_params": { 14 | "downsample": args.downsample, 15 | "normalization": "the_good_one", 16 | }, 17 | } 18 | ln.track(params=params) 19 | 20 | # your code 21 | 22 | ln.finish() 23 | -------------------------------------------------------------------------------- /docs/test_notebooks.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from pathlib import Path 3 | 4 | import nbproject_test as test 5 | 6 | sys.path[:0] = [str(Path(__file__).parent.parent)] 7 | 8 | from noxfile import GROUPS 9 | 10 | DOCS = Path(__file__).parents[1] / "docs/" 11 | 12 | 13 | def test_tutorial(): 14 | for artifactname in GROUPS["tutorial"]: 15 | test.execute_notebooks(DOCS / artifactname, write=True) 16 | 17 | 18 | def test_guide(): 19 | for artifactname in GROUPS["guide"]: 20 | test.execute_notebooks(DOCS / artifactname, write=True) 21 | 22 | 23 | def test_biology(): 24 | for artifactname in GROUPS["biology"]: 25 | test.execute_notebooks(DOCS / artifactname, write=True) 26 | -------------------------------------------------------------------------------- /lamindb/migrations/0099_alter_writelog_seqno.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.1.7 on 2025-05-23 23:20 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("lamindb", "0098_alter_feature_type_alter_project_type_and_more"), 9 | ] 10 | 11 | operations = [ 12 | migrations.AlterField( 13 | model_name="writelog", 14 | name="seqno", 15 | field=models.BigAutoField(primary_key=True, serialize=False), 16 | ), 17 | migrations.RenameField( 18 | model_name="writelog", 19 | old_name="seqno", 20 | new_name="id", 21 | ), 22 | ] 23 | -------------------------------------------------------------------------------- /lamindb/migrations/0150_rename_params_record_extra_data_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2.8 on 2025-12-02 05:36 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ( 9 | "lamindb", 10 | "0149_ulabel_update_feature_dtype_on_ulabel_type_name_change_and_more", 11 | ), 12 | ] 13 | 14 | operations = [ 15 | migrations.RenameField( 16 | model_name="record", 17 | old_name="params", 18 | new_name="extra_data", 19 | ), 20 | migrations.RemoveField( 21 | model_name="artifact", 22 | name="params", 23 | ), 24 | ] 25 | -------------------------------------------------------------------------------- /lamindb/migrations/0114_alter_run__status_code.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-07-06 08:47 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("lamindb", "0113_lower_case_branch_and_space_names"), 9 | ] 10 | 11 | operations = [ 12 | migrations.AlterField( 13 | model_name="run", 14 | name="_status_code", 15 | field=models.SmallIntegerField(db_index=True, default=None, null=True), 16 | ), 17 | migrations.RunSQL( 18 | sql=""" 19 | UPDATE lamindb_run 20 | SET _status_code = NULL 21 | WHERE _status_code = 0; 22 | """, 23 | ), 24 | ] 25 | -------------------------------------------------------------------------------- /lamindb/migrations/0148_artifact_params_record_params.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2.8 on 2025-11-25 21:26 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ( 9 | "lamindb", 10 | "0147_record_update_feature_dtype_on_record_type_name_change_and_more", 11 | ), 12 | ] 13 | 14 | operations = [ 15 | migrations.AddField( 16 | model_name="artifact", 17 | name="params", 18 | field=models.JSONField(null=True), 19 | ), 20 | migrations.AddField( 21 | model_name="record", 22 | name="params", 23 | field=models.JSONField(null=True), 24 | ), 25 | ] 26 | -------------------------------------------------------------------------------- /.github/workflows/doc-changes.yml: -------------------------------------------------------------------------------- 1 | name: doc-changes 2 | 3 | on: 4 | pull_request_target: 5 | branches: 6 | - main 7 | - release 8 | types: 9 | - closed 10 | 11 | jobs: 12 | doc-changes: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v4 16 | - uses: actions/setup-python@v5 17 | with: 18 | python-version: "3.11" 19 | - run: pip install "laminci[doc-changes]@git+https://x-access-token:${{ secrets.LAMIN_BUILD_DOCS }}@github.com/laminlabs/laminci" 20 | - run: laminci doc-changes 21 | env: 22 | repo_token: ${{ secrets.GITHUB_TOKEN }} 23 | docs_token: ${{ secrets.LAMIN_BUILD_DOCS }} 24 | changelog_file: lamin-docs/docs/changelog/soon/lamindb.md 25 | -------------------------------------------------------------------------------- /lamindb/migrations/0091_alter_featurevalue_options_alter_space_options_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.1.4 on 2025-04-30 09:11 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("lamindb", "0090_runproject_project_runs"), 9 | ] 10 | 11 | operations = [ 12 | migrations.AlterModelOptions( 13 | name="featurevalue", 14 | options={"base_manager_name": "objects"}, 15 | ), 16 | migrations.AlterModelOptions( 17 | name="space", 18 | options={"base_manager_name": "objects"}, 19 | ), 20 | migrations.AlterModelOptions( 21 | name="user", 22 | options={"base_manager_name": "objects"}, 23 | ), 24 | ] 25 | -------------------------------------------------------------------------------- /lamindb/migrations/0097_remove_schemaparam_param_remove_paramvalue_param_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-05-11 18:54 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("lamindb", "0096_remove_artifact__param_values_and_more"), 9 | ] 10 | 11 | operations = [ 12 | migrations.DeleteModel( 13 | name="ArtifactParamValue", 14 | ), 15 | migrations.DeleteModel( 16 | name="SchemaParam", 17 | ), 18 | migrations.DeleteModel( 19 | name="Param", 20 | ), 21 | migrations.DeleteModel( 22 | name="ParamValue", 23 | ), 24 | migrations.DeleteModel( 25 | name="RunParamValue", 26 | ), 27 | ] 28 | -------------------------------------------------------------------------------- /docs/includes/installation.md: -------------------------------------------------------------------------------- 1 | ![pyversions](https://img.shields.io/pypi/pyversions/lamindb) 2 | 3 | ```shell 4 | pip install lamindb 5 | ``` 6 | 7 | You can configure the installation using `extras`, e.g., 8 | 9 | ```shell 10 | pip install 'lamindb[gcp]' 11 | ``` 12 | 13 | Supported `extras` are: 14 | 15 | ```yaml 16 | # cloud backends (AWS is assumed) 17 | gcp # Google Cloud (gcfs, etc.) 18 | # biological artifact formats 19 | fcs # FCS artifacts (flow cytometry) 20 | # storage backends 21 | zarr # store & stream arrays with zarr 22 | ``` 23 | 24 | If you'd like to install from GitHub, see [here](https://github.com/laminlabs/lamindb/blob/main/README.md). 25 | 26 | If you'd like a docker container, here is a way: [github.com/laminlabs/lamindb-docker](https://github.com/laminlabs/lamindb-docker). 27 | -------------------------------------------------------------------------------- /lamindb/core/__init__.py: -------------------------------------------------------------------------------- 1 | """Core library. 2 | 3 | Settings & context: 4 | 5 | .. autosummary:: 6 | :toctree: . 7 | 8 | Settings 9 | subsettings 10 | Context 11 | 12 | Artifact loaders: 13 | 14 | .. autosummary:: 15 | :toctree: . 16 | 17 | loaders 18 | 19 | Data loaders: 20 | 21 | .. autosummary:: 22 | :toctree: . 23 | 24 | MappedCollection 25 | 26 | Modules: 27 | 28 | .. autosummary:: 29 | :toctree: . 30 | 31 | storage 32 | logger 33 | 34 | """ 35 | 36 | from lamin_utils import logger 37 | from lamin_utils._inspect import InspectResult 38 | 39 | from .. import errors as exceptions 40 | from ..examples import datasets # backward compat 41 | from . import loaders, subsettings, types 42 | from ._context import Context 43 | from ._mapped_collection import MappedCollection 44 | from ._settings import Settings 45 | -------------------------------------------------------------------------------- /lamindb/migrations/0119_rename_records_project_linked_in_records.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-08-09 13:31 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("lamindb", "0118_alter_recordproject_value_projectrecord"), 9 | ] 10 | 11 | operations = [ 12 | migrations.RenameField( 13 | model_name="project", 14 | old_name="records", 15 | new_name="linked_in_records", 16 | ), 17 | migrations.AddField( 18 | model_name="project", 19 | name="records", 20 | field=models.ManyToManyField( 21 | related_name="projects", 22 | through="lamindb.ProjectRecord", 23 | to="lamindb.record", 24 | ), 25 | ), 26 | ] 27 | -------------------------------------------------------------------------------- /lamindb/examples/schemas/_simple.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import importlib 4 | from typing import TYPE_CHECKING 5 | 6 | if TYPE_CHECKING: 7 | from ... import Schema 8 | 9 | 10 | def valid_features() -> Schema: 11 | """A `DataFrame` schema that validates that columns map on existing features. 12 | 13 | .. literalinclude:: scripts/define_valid_features.py 14 | :language: python 15 | """ 16 | from ... import Schema 17 | 18 | try: 19 | return Schema.get(name="valid_features") 20 | except Schema.DoesNotExist: 21 | try: 22 | from . import define_valid_features # noqa 23 | 24 | return Schema.get(name="valid_features") 25 | except Schema.DoesNotExist: 26 | importlib.reload(define_valid_features) 27 | return Schema.get(name="valid_features") 28 | -------------------------------------------------------------------------------- /lamindb/migrations/0106_transfer_data_migration.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-06-03 19:37 2 | 3 | from django.db import connection, migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("lamindb", "0105_record_unique_name"), 9 | ] 10 | 11 | operations = [ 12 | migrations.RunSQL( 13 | sql=""" 14 | UPDATE lamindb_transform 15 | SET key = '__lamindb_transfer__/' || SUBSTR(key, 11) 16 | WHERE key LIKE 'transfers/%'; 17 | """ 18 | if connection.vendor == "sqlite" 19 | else """ 20 | UPDATE lamindb_transform 21 | SET key = CONCAT('__lamindb_transfer__/', SUBSTRING(key FROM 11)) 22 | WHERE key LIKE 'transfers/%'; 23 | """ 24 | ), 25 | ] 26 | -------------------------------------------------------------------------------- /tests/no_instance/test_no_default_instance.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | import pandas as pd 3 | import pytest 4 | from lamindb_setup.errors import CurrentInstanceNotConfigured 5 | 6 | 7 | def test_instance_not_connected(): 8 | assert ln.setup.settings.instance.slug == "none/none" 9 | 10 | with pytest.raises(CurrentInstanceNotConfigured): 11 | ln.Artifact.filter().count() 12 | 13 | 14 | def test_query_artifacts_lamindata(): 15 | artifacts = ln.Artifact.connect("laminlabs/lamindata") 16 | n_artifacts = artifacts.count() 17 | assert n_artifacts > 0 18 | assert n_artifacts > artifacts.filter().count() 19 | 20 | 21 | def test_get_artifact_lamindata(): 22 | artifact = ln.Artifact.connect("laminlabs/lamindata").get( 23 | key="example_datasets/small_dataset1.parquet" 24 | ) 25 | assert isinstance(artifact.load(), pd.DataFrame) 26 | -------------------------------------------------------------------------------- /tests/integrations/conftest.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | from time import perf_counter 3 | 4 | import lamindb_setup as ln_setup 5 | import pytest 6 | 7 | 8 | def pytest_sessionstart(): 9 | t_execute_start = perf_counter() 10 | ln_setup.init(storage="./testdb-integrations") 11 | total_time_elapsed = perf_counter() - t_execute_start 12 | print(f"time to setup the instance: {total_time_elapsed:.1f}s") 13 | 14 | 15 | def pytest_sessionfinish(session: pytest.Session): 16 | shutil.rmtree("./testdb-integrations") 17 | ln_setup.delete("testdb-integrations", force=True) 18 | 19 | 20 | @pytest.fixture 21 | def ccaplog(caplog): 22 | """Add caplog handler to our custom logger at session start.""" 23 | from lamin_utils._logger import logger 24 | 25 | logger.addHandler(caplog.handler) 26 | 27 | yield caplog 28 | 29 | logger.removeHandler(caplog.handler) 30 | -------------------------------------------------------------------------------- /tests/curators/conftest.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | from time import perf_counter 3 | 4 | import lamindb_setup as ln_setup 5 | import pytest 6 | 7 | 8 | def pytest_sessionstart(): 9 | t_execute_start = perf_counter() 10 | ln_setup.init(storage="./test-curators-db", modules="bionty") 11 | total_time_elapsed = perf_counter() - t_execute_start 12 | print(f"time to setup the instance: {total_time_elapsed:.1f}s") 13 | 14 | 15 | def pytest_sessionfinish(session: pytest.Session): 16 | shutil.rmtree("./test-curators-db") 17 | ln_setup.delete("test-curators-db", force=True) 18 | 19 | 20 | @pytest.fixture 21 | def ccaplog(caplog): 22 | """Add caplog handler to our custom logger at session start.""" 23 | from lamin_utils._logger import logger 24 | 25 | logger.addHandler(caplog.handler) 26 | 27 | yield caplog 28 | 29 | logger.removeHandler(caplog.handler) 30 | -------------------------------------------------------------------------------- /tests/storage/test_connect_reconnect.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | import pytest 3 | 4 | 5 | def test_connect_reconnect(): 6 | # testuser2 needs write access lamin-site-assets because of a fluke 7 | # in the legacy collaborator management, it seems 8 | assert ln.setup.settings.user.handle == "testuser2" 9 | ln.connect("lamindb-unit-tests-storage") # this is not changing anything 10 | count1 = ln.Artifact.filter().count() 11 | # a public instance that does not have bionty configured 12 | ln.connect("laminlabs/lamin-site-assets") 13 | count2 = ln.Artifact.filter().count() 14 | assert count1 != count2 15 | with pytest.raises(ln.setup.errors.ModuleWasntConfigured): 16 | import bionty as bt 17 | ln.connect("lamindb-unit-tests-storage") 18 | import bionty as bt 19 | 20 | count3 = bt.Gene.filter().count() 21 | assert count2 != count3 22 | -------------------------------------------------------------------------------- /lamindb/migrations/0135_alter_collection_hash_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-10-15 09:35 2 | 3 | from django.db import migrations, models 4 | 5 | import lamindb.base.fields 6 | 7 | 8 | class Migration(migrations.Migration): 9 | dependencies = [ 10 | ("lamindb", "0134_run_params"), 11 | ] 12 | 13 | operations = [ 14 | migrations.AlterField( 15 | model_name="collection", 16 | name="hash", 17 | field=lamindb.base.fields.CharField( 18 | blank=True, db_index=True, default=None, max_length=22, null=True 19 | ), 20 | ), 21 | migrations.AddConstraint( 22 | model_name="collection", 23 | constraint=models.UniqueConstraint( 24 | fields=("key", "hash"), name="unique_collection_key_hash_not_null" 25 | ), 26 | ), 27 | ] 28 | -------------------------------------------------------------------------------- /lamindb/migrations/0108_remove_record_sheet_remove_sheetproject_sheet_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-06-30 12:54 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("lamindb", "0107_add_schema_to_record"), 9 | ] 10 | 11 | operations = [ 12 | migrations.RemoveField( 13 | model_name="record", 14 | name="sheet", 15 | ), 16 | migrations.RemoveField( 17 | model_name="project", 18 | name="sheets", 19 | ), 20 | migrations.AlterUniqueTogether( 21 | name="sheetproject", 22 | unique_together=None, 23 | ), 24 | migrations.DeleteModel( 25 | name="Sheet", 26 | ), 27 | migrations.DeleteModel( 28 | name="SheetProject", 29 | ), 30 | ] 31 | -------------------------------------------------------------------------------- /lamindb/migrations/0138_remove_record_linked_users_user_linked_in_records.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.1.8 on 2025-10-24 12:47 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ( 9 | "lamindb", 10 | "0137_remove_recordrecord__aux_remove_recordrecord_branch_and_more", 11 | ), 12 | ] 13 | 14 | operations = [ 15 | migrations.RemoveField( 16 | model_name="record", 17 | name="linked_users", 18 | ), 19 | migrations.AddField( 20 | model_name="user", 21 | name="linked_in_records", 22 | field=models.ManyToManyField( 23 | related_name="linked_users", 24 | through="lamindb.RecordUser", 25 | to="lamindb.record", 26 | ), 27 | ), 28 | ] 29 | -------------------------------------------------------------------------------- /docs/scripts/curate_dataframe_external_features.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | from datetime import date 3 | 4 | df = ln.examples.datasets.mini_immuno.get_dataset1(otype="DataFrame") 5 | 6 | temperature = ln.Feature(name="temperature", dtype=float).save() 7 | date_of_study = ln.Feature(name="date_of_study", dtype=date).save() 8 | external_schema = ln.Schema(features=[temperature, date_of_study]).save() 9 | 10 | concentration = ln.Feature(name="concentration", dtype=str).save() 11 | donor = ln.Feature(name="donor", dtype=str, nullable=True).save() 12 | schema = ln.Schema( 13 | features=[concentration, donor], 14 | slots={"__external__": external_schema}, 15 | otype="DataFrame", 16 | ).save() 17 | 18 | artifact = ln.Artifact.from_dataframe( 19 | df, 20 | key="examples/dataset1.parquet", 21 | features={"temperature": 21.6, "date_of_study": date(2024, 10, 1)}, 22 | schema=schema, 23 | ).save() 24 | artifact.describe() 25 | -------------------------------------------------------------------------------- /tests/core/test_artifact_parquet.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | import pandas as pd 3 | import pyarrow.parquet as pq 4 | 5 | 6 | def test_parquet_kwargs(): 7 | df = pd.DataFrame( 8 | { 9 | "a": [3, 1, 4, 2], 10 | "b": ["c", "a", "d", "b"], 11 | "c": [3.3, 1.1, 4.4, 2.2], 12 | } 13 | ) 14 | df_sorted = df.sort_values(by=["a", "b"]) 15 | sorting_columns = [ 16 | pq.SortingColumn(0, descending=False, nulls_first=False), 17 | pq.SortingColumn(1, descending=False, nulls_first=False), 18 | ] 19 | artifact = ln.Artifact.from_dataframe( 20 | df_sorted, 21 | key="df_sorted.parquet", 22 | parquet_kwargs={"sorting_columns": sorting_columns}, 23 | ).save() 24 | pyarrow_dataset = artifact.open() 25 | fragment = next(pyarrow_dataset.get_fragments()) 26 | assert list(fragment.metadata.row_group(0).sorting_columns) == sorting_columns 27 | -------------------------------------------------------------------------------- /lamindb/base/users.py: -------------------------------------------------------------------------------- 1 | user_id_cache = {} 2 | 3 | 4 | def current_user_id() -> int: 5 | import lamindb_setup as ln_setup 6 | from lamindb_setup import settings 7 | from lamindb_setup._init_instance import register_user 8 | 9 | from lamindb.models import User 10 | 11 | def query_user_id(): 12 | if ln_setup.core.django.IS_MIGRATING: 13 | return 1 14 | else: 15 | try: 16 | user_id = User.objects.get(uid=settings.user.uid).id 17 | except User.DoesNotExist: 18 | register_user(settings.user) 19 | user_id = User.objects.get(uid=settings.user.uid).id 20 | return user_id 21 | 22 | if settings._instance_exists: 23 | if settings.instance.slug not in user_id_cache: 24 | user_id_cache[settings.instance.slug] = query_user_id() 25 | return user_id_cache[settings.instance.slug] 26 | else: 27 | return query_user_id() 28 | -------------------------------------------------------------------------------- /lamindb/examples/datasets/define_mini_immuno_features_labels.py: -------------------------------------------------------------------------------- 1 | import bionty as bt 2 | 3 | import lamindb as ln 4 | 5 | # define valid labels 6 | perturbation_type = ln.Record(name="Perturbation", is_type=True).save() 7 | ln.Record(name="DMSO", type=perturbation_type).save() 8 | ln.Record(name="IFNG", type=perturbation_type).save() 9 | bt.CellType.from_source(name="B cell").save() 10 | bt.CellType.from_source(name="T cell").save() 11 | 12 | # define valid features 13 | ln.Feature(name="perturbation", dtype=perturbation_type).save() 14 | ln.Feature(name="cell_type_by_expert", dtype=bt.CellType).save() 15 | ln.Feature(name="cell_type_by_model", dtype=bt.CellType).save() 16 | ln.Feature(name="assay_oid", dtype=bt.ExperimentalFactor.ontology_id).save() 17 | ln.Feature(name="concentration", dtype=str).save() 18 | ln.Feature(name="treatment_time_h", dtype="num", coerce_dtype=True).save() 19 | ln.Feature(name="donor", dtype=str, nullable=True).save() 20 | ln.Feature(name="donor_ethnicity", dtype=list[bt.Ethnicity]).save() 21 | -------------------------------------------------------------------------------- /lamindb/core/subsettings/_creation_settings.py: -------------------------------------------------------------------------------- 1 | class CreationSettings: 2 | search_names: bool = True 3 | """Switch off to speed up creating records (default `True`). 4 | 5 | If `True`, search for alternative names and avoids duplicates. 6 | 7 | FAQ: :doc:`/faq/idempotency` 8 | """ 9 | artifact_skip_size_hash: bool = False 10 | """To speed up registering high numbers of files (default `False`). 11 | 12 | This bypasses queries for size and hash to AWS & GCP. 13 | 14 | It speeds up file creation by about a factor 100. 15 | """ 16 | artifact_silence_missing_run_warning: bool = False 17 | """Silence warning about missing run & transform during artifact creation (default `False`).""" 18 | _artifact_use_virtual_keys: bool = True 19 | """Treat `key` parameter in :class:`~lamindb.Artifact` as virtual. 20 | 21 | If `True`, the `key` is **not** used to construct file paths, but file paths are 22 | based on the `uid` of artifact. 23 | """ 24 | 25 | 26 | creation_settings = CreationSettings() 27 | -------------------------------------------------------------------------------- /lamindb/migrations/0127_alter_run_status_code_feature_dtype.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.1.12 on 2025-09-30 17:16 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ( 9 | "lamindb", 10 | "0126_alter_artifact_is_locked_alter_collection_is_locked_and_more", 11 | ), 12 | ] 13 | 14 | operations = [ 15 | migrations.AlterField( 16 | model_name="run", 17 | name="_status_code", 18 | field=models.SmallIntegerField( 19 | db_default=-3, db_index=True, default=-3, null=True 20 | ), 21 | ), 22 | migrations.AddConstraint( 23 | model_name="feature", 24 | constraint=models.CheckConstraint( 25 | condition=models.Q( 26 | ("is_type", True), ("dtype__isnull", False), _connector="OR" 27 | ), 28 | name="dtype_not_null_when_is_type_false", 29 | ), 30 | ), 31 | ] 32 | -------------------------------------------------------------------------------- /docs/scripts/run_workflow.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import lamindb as ln 3 | 4 | 5 | @ln.tracked() 6 | def subset_dataframe( 7 | artifact: ln.Artifact, 8 | subset_rows: int = 2, 9 | subset_cols: int = 2, 10 | run: ln.Run | None = None, 11 | ) -> ln.Artifact: 12 | dataset = artifact.load(is_run_input=run) 13 | new_data = dataset.iloc[:subset_rows, :subset_cols] 14 | new_key = artifact.key.replace(".parquet", "_subsetted.parquet") 15 | return ln.Artifact.from_dataframe(new_data, key=new_key, run=run).save() 16 | 17 | 18 | if __name__ == "__main__": 19 | p = argparse.ArgumentParser() 20 | p.add_argument("--subset", action="store_true") 21 | args = p.parse_args() 22 | 23 | params = {"is_subset": args.subset} 24 | 25 | ln.track(params=params) 26 | 27 | if args.subset: 28 | df = ln.examples.datasets.mini_immuno.get_dataset1(otype="DataFrame") 29 | artifact = ln.Artifact.from_dataframe( 30 | df, key="my_analysis/dataset.parquet" 31 | ).save() 32 | subsetted_artifact = subset_dataframe(artifact) 33 | 34 | ln.finish() 35 | -------------------------------------------------------------------------------- /lamindb/base/utils.py: -------------------------------------------------------------------------------- 1 | """Utilities. 2 | 3 | .. autodecorator:: doc_args 4 | .. autodecorator:: deprecated 5 | .. autodecorator:: class_and_instance_method 6 | 7 | """ 8 | 9 | from functools import wraps 10 | 11 | from lamindb_setup.core import deprecated, doc_args 12 | 13 | 14 | class class_and_instance_method: 15 | """Decorator to define a method that works both as class and instance method.""" 16 | 17 | def __init__(self, func): 18 | self.func = func 19 | # Copy metadata to the descriptor itself 20 | wraps(func)(self) 21 | 22 | def __get__(self, instance, owner): 23 | # Create a proper wrapper that preserves metadata 24 | if instance is None: 25 | 26 | @wraps(self.func) 27 | def wrapper(*args, **kwargs): 28 | return self.func(owner, *args, **kwargs) 29 | else: 30 | 31 | @wraps(self.func) 32 | def wrapper(*args, **kwargs): 33 | return self.func(instance, *args, **kwargs) 34 | 35 | return wrapper 36 | 37 | 38 | __all__ = ["doc_args", "deprecated", "class_and_instance_method"] 39 | -------------------------------------------------------------------------------- /lamindb/core/_track_environment.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import subprocess 4 | import sys 5 | from typing import TYPE_CHECKING 6 | 7 | import lamindb_setup as ln_setup 8 | from lamin_utils import logger 9 | 10 | if TYPE_CHECKING: 11 | from lamindb.models import Run 12 | 13 | 14 | def track_python_environment(run: Run) -> None: 15 | env_dir = ln_setup.settings.cache_dir / "environments" / f"run_{run.uid}" 16 | filepath = env_dir / "run_env_pip.txt" 17 | if not env_dir.exists(): 18 | filepath.parent.mkdir(parents=True) 19 | # create a requirements.txt 20 | # we don't create a conda environment.yml mostly for its slowness 21 | try: 22 | with open(filepath, "w") as f: 23 | result = subprocess.run( 24 | [sys.executable, "-m", "pip", "freeze"], 25 | stdout=f, 26 | ) 27 | except OSError as e: 28 | result = None 29 | logger.warning(f"could not run pip freeze with error {e}") 30 | if result is not None and result.returncode == 0: 31 | logger.info(f"tracked pip freeze > {str(filepath)}") 32 | -------------------------------------------------------------------------------- /lamindb/examples/datasets/_fake.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | 4 | def fake_bio_notebook_titles(n=100) -> list[str]: 5 | """A fake collection of study titles.""" 6 | from faker import Faker 7 | 8 | fake = Faker() 9 | 10 | from faker_biology.mol_biol import Antibody 11 | from faker_biology.physiology import CellType, Organ, Organelle 12 | 13 | fake.add_provider(CellType) 14 | fake.add_provider(Organ) 15 | fake.add_provider(Organelle) 16 | fake.add_provider(Antibody) 17 | 18 | my_words = [ 19 | "study", 20 | "investigate", 21 | "research", 22 | "result", 23 | "cluster", 24 | "rank", 25 | "candidate", 26 | "visualize", 27 | "efficiency", 28 | "classify", 29 | ] 30 | my_words += [fake.organ() for i in range(5)] + ["intestine", "intestinal"] 31 | my_words += [fake.celltype() for i in range(10)] 32 | my_words += [fake.antibody_isotype() for i in range(20)] 33 | 34 | my_notebook_titles = [fake.sentence(ext_word_list=my_words) for i in range(n)] 35 | 36 | return my_notebook_titles 37 | -------------------------------------------------------------------------------- /lamindb/examples/schemas/_anndata.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import importlib 4 | from typing import TYPE_CHECKING 5 | 6 | if TYPE_CHECKING: 7 | from ... import Schema 8 | 9 | 10 | def anndata_ensembl_gene_ids_and_valid_features_in_obs() -> Schema: 11 | """An `AnnData` schema validating Ensembl gene IDs and valid features in obs. 12 | 13 | .. literalinclude:: scripts/define_schema_anndata_ensembl_gene_ids_and_valid_features_in_obs.py 14 | :language: python 15 | """ 16 | from ... import Schema 17 | 18 | try: 19 | return Schema.get(name="anndata_ensembl_gene_ids_and_valid_features_in_obs") 20 | except Schema.DoesNotExist: 21 | from . import define_schema_anndata_ensembl_gene_ids_and_valid_features_in_obs # noqa 22 | 23 | try: 24 | return Schema.get(name="anndata_ensembl_gene_ids_and_valid_features_in_obs") 25 | except Schema.DoesNotExist: 26 | importlib.reload( 27 | define_schema_anndata_ensembl_gene_ids_and_valid_features_in_obs 28 | ) 29 | return Schema.get(name="anndata_ensembl_gene_ids_and_valid_features_in_obs") 30 | -------------------------------------------------------------------------------- /lamindb/migrations/0079_alter_rundata_value_json_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-01-16 01:29 2 | 3 | import django.db.models.deletion 4 | from django.db import migrations, models 5 | 6 | import lamindb.base.fields 7 | 8 | 9 | class Migration(migrations.Migration): 10 | dependencies = [ 11 | ("lamindb", "0078_lamindbv1_part6c"), 12 | ] 13 | 14 | operations = [ 15 | migrations.AlterField( 16 | model_name="rundata", 17 | name="value_json", 18 | field=models.JSONField(blank=True, null=True), 19 | ), 20 | migrations.AlterField( 21 | model_name="tidytabledata", 22 | name="value_json", 23 | field=models.JSONField(blank=True, null=True), 24 | ), 25 | migrations.AlterField( 26 | model_name="tidytable", 27 | name="schema", 28 | field=lamindb.base.fields.ForeignKey( 29 | blank=True, 30 | null=True, 31 | on_delete=django.db.models.deletion.SET_NULL, 32 | related_name="_tidytables", 33 | to="lamindb.schema", 34 | ), 35 | ), 36 | ] 37 | -------------------------------------------------------------------------------- /lamindb/migrations/0084_alter_schemafeature_feature_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-01-27 07:22 2 | 3 | import django.db.models.deletion 4 | from django.db import migrations 5 | 6 | import lamindb.base.fields 7 | 8 | 9 | class Migration(migrations.Migration): 10 | dependencies = [ 11 | ("lamindb", "0083_alter_feature_is_type_alter_flextable_is_type_and_more"), 12 | ] 13 | 14 | operations = [ 15 | migrations.AlterField( 16 | model_name="schemafeature", 17 | name="feature", 18 | field=lamindb.base.fields.ForeignKey( 19 | blank=True, 20 | on_delete=django.db.models.deletion.PROTECT, 21 | related_name="links_schema", 22 | to="lamindb.feature", 23 | ), 24 | ), 25 | migrations.AlterField( 26 | model_name="schemafeature", 27 | name="schema", 28 | field=lamindb.base.fields.ForeignKey( 29 | blank=True, 30 | on_delete=django.db.models.deletion.CASCADE, 31 | related_name="links_feature", 32 | to="lamindb.schema", 33 | ), 34 | ), 35 | ] 36 | -------------------------------------------------------------------------------- /tests/core/notebooks/duplicate/with-title-initialized-consecutive-finish.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# My duplicated test notebook (consecutive) with `ln.finish()`" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "This has actually different content than the original one in the `notebooks/` folder." 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import lamindb as ln\n", 24 | "\n", 25 | "ln.track()" 26 | ] 27 | } 28 | ], 29 | "metadata": { 30 | "kernelspec": { 31 | "display_name": "py310", 32 | "language": "python", 33 | "name": "python3" 34 | }, 35 | "language_info": { 36 | "codemirror_mode": { 37 | "name": "ipython", 38 | "version": 3 39 | }, 40 | "file_extension": ".py", 41 | "mimetype": "text/x-python", 42 | "name": "python", 43 | "nbconvert_exporter": "python", 44 | "pygments_lexer": "ipython3", 45 | "version": "3.12.8" 46 | } 47 | }, 48 | "nbformat": 4, 49 | "nbformat_minor": 2 50 | } 51 | -------------------------------------------------------------------------------- /lamindb/migrations/0117_fix_artifact_storage_hash_unique_constraints.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-07-26 18:50 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("lamindb", "0116_remove_artifact_unique_artifact_storage_key_hash_and_more"), 9 | ] 10 | 11 | operations = [ 12 | migrations.RemoveConstraint( 13 | model_name="artifact", 14 | name="unique_artifact_storage_key_hash", 15 | ), 16 | migrations.AddConstraint( 17 | model_name="artifact", 18 | constraint=models.UniqueConstraint( 19 | condition=models.Q(("key__isnull", False)), 20 | fields=("storage", "key", "hash"), 21 | name="unique_artifact_storage_key_hash_not_null", 22 | ), 23 | ), 24 | migrations.AddConstraint( 25 | model_name="artifact", 26 | constraint=models.UniqueConstraint( 27 | condition=models.Q(("key__isnull", True)), 28 | fields=("storage", "hash"), 29 | name="unique_artifact_storage_hash_null_key", 30 | ), 31 | ), 32 | ] 33 | -------------------------------------------------------------------------------- /tests/core/notebooks/load_schema.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "0", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import lamindb as ln" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "id": "1", 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "# this is a test case because we had an issue with path resolution at some point: https://github.com/laminlabs/lamindb/pull/3211\n", 21 | "valid_features = ln.examples.schemas.valid_features()" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "id": "2", 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "valid_features.delete(permanent=True)" 32 | ] 33 | } 34 | ], 35 | "metadata": { 36 | "language_info": { 37 | "codemirror_mode": { 38 | "name": "ipython", 39 | "version": 3 40 | }, 41 | "file_extension": ".py", 42 | "mimetype": "text/x-python", 43 | "name": "python", 44 | "nbconvert_exporter": "python", 45 | "pygments_lexer": "ipython3", 46 | "version": "3.12.8" 47 | } 48 | }, 49 | "nbformat": 4, 50 | "nbformat_minor": 5 51 | } 52 | -------------------------------------------------------------------------------- /lamindb/migrations/0143_remove_transform_entrypoint_transform_config_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-11-16 13:36 2 | 3 | import django.db.models.deletion 4 | from django.db import migrations, models 5 | 6 | 7 | class Migration(migrations.Migration): 8 | dependencies = [ 9 | ("lamindb", "0142_alter_transform_environment_transformtransform_and_more"), 10 | ] 11 | 12 | operations = [ 13 | migrations.RemoveField( 14 | model_name="transform", 15 | name="entrypoint", 16 | ), 17 | migrations.AddField( 18 | model_name="transform", 19 | name="config", 20 | field=models.JSONField(null=True), 21 | ), 22 | migrations.AddField( 23 | model_name="transform", 24 | name="is_flow", 25 | field=models.BooleanField(db_index=True, default=False), 26 | ), 27 | migrations.AlterField( 28 | model_name="transform", 29 | name="flow", 30 | field=models.ForeignKey( 31 | null=True, 32 | on_delete=django.db.models.deletion.CASCADE, 33 | related_name="steps", 34 | to="lamindb.transform", 35 | ), 36 | ), 37 | ] 38 | -------------------------------------------------------------------------------- /tests/core/test_storage.py: -------------------------------------------------------------------------------- 1 | import concurrent.futures 2 | 3 | import lamindb as ln 4 | 5 | 6 | # we need this test both in the core and the storage/cloud tests 7 | # because the internal logic that retrieves information about other instances 8 | # depends on whether the current instance is managed on the hub 9 | def test_reference_storage_location(ccaplog): 10 | ln.Artifact("s3://lamindata/iris_studies/study0_raw_images") 11 | assert ln.Storage.get(root="s3://lamindata").instance_uid == "4XIuR0tvaiXM" 12 | assert ( 13 | "referenced read-only storage location at s3://lamindata, is managed by instance with uid 4XIuR0tvaiXM" 14 | in ccaplog.text 15 | ) 16 | 17 | 18 | def test_create_storage_locations_parallel(): 19 | root: str = "nonregistered_storage" 20 | 21 | def create_storage() -> str: 22 | ln.Storage(root=root).save() # type: ignore 23 | return root 24 | 25 | n_parallel = 3 26 | with concurrent.futures.ThreadPoolExecutor(max_workers=n_parallel) as executor: 27 | futures = [executor.submit(create_storage) for i in range(n_parallel)] 28 | _ = [future.result() for future in concurrent.futures.as_completed(futures)] 29 | 30 | storage = ln.Storage.get(root__endswith=root) 31 | storage.delete() 32 | -------------------------------------------------------------------------------- /tests/permissions/conftest.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | from subprocess import DEVNULL, run 3 | from time import perf_counter 4 | 5 | import lamindb_setup as ln_setup 6 | import pytest 7 | from lamin_utils import logger 8 | 9 | 10 | def pytest_sessionstart(): 11 | t_execute_start = perf_counter() 12 | # these are called in separate scripts because can't change connection 13 | # within the same python process due to django 14 | # init instance and setup RLS 15 | run( # noqa: S602 16 | "python ./tests/permissions/scripts/setup_instance.py", 17 | shell=True, 18 | capture_output=False, 19 | ) 20 | # populate permissions and models via the admin connection 21 | run( # noqa: S602 22 | "python ./tests/permissions/scripts/setup_access.py", 23 | shell=True, 24 | capture_output=False, 25 | ) 26 | 27 | total_time_elapsed = perf_counter() - t_execute_start 28 | print(f"time to setup the instance: {total_time_elapsed:.1f}s") 29 | 30 | 31 | def pytest_sessionfinish(session: pytest.Session): 32 | logger.set_verbosity(1) 33 | shutil.rmtree("./default_storage_permissions") 34 | ln_setup.delete("lamindb-test-permissions", force=True) 35 | run("docker stop pgtest && docker rm pgtest", shell=True, stdout=DEVNULL) # noqa: S602 36 | -------------------------------------------------------------------------------- /tests/core/test_artifact_anndata_with_curation.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | 4 | def test_create_anndata_with_curation(): 5 | adata = ln.examples.datasets.mini_immuno.get_dataset1(otype="AnnData") 6 | feature1 = ln.Feature(name="sample_note", dtype=str).save() 7 | 8 | # ingest the first time 9 | artifact = ln.Artifact.from_anndata( 10 | adata, 11 | key="examples/mini_immuno1.h5ad", 12 | schema="ensembl_gene_ids_and_valid_features_in_obs", 13 | ).save() 14 | # capture the obs_schema because we'll overwrite it 15 | obs_schema = artifact.features.slots["obs"] 16 | 17 | # define another feature so that upon re-ingestion, we track more than before 18 | # (this also tests non-trivial idempotency) 19 | feature2 = ln.Feature(name="treatment_time_h", dtype=int).save() 20 | artifact = ln.Artifact.from_anndata( 21 | adata, 22 | key="examples/mini_immuno1.h5ad", 23 | schema="ensembl_gene_ids_and_valid_features_in_obs", 24 | ).save() 25 | 26 | schemas = artifact.features.slots 27 | artifact.delete(permanent=True) 28 | for schema in schemas.values(): 29 | schema.delete(permanent=True) 30 | obs_schema.delete(permanent=True) 31 | feature1.delete(permanent=True) 32 | feature2.delete(permanent=True) 33 | -------------------------------------------------------------------------------- /lamindb/migrations/0087_rename__schemas_m2m_artifact_feature_sets_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-02-13 12:00 2 | 3 | import django.db.models.deletion 4 | from django.db import migrations, models 5 | 6 | import lamindb.base.fields 7 | 8 | 9 | class Migration(migrations.Migration): 10 | dependencies = [ 11 | ("lamindb", "0086_various"), 12 | ] 13 | 14 | operations = [ 15 | migrations.RenameField( 16 | model_name="artifact", 17 | old_name="_schemas_m2m", 18 | new_name="feature_sets", 19 | ), 20 | migrations.AlterField( 21 | model_name="artifact", 22 | name="schema", 23 | field=lamindb.base.fields.ForeignKey( 24 | blank=True, 25 | default=None, 26 | null=True, 27 | on_delete=django.db.models.deletion.PROTECT, 28 | related_name="validated_artifacts", 29 | to="lamindb.schema", 30 | ), 31 | ), 32 | migrations.AlterField( 33 | model_name="artifact", 34 | name="feature_sets", 35 | field=models.ManyToManyField( 36 | related_name="artifacts", 37 | through="lamindb.ArtifactSchema", 38 | to="lamindb.schema", 39 | ), 40 | ), 41 | ] 42 | -------------------------------------------------------------------------------- /docs/scripts/curate_soma_experiment.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | import bionty as bt 3 | import tiledbsoma as soma 4 | import tiledbsoma.io 5 | 6 | adata = ln.examples.datasets.mini_immuno.get_dataset1(otype="AnnData") 7 | tiledbsoma.io.from_anndata("small_dataset.tiledbsoma", adata, measurement_name="RNA") 8 | 9 | obs_schema = ln.Schema( 10 | name="soma_obs_schema", 11 | features=[ 12 | ln.Feature(name="cell_type_by_expert", dtype=bt.CellType).save(), 13 | ln.Feature(name="cell_type_by_model", dtype=bt.CellType).save(), 14 | ], 15 | ).save() 16 | 17 | var_schema = ln.Schema( 18 | name="soma_var_schema", 19 | features=[ 20 | ln.Feature(name="var_id", dtype=bt.Gene.ensembl_gene_id).save(), 21 | ], 22 | coerce_dtype=True, 23 | ).save() 24 | 25 | soma_schema = ln.Schema( 26 | name="soma_experiment_schema", 27 | otype="tiledbsoma", 28 | slots={ 29 | "obs": obs_schema, 30 | "ms:RNA.T": var_schema, 31 | }, 32 | ).save() 33 | 34 | with soma.Experiment.open("small_dataset.tiledbsoma") as experiment: 35 | curator = ln.curators.TiledbsomaExperimentCurator(experiment, soma_schema) 36 | curator.validate() 37 | artifact = curator.save_artifact( 38 | key="examples/soma_experiment.tiledbsoma", 39 | description="SOMA experiment with schema validation", 40 | ) 41 | assert artifact.schema == soma_schema 42 | artifact.describe() 43 | -------------------------------------------------------------------------------- /lamindb/migrations/0103_remove_writelog_migration_state_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-05-29 12:02 2 | 3 | from django.db import migrations 4 | 5 | 6 | def fix_artifact_kind(apps, schema_editor): 7 | Artifact = apps.get_model("lamindb", "Artifact") 8 | Artifact.objects.filter(kind="__lamindb__").update(kind="__lamindb_run__") 9 | 10 | 11 | class Migration(migrations.Migration): 12 | dependencies = [ 13 | ("lamindb", "0102_remove_writelog_branch_code_and_more"), 14 | ] 15 | 16 | operations = [ 17 | migrations.RunPython(fix_artifact_kind), 18 | migrations.RemoveField( 19 | model_name="writelog", 20 | name="migration_state", 21 | ), 22 | migrations.RemoveField( 23 | model_name="writelog", 24 | name="table", 25 | ), 26 | migrations.RemoveField( 27 | model_name="writelog", 28 | name="branch", 29 | ), 30 | migrations.RemoveField( 31 | model_name="writelog", 32 | name="space", 33 | ), 34 | migrations.DeleteModel( 35 | name="WriteLogLock", 36 | ), 37 | migrations.DeleteModel( 38 | name="MigrationState", 39 | ), 40 | migrations.DeleteModel( 41 | name="TableState", 42 | ), 43 | migrations.DeleteModel( 44 | name="WriteLog", 45 | ), 46 | ] 47 | -------------------------------------------------------------------------------- /lamindb/examples/mlflow/__init__.py: -------------------------------------------------------------------------------- 1 | """Examples and utilities for Mlflow. 2 | 3 | .. autofunction:: save_mlflow_features 4 | """ 5 | 6 | import lamindb as ln 7 | 8 | 9 | def save_mlflow_features(): 10 | """Saves all MLflow experiment and run related features. 11 | 12 | Saves the following features: 13 | 14 | - mlflow_run_id 15 | - mlflow_run_name 16 | - mlflow_experiment_id 17 | - mlflow_experiment_name 18 | - mlflow_user_id 19 | - mlflow_status 20 | - mlflow_lifecycle_stage 21 | - mlflow_artifact_uri 22 | - mlflow_start_time 23 | - mlflow_end_time 24 | """ 25 | mlflow_type = ln.Feature(name="MLflow", is_type=True).save() 26 | ln.Feature(name="mlflow_run_id", dtype=str, type=mlflow_type).save() 27 | ln.Feature(name="mlflow_run_name", dtype=str, type=mlflow_type).save() 28 | ln.Feature(name="mlflow_experiment_id", dtype=str, type=mlflow_type).save() 29 | ln.Feature(name="mlflow_experiment_name", dtype=str, type=mlflow_type).save() 30 | ln.Feature(name="mlflow_user_id", dtype=str, type=mlflow_type).save() 31 | ln.Feature(name="mlflow_status", dtype=str, type=mlflow_type).save() 32 | ln.Feature(name="mlflow_lifecycle_stage", dtype=str, type=mlflow_type).save() 33 | ln.Feature(name="mlflow_artifact_uri", dtype=str, type=mlflow_type).save() 34 | ln.Feature(name="mlflow_start_time", dtype=int, type=mlflow_type).save() 35 | ln.Feature(name="mlflow_end_time", dtype=int, type=mlflow_type).save() 36 | -------------------------------------------------------------------------------- /lamindb/examples/wandb/__init__.py: -------------------------------------------------------------------------------- 1 | """Examples and utilities for Weights & Biases. 2 | 3 | .. autofunction:: save_wandb_features 4 | """ 5 | 6 | import lamindb as ln 7 | 8 | 9 | def save_wandb_features(): 10 | """Saves all Weights & Biases project and run related features. 11 | 12 | Saves the following features: 13 | 14 | - wandb_run_id 15 | - wandb_run_name 16 | - wandb_run_entity 17 | - wandb_project 18 | - wandb_state 19 | - wandb_url 20 | - wandb_tags 21 | - wandb_group 22 | - wandb_job_type 23 | - timestamp 24 | - runtime 25 | """ 26 | wandb_type = ln.Feature(name="Weights & Biases", is_type=True).save() 27 | ln.Feature(name="wandb_run_id", dtype=str, type=wandb_type).save() 28 | ln.Feature(name="wandb_run_name", dtype=str, type=wandb_type).save() 29 | ln.Feature(name="wandb_run_entity", dtype=str, type=wandb_type).save() 30 | ln.Feature(name="wandb_project", dtype=str, type=wandb_type).save() 31 | ln.Feature(name="wandb_state", dtype=str, type=wandb_type).save() 32 | ln.Feature(name="wandb_url", dtype=str, type=wandb_type).save() 33 | ln.Feature(name="wandb_tags", dtype=str, type=wandb_type).save() 34 | ln.Feature(name="wandb_group", dtype=str, type=wandb_type).save() 35 | ln.Feature(name="wandb_job_type", dtype=str, type=wandb_type).save() 36 | ln.Feature(name="wandb_timestamp", dtype=float, type=wandb_type).save() 37 | ln.Feature(name="wandb_runtime", dtype=float, type=wandb_type).save() 38 | -------------------------------------------------------------------------------- /tests/core/test_branches.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | 3 | 4 | def testbranch_id(): 5 | # create a file with default branch_id 6 | with open("./testbranch_id.txt", "w") as f: 7 | f.write("branch_id") 8 | artifact = ln.Artifact("./testbranch_id.txt", description="testbranch_id").save() 9 | assert artifact.branch_id == 1 10 | 11 | # create a collection from file 12 | collection = ln.Collection(artifact, key="testbranch_id").save() 13 | 14 | # delete a collection will put both collection but not linked artifact in trash 15 | collection.delete() 16 | assert collection.ordered_artifacts[0].branch_id == 1 17 | result = ln.Collection.filter(key="testbranch_id") 18 | assert len(result) == 0 19 | result = ln.Collection.filter(key="testbranch_id", branch_id=1) 20 | assert len(result) == 0 21 | result = ln.Collection.filter(key="testbranch_id", visibility=1) 22 | assert len(result) == 0 23 | result = ln.Collection.filter(key="testbranch_id", branch_id=None) 24 | assert len(result) == 1 25 | result = ln.Collection.filter(key="testbranch_id", visibility=None) 26 | assert len(result) == 1 27 | 28 | # restore 29 | collection.restore() 30 | assert collection.branch_id == 1 31 | assert collection.ordered_artifacts[0].branch_id == 1 32 | 33 | # permanent delete 34 | collection.delete(permanent=True) 35 | result = ln.Artifact.filter(description="testbranch_id", branch_id=None) 36 | # also permanently deleted linked file 37 | assert len(result) == 1 38 | -------------------------------------------------------------------------------- /lamindb/migrations/0115_alter_space_uid.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-07-06 23:09 2 | 3 | from django.db import migrations 4 | 5 | import lamindb.base.fields 6 | 7 | 8 | def extenddefault_values(apps, schema_editor): 9 | """Lowercase default values for Space and Branch models.""" 10 | Space = apps.get_model("lamindb", "Space") 11 | Branch = apps.get_model("lamindb", "Branch") 12 | 13 | space = Space.objects.get(uid="a") 14 | space.uid = 12 * "a" 15 | space.save() 16 | 17 | trash_branch = Branch.objects.get(uid="t") 18 | trash_branch.uid = 12 * "t" 19 | trash_branch.save() 20 | 21 | archive_branch = Branch.objects.get(uid="a") 22 | archive_branch.uid = 12 * "a" 23 | archive_branch.save() 24 | 25 | main_branch = Branch.objects.get(uid="m") 26 | main_branch.uid = 12 * "m" 27 | main_branch.save() 28 | 29 | 30 | class Migration(migrations.Migration): 31 | dependencies = [ 32 | ("lamindb", "0114_alter_run__status_code"), 33 | ] 34 | 35 | operations = [ 36 | migrations.AlterField( 37 | model_name="space", 38 | name="uid", 39 | field=lamindb.base.fields.CharField( 40 | blank=True, 41 | db_default="aaaaaaaaaaaa", 42 | db_index=True, 43 | default="aaaaaaaaaaaaa", 44 | editable=False, 45 | max_length=12, 46 | unique=True, 47 | ), 48 | ), 49 | migrations.RunPython( 50 | extenddefault_values, 51 | ), 52 | ] 53 | -------------------------------------------------------------------------------- /docs/scripts/define_schema_spatialdata.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | import bionty as bt 3 | 4 | 5 | attrs_schema = ln.Schema( 6 | features=[ 7 | ln.Feature(name="bio", dtype=dict).save(), 8 | ln.Feature(name="tech", dtype=dict).save(), 9 | ], 10 | ).save() 11 | 12 | sample_schema = ln.Schema( 13 | features=[ 14 | ln.Feature(name="disease", dtype=bt.Disease, coerce_dtype=True).save(), 15 | ln.Feature( 16 | name="developmental_stage", 17 | dtype=bt.DevelopmentalStage, 18 | coerce_dtype=True, 19 | ).save(), 20 | ], 21 | ).save() 22 | 23 | tech_schema = ln.Schema( 24 | features=[ 25 | ln.Feature(name="assay", dtype=bt.ExperimentalFactor, coerce_dtype=True).save(), 26 | ], 27 | ).save() 28 | 29 | obs_schema = ln.Schema( 30 | features=[ 31 | ln.Feature(name="sample_region", dtype="str").save(), 32 | ], 33 | ).save() 34 | 35 | uns_schema = ln.Schema( 36 | features=[ 37 | ln.Feature(name="analysis", dtype="str").save(), 38 | ], 39 | ).save() 40 | 41 | # Schema enforces only registered Ensembl Gene IDs are valid (maximal_set=True) 42 | varT_schema = ln.Schema(itype=bt.Gene.ensembl_gene_id, maximal_set=True).save() 43 | 44 | sdata_schema = ln.Schema( 45 | name="spatialdata_blobs_schema", 46 | otype="SpatialData", 47 | slots={ 48 | "attrs:bio": sample_schema, 49 | "attrs:tech": tech_schema, 50 | "attrs": attrs_schema, 51 | "tables:table:obs": obs_schema, 52 | "tables:table:var.T": varT_schema, 53 | }, 54 | ).save() 55 | -------------------------------------------------------------------------------- /lamindb/core/storage/_pyarrow_dataset.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING 4 | 5 | import pyarrow.dataset 6 | from lamindb_setup.core.upath import LocalPathClasses 7 | 8 | if TYPE_CHECKING: 9 | from pyarrow.dataset import Dataset as PyArrowDataset 10 | from upath import UPath 11 | 12 | 13 | PYARROW_SUFFIXES = (".parquet", ".csv", ".json", ".orc", ".arrow", ".feather", ".ipc") 14 | 15 | 16 | def _open_pyarrow_dataset(paths: UPath | list[UPath], **kwargs) -> PyArrowDataset: 17 | if isinstance(paths, list): 18 | # a single path can be a directory, but a list of paths 19 | # has to be a flat list of files 20 | paths_str = [] 21 | path0 = paths[0] 22 | if isinstance(path0, LocalPathClasses): 23 | path_to_str = lambda p: p.as_posix() 24 | filesystem = None 25 | else: 26 | path_to_str = lambda p: p.path 27 | filesystem = path0.fs 28 | for path in paths: 29 | if ( 30 | getattr(path, "protocol", None) not in {"http", "https"} 31 | and path.is_dir() 32 | ): 33 | paths_str += [path_to_str(p) for p in path.rglob("*") if p.suffix != ""] 34 | else: 35 | paths_str.append(path_to_str(path)) 36 | elif isinstance(paths, LocalPathClasses): 37 | paths_str, filesystem = paths.as_posix(), None 38 | else: 39 | paths_str, filesystem = paths.path, paths.fs 40 | 41 | return pyarrow.dataset.dataset(paths_str, filesystem=filesystem, **kwargs) 42 | -------------------------------------------------------------------------------- /docs/faq/trash-archive.md: -------------------------------------------------------------------------------- 1 | # How do I trash or archive objects? 2 | 3 | Any object in LaminDB has the following 3 levels of visibility through 3 default branches: 4 | 5 | - `main`: visible 6 | - `archive`: excluded from query & search 7 | - `trash`: excluded from query & search, scheduled for deletion 8 | 9 | Let's look at an example for an `Artifact` object while noting that the same applies to any other `SQLRecord`. 10 | 11 | ```python 12 | import lamindb as ln 13 | import pandas as pd 14 | 15 | df = pd.DataFrame({"a": [1, 2], "b": [3, 4]}) 16 | artifact = ln.Artifact.from_dataframe(df, key="dataset.parquet").save() 17 | ``` 18 | 19 | An artifact is by default created on the `main` branch. 20 | 21 | ```python 22 | assert artifact.branch.name == "main" 23 | ln.Artifact.filter(key="dataset.parquet").to_dataframe() 24 | # the artifact shows up 25 | ``` 26 | 27 | If you delete an artifact, it gets moved into the `trash` branch. 28 | 29 | ```python 30 | artifact.delete() 31 | assert artifact.branch.name == "trash" 32 | ``` 33 | 34 | Artifacts in trash won't show up in queries with default arguments: 35 | 36 | ```python 37 | ln.Artifact.filter(key="dataset.parquet").to_dataframe() 38 | # the artifact does not show up 39 | ``` 40 | 41 | You can query for them by adding the `trash` branch to the filter. 42 | 43 | ```python 44 | ln.Artifact.filter(key="dataset.parquet", branch__name="trash").to_dataframe() 45 | # the artifact shows up 46 | ``` 47 | 48 | You can restore an artifact from trash: 49 | 50 | ```python 51 | artifact.restore() 52 | ln.Artifact.filter(key="dataset.parquet").to_dataframe() 53 | # the artifact shows up 54 | ``` 55 | -------------------------------------------------------------------------------- /tests/core/notebooks/no-title.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "0", 6 | "metadata": {}, 7 | "source": [ 8 | "A notebook without title." 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "id": "1", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import lamindb as ln" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "id": "2", 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "# pass stem uid\n", 29 | "ln.track(\"123456789ABC\")" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "id": "3", 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "assert ln.context.transform.description is None\n", 40 | "assert ln.context.transform.key == \"no-title.ipynb\"" 41 | ] 42 | } 43 | ], 44 | "metadata": { 45 | "kernelspec": { 46 | "display_name": "py312", 47 | "language": "python", 48 | "name": "python3" 49 | }, 50 | "language_info": { 51 | "codemirror_mode": { 52 | "name": "ipython", 53 | "version": 3 54 | }, 55 | "file_extension": ".py", 56 | "mimetype": "text/x-python", 57 | "name": "python", 58 | "nbconvert_exporter": "python", 59 | "pygments_lexer": "ipython3", 60 | "version": "3.12.8" 61 | }, 62 | "nbproject": { 63 | "id": "Irn3xQyQ40GU", 64 | "pypackage": { 65 | "nbproject": "0.0.7+2.g8521e30" 66 | }, 67 | "time_init": "2022-06-08T14:42:31.551211+00:00", 68 | "version": "0" 69 | } 70 | }, 71 | "nbformat": 4, 72 | "nbformat_minor": 5 73 | } 74 | -------------------------------------------------------------------------------- /tests/core/test_run.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | import pytest 3 | 4 | 5 | def test_run(): 6 | transform = ln.Transform(key="My transform") 7 | with pytest.raises(ValueError) as error: 8 | ln.Run(transform) 9 | assert ( 10 | error.exconly() 11 | == "ValueError: Please save transform record before creating a run" 12 | ) 13 | transform.save() 14 | run = ln.Run(transform).save() 15 | assert run.status == "scheduled" 16 | assert run.reference is None 17 | assert run.reference_type is None 18 | run2 = ln.Run(transform, reference="test1", reference_type="test2").save() 19 | assert run2.reference == "test1" 20 | assert run2.reference_type == "test2" 21 | assert run.uid != run2.uid 22 | 23 | report_artifact = ln.Artifact("README.md", description="report of run2").save() 24 | run2.report = report_artifact 25 | environment = ln.Artifact("CONTRIBUTING.md", description="env of run2").save() 26 | run2.environment = environment 27 | 28 | run2.delete(permanent=True) 29 | 30 | # test deletion of run including attached artifacts 31 | assert ln.Artifact.objects.filter(uid=report_artifact.uid).exists() is False 32 | assert ln.Artifact.objects.filter(uid=environment.uid).exists() is False 33 | 34 | transform.delete(permanent=True) 35 | 36 | assert ln.Run.filter(uid=run.uid).count() == 0 37 | 38 | 39 | def test_edge_cases(): 40 | with pytest.raises(ValueError) as error: 41 | ln.Run(1, 2) 42 | assert error.exconly() == "ValueError: Only one non-keyword arg allowed: transform" 43 | with pytest.raises(TypeError) as error: 44 | ln.Run() 45 | assert error.exconly() == "TypeError: Pass transform parameter" 46 | -------------------------------------------------------------------------------- /tests/core/test_delete.py: -------------------------------------------------------------------------------- 1 | import bionty as bt 2 | import lamindb as ln 3 | import pytest 4 | 5 | 6 | @pytest.mark.parametrize("permanent", [True, False]) 7 | def test_delete_qs(permanent): 8 | """Test deletion behavior for small (1) and large (>=2) querysets. 9 | 10 | Small querysets delete individually, large ones trigger bulk delete.""" 11 | ln.settings.creation.search_names = False 12 | labels = [ln.Record(name=f"label_{i}") for i in range(3)] 13 | ln.settings.creation.search_names = True 14 | ln.save(labels) 15 | ln.Record.filter(name__startswith="label_").delete(permanent=permanent) 16 | assert ln.Record.filter(name__startswith="label_", branch_id=-1).count() == ( 17 | 0 if permanent else 3 18 | ) 19 | assert ln.ULabel.filter(name__startswith="label_").count() == 0 20 | 21 | 22 | def test_recreate_soft_deleted_record(): 23 | # testing soft delete and recreate with postgres (sqlite is tested in curators/test_records.py) 24 | # soft delete a record, then recreate it with some changes 25 | record = bt.Ethnicity.from_source(ontology_id="HANCESTRO:0006").save() 26 | assert record.branch_id == 1 27 | record.delete() 28 | assert record.branch_id == -1 29 | # now recreate the same record from ontology_id with a different description 30 | # there's a unique constraint on ontology_id, so this should recover the trashed record 31 | record = bt.Ethnicity.from_source(ontology_id="HANCESTRO:0006") 32 | record.description = "new description" 33 | record.save() 34 | # now this record is recovered from the trash with the new description 35 | assert record.branch_id == 1 36 | assert record.description == "new description" 37 | bt.Ethnicity.objects.filter().delete() 38 | -------------------------------------------------------------------------------- /tests/core/notebooks/with-title-initialized-consecutive-finish-not-last-cell.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# My test notebook (consecutive) with `ln.finish()` not in last cell" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import lamindb as ln" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "# do not pass uid purposefully\n", 26 | "ln.track()" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "print(\"my consecutive cell\")" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "ln.finish(ignore_non_consecutive=True)" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "print(\"my consecutive cell\")" 54 | ] 55 | } 56 | ], 57 | "metadata": { 58 | "kernelspec": { 59 | "display_name": "py39", 60 | "language": "python", 61 | "name": "python3" 62 | }, 63 | "language_info": { 64 | "codemirror_mode": { 65 | "name": "ipython", 66 | "version": 3 67 | }, 68 | "file_extension": ".py", 69 | "mimetype": "text/x-python", 70 | "name": "python", 71 | "nbconvert_exporter": "python", 72 | "pygments_lexer": "ipython3", 73 | "version": "3.12.8" 74 | } 75 | }, 76 | "nbformat": 4, 77 | "nbformat_minor": 2 78 | } 79 | -------------------------------------------------------------------------------- /lamindb/migrations/0116_remove_artifact_unique_artifact_storage_key_hash_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-07-26 15:55 2 | 3 | from django.db import migrations, models 4 | 5 | import lamindb.base.fields 6 | 7 | 8 | class Migration(migrations.Migration): 9 | dependencies = [ 10 | ("lamindb", "0115_alter_space_uid"), 11 | ] 12 | 13 | operations = [ 14 | migrations.RemoveConstraint( 15 | model_name="artifact", 16 | name="unique_artifact_storage_key_hash", 17 | ), 18 | migrations.AlterField( 19 | model_name="record", 20 | name="description", 21 | field=lamindb.base.fields.CharField( 22 | blank=True, db_index=True, default=None, max_length=255, null=True 23 | ), 24 | ), 25 | migrations.AlterField( 26 | model_name="reference", 27 | name="text", 28 | field=lamindb.base.fields.TextField( 29 | blank=True, db_index=True, default=None, null=True 30 | ), 31 | ), 32 | migrations.AlterField( 33 | model_name="reference", 34 | name="url", 35 | field=lamindb.base.fields.URLField(blank=True, db_index=True, null=True), 36 | ), 37 | migrations.AlterField( 38 | model_name="run", 39 | name="name", 40 | field=lamindb.base.fields.CharField( 41 | blank=True, db_index=True, default=None, max_length=150, null=True 42 | ), 43 | ), 44 | migrations.AddConstraint( 45 | model_name="artifact", 46 | constraint=models.UniqueConstraint( 47 | fields=("storage", "key", "hash"), 48 | name="unique_artifact_storage_key_hash", 49 | ), 50 | ), 51 | ] 52 | -------------------------------------------------------------------------------- /tests/core/test_notebooks.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | from pathlib import Path 4 | 5 | import lamindb as ln 6 | import nbproject_test 7 | 8 | notebook_dir = Path(__file__).parent / "notebooks/" 9 | notebook_dir_duplicate = Path(__file__).parent / "notebooks/duplicate/" 10 | 11 | 12 | def test_all_notebooks(): 13 | env = os.environ 14 | env["LAMIN_TESTING"] = "true" 15 | nbproject_test.execute_notebooks(notebook_dir) 16 | nbproject_test.execute_notebooks(notebook_dir_duplicate) 17 | del env["LAMIN_TESTING"] 18 | 19 | 20 | def test_run_after_rename_no_uid(): 21 | notebook_path = ( 22 | notebook_dir / "with-title-initialized-consecutive-finish-not-last-cell.ipynb" 23 | ) 24 | result = subprocess.run( # noqa: S602 25 | f"jupyter nbconvert --to notebook --inplace --execute {notebook_path}", 26 | shell=True, 27 | capture_output=True, 28 | ) 29 | print(result.stdout.decode()) 30 | print(result.stderr.decode()) 31 | assert result.returncode == 0 32 | 33 | uid = ln.Transform.get( 34 | key="with-title-initialized-consecutive-finish-not-last-cell.ipynb" 35 | ).uid 36 | 37 | # now, assume the user renames the notebook 38 | new_path = notebook_path.with_name("no-uid-renamed.ipynb") 39 | os.system(f"cp {notebook_path} {new_path}") # noqa: S605 40 | 41 | env = os.environ 42 | env["LAMIN_TESTING"] = "true" 43 | result = subprocess.run( # noqa: S602 44 | f"jupyter nbconvert --to notebook --inplace --execute {new_path}", 45 | shell=True, 46 | capture_output=True, 47 | env=env, 48 | ) 49 | print(result.stdout.decode()) 50 | print(result.stderr.decode()) 51 | assert result.returncode == 0 52 | del env["LAMIN_TESTING"] 53 | 54 | assert ln.Transform.get(key="no-uid-renamed.ipynb").uid == uid 55 | 56 | # new_path.unlink() 57 | -------------------------------------------------------------------------------- /docs/faq/validate-fields.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Django field validation\n", 8 | "\n", 9 | "[Django field validation](https://docs.djangoproject.com/en/5.1/ref/validators/) are enabled for models that inherit the `ValidateFields` class." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "# pip install lamindb\n", 19 | "!lamin init --storage ./test-django-validation" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "import lamindb as ln\n", 29 | "from lamindb.core.exceptions import FieldValidationError" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "try:\n", 39 | " ln.Reference(name=\"my ref\", doi=\"abc.ef\", url=\"myurl.com\")\n", 40 | "except FieldValidationError as e:\n", 41 | " print(e)" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "!lamin delete --force test-django-validation" 51 | ] 52 | } 53 | ], 54 | "metadata": { 55 | "kernelspec": { 56 | "display_name": "py310", 57 | "language": "python", 58 | "name": "python3" 59 | }, 60 | "language_info": { 61 | "codemirror_mode": { 62 | "name": "ipython", 63 | "version": 3 64 | }, 65 | "file_extension": ".py", 66 | "mimetype": "text/x-python", 67 | "name": "python", 68 | "nbconvert_exporter": "python", 69 | "pygments_lexer": "ipython3", 70 | "version": "3.10.13" 71 | } 72 | }, 73 | "nbformat": 4, 74 | "nbformat_minor": 2 75 | } 76 | -------------------------------------------------------------------------------- /lamindb/examples/datasets/save_mini_immuno_datasets.py: -------------------------------------------------------------------------------- 1 | import bionty as bt 2 | 3 | import lamindb as ln 4 | 5 | ## define valid labels 6 | ln.Record.from_values(["DMSO", "IFNG"], create=True).save() 7 | ln.Record.from_values(["Experiment 1", "Experiment 2"], create=True).save() 8 | bt.CellType.from_values(["B cell", "T cell"]).save() 9 | 10 | # observation-level metadata 11 | ln.Feature(name="perturbation", dtype="cat[Record]").save() 12 | ln.Feature(name="sample_note", dtype="str").save() 13 | ln.Feature(name="cell_type_by_expert", dtype="cat[bionty.CellType]").save() 14 | ln.Feature(name="cell_type_by_model", dtype="cat[bionty.CellType]").save() 15 | # dataset-level metadata 16 | ln.Feature(name="temperature", dtype="float").save() 17 | ln.Feature(name="experiment", dtype="cat[Record]").save() 18 | ln.Feature(name="date_of_study", dtype="date").save() 19 | ln.Feature(name="study_note", dtype="str").save() 20 | ln.Feature(name="study_metadata", dtype=dict).save() 21 | 22 | schema = ln.examples.schemas.anndata_ensembl_gene_ids_and_valid_features_in_obs() 23 | 24 | ## Ingest dataset1 25 | adata = ln.examples.datasets.mini_immuno.get_dataset1(otype="AnnData") 26 | artifact = ln.Artifact.from_anndata( 27 | adata, 28 | key="examples/dataset1.h5ad", 29 | schema=schema, 30 | ).save() 31 | adhoc = {"study_metadata": {"detail1": "123", "detail2": 1}} 32 | dataset_metadata = adata.uns 33 | dataset_metadata.update(adhoc) 34 | artifact.features.add_values(dataset_metadata) # type: ignore 35 | 36 | # Ingest dataset2 37 | adata2 = ln.examples.datasets.mini_immuno.get_dataset2(otype="AnnData") 38 | artifact2 = ln.Artifact.from_anndata( 39 | adata2, 40 | key="examples/dataset2.h5ad", 41 | schema=schema, 42 | ).save() 43 | adhoc2 = {"study_metadata": {"detail1": "456", "detail2": 2}} 44 | dataset_metadata2 = adata2.uns 45 | dataset_metadata2.update(adhoc2) 46 | artifact2.features.add_values(dataset_metadata2) # type: ignore 47 | -------------------------------------------------------------------------------- /lamindb/core/storage/_spatialdata_accessor.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from functools import cached_property 4 | from typing import TYPE_CHECKING 5 | 6 | from ._anndata_accessor import AnnDataAccessor 7 | 8 | if TYPE_CHECKING: 9 | from zarr import Group 10 | 11 | from lamindb import Artifact 12 | 13 | 14 | class _TablesAccessor: 15 | def __init__(self, tables: Group, artifact: Artifact | None = None): 16 | self._tables = tables 17 | 18 | self._artifact = artifact 19 | 20 | def __getitem__(self, key: str) -> AnnDataAccessor: 21 | return AnnDataAccessor( 22 | connection=None, 23 | storage=self._tables[key], 24 | filename=key, 25 | artifact=self._artifact, 26 | ) 27 | 28 | def keys(self) -> list[str]: 29 | return list(self._tables.keys()) 30 | 31 | def __repr__(self) -> str: 32 | """Description of the _TablesAccessor object.""" 33 | descr = ( 34 | f"Accessor for the SpatialData attribute tables\n with keys: {self.keys()}" 35 | ) 36 | return descr 37 | 38 | 39 | class SpatialDataAccessor: 40 | """Cloud-backed SpatialData. 41 | 42 | For now only allows to access `tables`. 43 | """ 44 | 45 | def __init__(self, storage: Group, name: str, artifact: Artifact | None = None): 46 | self.storage = storage 47 | self._name = name 48 | 49 | self._artifact = artifact 50 | 51 | @cached_property 52 | def tables(self) -> _TablesAccessor: 53 | """tables of the underlying SpatialData object.""" 54 | return _TablesAccessor(self.storage["tables"], self._artifact) 55 | 56 | def __repr__(self): 57 | """Description of the SpatialDataAccessor object.""" 58 | descr = ( 59 | "SpatialDataAccessor object" 60 | f"\n constructed for the SpatialData object {self._name}" 61 | f"\n with tables: {self.tables.keys()}" 62 | ) 63 | return descr 64 | -------------------------------------------------------------------------------- /lamindb/migrations/0113_lower_case_branch_and_space_names.py: -------------------------------------------------------------------------------- 1 | import django.db.models.functions.text 2 | from django.db import migrations, models 3 | 4 | 5 | def lowercase_default_values(apps, schema_editor): 6 | """Lowercase default values for Space and Branch models.""" 7 | Space = apps.get_model("lamindb", "Space") 8 | Branch = apps.get_model("lamindb", "Branch") 9 | 10 | space = Space.objects.get(uid="A") 11 | space.uid = "a" 12 | space.name = "all" 13 | space.save() 14 | 15 | trash_branch = Branch.objects.get(uid="T") 16 | trash_branch.uid = "t" 17 | trash_branch.name = "trash" 18 | trash_branch.save() 19 | 20 | archive_branch = Branch.objects.get(uid="A") 21 | archive_branch.uid = "a" 22 | archive_branch.name = "archive" 23 | archive_branch.save() 24 | 25 | main_branch = Branch.objects.get(uid="M") 26 | main_branch.uid = "m" 27 | main_branch.name = "main" 28 | main_branch.save() 29 | 30 | 31 | class Migration(migrations.Migration): 32 | dependencies = [ 33 | ("lamindb", "0112_alter_recordartifact_feature_and_more"), 34 | ] 35 | 36 | operations = [ 37 | migrations.RunPython( 38 | lowercase_default_values, 39 | ), 40 | migrations.AlterModelOptions( 41 | name="branch", 42 | options={}, 43 | ), 44 | migrations.AlterModelOptions( 45 | name="space", 46 | options={}, 47 | ), 48 | migrations.AddConstraint( 49 | model_name="branch", 50 | constraint=models.UniqueConstraint( 51 | django.db.models.functions.text.Lower("name"), 52 | name="unique_branch_name_lower", 53 | ), 54 | ), 55 | migrations.AddConstraint( 56 | model_name="space", 57 | constraint=models.UniqueConstraint( 58 | django.db.models.functions.text.Lower("name"), 59 | name="unique_space_name_lower", 60 | ), 61 | ), 62 | ] 63 | -------------------------------------------------------------------------------- /docs/faq/import-modules.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "# What happens if I import a schema module without lamindb?" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "# !pip install 'lamindb[bionty]'\n", 18 | "!lamin init --storage testmodule --modules bionty" 19 | ] 20 | }, 21 | { 22 | "attachments": {}, 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "Upon `import`, nothing yet happens:" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "import bionty as bt" 36 | ] 37 | }, 38 | { 39 | "attachments": {}, 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "If you try to access an attribute (other than `model`), you'll load the instance in the same way as calling `import lamindb`.\n", 44 | "\n", 45 | "Under the hood, `lamindb` is imported!" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "assert bt.Organism(name=\"human\") is not None" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "!lamin delete --force testmodule" 64 | ] 65 | } 66 | ], 67 | "metadata": { 68 | "kernelspec": { 69 | "display_name": "py39", 70 | "language": "python", 71 | "name": "python3" 72 | }, 73 | "language_info": { 74 | "artifact_extension": ".py", 75 | "codemirror_mode": { 76 | "name": "ipython", 77 | "version": 3 78 | }, 79 | "mimetype": "text/x-python", 80 | "name": "python", 81 | "nbconvert_exporter": "python", 82 | "pygments_lexer": "ipython3", 83 | "version": "3.9.16" 84 | } 85 | }, 86 | "nbformat": 4, 87 | "nbformat_minor": 2 88 | } 89 | -------------------------------------------------------------------------------- /lamindb/migrations/0120_add_record_fk_constraint.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-08-07 18:52 2 | 3 | from django.db import migrations 4 | 5 | CREATE_FUNCTION_SQL = """ 6 | CREATE OR REPLACE FUNCTION is_valid_record_type(record_type_id INTEGER, record_is_type BOOLEAN) 7 | RETURNS BOOLEAN AS $$ 8 | BEGIN 9 | -- Record with no type is valid 10 | IF record_type_id IS NULL THEN 11 | RETURN TRUE; 12 | END IF; 13 | 14 | -- If current record is a type, it can only reference schema-less types 15 | IF record_is_type THEN 16 | RETURN EXISTS ( 17 | SELECT 1 FROM lamindb_record r 18 | WHERE r.id = record_type_id AND r.is_type AND r.schema_id IS NULL 19 | ); 20 | END IF; 21 | 22 | -- Regular records can reference any type 23 | RETURN EXISTS ( 24 | SELECT 1 FROM lamindb_record r 25 | WHERE r.id = record_type_id AND r.is_type 26 | ); 27 | END; 28 | $$ LANGUAGE plpgsql; 29 | """ 30 | 31 | ADD_CONSTRAINT_SQL = """ 32 | ALTER TABLE lamindb_record 33 | ADD CONSTRAINT record_type_is_valid_fk 34 | CHECK (is_valid_record_type(type_id, is_type)); 35 | """ 36 | 37 | DROP_CONSTRAINT_SQL = ( 38 | "ALTER TABLE lamindb_record DROP CONSTRAINT IF EXISTS record_type_is_valid_fk;" 39 | ) 40 | DROP_FUNCTION_SQL = "DROP FUNCTION IF EXISTS is_valid_record_type(INTEGER, BOOLEAN);" 41 | 42 | 43 | def apply_postgres_constraint(apps, schema_editor): 44 | if schema_editor.connection.vendor == "postgresql": 45 | schema_editor.execute(CREATE_FUNCTION_SQL) 46 | schema_editor.execute(ADD_CONSTRAINT_SQL) 47 | 48 | 49 | def revert_postgres_constraint(apps, schema_editor): 50 | if schema_editor.connection.vendor == "postgresql": 51 | schema_editor.execute(DROP_CONSTRAINT_SQL) 52 | schema_editor.execute(DROP_FUNCTION_SQL) 53 | 54 | 55 | class Migration(migrations.Migration): 56 | dependencies = [ 57 | ("lamindb", "0119_rename_records_project_linked_in_records"), 58 | ] 59 | 60 | operations = [ 61 | migrations.RunPython( 62 | apply_postgres_constraint, reverse_code=revert_postgres_constraint 63 | ), 64 | ] 65 | -------------------------------------------------------------------------------- /lamindb/migrations/0132_record_parents_record_reference_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.1.12 on 2025-10-04 10:57 2 | 3 | import django.db.models.deletion 4 | from django.db import migrations, models 5 | 6 | import lamindb.base.fields 7 | 8 | 9 | class Migration(migrations.Migration): 10 | dependencies = [ 11 | ("lamindb", "0131_record_unique_name_type_space"), 12 | ] 13 | 14 | operations = [ 15 | migrations.AddField( 16 | model_name="record", 17 | name="parents", 18 | field=models.ManyToManyField(related_name="children", to="lamindb.record"), 19 | ), 20 | migrations.AddField( 21 | model_name="record", 22 | name="reference", 23 | field=lamindb.base.fields.CharField( 24 | blank=True, db_index=True, default=None, max_length=255, null=True 25 | ), 26 | ), 27 | migrations.AddField( 28 | model_name="record", 29 | name="reference_type", 30 | field=lamindb.base.fields.CharField( 31 | blank=True, db_index=True, default=None, max_length=25, null=True 32 | ), 33 | ), 34 | migrations.RenameField( 35 | model_name="record", 36 | old_name="ulabels", 37 | new_name="linked_ulabels", 38 | ), 39 | migrations.AlterField( 40 | model_name="record", 41 | name="linked_ulabels", 42 | field=models.ManyToManyField( 43 | related_name="linked_in_records", 44 | through="lamindb.RecordULabel", 45 | to="lamindb.ulabel", 46 | ), 47 | ), 48 | migrations.AlterField( 49 | model_name="record", 50 | name="run", 51 | field=lamindb.base.fields.ForeignKey( 52 | blank=True, 53 | default=lamindb.models.run.current_run, 54 | editable=False, 55 | null=True, 56 | on_delete=django.db.models.deletion.PROTECT, 57 | related_name="output_records", 58 | to="lamindb.run", 59 | ), 60 | ), 61 | ] 62 | -------------------------------------------------------------------------------- /lamindb/migrations/0121_recorduser.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-09-05 12:25 2 | 3 | import django.db.models.deletion 4 | from django.db import migrations, models 5 | 6 | import lamindb.base.fields 7 | import lamindb.models.sqlrecord 8 | 9 | 10 | class Migration(migrations.Migration): 11 | dependencies = [ 12 | ("lamindb", "0120_add_record_fk_constraint"), 13 | ] 14 | 15 | operations = [ 16 | migrations.CreateModel( 17 | name="RecordUser", 18 | fields=[ 19 | ("id", models.BigAutoField(primary_key=True, serialize=False)), 20 | ( 21 | "feature", 22 | lamindb.base.fields.ForeignKey( 23 | blank=True, 24 | on_delete=django.db.models.deletion.PROTECT, 25 | related_name="links_recorduser", 26 | to="lamindb.feature", 27 | ), 28 | ), 29 | ( 30 | "record", 31 | lamindb.base.fields.ForeignKey( 32 | blank=True, 33 | on_delete=django.db.models.deletion.CASCADE, 34 | related_name="values_user", 35 | to="lamindb.record", 36 | ), 37 | ), 38 | ( 39 | "value", 40 | lamindb.base.fields.ForeignKey( 41 | blank=True, 42 | on_delete=django.db.models.deletion.PROTECT, 43 | related_name="links_record", 44 | to="lamindb.user", 45 | ), 46 | ), 47 | ], 48 | options={ 49 | "unique_together": {("record", "feature", "value")}, 50 | }, 51 | bases=(models.Model, lamindb.models.sqlrecord.IsLink), 52 | ), 53 | migrations.AddField( 54 | model_name="record", 55 | name="linked_users", 56 | field=models.ManyToManyField( 57 | related_name="records", through="lamindb.RecordUser", to="lamindb.user" 58 | ), 59 | ), 60 | ] 61 | -------------------------------------------------------------------------------- /tests/core/notebooks/with-title-initialized-consecutive-finish.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# My test notebook (consecutive) with `ln.finish()`" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import lamindb as ln\n", 17 | "import pytest" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": null, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "with pytest.raises(ln.errors.InvalidArgument) as error:\n", 27 | " ln.track(\"ujPaFZ\")\n", 28 | "print(error.exconly())\n", 29 | "assert error.exconly().startswith(\n", 30 | " 'lamindb.errors.InvalidArgument: Please pass an auto-generated uid instead of \"ujPaFZ\". Resolve by running:'\n", 31 | ")" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "# with uid passed\n", 41 | "ln.track(\"ujPaFZatnMLG0000\")" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "print(\"my consecutive cell\")" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "print(\"my consecutive cell\")" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "ln.finish()" 69 | ] 70 | } 71 | ], 72 | "metadata": { 73 | "kernelspec": { 74 | "display_name": "py312", 75 | "language": "python", 76 | "name": "python3" 77 | }, 78 | "language_info": { 79 | "codemirror_mode": { 80 | "name": "ipython", 81 | "version": 3 82 | }, 83 | "file_extension": ".py", 84 | "mimetype": "text/x-python", 85 | "name": "python", 86 | "nbconvert_exporter": "python", 87 | "pygments_lexer": "ipython3", 88 | "version": "3.12.8" 89 | } 90 | }, 91 | "nbformat": 4, 92 | "nbformat_minor": 2 93 | } 94 | -------------------------------------------------------------------------------- /lamindb/migrations/0085_alter_feature_is_type_alter_flextable_is_type_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-01-27 13:48 2 | 3 | from django.db import migrations 4 | 5 | import lamindb.base.fields 6 | 7 | 8 | class Migration(migrations.Migration): 9 | dependencies = [ 10 | ("lamindb", "0084_alter_schemafeature_feature_and_more"), 11 | ] 12 | 13 | operations = [ 14 | migrations.AlterField( 15 | model_name="feature", 16 | name="is_type", 17 | field=lamindb.base.fields.BooleanField( 18 | blank=True, db_index=True, default=False, null=True 19 | ), 20 | ), 21 | migrations.AlterField( 22 | model_name="flextable", 23 | name="is_type", 24 | field=lamindb.base.fields.BooleanField( 25 | blank=True, db_index=True, default=False, null=True 26 | ), 27 | ), 28 | migrations.AlterField( 29 | model_name="param", 30 | name="is_type", 31 | field=lamindb.base.fields.BooleanField( 32 | blank=True, db_index=True, default=False, null=True 33 | ), 34 | ), 35 | migrations.AlterField( 36 | model_name="project", 37 | name="is_type", 38 | field=lamindb.base.fields.BooleanField( 39 | blank=True, db_index=True, default=False, null=True 40 | ), 41 | ), 42 | migrations.AlterField( 43 | model_name="reference", 44 | name="is_type", 45 | field=lamindb.base.fields.BooleanField( 46 | blank=True, db_index=True, default=False, null=True 47 | ), 48 | ), 49 | migrations.AlterField( 50 | model_name="schema", 51 | name="is_type", 52 | field=lamindb.base.fields.BooleanField( 53 | blank=True, db_index=True, default=False, null=True 54 | ), 55 | ), 56 | migrations.AlterField( 57 | model_name="ulabel", 58 | name="is_type", 59 | field=lamindb.base.fields.BooleanField( 60 | blank=True, db_index=True, default=False, null=True 61 | ), 62 | ), 63 | ] 64 | -------------------------------------------------------------------------------- /lamindb/core/_compat.py: -------------------------------------------------------------------------------- 1 | import importlib.util 2 | from typing import Any, Callable, TypeVar 3 | 4 | T = TypeVar("T") 5 | 6 | 7 | def is_package_installed(package_name: str) -> bool: 8 | spec = importlib.util.find_spec(package_name) 9 | return spec is not None 10 | 11 | 12 | def with_package(package_name: str, operation: Callable[[Any], T]) -> T: 13 | """Execute an operation that requires a specific package. 14 | 15 | Args: 16 | package_name: Package name (e.g., "mudata") 17 | operation: Function that takes the imported module and returns a result 18 | 19 | Examples: 20 | # For direct package functions 21 | result = with_package("mudata", lambda mod: mod.read_zarr(path)) 22 | """ 23 | try: 24 | module = importlib.import_module(package_name) 25 | return operation(module) 26 | except ImportError: 27 | raise ImportError( 28 | f"Package '{package_name}' is required but not installed. " 29 | f"Please install with: pip install {package_name}" 30 | ) from None 31 | 32 | 33 | def with_package_obj( 34 | obj: Any, class_name: str, package_name: str, operation: Callable[[Any], T] 35 | ) -> tuple[bool, T | None]: 36 | """Handle operations on objects that require specific packages. 37 | 38 | Args: 39 | obj: The object to operate on 40 | class_name: Expected class name (e.g., "MuData") 41 | package_name: Package that provides the class (e.g., "mudata") 42 | operation: Function to call with the object if package is available. 43 | 44 | Examples: 45 | # For instance methods 46 | handled, res = apply_class_func(dmem, "MuData", "mudata", 47 | lambda obj: obj.write(filepath)) 48 | """ 49 | if obj.__class__.__name__ == class_name: 50 | try: 51 | importlib.import_module(package_name) 52 | result = operation(obj) 53 | return True, result 54 | except ImportError: 55 | raise ImportError( 56 | f"Object appears to be {class_name} but '{package_name}' package is not installed. " 57 | f"Please install with: pip install {package_name}" 58 | ) from None 59 | 60 | return False, None 61 | -------------------------------------------------------------------------------- /tests/core/test_has_parents.py: -------------------------------------------------------------------------------- 1 | import bionty as bt 2 | import lamindb as ln 3 | 4 | 5 | def test_view_parents(): 6 | label1 = ln.Record(name="label1") 7 | label2 = ln.Record(name="label2") 8 | label1.save() 9 | label2.save() 10 | label1.parents.add(label2) 11 | label1.view_parents(ln.Record.name, distance=1) 12 | label1.delete(permanent=True) 13 | label2.delete(permanent=True) 14 | 15 | 16 | def test_query_parents_children(): 17 | label1 = ln.Record(name="label1").save() 18 | label2 = ln.Record(name="label2").save() 19 | label3 = ln.Record(name="label3").save() 20 | label1.children.add(label2) 21 | label2.children.add(label3) 22 | parents = label3.query_parents() 23 | assert len(parents) == 2 24 | assert label1 in parents and label2 in parents 25 | children = label1.query_children() 26 | assert len(children) == 2 27 | assert label2 in children and label3 in children 28 | label1.delete(permanent=True) 29 | label2.delete(permanent=True) 30 | label3.delete(permanent=True) 31 | 32 | 33 | def test_view_lineage_circular(): 34 | import pandas as pd 35 | 36 | transform = ln.Transform(key="test").save() 37 | run = ln.Run(transform=transform).save() 38 | artifact = ln.Artifact.from_dataframe( 39 | pd.DataFrame({"a": [1, 2, 3]}), description="test artifact", run=run 40 | ).save() 41 | run.input_artifacts.add(artifact) 42 | artifact.view_lineage() 43 | artifact.delete(permanent=True) 44 | transform.delete(permanent=True) 45 | 46 | 47 | def test_view_parents_connected_instance(): 48 | ct = bt.CellType.connect("laminlabs/cellxgene").first() 49 | 50 | if ct and hasattr(ct, "parents"): 51 | ct.view_parents(distance=2, with_children=True) 52 | 53 | 54 | def test_query_relatives_connected_instance(): 55 | ct = bt.CellType.connect("laminlabs/cellxgene").filter(name="T cell").first() 56 | 57 | if ct: 58 | parents = ct.query_parents() 59 | assert parents.db == "laminlabs/cellxgene" 60 | 61 | children = ct.query_children() 62 | assert children.db == "laminlabs/cellxgene" 63 | 64 | 65 | def test_view_lineage_connected_instance(): 66 | af = ln.Artifact.connect("laminlabs/cellxgene").first() 67 | 68 | if af and af.run: 69 | af.view_lineage() 70 | -------------------------------------------------------------------------------- /lamindb/migrations/0151_feature_update_feature_on_name_change.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2.8 on 2025-12-07 09:53 2 | 3 | import pgtrigger.compiler 4 | import pgtrigger.migrations 5 | from django.db import connection, migrations 6 | 7 | 8 | class Migration(migrations.Migration): 9 | dependencies = [ 10 | ("lamindb", "0150_rename_params_record_extra_data_and_more"), 11 | ] 12 | 13 | operations = [] # type: ignore 14 | 15 | 16 | if connection.vendor == "postgresql": 17 | Migration.operations += [ 18 | pgtrigger.migrations.AddTrigger( 19 | model_name="feature", 20 | trigger=pgtrigger.compiler.Trigger( 21 | name="update_feature_on_name_change", 22 | sql=pgtrigger.compiler.UpsertTriggerSql( 23 | condition="WHEN (OLD.name IS DISTINCT FROM NEW.name)", 24 | func="DECLARE\n old_renamed JSONB;\n new_renamed JSONB;\n ts TEXT;\nBEGIN\n -- Only proceed if name actually changed\n IF OLD.name IS DISTINCT FROM NEW.name THEN\n -- Update synonyms\n IF NEW.synonyms IS NULL OR NEW.synonyms = '' THEN\n NEW.synonyms := OLD.name;\n ELSIF position(OLD.name in NEW.synonyms) = 0 THEN\n NEW.synonyms := NEW.synonyms || '|' || OLD.name;\n END IF;\n\n -- Update _aux with rename history\n ts := TO_CHAR(NOW() AT TIME ZONE 'UTC', 'YYYY-MM-DD\"T\"HH24:MI:SS\"Z\"');\n\n -- Get existing renamed history or initialize empty object\n old_renamed := COALESCE((OLD._aux->>'renamed')::JSONB, '{}'::JSONB);\n\n -- Add old name with timestamp\n new_renamed := old_renamed || jsonb_build_object(ts, OLD.name);\n\n -- Update _aux with new renamed history\n IF NEW._aux IS NULL THEN\n NEW._aux := jsonb_build_object('renamed', new_renamed);\n ELSE\n NEW._aux := NEW._aux || jsonb_build_object('renamed', new_renamed);\n END IF;\n END IF;\n\n RETURN NEW;\nEND;\n", 25 | hash="5f2e7a65e42c34b0455f0840def52f078726e401", 26 | operation="UPDATE", 27 | pgid="pgtrigger_update_feature_on_name_change_6c32d", 28 | table="lamindb_feature", 29 | when="BEFORE", 30 | ), 31 | ), 32 | ), 33 | ] 34 | -------------------------------------------------------------------------------- /docs/faq/reference-field.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Where to store external links and IDs?" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "When registering data in LaminDB, you might want to store a reference link or ID to indicate the source of the collection.\n", 15 | "\n", 16 | "We have `reference` and `reference_type` fields for this purpose, they are available for {class}`~lamindb.Collection`, {class}`~lamindb.Transform`, {class}`~lamindb.Run` and {class}`~lamindb.Record`." 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "# !pip install lamindb\n", 26 | "!lamin init --storage testreference" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "import lamindb as ln" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "Let's say we have a few donor samples that came form Vendor X, in order to chase back the orders, I'd like to keep track the donor ids provided by the vendor:" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "ln.Record(\n", 52 | " name=\"donor 001\", reference=\"VX984545\", reference_type=\"Donor ID from Vendor X\"\n", 53 | ")" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "!lamin delete --force testreference" 63 | ] 64 | } 65 | ], 66 | "metadata": { 67 | "kernelspec": { 68 | "display_name": "py39", 69 | "language": "python", 70 | "name": "python3" 71 | }, 72 | "language_info": { 73 | "artifact_extension": ".py", 74 | "codemirror_mode": { 75 | "name": "ipython", 76 | "version": 3 77 | }, 78 | "mimetype": "text/x-python", 79 | "name": "python", 80 | "nbconvert_exporter": "python", 81 | "pygments_lexer": "ipython3", 82 | "version": "3.9.16" 83 | } 84 | }, 85 | "nbformat": 4, 86 | "nbformat_minor": 2 87 | } 88 | -------------------------------------------------------------------------------- /tests/core/test_search.py: -------------------------------------------------------------------------------- 1 | import bionty as bt 2 | import lamindb as ln 3 | import pytest 4 | 5 | 6 | @pytest.fixture(scope="module") 7 | def prepare_cell_type_registry(): 8 | bt.CellType.filter().delete(permanent=True) 9 | records = [ 10 | { 11 | "ontology_id": "CL:0000084", 12 | "name": "T cell", 13 | "synonyms": "T-cell|T-lymphocyte|T lymphocyte", 14 | "children": ["CL:0000798", "CL:0002420", "CL:0002419", "CL:0000789"], 15 | }, 16 | { 17 | "ontology_id": "CL:0000236", 18 | "name": "B cell", 19 | "synonyms": "B-lymphocyte|B lymphocyte|B-cell", 20 | "children": ["CL:0009114", "CL:0001201"], 21 | }, 22 | { 23 | "ontology_id": "CL:0000696", 24 | "name": "PP cell", 25 | "synonyms": "type F enteroendocrine cell", 26 | "children": ["CL:0002680"], 27 | }, 28 | { 29 | "ontology_id": "CL:0002072", 30 | "name": "nodal myocyte", 31 | "synonyms": "P cell|myocytus nodalis|cardiac pacemaker cell", 32 | "children": ["CL:1000409", "CL:1000410"], 33 | }, 34 | ] 35 | public_records = [] 36 | for ref_record in records: 37 | record = bt.CellType.from_source(ontology_id=ref_record["ontology_id"]) 38 | assert record.name == ref_record["name"] 39 | assert set(record.synonyms.split("|")) == set(ref_record["synonyms"].split("|")) 40 | public_records.append(record) 41 | ln.save(public_records) 42 | yield "prepared" 43 | bt.CellType.filter().delete(permanent=True) 44 | 45 | 46 | def test_search_synonyms(prepare_cell_type_registry): 47 | result = bt.CellType.search("P cell").to_dataframe() 48 | assert set(result.name.iloc[:2]) == {"nodal myocyte", "PP cell"} 49 | 50 | 51 | def test_search_limit(prepare_cell_type_registry): 52 | result = bt.CellType.search("P cell", limit=1).to_dataframe() 53 | assert len(result) == 1 54 | 55 | 56 | def test_search_case_sensitive(prepare_cell_type_registry): 57 | result = bt.CellType.search("b cell", case_sensitive=False).to_dataframe() 58 | assert result.name.iloc[0] == "B cell" 59 | 60 | 61 | def test_search_None(): 62 | with pytest.raises( 63 | ValueError, match="Cannot search for None value! Please pass a valid string." 64 | ): 65 | bt.CellType.search(None) 66 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | fail_fast: false 2 | default_language_version: 3 | python: python3 4 | default_stages: 5 | - pre-commit 6 | - pre-push 7 | minimum_pre_commit_version: 2.16.0 8 | repos: 9 | - repo: https://github.com/rbubley/mirrors-prettier 10 | rev: v3.5.1 11 | hooks: 12 | - id: prettier 13 | exclude: | 14 | (?x)( 15 | docs/changelog.md|.github/ISSUE_TEMPLATE/config.yml|tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html|README.md 16 | ) 17 | - repo: https://github.com/kynan/nbstripout 18 | rev: 0.8.1 19 | hooks: 20 | - id: nbstripout 21 | exclude: | 22 | (?x)( 23 | docs/examples/| 24 | docs/notes/ 25 | ) 26 | - repo: https://github.com/astral-sh/ruff-pre-commit 27 | rev: v0.9.10 28 | hooks: 29 | - id: ruff 30 | args: [--fix, --exit-non-zero-on-fix, --unsafe-fixes] 31 | - id: ruff-format 32 | - repo: https://github.com/pre-commit/pre-commit-hooks 33 | rev: v4.5.0 34 | hooks: 35 | - id: detect-private-key 36 | - id: check-ast 37 | - id: end-of-file-fixer 38 | exclude: | 39 | (?x)( 40 | .github/workflows/latest-changes.jinja2 41 | ) 42 | - id: mixed-line-ending 43 | args: [--fix=lf] 44 | - id: trailing-whitespace 45 | exclude: | 46 | (?x)( 47 | tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html 48 | ) 49 | - id: check-case-conflict 50 | - repo: https://github.com/pre-commit/mirrors-mypy 51 | rev: v1.14.1 52 | hooks: 53 | - id: mypy 54 | args: 55 | [ 56 | --no-strict-optional, 57 | --ignore-missing-imports, 58 | --disable-error-code=annotation-unchecked, 59 | ] 60 | additional_dependencies: ["types-requests", "types-attrs"] 61 | exclude: | 62 | (?x)( 63 | test_notebooks.py| 64 | script-to-test-versioning.py| 65 | tests/storage/conftest.py| 66 | tests/curators/conftest.py| 67 | tests/permissions/conftest.py| 68 | tests/writelog/conftest.py| 69 | tests/writelog_sqlite/conftest.py| 70 | tests/curators/test_curators_examples.py| 71 | tests/core/conftest.py| 72 | docs/scripts/ 73 | ) 74 | -------------------------------------------------------------------------------- /lamindb/migrations/0107_add_schema_to_record.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-06-30 12:42 2 | 3 | import django.db.models.deletion 4 | from django.db import migrations 5 | 6 | import lamindb.base.fields 7 | 8 | 9 | def migrate_sheets_to_records(apps, schema_editor): 10 | """Migrate Sheet records to Record table as type records.""" 11 | with schema_editor.connection.cursor() as cursor: 12 | # Insert sheets as records with is_type=True 13 | cursor.execute(""" 14 | INSERT INTO lamindb_record (uid, name, description, schema_id, is_type, created_by_id, created_at, updated_at, run_id) 15 | SELECT uid, name, description, schema_id, TRUE, created_by_id, created_at, updated_at, run_id 16 | FROM lamindb_sheet; 17 | """) 18 | 19 | # Update records that were linked to sheets to link to the new record types 20 | cursor.execute(""" 21 | UPDATE lamindb_record 22 | SET type_id = ( 23 | SELECT r.id 24 | FROM lamindb_record r 25 | JOIN lamindb_sheet s ON r.uid = s.uid 26 | WHERE s.id = lamindb_record.sheet_id 27 | ) 28 | WHERE sheet_id IS NOT NULL; 29 | """) 30 | 31 | 32 | class Migration(migrations.Migration): 33 | dependencies = [ 34 | ("lamindb", "0106_transfer_data_migration"), 35 | ] 36 | 37 | operations = [ 38 | migrations.RemoveConstraint( 39 | model_name="record", 40 | name="unique_name", 41 | ), 42 | migrations.AddField( 43 | model_name="record", 44 | name="schema", 45 | field=lamindb.base.fields.ForeignKey( 46 | blank=True, 47 | null=True, 48 | on_delete=django.db.models.deletion.CASCADE, 49 | related_name="records", 50 | to="lamindb.schema", 51 | ), 52 | ), 53 | migrations.AlterField( 54 | model_name="record", 55 | name="is_type", 56 | field=lamindb.base.fields.BooleanField( 57 | blank=True, db_index=True, default=False 58 | ), 59 | ), 60 | migrations.RunPython( 61 | migrate_sheets_to_records, 62 | ), 63 | migrations.AddField( 64 | model_name="record", 65 | name="_sort_order", 66 | field=django.db.models.FloatField(null=True, default=None), 67 | ), 68 | ] 69 | -------------------------------------------------------------------------------- /tests/core/_dataset_fixtures.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | from pathlib import Path 3 | from typing import Generator 4 | 5 | import anndata as ad 6 | import lamindb as ln 7 | import mudata as md 8 | import numpy as np 9 | import pandas as pd 10 | import pytest 11 | import spatialdata as sd 12 | import tiledbsoma 13 | import tiledbsoma.io 14 | from scipy.sparse import csr_matrix 15 | 16 | 17 | @pytest.fixture(scope="session") 18 | def get_small_adata(): 19 | return ad.AnnData( 20 | X=np.array([[1, 2, 3], [4, 5, 6]]), 21 | obs={"feat1": ["A", "B"]}, 22 | var=pd.DataFrame(index=["MYC", "TCF7", "GATA1"]), 23 | obsm={"X_pca": np.array([[1, 2], [3, 4]])}, 24 | ) 25 | 26 | 27 | @pytest.fixture(scope="session") 28 | def get_small_mdata(): 29 | adata1 = ad.AnnData( 30 | X=np.array([[1, 2, 3], [4, 5, 6]]), 31 | obs={"feat1": ["A", "B"]}, 32 | var=pd.DataFrame(index=["MYC", "TCF7", "GATA1"]), 33 | obsm={"X_pca": np.array([[1, 2], [3, 4]])}, 34 | ) 35 | 36 | adata2 = ad.AnnData( 37 | X=np.array([[7, 8], [9, 10]]), 38 | obs={"feat2": ["C", "D"]}, 39 | var=pd.DataFrame(index=["FOXP3", "CD8A"]), 40 | obsm={"X_umap": np.array([[5, 6], [7, 8]])}, 41 | ) 42 | 43 | return md.MuData({"rna": adata1, "protein": adata2}) 44 | 45 | 46 | @pytest.fixture(scope="session") 47 | def get_small_sdata(): 48 | adata = ad.AnnData( 49 | X=csr_matrix(np.array([[0.1, 0.2], [0.3, 0.4]])), 50 | obs=pd.DataFrame(index=["cell1", "cell2"]), 51 | var=pd.DataFrame(index=["gene1", "gene2"]), 52 | ) 53 | 54 | { 55 | "region1": np.array([[[0, 0], [0, 1], [1, 1], [1, 0]]]), 56 | "region2": np.array([[[2, 2], [2, 3], [3, 3], [3, 2]]]), 57 | } 58 | 59 | sdata_obj = sd.SpatialData( 60 | tables={"gene_expression": adata}, 61 | ) 62 | 63 | return sdata_obj 64 | 65 | 66 | @pytest.fixture(scope="session") 67 | def get_small_soma_experiment(): 68 | adata = ln.examples.datasets.mini_immuno.get_dataset1(otype="AnnData") 69 | tiledbsoma.io.from_anndata("test.tiledbsoma", adata, measurement_name="RNA") 70 | 71 | exp = tiledbsoma.Experiment.open("test.tiledbsoma") 72 | yield exp 73 | 74 | shutil.rmtree("test.tiledbsoma") 75 | 76 | 77 | @pytest.fixture(scope="session") 78 | def get_mini_csv() -> Generator[Path, None, None]: 79 | csv_path = ln.examples.datasets.file_mini_csv() 80 | yield csv_path 81 | 82 | Path("mini.csv").unlink(missing_ok=True) 83 | -------------------------------------------------------------------------------- /docs/storage/prepare-transfer-local-to-cloud.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Prepare transfer artifacts from a local instance to a cloud instance" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "!lamin disconnect" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "import lamindb as ln\n", 26 | "import bionty as bt\n", 27 | "import wetlab as wl\n", 28 | "import pandas as pd" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "ln.setup.init(storage=\"./test-transfer-to-cloud\", modules=\"bionty,wetlab\")" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "artifact = ln.Artifact.from_dataframe(\n", 47 | " pd.DataFrame({\"a\": [1, 2, 3]}), description=\"test-transfer-to-cloud\"\n", 48 | ").save()\n", 49 | "features = bt.CellMarker.from_values(\n", 50 | " [\"PD1\", \"CD21\"], field=bt.CellMarker.name, organism=\"human\"\n", 51 | ").save()\n", 52 | "artifact.features._add_schema(ln.Schema(features), slot=\"var\")\n", 53 | "organism = bt.Organism.from_source(name=\"human\").save()\n", 54 | "artifact.labels.add(organism)\n", 55 | "experiment = wl.Experiment(name=\"experiment-test-transfer-to-cloud\").save()\n", 56 | "artifact.experiments.add(experiment)\n", 57 | "artifact.describe()" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "assert artifact.features.slots[\"var\"].members.count() == 2" 67 | ] 68 | } 69 | ], 70 | "metadata": { 71 | "kernelspec": { 72 | "display_name": "py312", 73 | "language": "python", 74 | "name": "python3" 75 | }, 76 | "language_info": { 77 | "codemirror_mode": { 78 | "name": "ipython", 79 | "version": 3 80 | }, 81 | "file_extension": ".py", 82 | "mimetype": "text/x-python", 83 | "name": "python", 84 | "nbconvert_exporter": "python", 85 | "pygments_lexer": "ipython3", 86 | "version": "3.12.8" 87 | } 88 | }, 89 | "nbformat": 4, 90 | "nbformat_minor": 2 91 | } 92 | -------------------------------------------------------------------------------- /lamindb/migrations/0102_remove_writelog_branch_code_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-05-27 11:29 2 | 3 | import django.db.models.deletion 4 | from django.db import migrations, models 5 | 6 | 7 | class Migration(migrations.Migration): 8 | dependencies = [ 9 | ("lamindb", "0101_alter_artifact_hash_alter_feature_name_and_more"), 10 | ] 11 | 12 | operations = [ 13 | migrations.RemoveField( 14 | model_name="writelog", 15 | name="branch_code", 16 | ), 17 | migrations.RemoveField( 18 | model_name="writelog", 19 | name="space_uid", 20 | ), 21 | migrations.AddField( 22 | model_name="writelog", 23 | name="branch", 24 | field=models.ForeignKey( 25 | default=1, 26 | on_delete=django.db.models.deletion.PROTECT, 27 | to="lamindb.branch", 28 | ), 29 | ), 30 | migrations.AddField( 31 | model_name="writelog", 32 | name="space", 33 | field=models.ForeignKey( 34 | default=1, 35 | on_delete=django.db.models.deletion.PROTECT, 36 | to="lamindb.space", 37 | ), 38 | ), 39 | migrations.AlterField( 40 | model_name="writelog", 41 | name="run_uid", 42 | field=models.CharField(default="0000000000000000", max_length=20), 43 | ), 44 | migrations.AlterField( 45 | model_name="writelog", 46 | name="record_uid", 47 | field=models.JSONField(db_index=True, default=0), 48 | preserve_default=False, 49 | ), 50 | migrations.AlterModelOptions( 51 | name="migrationstate", 52 | options={"base_manager_name": "objects"}, 53 | ), 54 | migrations.AlterModelOptions( 55 | name="tablestate", 56 | options={"base_manager_name": "objects"}, 57 | ), 58 | migrations.AlterField( 59 | model_name="writelog", 60 | name="migration_state", 61 | field=models.ForeignKey( 62 | on_delete=django.db.models.deletion.PROTECT, to="lamindb.migrationstate" 63 | ), 64 | ), 65 | migrations.AlterField( 66 | model_name="writelog", 67 | name="table", 68 | field=models.ForeignKey( 69 | on_delete=django.db.models.deletion.PROTECT, to="lamindb.tablestate" 70 | ), 71 | ), 72 | ] 73 | -------------------------------------------------------------------------------- /docs/scripts/curate_mudata.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | import bionty as bt 3 | 4 | from docs.scripts.define_schema_df_metadata import study_metadata_schema 5 | 6 | # define labels 7 | perturbation = ln.Record(name="Perturbation", is_type=True).save() 8 | ln.Record(name="Perturbed", type=perturbation).save() 9 | ln.Record(name="NT", type=perturbation).save() 10 | 11 | replicate = ln.Record(name="Replicate", is_type=True).save() 12 | ln.Record(name="rep1", type=replicate).save() 13 | ln.Record(name="rep2", type=replicate).save() 14 | ln.Record(name="rep3", type=replicate).save() 15 | 16 | # define the global obs schema 17 | obs_schema = ln.Schema( 18 | name="mudata_papalexi21_subset_obs_schema", 19 | features=[ 20 | ln.Feature(name="perturbation", dtype="cat[Record[Perturbation]]").save(), 21 | ln.Feature(name="replicate", dtype="cat[Record[Replicate]]").save(), 22 | ], 23 | ).save() 24 | 25 | # define the ['rna'].obs schema 26 | obs_schema_rna = ln.Schema( 27 | name="mudata_papalexi21_subset_rna_obs_schema", 28 | features=[ 29 | ln.Feature(name="nCount_RNA", dtype=int).save(), 30 | ln.Feature(name="nFeature_RNA", dtype=int).save(), 31 | ln.Feature(name="percent.mito", dtype=float).save(), 32 | ], 33 | ).save() 34 | 35 | # define the ['hto'].obs schema 36 | obs_schema_hto = ln.Schema( 37 | name="mudata_papalexi21_subset_hto_obs_schema", 38 | features=[ 39 | ln.Feature(name="nCount_HTO", dtype=float).save(), 40 | ln.Feature(name="nFeature_HTO", dtype=int).save(), 41 | ln.Feature(name="technique", dtype=bt.ExperimentalFactor).save(), 42 | ], 43 | ).save() 44 | 45 | # define ['rna'].var schema 46 | var_schema_rna = ln.Schema( 47 | name="mudata_papalexi21_subset_rna_var_schema", 48 | itype=bt.Gene.symbol, 49 | dtype=float, 50 | ).save() 51 | 52 | # define composite schema 53 | mudata_schema = ln.Schema( 54 | name="mudata_papalexi21_subset_mudata_schema", 55 | otype="MuData", 56 | slots={ 57 | "obs": obs_schema, 58 | "rna:obs": obs_schema_rna, 59 | "hto:obs": obs_schema_hto, 60 | "rna:var": var_schema_rna, 61 | "uns:study_metadata": study_metadata_schema, 62 | }, 63 | ).save() 64 | 65 | # curate a MuData 66 | mdata = ln.examples.datasets.mudata_papalexi21_subset(with_uns=True) 67 | bt.settings.organism = "human" # set the organism to map gene symbols 68 | curator = ln.curators.MuDataCurator(mdata, mudata_schema) 69 | artifact = curator.save_artifact(key="examples/mudata_papalexi21_subset.h5mu") 70 | assert artifact.schema == mudata_schema 71 | -------------------------------------------------------------------------------- /lamindb/examples/croissant/__init__.py: -------------------------------------------------------------------------------- 1 | """Examples for MLCommons Croissant files, which are used to store metadata about datasets. 2 | 3 | .. autofunction:: mini_immuno 4 | 5 | """ 6 | 7 | import json 8 | from pathlib import Path 9 | 10 | 11 | def mini_immuno( 12 | n_files: int = 1, filepath_prefix: str = "", strip_version: bool = False 13 | ) -> list[Path]: 14 | """Return paths to the mini immuno dataset and its metadata as a Croissant file. 15 | 16 | Args: 17 | n_files: Number of files inside the croissant file. 18 | filepath_prefix: Move the dataset and references to it in a specific directory. 19 | 20 | Example 21 | 22 | :: 23 | 24 | croissant_path, dataset1_path = ln.examples.croissant.mini_immuno() 25 | croissant_path, dataset1_path, dataset2_path = ln.examples.croissant.mini_immuno(n_files=2) 26 | """ 27 | from ..datasets import file_mini_csv 28 | from ..datasets.mini_immuno import get_dataset1 29 | 30 | adata = get_dataset1(otype="AnnData") 31 | if filepath_prefix: 32 | dataset1_path = Path(filepath_prefix) / "mini_immuno.anndata.zarr" 33 | else: 34 | dataset1_path = Path("mini_immuno.anndata.zarr") 35 | adata.write_zarr(dataset1_path) 36 | orig_croissant_path = ( 37 | Path(__file__).parent / "mini_immuno.anndata.zarr_metadata.json" 38 | ) 39 | with open(orig_croissant_path, encoding="utf-8") as f: 40 | data = json.load(f) 41 | if filepath_prefix: 42 | assert data["distribution"][0]["@id"] == "mini_immuno.anndata.zarr" # noqa: S101 43 | data["distribution"][0]["@id"] = str(Path(filepath_prefix) / dataset1_path.name) 44 | if strip_version: 45 | data.pop("version", None) 46 | if n_files == 2: 47 | file_mini_csv() 48 | if filepath_prefix: 49 | dataset2_path = Path(filepath_prefix) / "mini.csv" 50 | else: 51 | dataset2_path = Path("mini.csv") 52 | data["distribution"].append( 53 | { 54 | "@type": "sc:FileObject", 55 | "@id": dataset2_path.as_posix(), 56 | "name": "mini.csv", 57 | "encodingFormat": "text/csv", 58 | } 59 | ) 60 | croissant_path = Path("mini_immuno.anndata.zarr_metadata.json") 61 | with open(croissant_path, "w", encoding="utf-8") as f: 62 | json.dump(data, f, indent=2) 63 | 64 | result: list[Path] = [croissant_path, dataset1_path] 65 | if n_files == 1: 66 | return result 67 | result.append(dataset2_path) 68 | 69 | return result 70 | -------------------------------------------------------------------------------- /tests/core/test_label_mutations.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import lamindb as ln 4 | import pytest 5 | 6 | 7 | @pytest.mark.skipif( 8 | os.getenv("LAMINDB_TEST_DB_VENDOR") == "sqlite", reason="Postgres-only" 9 | ) 10 | @pytest.mark.parametrize( 11 | "model_class,model_name", 12 | [ 13 | (ln.Record, "Record"), 14 | (ln.ULabel, "ULabel"), 15 | ], 16 | ids=["Record", "ULabel"], 17 | ) 18 | def test_rename_and_reparent_recordtype(model_class, model_name): 19 | """Test renaming a record type and changing its parent.""" 20 | 21 | # Test simple rename first 22 | experiment = model_class(name="Experiment", is_type=True).save() 23 | feature = ln.Feature(name="experiment", dtype=experiment).save() 24 | experiment.name = "ExperimentRenamed" 25 | experiment.save() 26 | feature.refresh_from_db() 27 | assert feature.dtype == f"cat[{model_name}[ExperimentRenamed]]" 28 | 29 | # Now add a parent (move from root to under a parent) 30 | parent_type = model_class(name="ParentType", is_type=True).save() 31 | experiment.type = parent_type 32 | experiment.save() 33 | feature.refresh_from_db() 34 | assert feature.dtype == f"cat[{model_name}[ParentType[ExperimentRenamed]]]" 35 | 36 | # Change to a different parent 37 | other_parent = model_class(name="OtherParent", is_type=True).save() 38 | experiment.type = other_parent 39 | experiment.save() 40 | feature.refresh_from_db() 41 | assert feature.dtype == f"cat[{model_name}[OtherParent[ExperimentRenamed]]]" 42 | 43 | # Create a record under the previous parent that has the same name with a feature 44 | experiment2 = model_class( 45 | name="ExperimentRenamed", is_type=True, type=parent_type 46 | ).save() 47 | feature2 = ln.Feature(name="experiment2", dtype=experiment2).save() 48 | assert feature2.dtype == f"cat[{model_name}[ParentType[ExperimentRenamed]]]" 49 | 50 | # Test rename the new record type 51 | experiment2.name = "Experiment" 52 | experiment2.save() 53 | feature2.refresh_from_db() 54 | assert feature2.dtype == f"cat[{model_name}[ParentType[Experiment]]]" 55 | # this did not mutate the other feature that has the same name 56 | assert feature.dtype == f"cat[{model_name}[OtherParent[ExperimentRenamed]]]" 57 | 58 | # Remove parent (move back to root) 59 | experiment.type = None 60 | experiment.save() 61 | feature.refresh_from_db() 62 | assert feature.dtype == f"cat[{model_name}[ExperimentRenamed]]" 63 | 64 | experiment.delete(permanent=True) 65 | feature.delete(permanent=True) 66 | experiment2.delete(permanent=True) 67 | feature2.delete(permanent=True) 68 | parent_type.delete(permanent=True) 69 | other_parent.delete(permanent=True) 70 | -------------------------------------------------------------------------------- /lamindb/migrations/0070_lamindbv1_migrate_data.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-01-05 11:58 2 | 3 | from pathlib import Path 4 | 5 | import lamindb_setup as ln_setup 6 | import psycopg2 7 | from django.db import migrations 8 | 9 | 10 | def get_artifact_path_psycopg2(artifact_id): 11 | """Get artifact path using psycopg2.""" 12 | query = """ 13 | SELECT 14 | s.root || '/.lamindb/' || a.uid || a.suffix AS full_path 15 | FROM 16 | lamindb_artifact a 17 | JOIN lamindb_storage s ON a.storage_id = s.id 18 | WHERE 19 | a.id = %s 20 | """ 21 | 22 | with psycopg2.connect(ln_setup.settings.instance.db) as conn: 23 | with conn.cursor() as cur: 24 | cur.execute(query, (artifact_id,)) 25 | return cur.fetchone()[0] 26 | 27 | 28 | def transfer_source_code(apps, schema_editor): 29 | from lamindb._finish import notebook_to_script 30 | 31 | Transform = apps.get_model("lamindb", "Transform") 32 | transforms = Transform.objects.filter( 33 | _source_code_artifact__isnull=False, 34 | ).select_related("_source_code_artifact") 35 | 36 | for transform in transforms: 37 | print(f"migrating source code of transform {transform}") 38 | artifact = transform._source_code_artifact 39 | print("artifact", artifact.uid) 40 | 41 | path_str = get_artifact_path_psycopg2(artifact.id) 42 | print(ln_setup.settings.storage.root_as_str) 43 | print(path_str) 44 | if path_str.startswith(ln_setup.settings.storage.root_as_str): 45 | path = ( 46 | ln_setup.settings.storage.root 47 | / f".lamindb/{artifact.uid}{artifact.suffix}" 48 | ) 49 | else: 50 | path = ln_setup.core.upath.UPath(path_str) 51 | if path.exists(): 52 | if path_str.startswith("s3://"): 53 | local_path = Path(f"temp{path.suffix}") 54 | path.download_to(local_path) 55 | else: 56 | local_path = path 57 | 58 | if artifact.suffix == ".ipynb": 59 | transform.source_code = notebook_to_script(transform, local_path) 60 | else: 61 | transform.source_code = local_path.read_text() 62 | transform.hash = artifact.hash 63 | path.unlink() 64 | else: 65 | print(f"path did not exist: {path_str}") 66 | transform._source_code_artifact = None 67 | transform.save() 68 | artifact.delete() 69 | 70 | 71 | class Migration(migrations.Migration): 72 | dependencies = [ 73 | ("lamindb", "0069_squashed"), 74 | ] 75 | 76 | operations = [ 77 | migrations.RunPython(transfer_source_code), 78 | ] 79 | -------------------------------------------------------------------------------- /lamindb/migrations/0090_runproject_project_runs.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-03-05 10:20 2 | 3 | import django.db.models.deletion 4 | import django.db.models.functions.datetime 5 | from django.db import migrations, models 6 | 7 | import lamindb.base.fields 8 | import lamindb.base.users 9 | import lamindb.models.sqlrecord 10 | 11 | 12 | class Migration(migrations.Migration): 13 | dependencies = [ 14 | ("lamindb", "0089_subsequent_runs"), 15 | ] 16 | 17 | operations = [ 18 | migrations.CreateModel( 19 | name="RunProject", 20 | fields=[ 21 | ("id", models.BigAutoField(primary_key=True, serialize=False)), 22 | ( 23 | "created_at", 24 | lamindb.base.fields.DateTimeField( 25 | blank=True, 26 | db_default=django.db.models.functions.datetime.Now(), 27 | db_index=True, 28 | editable=False, 29 | ), 30 | ), 31 | ( 32 | "created_by", 33 | lamindb.base.fields.ForeignKey( 34 | blank=True, 35 | default=lamindb.base.users.current_user_id, 36 | editable=False, 37 | on_delete=django.db.models.deletion.PROTECT, 38 | related_name="+", 39 | to="lamindb.user", 40 | ), 41 | ), 42 | ( 43 | "project", 44 | lamindb.base.fields.ForeignKey( 45 | blank=True, 46 | on_delete=django.db.models.deletion.PROTECT, 47 | related_name="links_run", 48 | to="lamindb.project", 49 | ), 50 | ), 51 | ( 52 | "run", 53 | lamindb.base.fields.ForeignKey( 54 | blank=True, 55 | on_delete=django.db.models.deletion.CASCADE, 56 | related_name="links_project", 57 | to="lamindb.run", 58 | ), 59 | ), 60 | ], 61 | options={ 62 | "unique_together": {("run", "project")}, 63 | }, 64 | bases=(models.Model, lamindb.models.sqlrecord.IsLink), 65 | ), 66 | migrations.AddField( 67 | model_name="project", 68 | name="runs", 69 | field=models.ManyToManyField( 70 | related_name="projects", through="lamindb.RunProject", to="lamindb.run" 71 | ), 72 | ), 73 | ] 74 | -------------------------------------------------------------------------------- /lamindb/base/uids.py: -------------------------------------------------------------------------------- 1 | """Universal IDs. 2 | 3 | Base generators 4 | =============== 5 | 6 | .. autofunction:: base26 7 | .. autofunction:: base62 8 | .. autofunction:: base64 9 | 10 | UID generators 11 | ================ 12 | 13 | .. autofunction:: base62_8 14 | .. autofunction:: base62_12 15 | .. autofunction:: base62_16 16 | .. autofunction:: base62_20 17 | 18 | Collision probabilities 19 | ======================= 20 | 21 | 8 base62 characters (`62**8=2e+14`): 22 | 23 | ======= =========== 24 | n p_collision 25 | ======= =========== 26 | 100k 2e-05 27 | 1M 2e-03 28 | ======= =========== 29 | 30 | 12 base62 characters (`62**12=3e+21`): 31 | 32 | ======= =========== 33 | n p_collision 34 | ======= =========== 35 | 100M 2e-06 36 | 1B 2e-04 37 | ======= =========== 38 | 39 | 16 base62 characters (`62**16=5e+28`): 40 | 41 | ======= =========== 42 | n p_collision 43 | ======= =========== 44 | 1e12 7e-05 45 | 1e13 7e-03 46 | ======= =========== 47 | 48 | 20 base62 characters (`62**20=7e+35`) roughly matches UUID (`2**122=5e+36`): 49 | 50 | ======= =========== 51 | n p_collision 52 | ======= =========== 53 | 1e16 7e-05 54 | 1e17 7e-03 55 | ======= =========== 56 | 57 | See `source `__. 58 | 59 | """ 60 | 61 | import secrets 62 | import string 63 | 64 | 65 | def base64(n_char: int) -> str: 66 | """Random Base64 string.""" 67 | alphabet = string.digits + string.ascii_letters.swapcase() + "_" + "-" 68 | uid = "".join(secrets.choice(alphabet) for i in range(n_char)) 69 | return uid 70 | 71 | 72 | def base62(n_char: int) -> str: 73 | """Random Base62 string.""" 74 | alphabet = string.digits + string.ascii_letters.swapcase() 75 | uid = "".join(secrets.choice(alphabet) for i in range(n_char)) 76 | return uid 77 | 78 | 79 | def base26(n_char: int): 80 | """ASCII lowercase.""" 81 | alphabet = string.ascii_lowercase 82 | uid = "".join(secrets.choice(alphabet) for i in range(n_char)) 83 | return uid 84 | 85 | 86 | def base62_4() -> str: 87 | return base62(4) 88 | 89 | 90 | def base62_8() -> str: 91 | """Random Base62 string of length 8.""" 92 | return base62(8) 93 | 94 | 95 | def base62_12() -> str: 96 | """Random Base62 string of length 12.""" 97 | return base62(12) 98 | 99 | 100 | def base62_16() -> str: 101 | """Random Base62 string of length 16.""" 102 | return base62(16) 103 | 104 | 105 | def base62_20() -> str: 106 | """Random Base62 string of length 20.""" 107 | return base62(20) 108 | 109 | 110 | def base62_24() -> str: 111 | """Random Base62 string of length 24.""" 112 | return base62(24) 113 | -------------------------------------------------------------------------------- /lamindb/examples/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | """Example datasets. 2 | 3 | The mini immuno dataset 4 | ----------------------- 5 | 6 | .. autosummary:: 7 | :toctree: . 8 | 9 | mini_immuno 10 | 11 | Small in-memory datasets 12 | ------------------------ 13 | 14 | .. autofunction:: anndata_with_obs 15 | 16 | Files 17 | ----- 18 | 19 | .. autofunction:: file_fcs 20 | .. autofunction:: file_fcs_alpert19 21 | .. autofunction:: file_tsv_rnaseq_nfcore_salmon_merged_gene_counts 22 | .. autofunction:: file_jpg_paradisi05 23 | .. autofunction:: file_tiff_suo22 24 | .. autofunction:: file_fastq 25 | .. autofunction:: file_bam 26 | .. autofunction:: file_mini_csv 27 | 28 | Directories 29 | ----------- 30 | 31 | .. autofunction:: dir_scrnaseq_cellranger 32 | .. autofunction:: dir_iris_images 33 | 34 | Dictionary, Dataframe, AnnData, MuData, SpatialData 35 | ---------------------------------------------------- 36 | 37 | .. autofunction:: dict_cellxgene_uns 38 | .. autofunction:: df_iris 39 | .. autofunction:: df_iris_in_meter 40 | .. autofunction:: df_iris_in_meter_study1 41 | .. autofunction:: df_iris_in_meter_study2 42 | .. autofunction:: anndata_mouse_sc_lymph_node 43 | .. autofunction:: anndata_human_immune_cells 44 | .. autofunction:: anndata_pbmc68k_reduced 45 | .. autofunction:: anndata_file_pbmc68k_test 46 | .. autofunction:: anndata_pbmc3k_processed 47 | .. autofunction:: anndata_with_obs 48 | .. autofunction:: anndata_suo22_Visium10X 49 | .. autofunction:: mudata_papalexi21_subset 50 | .. autofunction:: schmidt22_crispra_gws_IFNG 51 | .. autofunction:: schmidt22_perturbseq 52 | .. autofunction:: spatialdata_blobs 53 | 54 | Other 55 | ----- 56 | 57 | .. autofunction:: fake_bio_notebook_titles 58 | """ 59 | 60 | from . import mini_immuno 61 | from ._core import ( 62 | anndata_file_pbmc68k_test, 63 | anndata_human_immune_cells, 64 | anndata_mouse_sc_lymph_node, 65 | anndata_pbmc3k_processed, 66 | anndata_pbmc68k_reduced, 67 | anndata_suo22_Visium10X, 68 | df_iris, 69 | df_iris_in_meter, 70 | df_iris_in_meter_study1, 71 | df_iris_in_meter_study2, 72 | dict_cellxgene_uns, 73 | dir_iris_images, 74 | dir_scrnaseq_cellranger, 75 | file_bam, 76 | file_fastq, 77 | file_fcs, 78 | file_fcs_alpert19, 79 | file_jpg_paradisi05, 80 | file_mini_csv, 81 | file_tiff_suo22, 82 | file_tsv_rnaseq_nfcore_salmon_merged_gene_counts, 83 | mudata_papalexi21_subset, 84 | schmidt22_crispra_gws_IFNG, 85 | schmidt22_perturbseq, 86 | spatialdata_blobs, 87 | ) 88 | from ._fake import fake_bio_notebook_titles 89 | from ._small import ( 90 | anndata_with_obs, 91 | small_dataset3_cellxgene, 92 | ) 93 | 94 | small_dataset1 = mini_immuno.get_dataset1 # backward compat 95 | small_dataset2 = mini_immuno.get_dataset2 # backward compat 96 | -------------------------------------------------------------------------------- /lamindb/migrations/0092_alter_artifactfeaturevalue_artifact_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-05-06 20:34 2 | 3 | import django.db.models.deletion 4 | from django.db import migrations 5 | 6 | import lamindb.base.fields 7 | 8 | 9 | class Migration(migrations.Migration): 10 | dependencies = [ 11 | ("lamindb", "0091_alter_featurevalue_options_alter_space_options_and_more"), 12 | ] 13 | 14 | operations = [ 15 | migrations.AlterField( 16 | model_name="artifactfeaturevalue", 17 | name="artifact", 18 | field=lamindb.base.fields.ForeignKey( 19 | blank=True, 20 | on_delete=django.db.models.deletion.CASCADE, 21 | related_name="links_featurevalue", 22 | to="lamindb.artifact", 23 | ), 24 | ), 25 | migrations.AlterField( 26 | model_name="artifactfeaturevalue", 27 | name="featurevalue", 28 | field=lamindb.base.fields.ForeignKey( 29 | blank=True, 30 | on_delete=django.db.models.deletion.PROTECT, 31 | related_name="links_artifact", 32 | to="lamindb.featurevalue", 33 | ), 34 | ), 35 | migrations.AlterField( 36 | model_name="artifactparamvalue", 37 | name="artifact", 38 | field=lamindb.base.fields.ForeignKey( 39 | blank=True, 40 | on_delete=django.db.models.deletion.CASCADE, 41 | related_name="links_paramvalue", 42 | to="lamindb.artifact", 43 | ), 44 | ), 45 | migrations.AlterField( 46 | model_name="artifactparamvalue", 47 | name="paramvalue", 48 | field=lamindb.base.fields.ForeignKey( 49 | blank=True, 50 | on_delete=django.db.models.deletion.PROTECT, 51 | related_name="links_artifact", 52 | to="lamindb.paramvalue", 53 | ), 54 | ), 55 | migrations.AlterField( 56 | model_name="runparamvalue", 57 | name="paramvalue", 58 | field=lamindb.base.fields.ForeignKey( 59 | blank=True, 60 | on_delete=django.db.models.deletion.PROTECT, 61 | related_name="links_run", 62 | to="lamindb.paramvalue", 63 | ), 64 | ), 65 | migrations.AlterField( 66 | model_name="runparamvalue", 67 | name="run", 68 | field=lamindb.base.fields.ForeignKey( 69 | blank=True, 70 | on_delete=django.db.models.deletion.CASCADE, 71 | related_name="links_paramvalue", 72 | to="lamindb.run", 73 | ), 74 | ), 75 | ] 76 | -------------------------------------------------------------------------------- /tests/storage/test_artifact_zarr.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | from pathlib import Path 3 | 4 | import anndata as ad 5 | import lamindb as ln 6 | import numpy as np 7 | import pandas as pd 8 | import pytest 9 | from lamindb.core.storage._zarr import identify_zarr_type 10 | from lamindb_setup.core.upath import ( 11 | CloudPath, 12 | ) 13 | 14 | 15 | @pytest.fixture(scope="session") 16 | def get_small_adata(): 17 | return ad.AnnData( 18 | X=np.array([[1, 2, 3], [4, 5, 6]]), 19 | obs={"feat1": ["A", "B"]}, 20 | var=pd.DataFrame(index=["MYC", "TCF7", "GATA1"]), 21 | obsm={"X_pca": np.array([[1, 2], [3, 4]])}, 22 | ) 23 | 24 | 25 | def test_zarr_upload_cache(get_small_adata): 26 | previous_storage = ln.setup.settings.storage.root_as_str 27 | ln.settings.storage = "s3://lamindb-test/core" 28 | 29 | zarr_path = Path("./test_adata.zarr") 30 | get_small_adata.write_zarr(zarr_path) 31 | 32 | artifact = ln.Artifact(zarr_path, key="test_adata.zarr") 33 | assert artifact._is_saved_to_storage_location is None 34 | assert artifact.otype == "AnnData" 35 | assert artifact.n_files >= 1 36 | artifact.save() 37 | 38 | assert ln.Artifact.get(path=artifact.path) == artifact 39 | 40 | assert artifact._is_saved_to_storage_location 41 | 42 | assert isinstance(artifact.path, CloudPath) 43 | assert artifact.path.exists() 44 | assert identify_zarr_type(artifact.path) == "anndata" 45 | 46 | shutil.rmtree(artifact.cache()) 47 | 48 | cache_path = artifact._cache_path 49 | assert isinstance(artifact.load(), ad.AnnData) 50 | assert cache_path.is_dir() 51 | 52 | shutil.rmtree(cache_path) 53 | assert not cache_path.exists() 54 | artifact.cache() 55 | assert cache_path.is_dir() 56 | 57 | artifact.delete(permanent=True, storage=True) 58 | shutil.rmtree(zarr_path) 59 | 60 | # test zarr from memory 61 | artifact = ln.Artifact(get_small_adata, key="test_adata.anndata.zarr") 62 | assert artifact._is_saved_to_storage_location is None 63 | assert artifact._local_filepath.is_dir() 64 | assert artifact.otype == "AnnData" 65 | assert artifact.suffix == ".anndata.zarr" 66 | assert artifact.n_files >= 1 67 | 68 | ln.save([artifact]) # use bulk save here for testing 69 | assert artifact._is_saved_to_storage_location 70 | assert isinstance(artifact.path, CloudPath) 71 | assert artifact.path.exists() 72 | cache_path = artifact._cache_path 73 | assert cache_path.is_dir() 74 | 75 | shutil.rmtree(cache_path) 76 | assert not cache_path.exists() 77 | 78 | artifact._memory_rep = None 79 | 80 | assert isinstance(artifact.load(), ad.AnnData) 81 | assert cache_path.is_dir() 82 | 83 | artifact.delete(permanent=True, storage=True) 84 | 85 | ln.settings.storage = previous_storage 86 | -------------------------------------------------------------------------------- /lamindb/examples/cellxgene/cellxgene_schema_versions.csv: -------------------------------------------------------------------------------- 1 | schema_version,entity,organism,source,version 2 | 4.0.0,CellType,all,cl,2023-08-24 3 | 4.0.0,ExperimentalFactor,all,efo,3.57.0 4 | 4.0.0,Ethnicity,human,hancestro,3.0 5 | 4.0.0,DevelopmentalStage,human,hsapdv,2020-03-10 6 | 4.0.0,DevelopmentalStage,mouse,mmusdv,2020-03-10 7 | 4.0.0,Disease,all,mondo,2023-08-02 8 | 4.0.0,Organism,all,ncbitaxon,2023-06-20 9 | 4.0.0,Phenotype,all,pato,2023-05-18 10 | 4.0.0,Tissue,all,uberon,2023-09-05 11 | 5.0.0,CellType,all,cl,2024-01-04 12 | 5.0.0,ExperimentalFactor,all,efo,3.62.0 13 | 5.0.0,Ethnicity,human,hancestro,3.0 14 | 5.0.0,DevelopmentalStage,human,hsapdv,2020-03-10 15 | 5.0.0,DevelopmentalStage,mouse,mmusdv,2020-03-10 16 | 5.0.0,Disease,all,mondo,2024-01-03 17 | 5.0.0,Organism,all,ncbitaxon,2023-06-20 18 | 5.0.0,Phenotype,all,pato,2023-05-18 19 | 5.0.0,Tissue,all,uberon,2024-01-18 20 | 5.0.0,Gene,human,ensembl,release-110 21 | 5.0.0,Gene,mouse,ensembl,release-110 22 | 5.1.0,CellType,all,cl,2024-04-05 23 | 5.1.0,ExperimentalFactor,all,efo,3.65.0 24 | 5.1.0,Ethnicity,human,hancestro,3.0 25 | 5.1.0,DevelopmentalStage,human,hsapdv,2020-03-10 26 | 5.1.0,DevelopmentalStage,mouse,mmusdv,2020-03-10 27 | 5.1.0,Disease,all,mondo,2024-05-08 28 | 5.1.0,Organism,all,ncbitaxon,2023-06-20 29 | 5.1.0,Phenotype,all,pato,2023-05-18 30 | 5.1.0,Tissue,all,uberon,2024-03-22 31 | 5.1.0,Gene,human,ensembl,release-110 32 | 5.1.0,Gene,mouse,ensembl,release-110 33 | 5.2.0,CellType,all,cl,2024-08-16 34 | 5.2.0,ExperimentalFactor,all,efo,3.69.0 35 | 5.2.0,Ethnicity,human,hancestro,3.0 36 | 5.2.0,DevelopmentalStage,human,hsapdv,2024-05-28 37 | 5.2.0,DevelopmentalStage,mouse,mmusdv,2024-05-28 38 | 5.2.0,Disease,all,mondo,2024-08-06 39 | 5.2.0,Organism,all,ncbitaxon,2023-06-20 40 | 5.2.0,Phenotype,all,pato,2023-05-18 41 | 5.2.0,Tissue,all,uberon,2024-08-07 42 | 5.2.0,Gene,human,ensembl,release-110 43 | 5.2.0,Gene,mouse,ensembl,release-110 44 | 5.3.0,CellType,all,cl,2025-02-13 45 | 5.3.0,ExperimentalFactor,all,efo,3.75.0 46 | 5.3.0,Ethnicity,human,hancestro,3.0 47 | 5.3.0,DevelopmentalStage,human,hsapdv,2025-01-23 48 | 5.3.0,DevelopmentalStage,mouse,mmusdv,2025-01-23 49 | 5.3.0,Disease,all,mondo,2025-02-04 50 | 5.3.0,Organism,all,ncbitaxon,2024-11-25 51 | 5.3.0,Phenotype,all,pato,2025-02-01 52 | 5.3.0,Tissue,all,uberon,2025-01-15 53 | 5.3.0,Gene,human,ensembl,release-110 54 | 5.3.0,Gene,mouse,ensembl,release-110 55 | 6.0.0,CellType,all,cl,2025-04-10 56 | 6.0.0,ExperimentalFactor,all,efo,3.78.0 57 | 6.0.0,Ethnicity,human,hancestro,3.0 58 | 6.0.0,DevelopmentalStage,human,hsapdv,2025-01-23 59 | 6.0.0,DevelopmentalStage,mouse,mmusdv,2025-01-23 60 | 6.0.0,Disease,all,mondo,2025-05-06 61 | 6.0.0,Organism,all,ncbitaxon,2025-03-13 62 | 6.0.0,Phenotype,all,pato,2025-05-14 63 | 6.0.0,Tissue,all,uberon,2025-05-28 64 | 6.0.0,Gene,human,ensembl,release-110 65 | 6.0.0,Gene,mouse,ensembl,release-110 66 | -------------------------------------------------------------------------------- /lamindb/migrations/0139_alter_reference_text.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2.7 on 2025-10-29 15:16 2 | 3 | from django.db import migrations, models 4 | 5 | import lamindb.base.fields 6 | 7 | 8 | class Migration(migrations.Migration): 9 | dependencies = [ 10 | ("lamindb", "0138_remove_record_linked_users_user_linked_in_records"), 11 | ] 12 | 13 | operations = [ 14 | migrations.AlterField( 15 | model_name="reference", 16 | name="text", 17 | field=lamindb.base.fields.TextField(blank=True, default=None, null=True), 18 | ), 19 | migrations.RemoveField( 20 | model_name="artifact", 21 | name="ulabels", 22 | ), 23 | migrations.RemoveField( 24 | model_name="artifact", 25 | name="users", 26 | ), 27 | migrations.AddField( 28 | model_name="ulabel", 29 | name="artifacts", 30 | field=models.ManyToManyField( 31 | related_name="ulabels", 32 | through="lamindb.ArtifactULabel", 33 | to="lamindb.artifact", 34 | ), 35 | ), 36 | migrations.AddField( 37 | model_name="user", 38 | name="artifacts", 39 | field=models.ManyToManyField( 40 | related_name="users", 41 | through="lamindb.ArtifactUser", 42 | through_fields=("user", "artifact"), 43 | to="lamindb.artifact", 44 | ), 45 | ), 46 | migrations.RemoveField( 47 | model_name="record", 48 | name="linked_artifacts", 49 | ), 50 | migrations.RemoveField( 51 | model_name="record", 52 | name="linked_runs", 53 | ), 54 | migrations.RemoveField( 55 | model_name="record", 56 | name="linked_ulabels", 57 | ), 58 | migrations.AddField( 59 | model_name="artifact", 60 | name="linked_in_records", 61 | field=models.ManyToManyField( 62 | related_name="linked_artifacts", 63 | through="lamindb.RecordArtifact", 64 | to="lamindb.record", 65 | ), 66 | ), 67 | migrations.AddField( 68 | model_name="run", 69 | name="linked_in_records", 70 | field=models.ManyToManyField( 71 | related_name="linked_runs", 72 | through="lamindb.RecordRun", 73 | to="lamindb.record", 74 | ), 75 | ), 76 | migrations.AddField( 77 | model_name="ulabel", 78 | name="linked_in_records", 79 | field=models.ManyToManyField( 80 | related_name="linked_ulabels", 81 | through="lamindb.RecordULabel", 82 | to="lamindb.record", 83 | ), 84 | ), 85 | ] 86 | -------------------------------------------------------------------------------- /tests/core/test_querydb.py: -------------------------------------------------------------------------------- 1 | import lamindb as ln 2 | import pytest 3 | 4 | 5 | def test_DB_multiple_instances(): 6 | """Accessing multiple instances simultaneously must work.""" 7 | cxg_db = ln.DB("laminlabs/cellxgene") 8 | lamindata_db = ln.DB("laminlabs/lamindata") 9 | qs1 = cxg_db.Artifact.filter(suffix=".h5ad") 10 | qs2 = lamindata_db.Artifact.filter(suffix=".zarr") 11 | assert qs1._db != qs2._db 12 | 13 | 14 | def test_DB_bionty(): 15 | """Querying a record from bionty must work.""" 16 | cxg_db = ln.DB("laminlabs/cellxgene") 17 | assert len(cxg_db.bionty.Gene.filter(symbol__startswith="TP53")) > 0 18 | 19 | 20 | def test_DB_missing_module(): 21 | """Attempting to access an attribute that comes from a missing module must error.""" 22 | site_assets_db = ln.DB("laminlabs/lamin-site-assets") # instance without bionty 23 | 24 | with pytest.raises(AttributeError) as e: 25 | site_assets_db.bionty.Gene.first() 26 | 27 | assert ( 28 | "Schema 'bionty' not available in instance 'laminlabs/lamin-site-assets'." 29 | in str(e.value) 30 | ) 31 | 32 | 33 | def test_DB_instantiate_class(): 34 | """Attempting to instantiate a class must error.""" 35 | cxg_db = ln.DB("laminlabs/cellxgene") 36 | with pytest.raises(TypeError) as e: 37 | cxg_db.Artifact() 38 | assert ( 39 | "Cannot instantiate Artifact from DB. Use Artifact.filter(), Artifact.get(), etc. to query records." 40 | in str(e.value) 41 | ) 42 | 43 | 44 | @pytest.mark.parametrize( 45 | "attr,expected_msg", 46 | [ 47 | ("artifacts", "Registry 'artifacts' not found"), 48 | ("foo", "Registry 'foo' not found"), 49 | ("celltype", "Registry 'celltype' not found"), 50 | ], 51 | ) 52 | def test_DB_rejects_invalid_attributes(attr, expected_msg): 53 | """Accessing invalid attributes must fail.""" 54 | cxg_db = ln.DB("laminlabs/cellxgene") 55 | with pytest.raises(AttributeError) as e: 56 | getattr(cxg_db, attr) 57 | assert expected_msg in str(e.value) 58 | 59 | 60 | def test_DB_cache(): 61 | """Subsequent accesses must return cached wrapper.""" 62 | cxg_db = ln.DB("laminlabs/cellxgene") 63 | artifact1 = cxg_db.Artifact 64 | artifact2 = cxg_db.Artifact 65 | assert artifact1 is artifact2 66 | 67 | 68 | def test_queryset_caching(): 69 | """Calling `.filter()` multiple times should return different results.""" 70 | cxg_db = ln.DB("laminlabs/cellxgene") 71 | res_1 = cxg_db.Artifact.filter().first() 72 | res_2 = cxg_db.Artifact.filter().last() 73 | 74 | assert res_1 != res_2 75 | 76 | 77 | def test_DB_dir(): 78 | """__dir__ must return discovered registries.""" 79 | cxg = ln.DB("laminlabs/cellxgene") 80 | dir_result = dir(cxg) 81 | assert "Artifact" in dir_result 82 | assert "Collection" in dir_result 83 | assert "Gene" not in dir_result 84 | assert "bionty" in dir_result 85 | -------------------------------------------------------------------------------- /tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 |
library(laminr)
13 | 
14 | db <- connect()
15 | 16 | 17 | 18 |
→ connected lamindb: laminlabs/lamindata
19 | 20 | 21 | 22 |
db$track("lOScuxDTDE0q0000")
23 | 24 | 25 | 26 |
→ loaded Transform('lOScuxDT'), started Run('GWpaTtUg') at 2024-12-01 17:49:18 UTC
27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 |
db$finish()
35 | 36 | 37 | 38 |
MoreOUTPUT 
39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /lamindb/migrations/0125_artifact_is_locked_collection_is_locked_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.1.12 on 2025-09-29 00:46 2 | 3 | from django.db import migrations 4 | 5 | import lamindb.base.fields 6 | 7 | 8 | class Migration(migrations.Migration): 9 | dependencies = [ 10 | ("lamindb", "0124_page_artifact_page_collection_page_feature_page_and_more"), 11 | ] 12 | 13 | operations = [ 14 | migrations.AddField( 15 | model_name="artifact", 16 | name="is_locked", 17 | field=lamindb.base.fields.BooleanField(blank=True, default=False), 18 | ), 19 | migrations.AddField( 20 | model_name="collection", 21 | name="is_locked", 22 | field=lamindb.base.fields.BooleanField(blank=True, default=False), 23 | ), 24 | migrations.AddField( 25 | model_name="feature", 26 | name="is_locked", 27 | field=lamindb.base.fields.BooleanField(blank=True, default=False), 28 | ), 29 | migrations.AddField( 30 | model_name="featurevalue", 31 | name="is_locked", 32 | field=lamindb.base.fields.BooleanField(blank=True, default=False), 33 | ), 34 | migrations.AddField( 35 | model_name="project", 36 | name="is_locked", 37 | field=lamindb.base.fields.BooleanField(blank=True, default=False), 38 | ), 39 | migrations.AddField( 40 | model_name="record", 41 | name="is_locked", 42 | field=lamindb.base.fields.BooleanField(blank=True, default=False), 43 | ), 44 | migrations.AddField( 45 | model_name="recordrecord", 46 | name="is_locked", 47 | field=lamindb.base.fields.BooleanField(blank=True, default=False), 48 | ), 49 | migrations.AddField( 50 | model_name="reference", 51 | name="is_locked", 52 | field=lamindb.base.fields.BooleanField(blank=True, default=False), 53 | ), 54 | migrations.AddField( 55 | model_name="run", 56 | name="is_locked", 57 | field=lamindb.base.fields.BooleanField(blank=True, default=False), 58 | ), 59 | migrations.AddField( 60 | model_name="schema", 61 | name="is_locked", 62 | field=lamindb.base.fields.BooleanField(blank=True, default=False), 63 | ), 64 | migrations.AddField( 65 | model_name="storage", 66 | name="is_locked", 67 | field=lamindb.base.fields.BooleanField(blank=True, default=False), 68 | ), 69 | migrations.AddField( 70 | model_name="transform", 71 | name="is_locked", 72 | field=lamindb.base.fields.BooleanField(blank=True, default=False), 73 | ), 74 | migrations.AddField( 75 | model_name="ulabel", 76 | name="is_locked", 77 | field=lamindb.base.fields.BooleanField(blank=True, default=False), 78 | ), 79 | ] 80 | -------------------------------------------------------------------------------- /tests/core/notebooks/basic-r-notebook.Rmd.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | My exemplary R analysis 5 |

My exemplary R analysis

6 | 7 | 8 | 9 | 10 | 11 | 12 |
library(laminr)
13 | 
14 | db <- connect()
15 | 16 | 17 | 18 |
→ connected lamindb: laminlabs/lamindata
19 | 20 | 21 | 22 |
db$track("lOScuxDTDE0q0000")
23 | 24 | 25 | 26 |
→ loaded Transform('lOScuxDT'), started Run('GWpaTtUg') at 2024-12-01 17:49:18 UTC
27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 |
db$finish()
35 | 36 | 37 | 38 |
MoreOUTPUT ! please hit SHORTCUT to save the notebook in your editor and re-run finish()
39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /tests/storage/conftest.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | from pathlib import Path 3 | from subprocess import DEVNULL, run 4 | from time import perf_counter 5 | 6 | import lamindb as ln 7 | import lamindb_setup as ln_setup 8 | import pytest 9 | from lamin_utils import logger 10 | from laminci.db import setup_local_test_postgres 11 | 12 | 13 | def create_test_instance(pgurl: str): 14 | ln.setup.init( 15 | storage="./default_storage_unit_storage", 16 | modules="bionty", 17 | name="lamindb-unit-tests-storage", 18 | db=pgurl, 19 | ) 20 | ln.setup.register() # temporarily 21 | ln.settings.creation.artifact_silence_missing_run_warning = True 22 | ln.settings.track_run_inputs = False 23 | ln.Storage("s3://lamindb-ci/test-data").save() 24 | ln.Storage("s3://lamindb-test/core").save() 25 | ln.Storage("s3://lamindb-test/storage").save() 26 | 27 | 28 | def pytest_sessionstart(): 29 | t_execute_start = perf_counter() 30 | 31 | ln_setup._TESTING = True 32 | try: 33 | pgurl = setup_local_test_postgres() 34 | except RuntimeError: 35 | run("docker stop pgtest && docker rm pgtest", shell=True, stdout=DEVNULL) # noqa: S602 36 | pgurl = setup_local_test_postgres() 37 | try: 38 | create_test_instance(pgurl) 39 | except Exception as e: 40 | print("failed to create test instance:", e) 41 | print("deleting the instance") 42 | delete_test_instance() 43 | # below currently fails because cannot create two instances in the same session 44 | # create_test_instance(pgurl) 45 | print("now rerun") 46 | quit() 47 | total_time_elapsed = perf_counter() - t_execute_start 48 | print(f"time to setup the instance: {total_time_elapsed:.1f}s") 49 | assert ln.Storage.filter(root="s3://lamindb-ci/test-data").one_or_none() is not None 50 | 51 | 52 | def delete_test_instance(): 53 | logger.set_verbosity(1) 54 | if Path("./default_storage_unit_storage").exists(): 55 | shutil.rmtree("./default_storage_unit_storage") 56 | # handle below better in the future 57 | for path in ( 58 | "s3://lamindb-test/storage/.lamindb", 59 | "s3://lamindb-test/core/.lamindb", 60 | "s3://lamindb-ci/lamindb-unit-tests-cloud/.lamindb", 61 | "s3://lamindb-ci/test-settings-switch-storage/.lamindb", 62 | ): 63 | upath = ln.UPath(path) 64 | if upath.exists(): 65 | upath.rmdir() 66 | ln.setup.delete("lamindb-unit-tests-storage", force=True) 67 | 68 | 69 | def pytest_sessionfinish(session: pytest.Session): 70 | delete_test_instance() 71 | run("docker stop pgtest && docker rm pgtest", shell=True, stdout=DEVNULL) # noqa: S602 72 | 73 | 74 | @pytest.fixture 75 | def ccaplog(caplog): 76 | """Add caplog handler to our custom logger at session start.""" 77 | from lamin_utils._logger import logger 78 | 79 | # Add caplog's handler to our custom logger 80 | logger.addHandler(caplog.handler) 81 | 82 | yield caplog 83 | 84 | # Clean up at the end of the session 85 | logger.removeHandler(caplog.handler) 86 | -------------------------------------------------------------------------------- /lamindb/integrations/lightning.py: -------------------------------------------------------------------------------- 1 | """PyTorch Lightning. 2 | 3 | .. autoclass:: Callback 4 | """ 5 | 6 | from pathlib import Path 7 | from typing import Any 8 | 9 | import lightning as pl 10 | from lightning.pytorch import LightningModule, Trainer 11 | 12 | import lamindb as ln 13 | 14 | 15 | class Callback(pl.Callback): 16 | """Saves PyTorch Lightning model checkpoints to the LaminDB instance after each training epoch. 17 | 18 | Creates version families of artifacts for given `key` (relative file path). 19 | 20 | See also: :doc:`docs:mlflow` & :doc:`docs:wandb`. 21 | 22 | Args: 23 | path: A local path to the checkpoint. 24 | key: The `key` for the checkpoint artifact. 25 | features: Features to annotate the checkpoint. 26 | 27 | Examples: 28 | 29 | Create a callback that creates artifacts for checkpoints and annotates them by the MLflow run ID:: 30 | 31 | import lightning as pl 32 | from lamindb.integrations import lightning as ll 33 | 34 | lamindb_callback = ll.Callback( 35 | path=checkpoint_filename, key=artifact_key, features={"mlflow_run_id": mlflow_run.info.run_id} 36 | ) 37 | trainer = pl.Trainer(callbacks=[lamindb_callback]) 38 | """ 39 | 40 | def __init__( 41 | self, 42 | path: str | Path, 43 | key: str, 44 | features: dict[str, Any] | None = None, 45 | ): 46 | self.path = Path(path) 47 | self.key = key 48 | self.features = features or {} 49 | 50 | def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None: 51 | """Validates that features exist for all specified params.""" 52 | missing = [ 53 | feature 54 | for feature in self.features.keys() 55 | if ln.Feature.filter(name=feature).one_or_none() is None 56 | ] 57 | if missing: 58 | s = "s" if len(missing) > 1 else "" 59 | raise ValueError( 60 | f"Feature{s} {', '.join(missing)} missing. Create {'them' if len(missing) > 1 else 'it'} first." 61 | ) 62 | 63 | def on_train_epoch_end(self, trainer: Trainer, pl_module: LightningModule) -> None: 64 | """Saves model checkpoint artifacts at the end of each epoch and optionally annotates them.""" 65 | trainer.save_checkpoint(self.path) 66 | af = ln.Artifact(self.path, key=self.key, kind="model").save() 67 | 68 | feature_values = dict(self.features) 69 | 70 | for name in self.features.keys(): 71 | if hasattr(trainer, name): 72 | feature_values[name] = getattr(trainer, name) 73 | elif name in trainer.callback_metrics: 74 | metric_value = trainer.callback_metrics[name] 75 | feature_values[name] = ( 76 | metric_value.item() 77 | if hasattr(metric_value, "item") 78 | else float(metric_value) 79 | ) 80 | 81 | if feature_values: 82 | af.features.add_values(feature_values) 83 | 84 | af.save() 85 | 86 | 87 | __all__ = ["Callback"] 88 | -------------------------------------------------------------------------------- /lamindb/migrations/0083_alter_feature_is_type_alter_flextable_is_type_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-01-25 13:29 2 | 3 | from django.db import migrations 4 | 5 | import lamindb.base.fields 6 | 7 | 8 | class Migration(migrations.Migration): 9 | dependencies = [ 10 | ("lamindb", "0082_alter_feature_dtype"), 11 | ] 12 | 13 | operations = [ 14 | migrations.RunSQL( 15 | sql=""" 16 | UPDATE lamindb_feature 17 | SET is_type = FALSE 18 | WHERE is_type IS NULL; 19 | 20 | UPDATE lamindb_flextable 21 | SET is_type = FALSE 22 | WHERE is_type IS NULL; 23 | 24 | UPDATE lamindb_param 25 | SET is_type = FALSE 26 | WHERE is_type IS NULL; 27 | 28 | UPDATE lamindb_project 29 | SET is_type = FALSE 30 | WHERE is_type IS NULL; 31 | 32 | UPDATE lamindb_reference 33 | SET is_type = FALSE 34 | WHERE is_type IS NULL; 35 | 36 | UPDATE lamindb_schema 37 | SET is_type = FALSE 38 | WHERE is_type IS NULL; 39 | 40 | UPDATE lamindb_ulabel 41 | SET is_type = FALSE 42 | WHERE is_type IS NULL; 43 | """ 44 | ), 45 | migrations.AlterField( 46 | model_name="feature", 47 | name="is_type", 48 | field=lamindb.base.fields.BooleanField( 49 | blank=True, db_index=True, default=False 50 | ), 51 | ), 52 | migrations.AlterField( 53 | model_name="flextable", 54 | name="is_type", 55 | field=lamindb.base.fields.BooleanField( 56 | blank=True, db_index=True, default=False 57 | ), 58 | ), 59 | migrations.AlterField( 60 | model_name="param", 61 | name="is_type", 62 | field=lamindb.base.fields.BooleanField( 63 | blank=True, db_index=True, default=False 64 | ), 65 | ), 66 | migrations.AlterField( 67 | model_name="project", 68 | name="is_type", 69 | field=lamindb.base.fields.BooleanField( 70 | blank=True, db_index=True, default=False 71 | ), 72 | ), 73 | migrations.AlterField( 74 | model_name="reference", 75 | name="is_type", 76 | field=lamindb.base.fields.BooleanField( 77 | blank=True, db_index=True, default=False 78 | ), 79 | ), 80 | migrations.AlterField( 81 | model_name="schema", 82 | name="is_type", 83 | field=lamindb.base.fields.BooleanField( 84 | blank=True, db_index=True, default=False 85 | ), 86 | ), 87 | migrations.AlterField( 88 | model_name="ulabel", 89 | name="is_type", 90 | field=lamindb.base.fields.BooleanField( 91 | blank=True, db_index=True, default=False 92 | ), 93 | ), 94 | ] 95 | -------------------------------------------------------------------------------- /lamindb/migrations/0123_alter_artifact_description_alter_branch_description_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.1.12 on 2025-09-28 21:39 2 | 3 | from django.db import migrations 4 | 5 | import lamindb.base.fields 6 | 7 | 8 | class Migration(migrations.Migration): 9 | dependencies = [ 10 | ("lamindb", "0122_remove_personproject_person_and_more"), 11 | ] 12 | 13 | operations = [ 14 | migrations.AlterField( 15 | model_name="artifact", 16 | name="description", 17 | field=lamindb.base.fields.TextField( 18 | blank=True, db_index=True, default=None, null=True 19 | ), 20 | ), 21 | migrations.AlterField( 22 | model_name="branch", 23 | name="description", 24 | field=lamindb.base.fields.TextField(blank=True, default=None, null=True), 25 | ), 26 | migrations.AlterField( 27 | model_name="collection", 28 | name="description", 29 | field=lamindb.base.fields.TextField(blank=True, default=None, null=True), 30 | ), 31 | migrations.AlterField( 32 | model_name="feature", 33 | name="description", 34 | field=lamindb.base.fields.TextField(blank=True, default=None, null=True), 35 | ), 36 | migrations.AlterField( 37 | model_name="record", 38 | name="description", 39 | field=lamindb.base.fields.TextField(blank=True, default=None, null=True), 40 | ), 41 | migrations.AlterField( 42 | model_name="schema", 43 | name="description", 44 | field=lamindb.base.fields.TextField(blank=True, default=None, null=True), 45 | ), 46 | migrations.AlterField( 47 | model_name="space", 48 | name="description", 49 | field=lamindb.base.fields.TextField(blank=True, default=None, null=True), 50 | ), 51 | migrations.AlterField( 52 | model_name="storage", 53 | name="description", 54 | field=lamindb.base.fields.TextField(blank=True, default=None, null=True), 55 | ), 56 | migrations.AlterField( 57 | model_name="transform", 58 | name="description", 59 | field=lamindb.base.fields.TextField( 60 | blank=True, db_index=True, default=None, null=True 61 | ), 62 | ), 63 | migrations.AlterField( 64 | model_name="ulabel", 65 | name="description", 66 | field=lamindb.base.fields.TextField(blank=True, default=None, null=True), 67 | ), 68 | migrations.AlterField( 69 | model_name="artifact", 70 | name="key", 71 | field=lamindb.base.fields.CharField( 72 | blank=True, db_index=True, default=None, max_length=1024, null=True 73 | ), 74 | ), 75 | migrations.AlterField( 76 | model_name="transform", 77 | name="key", 78 | field=lamindb.base.fields.CharField( 79 | blank=True, db_index=True, default=None, max_length=1024, null=True 80 | ), 81 | ), 82 | ] 83 | -------------------------------------------------------------------------------- /tests/curators/test_curate_from_croissant.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | 3 | import lamindb as ln 4 | import pytest 5 | 6 | 7 | @pytest.mark.parametrize("filepath_prefix", [None, "test-curators-db/"]) 8 | def test_curate_artifact_from_croissant(filepath_prefix: str | None): 9 | croissant_path, dataset1_path = ln.examples.croissant.mini_immuno( 10 | n_files=1, filepath_prefix=filepath_prefix 11 | ) 12 | artifact1 = ln.integrations.curate_from_croissant(croissant_path) 13 | assert ( 14 | artifact1.description 15 | == "Mini immuno dataset - A few samples from the immunology dataset" 16 | ) 17 | assert artifact1.key == "mini_immuno.anndata.zarr" 18 | assert artifact1.version == "1.0" 19 | assert ( 20 | artifact1._key_is_virtual 21 | if filepath_prefix is None 22 | else not artifact1._key_is_virtual 23 | ) 24 | license_label = artifact1.ulabels.get( 25 | name="https://creativecommons.org/licenses/by/4.0/" 26 | ) 27 | project_label = artifact1.projects.get(name="Mini Immuno Project") 28 | 29 | # now mutate the dataset and create a new version 30 | croissant_path, dataset1_path = ln.examples.croissant.mini_immuno( 31 | n_files=1, filepath_prefix=filepath_prefix, strip_version=True 32 | ) 33 | dummy_file_path = dataset1_path / "dummy_file.txt" 34 | dummy_file_path.write_text("dummy file") 35 | 36 | artifact2 = ln.integrations.curate_from_croissant(croissant_path) 37 | assert artifact2.description == artifact1.description 38 | assert artifact2.key == artifact1.key 39 | assert artifact2.version is None 40 | assert artifact2.stem_uid == artifact1.stem_uid 41 | assert artifact2.uid != artifact1.uid 42 | assert ( 43 | artifact2._key_is_virtual 44 | if filepath_prefix is None 45 | else not artifact1._key_is_virtual 46 | ) 47 | license_label = artifact2.ulabels.get( 48 | name="https://creativecommons.org/licenses/by/4.0/" 49 | ) 50 | project_label = artifact2.projects.get(name="Mini Immuno Project") 51 | 52 | shutil.rmtree(dataset1_path) 53 | croissant_path.unlink() 54 | artifact1.delete(permanent=True, storage=True) # because of real storage key 55 | project_label.delete(permanent=True) 56 | license_label.delete(permanent=True) 57 | 58 | 59 | def test_curate_collection_from_croissant(): 60 | croissant_path, dataset1_path, dataset2_path = ln.examples.croissant.mini_immuno( 61 | n_files=2 62 | ) 63 | collection = ln.integrations.curate_from_croissant(croissant_path) 64 | croissant_path.unlink() 65 | shutil.rmtree(dataset1_path) 66 | dataset2_path.unlink() 67 | artifact1 = collection.artifacts.get(key="mini_immuno.anndata.zarr") 68 | artifact2 = collection.artifacts.get(key="mini.csv") 69 | license_label = collection.ulabels.get( 70 | name="https://creativecommons.org/licenses/by/4.0/" 71 | ) 72 | project_label = collection.projects.get(name="Mini Immuno Project") 73 | 74 | collection.delete(permanent=True) 75 | artifact1.delete(permanent=True) 76 | artifact2.delete(permanent=True) 77 | project_label.delete(permanent=True) 78 | license_label.delete(permanent=True) 79 | -------------------------------------------------------------------------------- /lamindb/migrations/0086_various.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-02-06 07:10 2 | 3 | from django.db import migrations, models 4 | 5 | import lamindb.base.fields 6 | 7 | 8 | class Migration(migrations.Migration): 9 | dependencies = [ 10 | ("lamindb", "0085_alter_feature_is_type_alter_flextable_is_type_and_more"), 11 | ] 12 | 13 | operations = [ 14 | migrations.AlterField( 15 | model_name="transform", 16 | name="hash", 17 | field=lamindb.base.fields.CharField( 18 | blank=True, 19 | db_index=True, 20 | default=None, 21 | max_length=22, 22 | null=True, 23 | unique=True, 24 | ), 25 | ), 26 | migrations.AlterField( 27 | model_name="artifact", 28 | name="hash", 29 | field=lamindb.base.fields.CharField( 30 | blank=True, 31 | db_index=True, 32 | default=None, 33 | max_length=22, 34 | null=True, 35 | unique=True, 36 | ), 37 | ), 38 | migrations.AlterField( 39 | model_name="collection", 40 | name="hash", 41 | field=lamindb.base.fields.CharField( 42 | blank=True, 43 | db_index=True, 44 | default=None, 45 | max_length=22, 46 | null=True, 47 | unique=True, 48 | ), 49 | ), 50 | migrations.CreateModel( 51 | name="Migration", 52 | fields=[ 53 | ( 54 | "id", 55 | models.BigAutoField( 56 | auto_created=True, 57 | primary_key=True, 58 | serialize=False, 59 | verbose_name="ID", 60 | ), 61 | ), 62 | ( 63 | "app", 64 | lamindb.base.fields.CharField( 65 | blank=True, default=None, max_length=255 66 | ), 67 | ), 68 | ( 69 | "name", 70 | lamindb.base.fields.CharField( 71 | blank=True, default=None, max_length=255 72 | ), 73 | ), 74 | ("applied", lamindb.base.fields.DateTimeField(blank=True)), 75 | ], 76 | options={ 77 | "db_table": "django_migrations", 78 | "managed": False, 79 | }, 80 | ), 81 | migrations.AlterField( 82 | model_name="param", 83 | name="dtype", 84 | field=lamindb.base.fields.CharField( 85 | blank=True, db_index=True, default=None, max_length=64, null=True 86 | ), 87 | ), 88 | migrations.AlterField( 89 | model_name="param", 90 | name="dtype", 91 | field=lamindb.base.fields.CharField( 92 | blank=True, db_index=True, default=None, max_length=255, null=True 93 | ), 94 | ), 95 | ] 96 | -------------------------------------------------------------------------------- /tests/core/test_rename_features_labels.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | 4 | import lamindb as ln 5 | import pandas as pd 6 | import pytest 7 | 8 | 9 | def test_rename_feature(ccaplog): 10 | df = pd.DataFrame({"old_name": [1, 2]}) 11 | ln.Feature(name="old_name", dtype=int).save() 12 | artifact = ln.Artifact.from_dataframe( 13 | df, key="test.parquet", schema="valid_features" 14 | ).save() 15 | feature = ln.Feature.get(name="old_name") 16 | 17 | # First rename 18 | feature.name = "new_name" 19 | feature.save() 20 | now1 = datetime.datetime.now(datetime.timezone.utc).replace(microsecond=0) 21 | assert ( 22 | "by renaming feature from 'old_name' to 'new_name' 1 artifact no longer matches the feature name in storage:" 23 | in ccaplog.text 24 | ) 25 | if os.getenv("LAMINDB_TEST_DB_VENDOR") != "sqlite": 26 | feature.refresh_from_db() 27 | assert feature.synonyms == "old_name" 28 | assert feature._aux["renamed"] == { 29 | now1.isoformat().replace("+00:00", "Z"): "old_name" 30 | } 31 | 32 | # Second rename 33 | feature.name = "newer_name" 34 | feature.save() 35 | now2 = datetime.datetime.now(datetime.timezone.utc).replace(microsecond=0) 36 | assert ( 37 | "by renaming feature from 'new_name' to 'newer_name' 1 artifact no longer matches the feature name in storage:" 38 | in ccaplog.text 39 | ) 40 | if os.getenv("LAMINDB_TEST_DB_VENDOR") != "sqlite": 41 | feature.refresh_from_db() 42 | assert feature.synonyms == "old_name|new_name" 43 | assert feature._aux["renamed"] == { 44 | now1.isoformat().replace("+00:00", "Z"): "old_name", 45 | now2.isoformat().replace("+00:00", "Z"): "new_name", 46 | } 47 | 48 | schema = artifact.feature_sets.first() 49 | artifact.delete(permanent=True) 50 | schema.delete(permanent=True) 51 | feature.delete(permanent=True) 52 | 53 | 54 | @pytest.mark.parametrize("model_class", [ln.ULabel, ln.Record]) 55 | def test_rename_label(model_class, ccaplog): 56 | df = pd.DataFrame( 57 | { 58 | "feature1": pd.Categorical(["label1", "label2"]), 59 | "feature2": pd.Categorical(["label2", "label2"]), 60 | } 61 | ) 62 | 63 | label1 = model_class(name="label1").save() 64 | label2 = model_class(name="label2").save() 65 | feature1 = ln.Feature(name="feature1", dtype=model_class).save() 66 | feature2 = ln.Feature(name="feature2", dtype=model_class).save() 67 | artifact = ln.Artifact.from_dataframe( 68 | df, key="test.parquet", schema="valid_features" 69 | ).save() 70 | 71 | label = model_class.get(name="label1") 72 | label.name = "label-renamed" 73 | label.save() 74 | 75 | assert ( 76 | "by renaming label from 'label1' to 'label-renamed' 1 artifact no longer matches the label name in storage:" 77 | in ccaplog.text 78 | ) 79 | 80 | schema = artifact.feature_sets.first() 81 | artifact.delete(permanent=True) 82 | schema.delete(permanent=True) 83 | feature1.delete(permanent=True) 84 | feature2.delete(permanent=True) 85 | label1.delete(permanent=True) 86 | label2.delete(permanent=True) 87 | -------------------------------------------------------------------------------- /lamindb/examples/croissant/mini_immuno.anndata.zarr_metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "@context": { 3 | "@vocab": "https://schema.org/", 4 | "cr": "https://mlcommons.org/croissant/", 5 | "ml": "http://ml-schema.org/", 6 | "sc": "https://schema.org/", 7 | "dct": "http://purl.org/dc/terms/", 8 | "data": "https://mlcommons.org/croissant/data/", 9 | "rai": "https://mlcommons.org/croissant/rai/", 10 | "format": "https://mlcommons.org/croissant/format/", 11 | "citeAs": "https://mlcommons.org/croissant/citeAs/", 12 | "conformsTo": "https://mlcommons.org/croissant/conformsTo/", 13 | "@language": "en", 14 | "repeated": "https://mlcommons.org/croissant/repeated/", 15 | "field": "https://mlcommons.org/croissant/field/", 16 | "examples": "https://mlcommons.org/croissant/examples/", 17 | "recordSet": "https://mlcommons.org/croissant/recordSet/", 18 | "fileObject": "https://mlcommons.org/croissant/fileObject/", 19 | "fileSet": "https://mlcommons.org/croissant/fileSet/", 20 | "source": "https://mlcommons.org/croissant/source/", 21 | "references": "https://mlcommons.org/croissant/references/", 22 | "key": "https://mlcommons.org/croissant/key/", 23 | "parentField": "https://mlcommons.org/croissant/parentField/", 24 | "isLiveDataset": "https://mlcommons.org/croissant/isLiveDataset/", 25 | "separator": "https://mlcommons.org/croissant/separator/", 26 | "extract": "https://mlcommons.org/croissant/extract/", 27 | "subField": "https://mlcommons.org/croissant/subField/", 28 | "regex": "https://mlcommons.org/croissant/regex/", 29 | "column": "https://mlcommons.org/croissant/column/", 30 | "path": "https://mlcommons.org/croissant/path/", 31 | "fileProperty": "https://mlcommons.org/croissant/fileProperty/", 32 | "md5": "https://mlcommons.org/croissant/md5/", 33 | "jsonPath": "https://mlcommons.org/croissant/jsonPath/", 34 | "transform": "https://mlcommons.org/croissant/transform/", 35 | "replace": "https://mlcommons.org/croissant/replace/", 36 | "dataType": "https://mlcommons.org/croissant/dataType/", 37 | "includes": "https://mlcommons.org/croissant/includes/", 38 | "excludes": "https://mlcommons.org/croissant/excludes/" 39 | }, 40 | "@type": "Dataset", 41 | "name": "Mini immuno dataset", 42 | "description": "A few samples from the immunology dataset", 43 | "url": "https://lamin.ai/laminlabs/lamindata/artifact/tCUkRcaEjTjhtozp0000", 44 | "creator": { 45 | "@type": "Person", 46 | "name": "falexwolf" 47 | }, 48 | "dateCreated": "2025-07-16", 49 | "cr:projectName": "Mini Immuno Project", 50 | "datePublished": "2025-07-16", 51 | "version": "1.0", 52 | "license": "https://creativecommons.org/licenses/by/4.0/", 53 | "citation": "Please cite this dataset as: mini immuno (2025)", 54 | "encodingFormat": "zarr", 55 | "distribution": [ 56 | { 57 | "@type": "cr:FileSet", 58 | "@id": "mini_immuno.anndata.zarr", 59 | "containedIn": { 60 | "@id": "directory" 61 | }, 62 | "encodingFormat": "zarr" 63 | } 64 | ], 65 | "cr:recordSet": [ 66 | { 67 | "@type": "cr:RecordSet", 68 | "@id": "#samples", 69 | "name": "samples", 70 | "description": "my sample" 71 | } 72 | ] 73 | } 74 | --------------------------------------------------------------------------------