`__.
63 |
64 | """
65 |
66 | import secrets
67 | import string
68 |
69 |
70 | def base64(n_char: int) -> str:
71 | """Random Base64 string."""
72 | alphabet = string.digits + string.ascii_letters.swapcase() + "_" + "-"
73 | uid = "".join(secrets.choice(alphabet) for i in range(n_char))
74 | return uid
75 |
76 |
77 | def base62(n_char: int) -> str:
78 | """Random Base62 string."""
79 | alphabet = string.digits + string.ascii_letters.swapcase()
80 | uid = "".join(secrets.choice(alphabet) for i in range(n_char))
81 | return uid
82 |
83 |
84 | def base26(n_char: int):
85 | """ASCII lowercase."""
86 | alphabet = string.ascii_lowercase
87 | uid = "".join(secrets.choice(alphabet) for i in range(n_char))
88 | return uid
89 |
90 |
91 | def base62_4() -> str:
92 | return base62(4)
93 |
94 |
95 | def base62_8() -> str:
96 | """Random Base62 string of length 8."""
97 | return base62(8)
98 |
99 |
100 | def base62_12() -> str:
101 | """Random Base62 string of length 12."""
102 | return base62(12)
103 |
104 |
105 | def base62_16() -> str:
106 | """Random Base62 string of length 16."""
107 | return base62(16)
108 |
109 |
110 | def base62_20() -> str:
111 | """Random Base62 string of length 20."""
112 | return base62(20)
113 |
114 |
115 | def base62_24() -> str:
116 | """Random Base62 string of length 24."""
117 | return base62(24)
118 |
--------------------------------------------------------------------------------
/lamindb/base/users.py:
--------------------------------------------------------------------------------
1 | user_id_cache = {}
2 |
3 |
4 | def current_user_id() -> int:
5 | import lamindb_setup as ln_setup
6 | from lamindb_setup import settings
7 | from lamindb_setup._init_instance import register_user
8 |
9 | from lamindb.models import User
10 |
11 | def query_user_id():
12 | if ln_setup.core.django.IS_MIGRATING:
13 | return 1
14 | else:
15 | try:
16 | user_id = User.objects.get(uid=settings.user.uid).id
17 | except User.DoesNotExist:
18 | register_user(settings.user)
19 | user_id = User.objects.get(uid=settings.user.uid).id
20 | return user_id
21 |
22 | if settings._instance_exists:
23 | if settings.instance.slug not in user_id_cache:
24 | user_id_cache[settings.instance.slug] = query_user_id()
25 | return user_id_cache[settings.instance.slug]
26 | else:
27 | return query_user_id()
28 |
--------------------------------------------------------------------------------
/lamindb/core/__init__.py:
--------------------------------------------------------------------------------
1 | """Core library.
2 |
3 | Settings & context:
4 |
5 | .. autosummary::
6 | :toctree: .
7 |
8 | Settings
9 | subsettings
10 | Context
11 |
12 | Artifact loaders:
13 |
14 | .. autosummary::
15 | :toctree: .
16 |
17 | loaders
18 |
19 | Data loaders:
20 |
21 | .. autosummary::
22 | :toctree: .
23 |
24 | MappedCollection
25 |
26 | Modules:
27 |
28 | .. autosummary::
29 | :toctree: .
30 |
31 | datasets
32 | storage
33 | logger
34 |
35 | """
36 |
37 | from lamin_utils import logger
38 | from lamin_utils._inspect import InspectResult
39 |
40 | from .. import errors as exceptions
41 | from . import datasets, loaders, subsettings, types
42 | from ._context import Context
43 | from ._mapped_collection import MappedCollection
44 | from ._settings import Settings
45 |
--------------------------------------------------------------------------------
/lamindb/core/_compat.py:
--------------------------------------------------------------------------------
1 | import importlib.util
2 | from typing import Any, Callable, TypeVar
3 |
4 | T = TypeVar("T")
5 |
6 |
7 | def is_package_installed(package_name: str) -> bool:
8 | spec = importlib.util.find_spec(package_name)
9 | return spec is not None
10 |
11 |
12 | def with_package(package_name: str, operation: Callable[[Any], T]) -> T:
13 | """Execute an operation that requires a specific package.
14 |
15 | Args:
16 | package_name: Package name (e.g., "mudata")
17 | operation: Function that takes the imported module and returns a result
18 |
19 | Examples:
20 | # For direct package functions
21 | result = with_package("mudata", lambda mod: mod.read_zarr(path))
22 | """
23 | try:
24 | module = importlib.import_module(package_name)
25 | return operation(module)
26 | except ImportError:
27 | raise ImportError(
28 | f"Package '{package_name}' is required but not installed. "
29 | f"Please install with: pip install {package_name}"
30 | ) from None
31 |
32 |
33 | def with_package_obj(
34 | obj: Any, class_name: str, package_name: str, operation: Callable[[Any], T]
35 | ) -> tuple[bool, T | None]:
36 | """Handle operations on objects that require specific packages.
37 |
38 | Args:
39 | obj: The object to operate on
40 | class_name: Expected class name (e.g., "MuData")
41 | package_name: Package that provides the class (e.g., "mudata")
42 | operation: Function to call with the object if package is available.
43 |
44 | Examples:
45 | # For instance methods
46 | handled, res = apply_class_func(dmem, "MuData", "mudata",
47 | lambda obj: obj.write(filepath))
48 | """
49 | if obj.__class__.__name__ == class_name:
50 | try:
51 | importlib.import_module(package_name)
52 | result = operation(obj)
53 | return True, result
54 | except ImportError:
55 | raise ImportError(
56 | f"Object appears to be {class_name} but '{package_name}' package is not installed. "
57 | f"Please install with: pip install {package_name}"
58 | ) from None
59 |
60 | return False, None
61 |
--------------------------------------------------------------------------------
/lamindb/core/_track_environment.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import subprocess
4 | import sys
5 | from typing import TYPE_CHECKING
6 |
7 | import lamindb_setup as ln_setup
8 | from lamin_utils import logger
9 |
10 | if TYPE_CHECKING:
11 | from lamindb.models import Run
12 |
13 |
14 | def track_environment(run: Run) -> None:
15 | filepath = ln_setup.settings.cache_dir / f"run_env_pip_{run.uid}.txt"
16 | # create a requirements.txt
17 | # we don't create a conda environment.yml mostly for its slowness
18 | try:
19 | with open(filepath, "w") as f:
20 | result = subprocess.run(
21 | [sys.executable, "-m", "pip", "freeze"],
22 | stdout=f,
23 | )
24 | except OSError as e:
25 | result = None
26 | logger.warning(f"could not run pip freeze with error {e}")
27 | if result is not None and result.returncode == 0:
28 | logger.info(f"tracked pip freeze > {str(filepath)}")
29 |
--------------------------------------------------------------------------------
/lamindb/core/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | """Test datasets.
2 |
3 | The mini immuno dataset.
4 |
5 | .. autosummary::
6 | :toctree: .
7 |
8 | mini_immuno
9 |
10 | Small in-memory datasets.
11 |
12 | .. autosummary::
13 | :toctree: .
14 |
15 | anndata_with_obs
16 |
17 | Files.
18 |
19 | .. autosummary::
20 | :toctree: .
21 |
22 | file_fcs
23 | file_fcs_alpert19
24 | file_tsv_rnaseq_nfcore_salmon_merged_gene_counts
25 | file_jpg_paradisi05
26 | file_tiff_suo22
27 | file_fastq
28 | file_bam
29 | file_mini_csv
30 |
31 | Directories.
32 |
33 | .. autosummary::
34 | :toctree: .
35 |
36 | dir_scrnaseq_cellranger
37 | dir_iris_images
38 |
39 | Dataframe, AnnData, MuData.
40 |
41 | .. autosummary::
42 | :toctree: .
43 |
44 | df_iris
45 | df_iris_in_meter
46 | df_iris_in_meter_study1
47 | df_iris_in_meter_study2
48 | anndata_mouse_sc_lymph_node
49 | anndata_human_immune_cells
50 | anndata_pbmc68k_reduced
51 | anndata_file_pbmc68k_test
52 | anndata_pbmc3k_processed
53 | anndata_with_obs
54 | anndata_suo22_Visium10X
55 | mudata_papalexi21_subset
56 | schmidt22_crispra_gws_IFNG
57 | schmidt22_perturbseq
58 |
59 | Other.
60 |
61 | .. autosummary::
62 | :toctree: .
63 |
64 | fake_bio_notebook_titles
65 | """
66 |
67 | from . import mini_immuno
68 | from ._core import (
69 | anndata_file_pbmc68k_test,
70 | anndata_human_immune_cells,
71 | anndata_mouse_sc_lymph_node,
72 | anndata_pbmc3k_processed,
73 | anndata_pbmc68k_reduced,
74 | anndata_suo22_Visium10X,
75 | df_iris,
76 | df_iris_in_meter,
77 | df_iris_in_meter_study1,
78 | df_iris_in_meter_study2,
79 | dir_iris_images,
80 | dir_scrnaseq_cellranger,
81 | file_bam,
82 | file_fastq,
83 | file_fcs,
84 | file_fcs_alpert19,
85 | file_jpg_paradisi05,
86 | file_mini_csv,
87 | file_tiff_suo22,
88 | file_tsv_rnaseq_nfcore_salmon_merged_gene_counts,
89 | mudata_papalexi21_subset,
90 | schmidt22_crispra_gws_IFNG,
91 | schmidt22_perturbseq,
92 | spatialdata_blobs,
93 | )
94 | from ._fake import fake_bio_notebook_titles
95 | from ._small import (
96 | anndata_with_obs,
97 | small_dataset3_cellxgene,
98 | )
99 |
100 | small_dataset1 = mini_immuno.get_dataset1 # backward compat
101 | small_dataset2 = mini_immuno.get_dataset2 # backward compat
102 |
--------------------------------------------------------------------------------
/lamindb/core/datasets/_fake.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 |
4 | def fake_bio_notebook_titles(n=100) -> list[str]:
5 | """A fake collection of study titles."""
6 | from faker import Faker
7 |
8 | fake = Faker()
9 |
10 | from faker_biology.mol_biol import Antibody
11 | from faker_biology.physiology import CellType, Organ, Organelle
12 |
13 | fake.add_provider(CellType)
14 | fake.add_provider(Organ)
15 | fake.add_provider(Organelle)
16 | fake.add_provider(Antibody)
17 |
18 | my_words = [
19 | "study",
20 | "investigate",
21 | "research",
22 | "result",
23 | "cluster",
24 | "rank",
25 | "candidate",
26 | "visualize",
27 | "efficiency",
28 | "classify",
29 | ]
30 | my_words += [fake.organ() for i in range(5)] + ["intestine", "intestinal"]
31 | my_words += [fake.celltype() for i in range(10)]
32 | my_words += [fake.antibody_isotype() for i in range(20)]
33 |
34 | my_notebook_titles = [fake.sentence(ext_word_list=my_words) for i in range(n)]
35 |
36 | return my_notebook_titles
37 |
--------------------------------------------------------------------------------
/lamindb/core/datasets/_small.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from typing import Any, Literal
4 |
5 | import anndata as ad
6 | import numpy as np
7 | import pandas as pd
8 |
9 |
10 | def small_dataset3_cellxgene(
11 | otype: Literal["DataFrame", "AnnData"] = "AnnData",
12 | ) -> tuple[pd.DataFrame, dict[str, Any]] | ad.AnnData:
13 | # TODO: consider other ids for other organisms
14 | # "ENSMUSG00002076988"
15 | var_ids = ["invalid_ensembl_id", "ENSG00000000419", "ENSG00000139618"]
16 | dataset_dict = {
17 | var_ids[0]: [2, 3, 3],
18 | var_ids[1]: [3, 4, 5],
19 | var_ids[2]: [4, 2, 3],
20 | "disease_ontology_term_id": ["MONDO:0004975", "MONDO:0004980", "MONDO:0004980"],
21 | "organism": ["human", "human", "human"],
22 | "sex": ["female", "male", "unknown"],
23 | "sex_ontology_term_id": ["PATO:0000383", "PATO:0000384", "unknown"],
24 | "tissue": ["lungg", "lungg", "heart"],
25 | "donor": ["-1", "1", "2"],
26 | }
27 | dataset_df = pd.DataFrame(
28 | dataset_dict,
29 | index=["barcode1", "barcode2", "barcode3"],
30 | )
31 | dataset_df["tissue"] = dataset_df["tissue"].astype("category")
32 | ad.AnnData(
33 | dataset_df[var_ids],
34 | obs=dataset_df[[key for key in dataset_dict if key not in var_ids]],
35 | )
36 | if otype == "DataFrame":
37 | return dataset_df
38 | else:
39 | dataset_ad = ad.AnnData(dataset_df.iloc[:, :3], obs=dataset_df.iloc[:, 3:])
40 | return dataset_ad
41 |
42 |
43 | def anndata_with_obs() -> ad.AnnData:
44 | """Create a mini anndata with cell_type, disease and tissue."""
45 | import anndata as ad
46 | import bionty.base as bionty_base
47 |
48 | celltypes = ["T cell", "hematopoietic stem cell", "hepatocyte", "my new cell type"]
49 | celltype_ids = ["CL:0000084", "CL:0000037", "CL:0000182", ""]
50 | diseases = [
51 | "chronic kidney disease",
52 | "liver lymphoma",
53 | "cardiac ventricle disorder",
54 | "Alzheimer disease",
55 | ]
56 | tissues = ["kidney", "liver", "heart", "brain"]
57 | df = pd.DataFrame()
58 | df["cell_type"] = celltypes * 10
59 | df["cell_type_id"] = celltype_ids * 10
60 | df["tissue"] = tissues * 10
61 | df["disease"] = diseases * 10
62 | df.index = "obs" + df.index.astype(str)
63 |
64 | adata = ad.AnnData(X=np.zeros(shape=(40, 100), dtype=np.float32), obs=df)
65 | adata.var.index = bionty_base.Gene().df().head(100)["ensembl_gene_id"].values
66 |
67 | return adata
68 |
--------------------------------------------------------------------------------
/lamindb/core/exceptions.py:
--------------------------------------------------------------------------------
1 | from ..errors import * # noqa: F403 backward compat
2 |
--------------------------------------------------------------------------------
/lamindb/core/storage/__init__.py:
--------------------------------------------------------------------------------
1 | """Storage API.
2 |
3 | Valid suffixes.
4 |
5 | .. autosummary::
6 | :toctree: .
7 |
8 | VALID_SUFFIXES
9 |
10 | Array accessors.
11 |
12 | .. autosummary::
13 | :toctree: .
14 |
15 | AnnDataAccessor
16 | BackedAccessor
17 | """
18 |
19 | from lamindb_setup.core.upath import LocalPathClasses, UPath, infer_filesystem
20 |
21 | from ._backed_access import AnnDataAccessor, BackedAccessor
22 | from ._tiledbsoma import save_tiledbsoma_experiment
23 | from ._valid_suffixes import VALID_SUFFIXES
24 | from .objects import infer_suffix, write_to_disk
25 | from .paths import delete_storage
26 |
--------------------------------------------------------------------------------
/lamindb/core/storage/_polars_lazy_df.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from contextlib import contextmanager
4 | from pathlib import Path
5 | from typing import TYPE_CHECKING
6 |
7 | if TYPE_CHECKING:
8 | from collections.abc import Iterator
9 |
10 | from polars import LazyFrame as PolarsLazyFrame
11 | from upath import UPath
12 |
13 | POLARS_SUFFIXES = (".parquet", ".csv", ".ndjson", ".ipc")
14 |
15 |
16 | @contextmanager
17 | def _open_polars_lazy_df(
18 | paths: UPath | list[UPath], **kwargs
19 | ) -> Iterator[PolarsLazyFrame]:
20 | try:
21 | import polars as pl
22 | except ImportError as ie:
23 | raise ImportError("Please install polars: pip install polars") from ie
24 |
25 | scans = {
26 | ".parquet": pl.scan_parquet,
27 | ".csv": pl.scan_csv,
28 | ".ndjson": pl.scan_ndjson,
29 | ".ipc": pl.scan_ipc,
30 | }
31 |
32 | path_list = []
33 | if isinstance(paths, Path):
34 | paths = [paths]
35 | for path in paths:
36 | # assume http is always a file
37 | if getattr(path, "protocol", None) not in {"http", "https"} and path.is_dir():
38 | path_list += [p for p in path.rglob("*") if p.suffix != ""]
39 | else:
40 | path_list.append(path)
41 |
42 | open_files = []
43 |
44 | try:
45 | for path in path_list:
46 | open_files.append(path.open(mode="rb"))
47 |
48 | yield scans[path_list[0].suffix](open_files, **kwargs)
49 | finally:
50 | for open_file in open_files:
51 | open_file.close()
52 |
--------------------------------------------------------------------------------
/lamindb/core/storage/_pyarrow_dataset.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from typing import TYPE_CHECKING
4 |
5 | import pyarrow.dataset
6 | from lamindb_setup.core.upath import LocalPathClasses
7 |
8 | if TYPE_CHECKING:
9 | from pyarrow.dataset import Dataset as PyArrowDataset
10 | from upath import UPath
11 |
12 |
13 | PYARROW_SUFFIXES = (".parquet", ".csv", ".json", ".orc", ".arrow", ".feather", ".ipc")
14 |
15 |
16 | def _open_pyarrow_dataset(paths: UPath | list[UPath], **kwargs) -> PyArrowDataset:
17 | if isinstance(paths, list):
18 | # a single path can be a directory, but a list of paths
19 | # has to be a flat list of files
20 | paths_str = []
21 | path0 = paths[0]
22 | if isinstance(path0, LocalPathClasses):
23 | path_to_str = lambda p: p.as_posix()
24 | filesystem = None
25 | else:
26 | path_to_str = lambda p: p.path
27 | filesystem = path0.fs
28 | for path in paths:
29 | if (
30 | getattr(path, "protocol", None) not in {"http", "https"}
31 | and path.is_dir()
32 | ):
33 | paths_str += [path_to_str(p) for p in path.rglob("*") if p.suffix != ""]
34 | else:
35 | paths_str.append(path_to_str(path))
36 | elif isinstance(paths, LocalPathClasses):
37 | paths_str, filesystem = paths.as_posix(), None
38 | else:
39 | paths_str, filesystem = paths.path, paths.fs
40 |
41 | return pyarrow.dataset.dataset(paths_str, filesystem=filesystem, **kwargs)
42 |
--------------------------------------------------------------------------------
/lamindb/core/storage/_valid_suffixes.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from lamindb_setup.core.upath import VALID_COMPOSITE_SUFFIXES, VALID_SIMPLE_SUFFIXES
4 |
5 | # add new composite suffixes like so
6 | VALID_COMPOSITE_SUFFIXES.update(
7 | {
8 | ".vitessce.json",
9 | ".ome.zarr",
10 | }
11 | )
12 | # can do the same for simple valid suffixes
13 |
14 |
15 | class VALID_SUFFIXES:
16 | """Valid suffixes."""
17 |
18 | SIMPLE: set[str] = VALID_SIMPLE_SUFFIXES
19 | """Simple suffixes."""
20 | COMPOSITE: set[str] = VALID_COMPOSITE_SUFFIXES
21 | """Composite suffixes."""
22 |
--------------------------------------------------------------------------------
/lamindb/core/storage/objects.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from pathlib import PurePosixPath
4 | from typing import TYPE_CHECKING, TypeAlias
5 |
6 | from anndata import AnnData
7 | from pandas import DataFrame
8 |
9 | from lamindb.core._compat import (
10 | with_package_obj,
11 | )
12 | from lamindb.core.types import ScverseDataStructures
13 |
14 | if TYPE_CHECKING:
15 | from lamindb_setup.core.types import UPathStr
16 |
17 | SupportedDataTypes: TypeAlias = DataFrame | ScverseDataStructures
18 |
19 |
20 | def infer_suffix(dmem: SupportedDataTypes, format: str | None = None):
21 | """Infer LaminDB storage file suffix from a data object."""
22 | if isinstance(dmem, AnnData):
23 | if format is not None:
24 | # should be `.h5ad`, `.`zarr`, or `.anndata.zarr`
25 | if format not in {"h5ad", "zarr", "anndata.zarr"}:
26 | raise ValueError(
27 | "Error when specifying AnnData storage format, it should be"
28 | f" 'h5ad', 'zarr', not '{format}'. Check 'format'"
29 | " or the suffix of 'key'."
30 | )
31 | return "." + format
32 | return ".h5ad"
33 |
34 | if isinstance(dmem, DataFrame):
35 | if format == ".csv":
36 | return ".csv"
37 | return ".parquet"
38 |
39 | if with_package_obj(
40 | dmem,
41 | "MuData",
42 | "mudata",
43 | lambda obj: True, # Just checking type, not calling any method
44 | )[0]:
45 | return ".h5mu"
46 |
47 | has_spatialdata, spatialdata_suffix = with_package_obj(
48 | dmem,
49 | "SpatialData",
50 | "spatialdata",
51 | lambda obj: (
52 | format
53 | if format is not None and format in {"spatialdata.zarr", "zarr"}
54 | else ".zarr"
55 | if format is None
56 | else (_ for _ in ()).throw(
57 | ValueError(
58 | "Error when specifying SpatialData storage format, it should be"
59 | f" 'zarr', 'spatialdata.zarr', not '{format}'. Check 'format'"
60 | " or the suffix of 'key'."
61 | )
62 | )
63 | ),
64 | )
65 | if has_spatialdata:
66 | return spatialdata_suffix
67 | else:
68 | raise NotImplementedError
69 |
70 |
71 | def write_to_disk(dmem: SupportedDataTypes, filepath: UPathStr) -> None:
72 | """Writes the passed in memory data to disk to a specified path."""
73 | if isinstance(dmem, AnnData):
74 | suffix = PurePosixPath(filepath).suffix
75 | if suffix == ".h5ad":
76 | dmem.write_h5ad(filepath)
77 | return
78 | elif suffix == ".zarr":
79 | dmem.write_zarr(filepath)
80 | return
81 | else:
82 | raise NotImplementedError
83 |
84 | if isinstance(dmem, DataFrame):
85 | if filepath.suffix == ".csv":
86 | dmem.to_csv(filepath)
87 | return
88 | dmem.to_parquet(filepath)
89 | return
90 |
91 | if with_package_obj(dmem, "MuData", "mudata", lambda obj: obj.write(filepath))[0]:
92 | return
93 |
94 | if with_package_obj(
95 | dmem,
96 | "SpatialData",
97 | "spatialdata",
98 | lambda obj: obj.write(filepath, overwrite=True),
99 | )[0]:
100 | return
101 |
102 | raise NotImplementedError
103 |
--------------------------------------------------------------------------------
/lamindb/core/subsettings/__init__.py:
--------------------------------------------------------------------------------
1 | """Sub settings.
2 |
3 | .. autosummary::
4 | :toctree: .
5 |
6 | CreationSettings
7 | AnnotationSettings
8 |
9 | """
10 |
11 | from ._annotation_settings import AnnotationSettings
12 | from ._creation_settings import CreationSettings
13 |
--------------------------------------------------------------------------------
/lamindb/core/subsettings/_annotation_settings.py:
--------------------------------------------------------------------------------
1 | class AnnotationSettings:
2 | n_max_records: int = 1000
3 | """Maximal number of records to annotate with during automated annotation.
4 |
5 | If the number of records to annotate exceeds this limit, print a warning and do not annotate.
6 |
7 | The number is calculated per feature for labels, and per schema for features.
8 | """
9 |
10 |
11 | annotation_settings = AnnotationSettings()
12 |
--------------------------------------------------------------------------------
/lamindb/core/subsettings/_creation_settings.py:
--------------------------------------------------------------------------------
1 | class CreationSettings:
2 | search_names: bool = True
3 | """Switch off to speed up creating records (default `True`).
4 |
5 | If `True`, search for alternative names and avoids duplicates.
6 |
7 | FAQ: :doc:`/faq/idempotency`
8 | """
9 | artifact_skip_size_hash: bool = False
10 | """To speed up registering high numbers of files (default `False`).
11 |
12 | This bypasses queries for size and hash to AWS & GCP.
13 |
14 | It speeds up file creation by about a factor 100.
15 | """
16 | artifact_silence_missing_run_warning: bool = False
17 | """Silence warning about missing run & transform during artifact creation (default `False`)."""
18 | _artifact_use_virtual_keys: bool = True
19 | """Treat `key` parameter in :class:`~lamindb.Artifact` as virtual.
20 |
21 | If `True`, the `key` is **not** used to construct file paths, but file paths are
22 | based on the `uid` of artifact.
23 | """
24 |
25 |
26 | creation_settings = CreationSettings()
27 |
--------------------------------------------------------------------------------
/lamindb/core/types.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from typing import TYPE_CHECKING, TypeVar
4 |
5 | from anndata import AnnData
6 | from lamindb_setup.core.types import UPathStr
7 |
8 | from lamindb.base.types import (
9 | Dtype,
10 | FieldAttr,
11 | ListLike,
12 | StrField,
13 | TransformType,
14 | )
15 |
16 | MuData = TypeVar("MuData")
17 | SpatialData = TypeVar("SpatialData")
18 |
19 | ScverseDataStructures = AnnData | MuData | SpatialData
20 |
--------------------------------------------------------------------------------
/lamindb/curators/__init__.py:
--------------------------------------------------------------------------------
1 | """Curators.
2 |
3 | .. autosummary::
4 | :toctree: .
5 |
6 | DataFrameCurator
7 | AnnDataCurator
8 | MuDataCurator
9 | SpatialDataCurator
10 | TiledbsomaExperimentCurator
11 |
12 | Modules.
13 |
14 | .. autosummary::
15 | :toctree: .
16 |
17 | core
18 |
19 | """
20 |
21 | from ._legacy import ( # backward compat
22 | CellxGeneAnnDataCatManager,
23 | PertAnnDataCatManager,
24 | )
25 | from .core import (
26 | AnnDataCurator,
27 | DataFrameCurator,
28 | MuDataCurator,
29 | SpatialDataCurator,
30 | TiledbsomaExperimentCurator,
31 | )
32 |
33 | __all__ = [
34 | "CellxGeneAnnDataCatManager",
35 | "PertAnnDataCatManager",
36 | "AnnDataCurator",
37 | "DataFrameCurator",
38 | "MuDataCurator",
39 | "SpatialDataCurator",
40 | "TiledbsomaExperimentCurator",
41 | ]
42 |
--------------------------------------------------------------------------------
/lamindb/curators/_cellxgene_schemas/schema_versions.csv:
--------------------------------------------------------------------------------
1 | schema_version,entity,organism,source,version
2 | 4.0.0,CellType,all,cl,2023-08-24
3 | 4.0.0,ExperimentalFactor,all,efo,3.57.0
4 | 4.0.0,Ethnicity,human,hancestro,3.0
5 | 4.0.0,DevelopmentalStage,human,hsapdv,2020-03-10
6 | 4.0.0,DevelopmentalStage,mouse,mmusdv,2020-03-10
7 | 4.0.0,Disease,all,mondo,2023-08-02
8 | 4.0.0,Organism,all,ncbitaxon,2023-06-20
9 | 4.0.0,Phenotype,all,pato,2023-05-18
10 | 4.0.0,Tissue,all,uberon,2023-09-05
11 | 5.0.0,CellType,all,cl,2024-01-04
12 | 5.0.0,ExperimentalFactor,all,efo,3.62.0
13 | 5.0.0,Ethnicity,human,hancestro,3.0
14 | 5.0.0,DevelopmentalStage,human,hsapdv,2020-03-10
15 | 5.0.0,DevelopmentalStage,mouse,mmusdv,2020-03-10
16 | 5.0.0,Disease,all,mondo,2024-01-03
17 | 5.0.0,Organism,all,ncbitaxon,2023-06-20
18 | 5.0.0,Phenotype,all,pato,2023-05-18
19 | 5.0.0,Tissue,all,uberon,2024-01-18
20 | 5.0.0,Gene,human,ensembl,release-110
21 | 5.0.0,Gene,mouse,ensembl,release-110
22 | 5.1.0,CellType,all,cl,2024-04-05
23 | 5.1.0,ExperimentalFactor,all,efo,3.65.0
24 | 5.1.0,Ethnicity,human,hancestro,3.0
25 | 5.1.0,DevelopmentalStage,human,hsapdv,2020-03-10
26 | 5.1.0,DevelopmentalStage,mouse,mmusdv,2020-03-10
27 | 5.1.0,Disease,all,mondo,2024-05-08
28 | 5.1.0,Organism,all,ncbitaxon,2023-06-20
29 | 5.1.0,Phenotype,all,pato,2023-05-18
30 | 5.1.0,Tissue,all,uberon,2024-03-22
31 | 5.1.0,Gene,human,ensembl,release-110
32 | 5.1.0,Gene,mouse,ensembl,release-110
33 | 5.2.0,CellType,all,cl,2024-08-16
34 | 5.2.0,ExperimentalFactor,all,efo,3.69.0
35 | 5.2.0,Ethnicity,human,hancestro,3.0
36 | 5.2.0,DevelopmentalStage,human,hsapdv,2024-05-28
37 | 5.2.0,DevelopmentalStage,mouse,mmusdv,2024-05-28
38 | 5.2.0,Disease,all,mondo,2024-08-06
39 | 5.2.0,Organism,all,ncbitaxon,2023-06-20
40 | 5.2.0,Phenotype,all,pato,2023-05-18
41 | 5.2.0,Tissue,all,uberon,2024-08-07
42 | 5.2.0,Gene,human,ensembl,release-110
43 | 5.2.0,Gene,mouse,ensembl,release-110
44 |
--------------------------------------------------------------------------------
/lamindb/errors.py:
--------------------------------------------------------------------------------
1 | """Errors.
2 |
3 | .. autosummary::
4 | :toctree: .
5 |
6 | ValidationError
7 | InvalidArgument
8 | DoesNotExist
9 | NotebookNotSaved
10 | MissingContextUID
11 | UpdateContext
12 | IntegrityError
13 | SQLRecordNameChangeIntegrityError
14 |
15 | """
16 |
17 | # inheriting from SystemExit has the sole purpose of suppressing
18 | # the traceback - this isn't optimal but the current best solution
19 | # https://laminlabs.slack.com/archives/C04A0RMA0SC/p1726856875597489
20 |
21 |
22 | class ValidationError(Exception):
23 | """Validation error."""
24 |
25 | pass
26 |
27 |
28 | class InvalidArgument(Exception):
29 | """Invalid method or function argument."""
30 |
31 | pass
32 |
33 |
34 | class TrackNotCalled(Exception):
35 | """`ln.track()` wasn't called."""
36 |
37 | pass
38 |
39 |
40 | class NotebookNotSaved(Exception):
41 | """Notebook wasn't saved."""
42 |
43 | pass
44 |
45 |
46 | # equivalent to Django's DoesNotExist
47 | # and SQLAlchemy's NoResultFound
48 | class DoesNotExist(Exception):
49 | """No record found."""
50 |
51 | pass
52 |
53 |
54 | class InconsistentKey(Exception):
55 | """Inconsistent transform or artifact `key`."""
56 |
57 | pass
58 |
59 |
60 | class SQLRecordNameChangeIntegrityError(Exception):
61 | """Custom exception for name change errors."""
62 |
63 | pass
64 |
65 |
66 | class FieldValidationError(Exception):
67 | """Field validation error."""
68 |
69 | pass
70 |
71 |
72 | # -------------------------------------------------------------------------------------
73 | # run context
74 | # -------------------------------------------------------------------------------------
75 |
76 |
77 | class IntegrityError(Exception):
78 | """Integrity error.
79 |
80 | For instance, it's not allowed to delete artifacts outside managed storage
81 | locations.
82 | """
83 |
84 | pass
85 |
86 |
87 | class MissingContextUID(SystemExit):
88 | """User didn't define transform settings."""
89 |
90 | pass
91 |
92 |
93 | class UpdateContext(SystemExit):
94 | """Transform settings require update."""
95 |
96 | pass
97 |
98 |
99 | # -------------------------------------------------------------------------------------
100 | # record
101 | # -------------------------------------------------------------------------------------
102 |
103 |
104 | class NoWriteAccess(Exception):
105 | """No write access to a space."""
106 |
107 | pass
108 |
--------------------------------------------------------------------------------
/lamindb/examples/__init__.py:
--------------------------------------------------------------------------------
1 | """Examples.
2 |
3 | .. autosummary::
4 | :toctree: .
5 |
6 | ingest_mini_immuno_datasets
7 | schemas
8 |
9 | """
10 |
11 | from . import schemas
12 |
13 |
14 | def ingest_mini_immuno_datasets():
15 | """Ingest mini immuno datasets.
16 |
17 | .. literalinclude:: scripts/ingest_mini_immuno_datasets.py
18 | :language: python
19 | """
20 | import sys
21 | from pathlib import Path
22 |
23 | docs_path = Path(__file__).parent.parent.parent / "docs" / "scripts"
24 | if str(docs_path) not in sys.path:
25 | sys.path.append(str(docs_path))
26 |
27 | import ingest_mini_immuno_datasets # noqa
28 |
--------------------------------------------------------------------------------
/lamindb/examples/schemas/__init__.py:
--------------------------------------------------------------------------------
1 | """Example schemas.
2 |
3 | .. autosummary::
4 | :toctree: .
5 |
6 | valid_features
7 | anndata_ensembl_gene_ids_and_valid_features_in_obs
8 |
9 | """
10 |
11 | from ._anndata import anndata_ensembl_gene_ids_and_valid_features_in_obs
12 | from ._simple import valid_features
13 |
--------------------------------------------------------------------------------
/lamindb/examples/schemas/_anndata.py:
--------------------------------------------------------------------------------
1 | from ... import Schema
2 |
3 |
4 | def anndata_ensembl_gene_ids_and_valid_features_in_obs() -> Schema:
5 | """Return a schema for an AnnData with Ensembl gene IDs and valid features in obs.
6 |
7 | .. literalinclude:: scripts/define_schema_anndata_ensembl_gene_ids_and_valid_features_in_obs.py
8 | :language: python
9 | """
10 | import subprocess
11 | from pathlib import Path
12 |
13 | docs_path = Path(__file__).parent.parent.parent.parent / "docs" / "scripts"
14 | subprocess.run(
15 | [
16 | "python",
17 | str(
18 | docs_path
19 | / "define_schema_anndata_ensembl_gene_ids_and_valid_features_in_obs.py"
20 | ),
21 | ],
22 | check=True,
23 | )
24 |
25 | return Schema.get(name="anndata_ensembl_gene_ids_and_valid_features_in_obs")
26 |
--------------------------------------------------------------------------------
/lamindb/examples/schemas/_simple.py:
--------------------------------------------------------------------------------
1 | from ... import Schema
2 |
3 |
4 | def valid_features() -> Schema:
5 | """Return a schema for an AnnData with Ensembl gene IDs and valid features in obs.
6 |
7 | .. literalinclude:: scripts/define_schema_anndata_ensembl_gene_ids_and_valid_features_in_obs.py
8 | :language: python
9 | """
10 | import subprocess
11 | from pathlib import Path
12 |
13 | docs_path = Path(__file__).parent.parent.parent.parent / "docs" / "scripts"
14 | subprocess.run(
15 | ["python", str(docs_path / "define_valid_features.py")],
16 | check=True,
17 | )
18 |
19 | return Schema.get(name="valid_features")
20 |
--------------------------------------------------------------------------------
/lamindb/integrations/__init__.py:
--------------------------------------------------------------------------------
1 | """Integrations.
2 |
3 | .. autosummary::
4 | :toctree: .
5 |
6 | save_vitessce_config
7 | save_tiledbsoma_experiment
8 | """
9 |
10 | from lamindb.core.storage import save_tiledbsoma_experiment
11 |
12 | from ._vitessce import save_vitessce_config
13 |
--------------------------------------------------------------------------------
/lamindb/migrations/0070_lamindbv1_migrate_data.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 5.2 on 2025-01-05 11:58
2 |
3 | from pathlib import Path
4 |
5 | import lamindb_setup as ln_setup
6 | import psycopg2
7 | from django.db import migrations
8 |
9 |
10 | def get_artifact_path_psycopg2(artifact_id):
11 | """Get artifact path using psycopg2."""
12 | query = """
13 | SELECT
14 | s.root || '/.lamindb/' || a.uid || a.suffix AS full_path
15 | FROM
16 | lamindb_artifact a
17 | JOIN lamindb_storage s ON a.storage_id = s.id
18 | WHERE
19 | a.id = %s
20 | """
21 |
22 | with psycopg2.connect(ln_setup.settings.instance.db) as conn:
23 | with conn.cursor() as cur:
24 | cur.execute(query, (artifact_id,))
25 | return cur.fetchone()[0]
26 |
27 |
28 | def transfer_source_code(apps, schema_editor):
29 | from lamindb._finish import notebook_to_script
30 |
31 | Transform = apps.get_model("lamindb", "Transform")
32 | transforms = Transform.objects.filter(
33 | _source_code_artifact__isnull=False,
34 | ).select_related("_source_code_artifact")
35 |
36 | for transform in transforms:
37 | print(f"migrating source code of transform {transform}")
38 | artifact = transform._source_code_artifact
39 | print("artifact", artifact.uid)
40 |
41 | path_str = get_artifact_path_psycopg2(artifact.id)
42 | print(ln_setup.settings.storage.root_as_str)
43 | print(path_str)
44 | if path_str.startswith(ln_setup.settings.storage.root_as_str):
45 | path = (
46 | ln_setup.settings.storage.root
47 | / f".lamindb/{artifact.uid}{artifact.suffix}"
48 | )
49 | else:
50 | path = ln_setup.core.upath.UPath(path_str)
51 | if path.exists():
52 | if path_str.startswith("s3://"):
53 | local_path = Path(f"temp{path.suffix}")
54 | path.download_to(local_path)
55 | else:
56 | local_path = path
57 |
58 | if artifact.suffix == ".ipynb":
59 | transform.source_code = notebook_to_script(transform, local_path)
60 | else:
61 | transform.source_code = local_path.read_text()
62 | transform.hash = artifact.hash
63 | path.unlink()
64 | else:
65 | print(f"path did not exist: {path_str}")
66 | transform._source_code_artifact = None
67 | transform.save()
68 | artifact.delete()
69 |
70 |
71 | class Migration(migrations.Migration):
72 | dependencies = [
73 | ("lamindb", "0069_squashed"),
74 | ]
75 |
76 | operations = [
77 | migrations.RunPython(transfer_source_code),
78 | ]
79 |
--------------------------------------------------------------------------------
/lamindb/migrations/0079_alter_rundata_value_json_and_more.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 5.2 on 2025-01-16 01:29
2 |
3 | import django.db.models.deletion
4 | from django.db import migrations, models
5 |
6 | import lamindb.base.fields
7 |
8 |
9 | class Migration(migrations.Migration):
10 | dependencies = [
11 | ("lamindb", "0078_lamindbv1_part6c"),
12 | ]
13 |
14 | operations = [
15 | migrations.AlterField(
16 | model_name="rundata",
17 | name="value_json",
18 | field=models.JSONField(blank=True, null=True),
19 | ),
20 | migrations.AlterField(
21 | model_name="tidytabledata",
22 | name="value_json",
23 | field=models.JSONField(blank=True, null=True),
24 | ),
25 | migrations.AlterField(
26 | model_name="tidytable",
27 | name="schema",
28 | field=lamindb.base.fields.ForeignKey(
29 | blank=True,
30 | null=True,
31 | on_delete=django.db.models.deletion.SET_NULL,
32 | related_name="_tidytables",
33 | to="lamindb.schema",
34 | ),
35 | ),
36 | ]
37 |
--------------------------------------------------------------------------------
/lamindb/migrations/0081_revert_textfield_collection.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 5.2 on 2025-01-21 17:03
2 |
3 | from django.db import migrations
4 |
5 | import lamindb.base.fields
6 |
7 |
8 | class Migration(migrations.Migration):
9 | dependencies = [
10 | ("lamindb", "0080_polish_lamindbv1"),
11 | ]
12 |
13 | operations = [
14 | migrations.AlterField(
15 | model_name="collection",
16 | name="description",
17 | field=lamindb.base.fields.TextField(
18 | blank=True, db_index=True, default=None, null=True
19 | ),
20 | ),
21 | ]
22 |
--------------------------------------------------------------------------------
/lamindb/migrations/0082_alter_feature_dtype.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 5.2 on 2025-01-25 08:26
2 |
3 | from django.db import migrations
4 |
5 | import lamindb.base.fields
6 |
7 |
8 | class Migration(migrations.Migration):
9 | dependencies = [
10 | ("lamindb", "0081_revert_textfield_collection"),
11 | ]
12 |
13 | operations = [
14 | migrations.AlterField(
15 | model_name="feature",
16 | name="dtype",
17 | field=lamindb.base.fields.CharField(
18 | blank=True, db_index=True, default=None, max_length=255, null=True
19 | ),
20 | ),
21 | ]
22 |
--------------------------------------------------------------------------------
/lamindb/migrations/0083_alter_feature_is_type_alter_flextable_is_type_and_more.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 5.2 on 2025-01-25 13:29
2 |
3 | from django.db import migrations
4 |
5 | import lamindb.base.fields
6 |
7 |
8 | class Migration(migrations.Migration):
9 | dependencies = [
10 | ("lamindb", "0082_alter_feature_dtype"),
11 | ]
12 |
13 | operations = [
14 | migrations.RunSQL(
15 | sql="""
16 | UPDATE lamindb_feature
17 | SET is_type = FALSE
18 | WHERE is_type IS NULL;
19 |
20 | UPDATE lamindb_flextable
21 | SET is_type = FALSE
22 | WHERE is_type IS NULL;
23 |
24 | UPDATE lamindb_param
25 | SET is_type = FALSE
26 | WHERE is_type IS NULL;
27 |
28 | UPDATE lamindb_project
29 | SET is_type = FALSE
30 | WHERE is_type IS NULL;
31 |
32 | UPDATE lamindb_reference
33 | SET is_type = FALSE
34 | WHERE is_type IS NULL;
35 |
36 | UPDATE lamindb_schema
37 | SET is_type = FALSE
38 | WHERE is_type IS NULL;
39 |
40 | UPDATE lamindb_ulabel
41 | SET is_type = FALSE
42 | WHERE is_type IS NULL;
43 | """
44 | ),
45 | migrations.AlterField(
46 | model_name="feature",
47 | name="is_type",
48 | field=lamindb.base.fields.BooleanField(
49 | blank=True, db_index=True, default=False
50 | ),
51 | ),
52 | migrations.AlterField(
53 | model_name="flextable",
54 | name="is_type",
55 | field=lamindb.base.fields.BooleanField(
56 | blank=True, db_index=True, default=False
57 | ),
58 | ),
59 | migrations.AlterField(
60 | model_name="param",
61 | name="is_type",
62 | field=lamindb.base.fields.BooleanField(
63 | blank=True, db_index=True, default=False
64 | ),
65 | ),
66 | migrations.AlterField(
67 | model_name="project",
68 | name="is_type",
69 | field=lamindb.base.fields.BooleanField(
70 | blank=True, db_index=True, default=False
71 | ),
72 | ),
73 | migrations.AlterField(
74 | model_name="reference",
75 | name="is_type",
76 | field=lamindb.base.fields.BooleanField(
77 | blank=True, db_index=True, default=False
78 | ),
79 | ),
80 | migrations.AlterField(
81 | model_name="schema",
82 | name="is_type",
83 | field=lamindb.base.fields.BooleanField(
84 | blank=True, db_index=True, default=False
85 | ),
86 | ),
87 | migrations.AlterField(
88 | model_name="ulabel",
89 | name="is_type",
90 | field=lamindb.base.fields.BooleanField(
91 | blank=True, db_index=True, default=False
92 | ),
93 | ),
94 | ]
95 |
--------------------------------------------------------------------------------
/lamindb/migrations/0084_alter_schemafeature_feature_and_more.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 5.2 on 2025-01-27 07:22
2 |
3 | import django.db.models.deletion
4 | from django.db import migrations
5 |
6 | import lamindb.base.fields
7 |
8 |
9 | class Migration(migrations.Migration):
10 | dependencies = [
11 | ("lamindb", "0083_alter_feature_is_type_alter_flextable_is_type_and_more"),
12 | ]
13 |
14 | operations = [
15 | migrations.AlterField(
16 | model_name="schemafeature",
17 | name="feature",
18 | field=lamindb.base.fields.ForeignKey(
19 | blank=True,
20 | on_delete=django.db.models.deletion.PROTECT,
21 | related_name="links_schema",
22 | to="lamindb.feature",
23 | ),
24 | ),
25 | migrations.AlterField(
26 | model_name="schemafeature",
27 | name="schema",
28 | field=lamindb.base.fields.ForeignKey(
29 | blank=True,
30 | on_delete=django.db.models.deletion.CASCADE,
31 | related_name="links_feature",
32 | to="lamindb.schema",
33 | ),
34 | ),
35 | ]
36 |
--------------------------------------------------------------------------------
/lamindb/migrations/0085_alter_feature_is_type_alter_flextable_is_type_and_more.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 5.2 on 2025-01-27 13:48
2 |
3 | from django.db import migrations
4 |
5 | import lamindb.base.fields
6 |
7 |
8 | class Migration(migrations.Migration):
9 | dependencies = [
10 | ("lamindb", "0084_alter_schemafeature_feature_and_more"),
11 | ]
12 |
13 | operations = [
14 | migrations.AlterField(
15 | model_name="feature",
16 | name="is_type",
17 | field=lamindb.base.fields.BooleanField(
18 | blank=True, db_index=True, default=False, null=True
19 | ),
20 | ),
21 | migrations.AlterField(
22 | model_name="flextable",
23 | name="is_type",
24 | field=lamindb.base.fields.BooleanField(
25 | blank=True, db_index=True, default=False, null=True
26 | ),
27 | ),
28 | migrations.AlterField(
29 | model_name="param",
30 | name="is_type",
31 | field=lamindb.base.fields.BooleanField(
32 | blank=True, db_index=True, default=False, null=True
33 | ),
34 | ),
35 | migrations.AlterField(
36 | model_name="project",
37 | name="is_type",
38 | field=lamindb.base.fields.BooleanField(
39 | blank=True, db_index=True, default=False, null=True
40 | ),
41 | ),
42 | migrations.AlterField(
43 | model_name="reference",
44 | name="is_type",
45 | field=lamindb.base.fields.BooleanField(
46 | blank=True, db_index=True, default=False, null=True
47 | ),
48 | ),
49 | migrations.AlterField(
50 | model_name="schema",
51 | name="is_type",
52 | field=lamindb.base.fields.BooleanField(
53 | blank=True, db_index=True, default=False, null=True
54 | ),
55 | ),
56 | migrations.AlterField(
57 | model_name="ulabel",
58 | name="is_type",
59 | field=lamindb.base.fields.BooleanField(
60 | blank=True, db_index=True, default=False, null=True
61 | ),
62 | ),
63 | ]
64 |
--------------------------------------------------------------------------------
/lamindb/migrations/0086_various.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 5.2 on 2025-02-06 07:10
2 |
3 | from django.db import migrations, models
4 |
5 | import lamindb.base.fields
6 |
7 |
8 | class Migration(migrations.Migration):
9 | dependencies = [
10 | ("lamindb", "0085_alter_feature_is_type_alter_flextable_is_type_and_more"),
11 | ]
12 |
13 | operations = [
14 | migrations.AlterField(
15 | model_name="transform",
16 | name="hash",
17 | field=lamindb.base.fields.CharField(
18 | blank=True,
19 | db_index=True,
20 | default=None,
21 | max_length=22,
22 | null=True,
23 | unique=True,
24 | ),
25 | ),
26 | migrations.AlterField(
27 | model_name="artifact",
28 | name="hash",
29 | field=lamindb.base.fields.CharField(
30 | blank=True,
31 | db_index=True,
32 | default=None,
33 | max_length=22,
34 | null=True,
35 | unique=True,
36 | ),
37 | ),
38 | migrations.AlterField(
39 | model_name="collection",
40 | name="hash",
41 | field=lamindb.base.fields.CharField(
42 | blank=True,
43 | db_index=True,
44 | default=None,
45 | max_length=22,
46 | null=True,
47 | unique=True,
48 | ),
49 | ),
50 | migrations.CreateModel(
51 | name="Migration",
52 | fields=[
53 | (
54 | "id",
55 | models.BigAutoField(
56 | auto_created=True,
57 | primary_key=True,
58 | serialize=False,
59 | verbose_name="ID",
60 | ),
61 | ),
62 | (
63 | "app",
64 | lamindb.base.fields.CharField(
65 | blank=True, default=None, max_length=255
66 | ),
67 | ),
68 | (
69 | "name",
70 | lamindb.base.fields.CharField(
71 | blank=True, default=None, max_length=255
72 | ),
73 | ),
74 | ("applied", lamindb.base.fields.DateTimeField(blank=True)),
75 | ],
76 | options={
77 | "db_table": "django_migrations",
78 | "managed": False,
79 | },
80 | ),
81 | migrations.AlterField(
82 | model_name="param",
83 | name="dtype",
84 | field=lamindb.base.fields.CharField(
85 | blank=True, db_index=True, default=None, max_length=64, null=True
86 | ),
87 | ),
88 | migrations.AlterField(
89 | model_name="param",
90 | name="dtype",
91 | field=lamindb.base.fields.CharField(
92 | blank=True, db_index=True, default=None, max_length=255, null=True
93 | ),
94 | ),
95 | ]
96 |
--------------------------------------------------------------------------------
/lamindb/migrations/0087_rename__schemas_m2m_artifact_feature_sets_and_more.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 5.2 on 2025-02-13 12:00
2 |
3 | import django.db.models.deletion
4 | from django.db import migrations, models
5 |
6 | import lamindb.base.fields
7 |
8 |
9 | class Migration(migrations.Migration):
10 | dependencies = [
11 | ("lamindb", "0086_various"),
12 | ]
13 |
14 | operations = [
15 | migrations.RenameField(
16 | model_name="artifact",
17 | old_name="_schemas_m2m",
18 | new_name="feature_sets",
19 | ),
20 | migrations.AlterField(
21 | model_name="artifact",
22 | name="schema",
23 | field=lamindb.base.fields.ForeignKey(
24 | blank=True,
25 | default=None,
26 | null=True,
27 | on_delete=django.db.models.deletion.PROTECT,
28 | related_name="validated_artifacts",
29 | to="lamindb.schema",
30 | ),
31 | ),
32 | migrations.AlterField(
33 | model_name="artifact",
34 | name="feature_sets",
35 | field=models.ManyToManyField(
36 | related_name="artifacts",
37 | through="lamindb.ArtifactSchema",
38 | to="lamindb.schema",
39 | ),
40 | ),
41 | ]
42 |
--------------------------------------------------------------------------------
/lamindb/migrations/0090_runproject_project_runs.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 5.2 on 2025-03-05 10:20
2 |
3 | import django.db.models.deletion
4 | import django.db.models.functions.datetime
5 | from django.db import migrations, models
6 |
7 | import lamindb.base.fields
8 | import lamindb.base.users
9 | import lamindb.models.sqlrecord
10 |
11 |
12 | class Migration(migrations.Migration):
13 | dependencies = [
14 | ("lamindb", "0089_subsequent_runs"),
15 | ]
16 |
17 | operations = [
18 | migrations.CreateModel(
19 | name="RunProject",
20 | fields=[
21 | ("id", models.BigAutoField(primary_key=True, serialize=False)),
22 | (
23 | "created_at",
24 | lamindb.base.fields.DateTimeField(
25 | blank=True,
26 | db_default=django.db.models.functions.datetime.Now(),
27 | db_index=True,
28 | editable=False,
29 | ),
30 | ),
31 | (
32 | "created_by",
33 | lamindb.base.fields.ForeignKey(
34 | blank=True,
35 | default=lamindb.base.users.current_user_id,
36 | editable=False,
37 | on_delete=django.db.models.deletion.PROTECT,
38 | related_name="+",
39 | to="lamindb.user",
40 | ),
41 | ),
42 | (
43 | "project",
44 | lamindb.base.fields.ForeignKey(
45 | blank=True,
46 | on_delete=django.db.models.deletion.PROTECT,
47 | related_name="links_run",
48 | to="lamindb.project",
49 | ),
50 | ),
51 | (
52 | "run",
53 | lamindb.base.fields.ForeignKey(
54 | blank=True,
55 | on_delete=django.db.models.deletion.CASCADE,
56 | related_name="links_project",
57 | to="lamindb.run",
58 | ),
59 | ),
60 | ],
61 | options={
62 | "unique_together": {("run", "project")},
63 | },
64 | bases=(models.Model, lamindb.models.sqlrecord.IsLink),
65 | ),
66 | migrations.AddField(
67 | model_name="project",
68 | name="runs",
69 | field=models.ManyToManyField(
70 | related_name="projects", through="lamindb.RunProject", to="lamindb.run"
71 | ),
72 | ),
73 | ]
74 |
--------------------------------------------------------------------------------
/lamindb/migrations/0091_alter_featurevalue_options_alter_space_options_and_more.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 5.1.4 on 2025-04-30 09:11
2 |
3 | from django.db import migrations
4 |
5 |
6 | class Migration(migrations.Migration):
7 | dependencies = [
8 | ("lamindb", "0090_runproject_project_runs"),
9 | ]
10 |
11 | operations = [
12 | migrations.AlterModelOptions(
13 | name="featurevalue",
14 | options={"base_manager_name": "objects"},
15 | ),
16 | migrations.AlterModelOptions(
17 | name="space",
18 | options={"base_manager_name": "objects"},
19 | ),
20 | migrations.AlterModelOptions(
21 | name="user",
22 | options={"base_manager_name": "objects"},
23 | ),
24 | ]
25 |
--------------------------------------------------------------------------------
/lamindb/migrations/0092_alter_artifactfeaturevalue_artifact_and_more.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 5.2 on 2025-05-06 20:34
2 |
3 | import django.db.models.deletion
4 | from django.db import migrations
5 |
6 | import lamindb.base.fields
7 |
8 |
9 | class Migration(migrations.Migration):
10 | dependencies = [
11 | ("lamindb", "0091_alter_featurevalue_options_alter_space_options_and_more"),
12 | ]
13 |
14 | operations = [
15 | migrations.AlterField(
16 | model_name="artifactfeaturevalue",
17 | name="artifact",
18 | field=lamindb.base.fields.ForeignKey(
19 | blank=True,
20 | on_delete=django.db.models.deletion.CASCADE,
21 | related_name="links_featurevalue",
22 | to="lamindb.artifact",
23 | ),
24 | ),
25 | migrations.AlterField(
26 | model_name="artifactfeaturevalue",
27 | name="featurevalue",
28 | field=lamindb.base.fields.ForeignKey(
29 | blank=True,
30 | on_delete=django.db.models.deletion.PROTECT,
31 | related_name="links_artifact",
32 | to="lamindb.featurevalue",
33 | ),
34 | ),
35 | migrations.AlterField(
36 | model_name="artifactparamvalue",
37 | name="artifact",
38 | field=lamindb.base.fields.ForeignKey(
39 | blank=True,
40 | on_delete=django.db.models.deletion.CASCADE,
41 | related_name="links_paramvalue",
42 | to="lamindb.artifact",
43 | ),
44 | ),
45 | migrations.AlterField(
46 | model_name="artifactparamvalue",
47 | name="paramvalue",
48 | field=lamindb.base.fields.ForeignKey(
49 | blank=True,
50 | on_delete=django.db.models.deletion.PROTECT,
51 | related_name="links_artifact",
52 | to="lamindb.paramvalue",
53 | ),
54 | ),
55 | migrations.AlterField(
56 | model_name="runparamvalue",
57 | name="paramvalue",
58 | field=lamindb.base.fields.ForeignKey(
59 | blank=True,
60 | on_delete=django.db.models.deletion.PROTECT,
61 | related_name="links_run",
62 | to="lamindb.paramvalue",
63 | ),
64 | ),
65 | migrations.AlterField(
66 | model_name="runparamvalue",
67 | name="run",
68 | field=lamindb.base.fields.ForeignKey(
69 | blank=True,
70 | on_delete=django.db.models.deletion.CASCADE,
71 | related_name="links_paramvalue",
72 | to="lamindb.run",
73 | ),
74 | ),
75 | ]
76 |
--------------------------------------------------------------------------------
/lamindb/migrations/0093_alter_schemacomponent_unique_together.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 5.2 on 2025-05-07 12:16
2 |
3 | from django.db import migrations
4 |
5 |
6 | class Migration(migrations.Migration):
7 | dependencies = [
8 | ("lamindb", "0092_alter_artifactfeaturevalue_artifact_and_more"),
9 | ]
10 |
11 | operations = [
12 | migrations.AlterUniqueTogether(
13 | name="schemacomponent",
14 | unique_together={("composite", "slot"), ("composite", "slot", "component")},
15 | ),
16 | ]
17 |
--------------------------------------------------------------------------------
/lamindb/migrations/0094_writeloglock_writelogmigrationstate_and_more.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 5.1.7 on 2025-05-10 00:32
2 |
3 | import django.db.models.deletion
4 | from django.db import migrations, models
5 |
6 |
7 | class Migration(migrations.Migration):
8 | dependencies = [
9 | ("lamindb", "0093_alter_schemacomponent_unique_together"),
10 | ]
11 |
12 | operations = [
13 | migrations.CreateModel(
14 | name="WriteLogLock",
15 | fields=[
16 | (
17 | "id",
18 | models.BigAutoField(
19 | auto_created=True,
20 | primary_key=True,
21 | serialize=False,
22 | verbose_name="ID",
23 | ),
24 | ),
25 | ("locked", models.BooleanField()),
26 | ],
27 | ),
28 | migrations.CreateModel(
29 | name="MigrationState",
30 | fields=[
31 | ("id", models.SmallAutoField(primary_key=True, serialize=False)),
32 | ("migration_state_id", models.JSONField()),
33 | ],
34 | ),
35 | migrations.CreateModel(
36 | name="TableState",
37 | fields=[
38 | ("id", models.SmallAutoField(primary_key=True, serialize=False)),
39 | ("table_name", models.CharField(max_length=255)),
40 | ("backfilled", models.BooleanField()),
41 | ],
42 | ),
43 | migrations.CreateModel(
44 | name="WriteLog",
45 | fields=[
46 | ("seqno", models.AutoField(primary_key=True, serialize=False)),
47 | (
48 | "uid",
49 | models.CharField(
50 | db_index=True, editable=False, max_length=18, unique=True
51 | ),
52 | ),
53 | ("space_uid", models.CharField(max_length=12, null=True)),
54 | ("created_by_uid", models.CharField(default="00000000", max_length=8)),
55 | ("branch_code", models.IntegerField(default=1)),
56 | (
57 | "run_uid",
58 | models.CharField(default="0000000000000000", max_length=16),
59 | ),
60 | ("record_uid", models.JSONField(null=True)),
61 | ("record_data", models.JSONField(null=True)),
62 | ("event_type", models.PositiveSmallIntegerField()),
63 | ("created_at", models.DateTimeField()),
64 | (
65 | "migration_state",
66 | models.ForeignKey(
67 | on_delete=django.db.models.deletion.PROTECT,
68 | to="lamindb.migrationstate",
69 | ),
70 | ),
71 | (
72 | "table",
73 | models.ForeignKey(
74 | on_delete=django.db.models.deletion.PROTECT,
75 | to="lamindb.tablestate",
76 | ),
77 | ),
78 | ],
79 | options={
80 | "verbose_name": "Write Log",
81 | "verbose_name_plural": "Write Logs",
82 | },
83 | ),
84 | ]
85 |
--------------------------------------------------------------------------------
/lamindb/migrations/0097_remove_schemaparam_param_remove_paramvalue_param_and_more.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 5.2 on 2025-05-11 18:54
2 |
3 | from django.db import migrations
4 |
5 |
6 | class Migration(migrations.Migration):
7 | dependencies = [
8 | ("lamindb", "0096_remove_artifact__param_values_and_more"),
9 | ]
10 |
11 | operations = [
12 | migrations.DeleteModel(
13 | name="ArtifactParamValue",
14 | ),
15 | migrations.DeleteModel(
16 | name="SchemaParam",
17 | ),
18 | migrations.DeleteModel(
19 | name="Param",
20 | ),
21 | migrations.DeleteModel(
22 | name="ParamValue",
23 | ),
24 | migrations.DeleteModel(
25 | name="RunParamValue",
26 | ),
27 | ]
28 |
--------------------------------------------------------------------------------
/lamindb/migrations/0099_alter_writelog_seqno.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 5.1.7 on 2025-05-23 23:20
2 |
3 | from django.db import migrations, models
4 |
5 |
6 | class Migration(migrations.Migration):
7 | dependencies = [
8 | ("lamindb", "0098_alter_feature_type_alter_project_type_and_more"),
9 | ]
10 |
11 | operations = [
12 | migrations.AlterField(
13 | model_name="writelog",
14 | name="seqno",
15 | field=models.BigAutoField(primary_key=True, serialize=False),
16 | ),
17 | migrations.RenameField(
18 | model_name="writelog",
19 | old_name="seqno",
20 | new_name="id",
21 | ),
22 | ]
23 |
--------------------------------------------------------------------------------
/lamindb/migrations/0100_branch_alter_artifact__branch_code_and_more.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 5.2 on 2025-05-25 11:59
2 |
3 | import django.db.models.deletion
4 | import django.db.models.functions.datetime
5 | from django.db import migrations, models
6 |
7 | import lamindb.base.fields
8 |
9 |
10 | def update_space_uids_and_create_branches(apps, schema_editor):
11 | Space = apps.get_model("lamindb", "Space")
12 | Space.objects.filter(uid="00000000").update(uid="A")
13 | Branch = apps.get_model("lamindb", "Branch")
14 | Branch.objects.get_or_create(
15 | id=-1,
16 | uid="T",
17 | name="Trash",
18 | description="The trash.",
19 | )
20 | Branch.objects.get_or_create(
21 | id=0,
22 | uid="A",
23 | name="Archive",
24 | description="The archive.",
25 | )
26 | Branch.objects.get_or_create(
27 | uid="M",
28 | name="Main",
29 | description="The main & default branch of the instance.",
30 | )
31 |
32 |
33 | class Migration(migrations.Migration):
34 | dependencies = [
35 | ("lamindb", "0099_alter_writelog_seqno"),
36 | ]
37 |
38 | operations = [
39 | migrations.CreateModel(
40 | name="Branch",
41 | fields=[
42 | ("id", models.AutoField(primary_key=True, serialize=False)),
43 | ("name", models.CharField(db_index=True, max_length=100)),
44 | (
45 | "uid",
46 | lamindb.base.fields.CharField(
47 | blank=True,
48 | db_default="M",
49 | db_index=True,
50 | default="M",
51 | editable=False,
52 | max_length=12,
53 | unique=True,
54 | ),
55 | ),
56 | (
57 | "description",
58 | lamindb.base.fields.CharField(
59 | blank=True, default=None, max_length=255, null=True
60 | ),
61 | ),
62 | (
63 | "created_at",
64 | lamindb.base.fields.DateTimeField(
65 | blank=True,
66 | db_default=django.db.models.functions.datetime.Now(),
67 | db_index=True,
68 | editable=False,
69 | ),
70 | ),
71 | (
72 | "created_by",
73 | lamindb.base.fields.ForeignKey(
74 | blank=True,
75 | default=None,
76 | null=True,
77 | on_delete=django.db.models.deletion.CASCADE,
78 | related_name="+",
79 | to="lamindb.user",
80 | ),
81 | ),
82 | ],
83 | options={
84 | "abstract": False,
85 | "base_manager_name": "objects",
86 | },
87 | ),
88 | migrations.AlterField(
89 | model_name="space",
90 | name="uid",
91 | field=lamindb.base.fields.CharField(
92 | blank=True,
93 | db_default="A",
94 | db_index=True,
95 | default="A",
96 | editable=False,
97 | max_length=12,
98 | unique=True,
99 | ),
100 | ),
101 | migrations.RunPython(update_space_uids_and_create_branches),
102 | ]
103 |
--------------------------------------------------------------------------------
/lamindb/migrations/0102_remove_writelog_branch_code_and_more.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 5.2 on 2025-05-27 11:29
2 |
3 | import django.db.models.deletion
4 | from django.db import migrations, models
5 |
6 |
7 | class Migration(migrations.Migration):
8 | dependencies = [
9 | ("lamindb", "0101_alter_artifact_hash_alter_feature_name_and_more"),
10 | ]
11 |
12 | operations = [
13 | migrations.RemoveField(
14 | model_name="writelog",
15 | name="branch_code",
16 | ),
17 | migrations.RemoveField(
18 | model_name="writelog",
19 | name="space_uid",
20 | ),
21 | migrations.AddField(
22 | model_name="writelog",
23 | name="branch",
24 | field=models.ForeignKey(
25 | default=1,
26 | on_delete=django.db.models.deletion.PROTECT,
27 | to="lamindb.branch",
28 | ),
29 | ),
30 | migrations.AddField(
31 | model_name="writelog",
32 | name="space",
33 | field=models.ForeignKey(
34 | default=1,
35 | on_delete=django.db.models.deletion.PROTECT,
36 | to="lamindb.space",
37 | ),
38 | ),
39 | migrations.AlterField(
40 | model_name="writelog",
41 | name="run_uid",
42 | field=models.CharField(default="0000000000000000", max_length=20),
43 | ),
44 | migrations.AlterField(
45 | model_name="writelog",
46 | name="record_uid",
47 | field=models.JSONField(db_index=True, default=0),
48 | preserve_default=False,
49 | ),
50 | migrations.AlterModelOptions(
51 | name="migrationstate",
52 | options={"base_manager_name": "objects"},
53 | ),
54 | migrations.AlterModelOptions(
55 | name="tablestate",
56 | options={"base_manager_name": "objects"},
57 | ),
58 | migrations.AlterField(
59 | model_name="writelog",
60 | name="migration_state",
61 | field=models.ForeignKey(
62 | on_delete=django.db.models.deletion.PROTECT, to="lamindb.migrationstate"
63 | ),
64 | ),
65 | migrations.AlterField(
66 | model_name="writelog",
67 | name="table",
68 | field=models.ForeignKey(
69 | on_delete=django.db.models.deletion.PROTECT, to="lamindb.tablestate"
70 | ),
71 | ),
72 | ]
73 |
--------------------------------------------------------------------------------
/lamindb/migrations/0103_remove_writelog_migration_state_and_more.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 5.2 on 2025-05-29 12:02
2 |
3 | from django.db import migrations
4 |
5 |
6 | def fix_artifact_kind(apps, schema_editor):
7 | Artifact = apps.get_model("lamindb", "Artifact")
8 | Artifact.objects.filter(kind="__lamindb__").update(kind="__lamindb_run__")
9 |
10 |
11 | class Migration(migrations.Migration):
12 | dependencies = [
13 | ("lamindb", "0102_remove_writelog_branch_code_and_more"),
14 | ]
15 |
16 | operations = [
17 | migrations.RunPython(fix_artifact_kind),
18 | migrations.RemoveField(
19 | model_name="writelog",
20 | name="migration_state",
21 | ),
22 | migrations.RemoveField(
23 | model_name="writelog",
24 | name="table",
25 | ),
26 | migrations.RemoveField(
27 | model_name="writelog",
28 | name="branch",
29 | ),
30 | migrations.RemoveField(
31 | model_name="writelog",
32 | name="space",
33 | ),
34 | migrations.DeleteModel(
35 | name="WriteLogLock",
36 | ),
37 | migrations.DeleteModel(
38 | name="MigrationState",
39 | ),
40 | migrations.DeleteModel(
41 | name="TableState",
42 | ),
43 | migrations.DeleteModel(
44 | name="WriteLog",
45 | ),
46 | ]
47 |
--------------------------------------------------------------------------------
/lamindb/migrations/0105_record_unique_name.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 5.2 on 2025-06-03 19:37
2 |
3 | from django.db import migrations, models
4 |
5 |
6 | class Migration(migrations.Migration):
7 | dependencies = [
8 | ("lamindb", "0104_squashed"),
9 | ]
10 |
11 | operations = [
12 | migrations.AddConstraint(
13 | model_name="record",
14 | constraint=models.UniqueConstraint(
15 | condition=models.Q(("is_type", True)),
16 | fields=("name",),
17 | name="unique_name",
18 | ),
19 | ),
20 | ]
21 |
--------------------------------------------------------------------------------
/lamindb/migrations/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/laminlabs/lamindb/0615476ba7f3680f4ff961851e6522d11e7f0a5c/lamindb/migrations/__init__.py
--------------------------------------------------------------------------------
/lamindb/models/__init__.py:
--------------------------------------------------------------------------------
1 | """Models library.
2 |
3 | .. autosummary::
4 | :toctree: .
5 |
6 | BaseSQLRecord
7 | SQLRecord
8 | Registry
9 | BasicQuerySet
10 | QuerySet
11 | ArtifactSet
12 | QueryManager
13 | SQLRecordList
14 | FeatureManager
15 | LabelManager
16 | IsVersioned
17 | CanCurate
18 | HasParents
19 | TracksRun
20 | TracksUpdates
21 | FeatureValue
22 | InspectResult
23 | ValidateFields
24 | SchemaOptionals
25 |
26 | """
27 |
28 | # ruff: noqa: I001
29 | from lamin_utils._inspect import InspectResult
30 | from ._is_versioned import IsVersioned
31 | from .can_curate import CanCurate
32 | from .sqlrecord import (
33 | BaseSQLRecord,
34 | SQLRecord,
35 | Registry,
36 | Space,
37 | Branch,
38 | Migration,
39 | ValidateFields,
40 | format_field_value,
41 | record_repr,
42 | IsLink,
43 | )
44 | from .core import Storage
45 | from .transform import Transform
46 | from .run import Run, TracksRun, TracksUpdates, current_run, User
47 | from .feature import Feature, FeatureValue
48 | from .schema import Schema
49 | from .ulabel import ULabel
50 |
51 | # should come last as it needs everything else
52 | from .artifact import Artifact
53 | from ._feature_manager import FeatureManager
54 | from ._label_manager import LabelManager
55 | from .collection import Collection, CollectionArtifact
56 | from .project import Person, Project, Reference
57 | from .query_manager import QueryManager
58 | from .query_set import BasicQuerySet, QuerySet, SQLRecordList
59 | from .artifact_set import ArtifactSet
60 | from .has_parents import HasParents
61 | from datetime import datetime as _datetime
62 |
63 | FeatureSet = Schema # backward compat
64 |
65 | # link models
66 | from .artifact import ArtifactFeatureValue
67 | from .project import (
68 | ArtifactProject,
69 | TransformProject,
70 | CollectionProject,
71 | ULabelProject,
72 | FeatureProject,
73 | SchemaProject,
74 | ArtifactReference,
75 | CollectionReference,
76 | SheetProject,
77 | RunProject,
78 | RecordProject,
79 | PersonProject,
80 | )
81 | from .run import RunFeatureValue
82 | from .schema import (
83 | SchemaFeature,
84 | ArtifactSchema,
85 | SchemaComponent,
86 | SchemaOptionals,
87 | )
88 | from .ulabel import ArtifactULabel, TransformULabel, RunULabel, CollectionULabel
89 |
90 | from .record import (
91 | Record,
92 | Sheet,
93 | RecordJson,
94 | RecordRecord,
95 | RecordULabel,
96 | RecordRun,
97 | RecordArtifact,
98 | )
99 |
100 |
101 | LinkORM = IsLink # backward compat
102 | ParamValue = FeatureValue # backward compat
103 | ArtifactParamValue = ArtifactFeatureValue # backward compat
104 | RunParamValue = RunFeatureValue # backward compat
105 | Param = Feature # backward compat
106 | BasicRecord = BaseSQLRecord # backward compat
107 |
--------------------------------------------------------------------------------
/lamindb/models/_relations.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from typing import TYPE_CHECKING
4 |
5 | import lamindb_setup as ln_setup
6 | from django.db.models import ManyToManyField
7 | from lamindb_setup._connect_instance import (
8 | get_owner_name_from_identifier,
9 | load_instance_settings,
10 | )
11 | from lamindb_setup.core._settings_store import instance_settings_file
12 |
13 | from lamindb.models.sqlrecord import IsLink
14 |
15 | if TYPE_CHECKING:
16 | from lamindb.models.sqlrecord import Registry, SQLRecord
17 |
18 |
19 | def get_schema_modules(instance: str | None) -> set[str]:
20 | if instance is None or instance == "default":
21 | schema_modules = set(ln_setup.settings.instance.modules)
22 | schema_modules.add("core")
23 | return schema_modules
24 | owner, name = get_owner_name_from_identifier(instance)
25 | settings_file = instance_settings_file(name, owner)
26 | if settings_file.exists():
27 | modules = set(load_instance_settings(settings_file).modules)
28 | else:
29 | cache_filepath = (
30 | ln_setup.settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
31 | )
32 | if cache_filepath.exists():
33 | modules = set(cache_filepath.read_text().split("\n")[1].split(","))
34 | else:
35 | raise ValueError(f"Instance {instance} not found")
36 | shared_schema_modules = set(ln_setup.settings.instance.modules).intersection(
37 | modules
38 | )
39 | shared_schema_modules.add("core")
40 | return shared_schema_modules
41 |
42 |
43 | # this function here should likely be renamed
44 | # it maps the __get_name_with_module__() onto the actual model
45 | def dict_module_name_to_model_name(
46 | registry: Registry, instance: str | None = None
47 | ) -> dict[str, Registry]:
48 | schema_modules = get_schema_modules(instance)
49 | d: dict = {
50 | i.related_model.__get_name_with_module__(): i.related_model
51 | for i in registry._meta.related_objects
52 | if i.related_name is not None
53 | and i.related_model.__get_module_name__() in schema_modules
54 | }
55 | d.update(
56 | {
57 | i.related_model.__get_name_with_module__(): i.related_model
58 | for i in registry._meta.many_to_many
59 | if i.name is not None
60 | and i.related_model.__get_module_name__() in schema_modules
61 | }
62 | )
63 | return d
64 |
65 |
66 | def dict_related_model_to_related_name(
67 | registry: type[SQLRecord], links: bool = False, instance: str | None = None
68 | ) -> dict[str, str]:
69 | def include(model: SQLRecord):
70 | return not links != issubclass(model, IsLink)
71 |
72 | schema_modules = get_schema_modules(instance)
73 |
74 | related_objects = registry._meta.related_objects + registry._meta.many_to_many
75 | d: dict = {
76 | record.related_model.__get_name_with_module__(): (
77 | record.related_name
78 | if not isinstance(record, ManyToManyField)
79 | else record.name
80 | )
81 | for record in related_objects
82 | if (
83 | record.name is not None
84 | and include(record.related_model)
85 | and record.related_model.__get_module_name__() in schema_modules
86 | )
87 | }
88 | return d
89 |
90 |
91 | def get_related_name(features_type: type[SQLRecord]) -> str:
92 | from lamindb.models.schema import Schema
93 |
94 | candidates = [
95 | field.related_name
96 | for field in Schema._meta.related_objects
97 | if field.related_model == features_type
98 | ]
99 | if not candidates:
100 | raise ValueError(
101 | f"Can't create feature sets from {features_type.__name__} because it's not"
102 | " related to it!\nYou need to create a link model between Schema and"
103 | " your SQLRecord in your custom module.\nTo do so, add a"
104 | " line:\n_feature_sets = models.ManyToMany(Schema,"
105 | " related_name='mythings')\n"
106 | )
107 | return candidates[0]
108 |
--------------------------------------------------------------------------------
/lamindb/models/core.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from typing import (
4 | TYPE_CHECKING,
5 | overload,
6 | )
7 |
8 | from django.db import models
9 |
10 | from lamindb.base.fields import (
11 | CharField,
12 | )
13 |
14 | from ..base.ids import base62_12
15 | from .run import TracksRun, TracksUpdates
16 | from .sqlrecord import SQLRecord
17 |
18 | if TYPE_CHECKING:
19 | from pathlib import Path
20 |
21 | from upath import UPath
22 |
23 | from .artifact import Artifact
24 |
25 |
26 | class Storage(SQLRecord, TracksRun, TracksUpdates):
27 | """Storage locations of artifacts such as folders and S3 buckets.
28 |
29 | A storage location is either a folder (local or in the cloud) or
30 | an entire S3/GCP bucket.
31 |
32 | A LaminDB instance can manage and link multiple storage locations. But any
33 | storage location is managed by *at most one* LaminDB instance.
34 |
35 | .. dropdown:: Managed vs. linked storage locations
36 |
37 | The LaminDB instance can update & delete artifacts in managed storage
38 | locations but merely read artifacts in linked storage locations.
39 |
40 | The `instance_uid` field defines the managing LaminDB instance of a
41 | storage location.
42 |
43 | When you delete a LaminDB instance, you'll be warned about data in managed
44 | storage locations while data in linked storage locations is ignored.
45 |
46 | See Also:
47 | :attr:`~lamindb.core.Settings.storage`
48 | Default storage.
49 | :attr:`~lamindb.setup.core.StorageSettings`
50 | Storage settings.
51 |
52 | Examples:
53 |
54 | Configure the default storage location on the command line::
55 |
56 | lamin init --storage ./myfolder # or "s3://my-bucket" or "gs://my-bucket"
57 |
58 | View the current storage location for writing artifacts::
59 |
60 | import lamindb as ln
61 |
62 | print(ln.settings.storage)
63 |
64 | Change the current storage location for writing artifacts::
65 |
66 | ln.settings.storage = "./myfolder2" # or "s3://my-bucket2" or "gs://my-bucket2"
67 |
68 | View all storage locations used by the current instance::
69 |
70 | ln.Storage.df()
71 | """
72 |
73 | class Meta(SQLRecord.Meta, TracksRun.Meta, TracksUpdates.Meta):
74 | abstract = False
75 |
76 | _name_field: str = "root"
77 |
78 | id: int = models.AutoField(primary_key=True)
79 | """Internal id, valid only in one DB instance."""
80 | uid: str = CharField(
81 | editable=False, unique=True, max_length=12, default=base62_12, db_index=True
82 | )
83 | """Universal id, valid across DB instances."""
84 | root: str = CharField(db_index=True, unique=True)
85 | """Root path of storage (cloud or local path)."""
86 | description: str | None = CharField(db_index=True, null=True)
87 | """A description of what the storage location is used for (optional)."""
88 | type: str = CharField(max_length=30, db_index=True)
89 | """Can be "local" vs. "s3" vs. "gs"."""
90 | region: str | None = CharField(max_length=64, db_index=True, null=True)
91 | """Cloud storage region, if applicable."""
92 | instance_uid: str | None = CharField(max_length=12, db_index=True, null=True)
93 | """Instance that manages this storage location."""
94 | artifacts: Artifact
95 | """Artifacts contained in this storage location."""
96 |
97 | @overload
98 | def __init__(
99 | self,
100 | root: str,
101 | type: str,
102 | region: str | None,
103 | ): ...
104 |
105 | @overload
106 | def __init__(
107 | self,
108 | *db_args,
109 | ): ...
110 |
111 | def __init__(
112 | self,
113 | *args,
114 | **kwargs,
115 | ):
116 | super().__init__(*args, **kwargs)
117 |
118 | @property
119 | def path(self) -> Path | UPath:
120 | """Path.
121 |
122 | Uses the `.root` field and converts it into a `Path` or `UPath`.
123 | """
124 | from lamindb_setup.core.upath import create_path
125 |
126 | access_token = self._access_token if hasattr(self, "_access_token") else None
127 | return create_path(self.root, access_token=access_token)
128 |
--------------------------------------------------------------------------------
/lamindb/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/laminlabs/lamindb/0615476ba7f3680f4ff961851e6522d11e7f0a5c/lamindb/py.typed
--------------------------------------------------------------------------------
/lamindb/setup/__init__.py:
--------------------------------------------------------------------------------
1 | import lamindb_setup as _lamindb_setup
2 | from lamindb_setup import * # noqa: F403
3 | from lamindb_setup import (
4 | connect,
5 | delete,
6 | init,
7 | settings,
8 | )
9 |
10 | from . import core
11 |
12 | del connect # we have this at the root level, hence, we don't want it here
13 | __doc__ = _lamindb_setup.__doc__.replace("lamindb_setup", "lamindb.setup")
14 | settings.__doc__ = settings.__doc__.replace("lamindb_setup", "lamindb.setup")
15 |
--------------------------------------------------------------------------------
/lamindb/setup/core/__init__.py:
--------------------------------------------------------------------------------
1 | import lamindb_setup as _lamindb_setup
2 | from lamindb_setup.core import * # noqa: F403
3 |
4 | __doc__ = _lamindb_setup.core.__doc__.replace("lamindb_setup", "lamindb.setup")
5 |
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import shutil
2 | from pathlib import Path
3 |
4 | import pytest
5 |
6 |
7 | @pytest.fixture(scope="function")
8 | def clean_soma_files(request):
9 | path = request.param if hasattr(request, "param") else "small_dataset.tiledbsoma"
10 | if Path(path).exists():
11 | shutil.rmtree(path)
12 |
13 | yield path
14 |
15 | if Path(path).exists():
16 | shutil.rmtree(path)
17 |
--------------------------------------------------------------------------------
/tests/core/_dataset_fixtures.py:
--------------------------------------------------------------------------------
1 | import anndata as ad
2 | import lamindb as ln
3 | import mudata as md
4 | import numpy as np
5 | import pandas as pd
6 | import pytest
7 | import spatialdata as sd
8 | import tiledbsoma
9 | import tiledbsoma.io
10 | from scipy.sparse import csr_matrix
11 |
12 |
13 | @pytest.fixture(scope="session")
14 | def get_small_adata():
15 | return ad.AnnData(
16 | X=np.array([[1, 2, 3], [4, 5, 6]]),
17 | obs={"feat1": ["A", "B"]},
18 | var=pd.DataFrame(index=["MYC", "TCF7", "GATA1"]),
19 | obsm={"X_pca": np.array([[1, 2], [3, 4]])},
20 | )
21 |
22 |
23 | @pytest.fixture(scope="session")
24 | def get_small_mdata():
25 | adata1 = ad.AnnData(
26 | X=np.array([[1, 2, 3], [4, 5, 6]]),
27 | obs={"feat1": ["A", "B"]},
28 | var=pd.DataFrame(index=["MYC", "TCF7", "GATA1"]),
29 | obsm={"X_pca": np.array([[1, 2], [3, 4]])},
30 | )
31 |
32 | adata2 = ad.AnnData(
33 | X=np.array([[7, 8], [9, 10]]),
34 | obs={"feat2": ["C", "D"]},
35 | var=pd.DataFrame(index=["FOXP3", "CD8A"]),
36 | obsm={"X_umap": np.array([[5, 6], [7, 8]])},
37 | )
38 |
39 | return md.MuData({"rna": adata1, "protein": adata2})
40 |
41 |
42 | @pytest.fixture(scope="session")
43 | def get_small_sdata():
44 | adata = ad.AnnData(
45 | X=csr_matrix(np.array([[0.1, 0.2], [0.3, 0.4]])),
46 | obs=pd.DataFrame(index=["cell1", "cell2"]),
47 | var=pd.DataFrame(index=["gene1", "gene2"]),
48 | )
49 |
50 | {
51 | "region1": np.array([[[0, 0], [0, 1], [1, 1], [1, 0]]]),
52 | "region2": np.array([[[2, 2], [2, 3], [3, 3], [3, 2]]]),
53 | }
54 |
55 | sdata_obj = sd.SpatialData(
56 | tables={"gene_expression": adata},
57 | )
58 |
59 | return sdata_obj
60 |
61 |
62 | @pytest.fixture(scope="session")
63 | def get_small_soma_experiment():
64 | adata = ln.core.datasets.mini_immuno.get_dataset1(otype="AnnData")
65 | tiledbsoma.io.from_anndata("test.tiledbsoma", adata, measurement_name="RNA")
66 |
67 | exp = tiledbsoma.Experiment.open("test.tiledbsoma")
68 |
69 | return exp
70 |
--------------------------------------------------------------------------------
/tests/core/conftest.py:
--------------------------------------------------------------------------------
1 | import shutil
2 | from pathlib import Path
3 | from subprocess import DEVNULL, run
4 | from time import perf_counter
5 |
6 | import lamindb_setup as ln_setup
7 | import pytest
8 | from lamin_utils import logger
9 | from laminci.db import setup_local_test_postgres
10 |
11 | AUTO_CONNECT = ln_setup.settings.auto_connect
12 | ln_setup.settings.auto_connect = False
13 |
14 | import lamindb as ln
15 |
16 |
17 | def pytest_sessionstart():
18 | t_execute_start = perf_counter()
19 |
20 | ln_setup._TESTING = True
21 | pgurl = setup_local_test_postgres()
22 | ln.setup.init(
23 | storage="./default_storage_unit_core",
24 | modules="bionty",
25 | name="lamindb-unit-tests-core",
26 | db=pgurl,
27 | )
28 | ln.setup.settings.auto_connect = True
29 | ln.settings.creation.artifact_silence_missing_run_warning = True
30 | total_time_elapsed = perf_counter() - t_execute_start
31 | print(f"Time to setup the instance: {total_time_elapsed:.3f}s")
32 |
33 |
34 | def pytest_sessionfinish(session: pytest.Session):
35 | logger.set_verbosity(1)
36 | shutil.rmtree("./default_storage_unit_core")
37 | ln.setup.delete("lamindb-unit-tests-core", force=True)
38 | run("docker stop pgtest && docker rm pgtest", shell=True, stdout=DEVNULL) # noqa: S602
39 | ln.setup.settings.auto_connect = AUTO_CONNECT
40 |
41 |
42 | @pytest.fixture
43 | def ccaplog(caplog):
44 | """Add caplog handler to our custom logger at session start."""
45 | from lamin_utils._logger import logger
46 |
47 | # Add caplog's handler to our custom logger
48 | logger.addHandler(caplog.handler)
49 |
50 | yield caplog
51 |
52 | # Clean up at the end of the session
53 | logger.removeHandler(caplog.handler)
54 |
55 |
56 | @pytest.fixture(
57 | scope="module",
58 | params=[
59 | # tuple of is_in_registered_storage, path, suffix, hash of test_dir
60 | (True, "./default_storage_unit_core/", ".csv", "iGtHiFEBV3r1_TFovdQCgw"),
61 | (True, "./default_storage_unit_core/", "", "iGtHiFEBV3r1_TFovdQCgw"),
62 | (True, "./registered_storage/", ".csv", "iGtHiFEBV3r1_TFovdQCgw"),
63 | (True, "./registered_storage/", "", "iGtHiFEBV3r1_TFovdQCgw"),
64 | (False, "./nonregistered_storage/", ".csv", "iGtHiFEBV3r1_TFovdQCgw"),
65 | (False, "./nonregistered_storage/", "", "iGtHiFEBV3r1_TFovdQCgw"),
66 | ],
67 | )
68 | def get_test_filepaths(request): # -> Tuple[bool, Path, Path, Path, str]
69 | import lamindb as ln
70 |
71 | is_in_registered_storage: bool = request.param[0]
72 | root_dir: Path = Path(request.param[1])
73 | suffix: str = request.param[2]
74 | hash_test_dir: str = request.param[3]
75 | if is_in_registered_storage:
76 | # ensure that it's actually registered
77 | if ln.Storage.filter(root=root_dir.resolve().as_posix()).one_or_none() is None:
78 | ln.Storage(root=root_dir.resolve().as_posix(), type="local").save()
79 | else:
80 | assert (
81 | ln.Storage.filter(root=root_dir.resolve().as_posix()).one_or_none() is None
82 | )
83 | test_dirpath = root_dir / "my_dir/"
84 | test_dirpath.mkdir(parents=True, exist_ok=True)
85 | # create a first file
86 | test_filepath0 = test_dirpath / f"my_file{suffix}"
87 | test_filepath0.write_text("0")
88 | # create a second, duplicated file
89 | test_filepath1 = test_dirpath / f"my_file1{suffix}"
90 | test_filepath1.write_text("0")
91 | # create a non-duplicated file
92 | test_filepath2 = test_dirpath / f"my_file2{suffix}"
93 | test_filepath2.write_text("1")
94 | # return a boolean indicating whether test filepath is in default storage
95 | # and the test filepath
96 | yield (
97 | is_in_registered_storage,
98 | root_dir,
99 | test_dirpath,
100 | test_filepath0,
101 | suffix,
102 | hash_test_dir,
103 | )
104 | shutil.rmtree(test_dirpath)
105 |
--------------------------------------------------------------------------------
/tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 | library(laminr)
13 |
14 | db <- connect()
15 |
16 |
17 |
18 | → connected lamindb: laminlabs/lamindata
19 |
20 |
21 |
22 | db$track("lOScuxDTDE0q0000")
23 |
24 |
25 |
26 | → loaded Transform('lOScuxDT'), started Run('GWpaTtUg') at 2024-12-01 17:49:18 UTC
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 | db$finish()
35 |
36 |
37 |
38 | MoreOUTPUT
39 |
40 |
41 |
42 |
43 |
--------------------------------------------------------------------------------
/tests/core/notebooks/basic-r-notebook.Rmd.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | My exemplary R analysis
5 | My exemplary R analysis
6 |
7 |
8 |
9 |
10 |
11 |
12 | library(laminr)
13 |
14 | db <- connect()
15 |
16 |
17 |
18 | → connected lamindb: laminlabs/lamindata
19 |
20 |
21 |
22 | db$track("lOScuxDTDE0q0000")
23 |
24 |
25 |
26 | → loaded Transform('lOScuxDT'), started Run('GWpaTtUg') at 2024-12-01 17:49:18 UTC
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 | db$finish()
35 |
36 |
37 |
38 | MoreOUTPUT ! please hit SHORTCUT to save the notebook in your editor and re-run finish()
39 |
40 |
41 |
42 |
43 |
--------------------------------------------------------------------------------
/tests/core/notebooks/duplicate/with-title-initialized-consecutive-finish.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# My duplicated test notebook (consecutive) with `ln.finish()`"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "This has actually different content than the original one in the `notebooks/` folder."
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": null,
20 | "metadata": {},
21 | "outputs": [],
22 | "source": [
23 | "import lamindb as ln\n",
24 | "\n",
25 | "ln.track()"
26 | ]
27 | }
28 | ],
29 | "metadata": {
30 | "kernelspec": {
31 | "display_name": "py310",
32 | "language": "python",
33 | "name": "python3"
34 | },
35 | "language_info": {
36 | "codemirror_mode": {
37 | "name": "ipython",
38 | "version": 3
39 | },
40 | "file_extension": ".py",
41 | "mimetype": "text/x-python",
42 | "name": "python",
43 | "nbconvert_exporter": "python",
44 | "pygments_lexer": "ipython3",
45 | "version": "3.12.8"
46 | }
47 | },
48 | "nbformat": 4,
49 | "nbformat_minor": 2
50 | }
51 |
--------------------------------------------------------------------------------
/tests/core/notebooks/no-title.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "0",
6 | "metadata": {},
7 | "source": [
8 | "A notebook without title."
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": null,
14 | "id": "1",
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "import lamindb as ln"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": null,
24 | "id": "2",
25 | "metadata": {},
26 | "outputs": [],
27 | "source": [
28 | "# pass stem uid\n",
29 | "ln.track(\"123456789ABC\")"
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": null,
35 | "id": "3",
36 | "metadata": {},
37 | "outputs": [],
38 | "source": [
39 | "assert ln.context.transform.description == \"no-title.ipynb\"\n",
40 | "assert ln.context.transform.key == \"no-title.ipynb\""
41 | ]
42 | }
43 | ],
44 | "metadata": {
45 | "kernelspec": {
46 | "display_name": "Python 3.9.12 ('base1')",
47 | "language": "python",
48 | "name": "python3"
49 | },
50 | "language_info": {
51 | "codemirror_mode": {
52 | "name": "ipython",
53 | "version": 3
54 | },
55 | "file_extension": ".py",
56 | "mimetype": "text/x-python",
57 | "name": "python",
58 | "nbconvert_exporter": "python",
59 | "pygments_lexer": "ipython3",
60 | "version": "3.12.8"
61 | },
62 | "nbproject": {
63 | "id": "Irn3xQyQ40GU",
64 | "pypackage": {
65 | "nbproject": "0.0.7+2.g8521e30"
66 | },
67 | "time_init": "2022-06-08T14:42:31.551211+00:00",
68 | "version": "0"
69 | },
70 | "vscode": {
71 | "interpreter": {
72 | "hash": "2775e555cdc2d728c54aa22130c79afb1fa4da64f22f2fc6dcc2aa346c4e0672"
73 | }
74 | }
75 | },
76 | "nbformat": 4,
77 | "nbformat_minor": 5
78 | }
79 |
--------------------------------------------------------------------------------
/tests/core/notebooks/with-title-initialized-consecutive-finish-not-last-cell.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# My test notebook (consecutive) with `ln.finish()` not in last cell"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import lamindb as ln"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": null,
22 | "metadata": {},
23 | "outputs": [],
24 | "source": [
25 | "# do not pass uid purposefully\n",
26 | "ln.track()"
27 | ]
28 | },
29 | {
30 | "cell_type": "code",
31 | "execution_count": null,
32 | "metadata": {},
33 | "outputs": [],
34 | "source": [
35 | "print(\"my consecutive cell\")"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": null,
41 | "metadata": {},
42 | "outputs": [],
43 | "source": [
44 | "ln.finish(ignore_non_consecutive=True)"
45 | ]
46 | },
47 | {
48 | "cell_type": "code",
49 | "execution_count": null,
50 | "metadata": {},
51 | "outputs": [],
52 | "source": [
53 | "print(\"my consecutive cell\")"
54 | ]
55 | }
56 | ],
57 | "metadata": {
58 | "kernelspec": {
59 | "display_name": "py39",
60 | "language": "python",
61 | "name": "python3"
62 | },
63 | "language_info": {
64 | "codemirror_mode": {
65 | "name": "ipython",
66 | "version": 3
67 | },
68 | "file_extension": ".py",
69 | "mimetype": "text/x-python",
70 | "name": "python",
71 | "nbconvert_exporter": "python",
72 | "pygments_lexer": "ipython3",
73 | "version": "3.12.8"
74 | }
75 | },
76 | "nbformat": 4,
77 | "nbformat_minor": 2
78 | }
79 |
--------------------------------------------------------------------------------
/tests/core/notebooks/with-title-initialized-consecutive-finish.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# My test notebook (consecutive) with `ln.finish()`"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import lamindb as ln\n",
17 | "import pytest"
18 | ]
19 | },
20 | {
21 | "cell_type": "code",
22 | "execution_count": null,
23 | "metadata": {},
24 | "outputs": [],
25 | "source": [
26 | "with pytest.raises(ln.errors.InvalidArgument) as error:\n",
27 | " ln.track(\"ujPaFZ\")\n",
28 | "print(error.exconly())\n",
29 | "assert error.exconly().startswith(\n",
30 | " 'lamindb.errors.InvalidArgument: Please pass an auto-generated uid instead of \"ujPaFZ\". Resolve by running:'\n",
31 | ")"
32 | ]
33 | },
34 | {
35 | "cell_type": "code",
36 | "execution_count": null,
37 | "metadata": {},
38 | "outputs": [],
39 | "source": [
40 | "# with uid passed\n",
41 | "ln.track(\"ujPaFZatnMLG0000\")"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": null,
47 | "metadata": {},
48 | "outputs": [],
49 | "source": [
50 | "print(\"my consecutive cell\")"
51 | ]
52 | },
53 | {
54 | "cell_type": "code",
55 | "execution_count": null,
56 | "metadata": {},
57 | "outputs": [],
58 | "source": [
59 | "print(\"my consecutive cell\")"
60 | ]
61 | },
62 | {
63 | "cell_type": "code",
64 | "execution_count": null,
65 | "metadata": {},
66 | "outputs": [],
67 | "source": [
68 | "ln.finish()"
69 | ]
70 | }
71 | ],
72 | "metadata": {
73 | "kernelspec": {
74 | "display_name": "py312",
75 | "language": "python",
76 | "name": "python3"
77 | },
78 | "language_info": {
79 | "codemirror_mode": {
80 | "name": "ipython",
81 | "version": 3
82 | },
83 | "file_extension": ".py",
84 | "mimetype": "text/x-python",
85 | "name": "python",
86 | "nbconvert_exporter": "python",
87 | "pygments_lexer": "ipython3",
88 | "version": "3.12.8"
89 | }
90 | },
91 | "nbformat": 4,
92 | "nbformat_minor": 2
93 | }
94 |
--------------------------------------------------------------------------------
/tests/core/scripts/duplicate1/script-to-test-versioning.py:
--------------------------------------------------------------------------------
1 | import lamindb as ln
2 |
3 | ln.context.version = "1"
4 | ln.track("Ro1gl7n8YrdH0001")
5 |
--------------------------------------------------------------------------------
/tests/core/scripts/duplicate2/script-to-test-versioning.py:
--------------------------------------------------------------------------------
1 | import lamindb as ln
2 |
3 | ln.context.version = "2"
4 | ln.track("Ro1gl7n8YrdH0001")
5 |
6 | assert ln.context.transform.version == "2"
7 |
--------------------------------------------------------------------------------
/tests/core/scripts/duplicate3/script-to-test-versioning.py:
--------------------------------------------------------------------------------
1 | import lamindb as ln
2 |
3 | ln.context.version = "3"
4 | ln.track("Ro1gl7n8YrdH0001")
5 |
--------------------------------------------------------------------------------
/tests/core/scripts/duplicate4/script-to-test-versioning.py:
--------------------------------------------------------------------------------
1 | import lamindb as ln
2 |
3 | ln.track()
4 |
--------------------------------------------------------------------------------
/tests/core/scripts/script-to-test-filename-change.py:
--------------------------------------------------------------------------------
1 | import lamindb as ln
2 |
3 | ln.track("Ro1gl7n8YrdH0000")
4 |
--------------------------------------------------------------------------------
/tests/core/scripts/script-to-test-versioning.py:
--------------------------------------------------------------------------------
1 | import lamindb as ln
2 |
3 | ln.context.version = "1"
4 | ln.track("Ro1gl7n8YrdH0000")
5 |
--------------------------------------------------------------------------------
/tests/core/test_artifact_folders.py:
--------------------------------------------------------------------------------
1 | import lamindb as ln
2 | import pytest
3 | from lamindb.errors import InvalidArgument
4 |
5 |
6 | @pytest.mark.parametrize("key", [None, "my_new_folder"])
7 | def test_folder_like_artifact(get_test_filepaths, key):
8 | # get variables from fixture
9 | is_in_registered_storage = get_test_filepaths[0]
10 | test_dirpath = get_test_filepaths[2]
11 | hash_test_dir = get_test_filepaths[5]
12 |
13 | # run tests on initial Artifact creation
14 | if key is not None and is_in_registered_storage:
15 | with pytest.raises(InvalidArgument) as error:
16 | ln.Artifact(test_dirpath, key=key)
17 | assert error.exconly().startswith(
18 | "lamindb.errors.InvalidArgument: The path" # The path {data} is already in registered storage
19 | )
20 | return None
21 | if key is None and not is_in_registered_storage:
22 | with pytest.raises(ValueError) as error:
23 | ln.Artifact(test_dirpath, key=key)
24 | assert error.exconly().startswith(
25 | "ValueError: Pass one of key, run or description as a parameter"
26 | )
27 | return None
28 | artifact1 = ln.Artifact(test_dirpath, key=key)
29 | assert artifact1.n_files == 3
30 | assert artifact1.hash == hash_test_dir
31 | assert artifact1._state.adding
32 | assert artifact1.description is None
33 | assert artifact1.path.exists()
34 | artifact1.save()
35 |
36 | # run tests on re-creating the Artifact
37 | artifact2 = ln.Artifact(test_dirpath, key=key, description="something")
38 | assert not artifact2._state.adding
39 | assert artifact1.id == artifact2.id
40 | assert artifact1.uid == artifact2.uid
41 | assert artifact1.storage == artifact2.storage
42 | assert artifact2.path.exists()
43 | assert artifact2.description == "something"
44 |
45 | # now put another file in the test directory
46 |
47 | # create a first file
48 | test_filepath_added = test_dirpath / "my_file_added.txt"
49 | test_filepath_added.write_text("2")
50 | artifact3 = ln.Artifact(test_dirpath, key=key, revises=artifact1)
51 | assert artifact3.n_files == 4
52 | assert artifact3.hash != hash_test_dir
53 | assert artifact3._state.adding
54 | assert artifact3.description is None
55 | assert artifact3.path.exists()
56 | artifact3.save()
57 |
58 | # the state of artifact1 is lost, because artifact3 is stored at the same path
59 | assert artifact3.overwrite_versions
60 | assert artifact1.overwrite_versions
61 | assert artifact3.path == artifact1.path
62 | test_filepath_added.unlink()
63 |
64 | # delete the artifact
65 | artifact2.delete(permanent=True, storage=False)
66 | artifact3.delete(permanent=True, storage=False)
67 |
68 |
69 | def test_overwrite_versions_false(get_test_filepaths):
70 | # get variables from fixture
71 | is_in_registered_storage = get_test_filepaths[0]
72 | test_dirpath = get_test_filepaths[2]
73 | hash_test_dir = get_test_filepaths[5]
74 | if is_in_registered_storage:
75 | return
76 | artifact1 = ln.Artifact(
77 | test_dirpath, key="my_folder", overwrite_versions=False
78 | ).save()
79 | assert artifact1.hash == hash_test_dir
80 | # skip artifact2 because we already test this above
81 | # create a first file
82 | test_filepath_added = test_dirpath / "my_file_added.txt"
83 | test_filepath_added.write_text("2")
84 | artifact3 = ln.Artifact(test_dirpath, key="my_folder", overwrite_versions=False)
85 | assert artifact3.hash != hash_test_dir
86 | artifact3.save()
87 | # the state of artifact1 is lost, because artifact3 is stored at the same path
88 | assert not artifact3.overwrite_versions
89 | assert not artifact1.overwrite_versions
90 | assert artifact3.path != artifact1.path
91 | test_filepath_added.unlink()
92 | artifact1.delete(permanent=True, storage=False)
93 | artifact3.delete(permanent=True, storage=False)
94 |
--------------------------------------------------------------------------------
/tests/core/test_data.py:
--------------------------------------------------------------------------------
1 | import lamindb as ln
2 | import pytest
3 |
4 |
5 | def test_rename():
6 | import pandas as pd
7 | from lamindb.errors import SQLRecordNameChangeIntegrityError
8 |
9 | df = pd.DataFrame(
10 | {
11 | "feature_to_rename": [
12 | "label-to-rename",
13 | "label-to-rename",
14 | "label-not-to-rename",
15 | ],
16 | "feature_to_rename2": [
17 | "label-not-to-rename",
18 | "label-not-to-rename",
19 | "label-not-to-rename",
20 | ],
21 | }
22 | )
23 |
24 | curator = ln.Curator.from_df(
25 | df,
26 | categoricals={
27 | "feature_to_rename": ln.ULabel.name,
28 | "feature_to_rename2": ln.ULabel.name,
29 | },
30 | )
31 | curator.add_new_from("feature_to_rename")
32 | curator.add_new_from("feature_to_rename2")
33 | artifact = curator.save_artifact(description="test-rename")
34 | assert artifact.ulabels.through.objects.filter(
35 | feature__name="feature_to_rename", ulabel__name="label-to-rename"
36 | ).exists()
37 | assert ln.Artifact.filter(feature_sets__features__name="feature_to_rename").exists()
38 |
39 | # rename label
40 | ulabel = ln.ULabel.get(name="label-to-rename")
41 | with pytest.raises(SQLRecordNameChangeIntegrityError):
42 | ulabel.name = "label-renamed"
43 | ulabel.save()
44 |
45 | artifact.labels.make_external(ulabel)
46 | assert not artifact.ulabels.through.objects.filter(
47 | feature__name="feature_to_rename", ulabel__name="label-to-rename"
48 | ).exists()
49 | ulabel.name = "label-renamed"
50 | ulabel.save()
51 |
52 | # rename feature
53 | feature = ln.Feature.get(name="feature_to_rename")
54 | with pytest.raises(SQLRecordNameChangeIntegrityError):
55 | feature.name = "feature_renamed"
56 | feature.save()
57 |
58 | artifact.features.make_external(feature)
59 | assert not ln.Artifact.filter(
60 | feature_sets__features__name="feature_to_rename"
61 | ).exists()
62 | assert ln.Artifact.filter(
63 | feature_sets__features__name="feature_to_rename2"
64 | ).exists()
65 | feature.name = "feature_renamed"
66 | feature.save()
67 |
68 | # rename the other feature, automatically deletes no-member schema
69 | feature2 = ln.Feature.get(name="feature_to_rename2")
70 | artifact.features.make_external(feature2)
71 | assert artifact.feature_sets.count() == 0
72 |
73 | # clean up
74 | artifact.delete(permanent=True)
75 | ln.Schema.filter().delete()
76 | ln.ULabel.filter().delete()
77 | ln.Feature.filter().delete()
78 |
--------------------------------------------------------------------------------
/tests/core/test_db.py:
--------------------------------------------------------------------------------
1 | import lamindb as ln
2 |
3 |
4 | def test_create_to_load():
5 | transform = ln.Transform(version="0", key="test", type="pipeline")
6 | transform.save()
7 | run = ln.Run(transform=transform)
8 | run.save()
9 | ln.Storage.get(root=str(ln.setup.settings.storage.root))
10 |
--------------------------------------------------------------------------------
/tests/core/test_delete.py:
--------------------------------------------------------------------------------
1 | import lamindb as ln
2 |
3 |
4 | def test_delete_record():
5 | names = ["label1", "label2", "label3"]
6 | labels = [ln.ULabel(name=name) for name in names]
7 | ln.save(labels)
8 | ln.ULabel.filter(name__in=names).delete()
9 | assert ln.ULabel.filter(name__in=names).count() == 0
10 |
--------------------------------------------------------------------------------
/tests/core/test_feature.py:
--------------------------------------------------------------------------------
1 | import bionty as bt
2 | import lamindb as ln
3 | import pandas as pd
4 | import pytest
5 | from lamindb.errors import ValidationError
6 | from lamindb.models.feature import serialize_pandas_dtype
7 | from pandas.api.types import is_string_dtype
8 |
9 |
10 | @pytest.fixture(scope="module")
11 | def df():
12 | return pd.DataFrame(
13 | {
14 | "feat1": [1, 2, 3],
15 | "feat2": [3.1, 4.2, 5.3],
16 | "feat3": ["cond1", "cond2", "cond2"],
17 | "feat4": ["id1", "id2", "id3"],
18 | "rando_feature": ["rando1", "rando2", "rando3"],
19 | }
20 | )
21 |
22 |
23 | def test_feature_init():
24 | # no args allowed
25 | with pytest.raises(ValueError):
26 | ln.Feature("x")
27 | # no dtype passed
28 | with pytest.raises(ValidationError):
29 | ln.Feature(name="feat")
30 | # is OK if also is_type is passed
31 | ln.Feature(name="Feat", is_type=True)
32 | # wrong type
33 | with pytest.raises(ValueError):
34 | ln.Feature(name="feat", dtype="x")
35 | # type has to be a list of SQLRecord types
36 | with pytest.raises(ValidationError):
37 | ln.Feature(name="feat", dtype="cat[1]")
38 | # ensure feat1 does not exist
39 | if feat1 := ln.Feature.filter(name="feat1").one_or_none() is not None:
40 | feat1.delete()
41 | feat1 = ln.Feature(name="feat", dtype="str").save()
42 | with pytest.raises(ValidationError) as error:
43 | ln.Feature(name="feat", dtype="cat")
44 | assert (
45 | error.exconly()
46 | == "lamindb.errors.ValidationError: Feature feat already exists with dtype str, you passed cat"
47 | )
48 | feat1.delete()
49 |
50 | # should just return the feature
51 | feat2 = ln.Feature(name="feat2", dtype="str", description="feat2").save()
52 | feat2_again = ln.Feature(name="feat2", dtype="str", description="feat2").save()
53 | assert feat2 == feat2_again
54 | feat2.delete()
55 |
56 | # check that this works
57 | feature = ln.Feature(name="feat1", dtype="cat[ULabel|bionty.Gene]")
58 | # check that it also works via objects
59 | feature = ln.Feature(name="feat1", dtype=[ln.ULabel, bt.Gene])
60 | assert feature.dtype == "cat[ULabel|bionty.Gene]"
61 |
62 |
63 | def test_feature_from_df(df):
64 | if feat1 := ln.Feature.filter(name="feat1").one_or_none() is not None:
65 | feat1.delete()
66 | features = ln.Feature.from_df(df.iloc[:, :4]).save()
67 | artifact = ln.Artifact.from_df(df, description="test").save()
68 | # test for deprecated add_feature_set
69 | artifact.features.add_feature_set(ln.Schema(features), slot="columns")
70 | features = artifact.features["columns"]
71 | assert len(features) == len(df.columns[:4])
72 | [col for col in df.columns if is_string_dtype(df[col])]
73 | categoricals = {
74 | col: df[col] for col in df.columns if isinstance(df[col], pd.CategoricalDtype)
75 | }
76 | for feature in features:
77 | if feature.name in categoricals:
78 | assert feature.dtype == "cat"
79 | else:
80 | orig_type = df[feature.name].dtype
81 | assert feature.dtype == serialize_pandas_dtype(orig_type)
82 | for feature in features:
83 | feature.save()
84 | labels = [ln.ULabel(name=name) for name in df["feat3"].unique()]
85 | ln.save(labels)
86 | feature = ln.Feature.get(name="feat3")
87 | feature.dtype = "cat"
88 | feature.save()
89 | with pytest.raises(ValidationError) as err:
90 | artifact.labels.add(labels, feature=feature)
91 | assert (
92 | err.exconly()
93 | == "lamindb.errors.ValidationError: Cannot manually annotate a feature measured *within* the dataset. Please use a Curator."
94 | )
95 | extfeature = ln.Feature(name="extfeat", dtype="str").save()
96 | with pytest.raises(ValidationError) as err:
97 | artifact.labels.add(labels, feature=extfeature)
98 | assert (
99 | err.exconly()
100 | == f"lamindb.errors.ValidationError: Feature {extfeature.name} needs dtype='cat' for label annotation, currently has dtype='str'"
101 | )
102 |
103 | # clean up
104 | artifact.delete(permanent=True)
105 | ln.Schema.filter().all().delete()
106 | ln.ULabel.filter().all().delete()
107 | ln.Feature.filter().all().delete()
108 |
--------------------------------------------------------------------------------
/tests/core/test_from_values.py:
--------------------------------------------------------------------------------
1 | import bionty as bt
2 | import lamindb as ln
3 | import pandas as pd
4 | import pytest
5 |
6 |
7 | @pytest.fixture(scope="module")
8 | def df():
9 | return pd.DataFrame(
10 | (
11 | ["T cell", "CL:0000084"],
12 | ["hepatocyte", "CL:0000182"],
13 | ["my new cell type", ""],
14 | ),
15 | columns=["cell_type", "cell_type_id"],
16 | )
17 |
18 |
19 | def test_from_values_name(df):
20 | bt.CellType.filter().delete()
21 | assert df["cell_type"].tolist() == ["T cell", "hepatocyte", "my new cell type"]
22 | # create records from bionty
23 | result = bt.CellType.from_values(df.cell_type, "name")
24 | ids = [i.ontology_id for i in result]
25 | assert len(result) == 2
26 | assert set(ids) == {"CL:0000084", "CL:0000182"}
27 | assert result[0].source.entity == "bionty.CellType"
28 |
29 | # wrong field type
30 | with pytest.raises(TypeError):
31 | result = bt.CellType.from_values(df.cell_type, field=bt.CellType)
32 |
33 |
34 | def test_from_values_ontology_id(df):
35 | assert df["cell_type_id"].tolist() == ["CL:0000084", "CL:0000182", ""]
36 | result = bt.CellType.from_values(df.cell_type_id, "ontology_id")
37 | names = {i.name for i in result}
38 | assert len(result) == 2
39 | assert names == {"T cell", "hepatocyte"}
40 | assert result[0].source.entity == "bionty.CellType"
41 |
42 |
43 | def test_from_values_multiple_match():
44 | records = bt.Gene.from_values(["ABC1", "PDCD1"], bt.Gene.symbol, organism="human")
45 | assert len(records) == 3
46 |
47 |
48 | def test_get_or_create_records():
49 | names = ["ulabel" + str(i) for i in range(25)]
50 | labels = [ln.ULabel(name=name) for name in names]
51 | ln.save(labels)
52 | # more than 20 existing values
53 | labels = ln.ULabel.from_values(names, field="name")
54 | assert len(labels) == 25
55 |
56 |
57 | def test_from_values_synonyms_aware():
58 | bt.CellType.from_source(name="T cell").save()
59 | # existing validated values
60 | records = bt.CellType.from_values(["T cell"], "name")
61 | assert len(records) == 1
62 | assert records[0].name == "T cell"
63 | assert isinstance(records[0].source, bt.Source)
64 | # existing validated values and synonyms
65 | records = bt.CellType.from_values(["T cell", "T-cell"], "name")
66 | assert len(records) == 1
67 | assert records[0].name == "T cell"
68 | assert isinstance(records[0].source, bt.Source)
69 | # bionty values and synonyms
70 | records = bt.CellType.from_values(["B-cell", "B cell"], "name")
71 | assert len(records) == 1
72 | assert records[0].name == "B cell"
73 | assert isinstance(records[0].source, bt.Source)
74 | # all possibilities of validated values
75 | records = bt.CellType.from_values(
76 | ["T cell", "T-cell", "t cell", "B cell", "B-cell"], "name"
77 | )
78 | assert len(records) == 2
79 | names = [r.name for r in records]
80 | assert set(names) == {"T cell", "B cell"}
81 | assert isinstance(records[0].source, bt.Source)
82 | assert isinstance(records[1].source, bt.Source)
83 | # non-validated values
84 | records = bt.CellType.from_values(["T cell", "mycell"], "name")
85 | assert len(records) == 1
86 | assert records[0].name == "T cell"
87 | assert isinstance(records[0].source, bt.Source)
88 | assert records[0].ontology_id == "CL:0000084"
89 | bt.CellType.filter().all().delete()
90 |
91 |
92 | def test_standardize():
93 | # only name field can be standardized
94 | results = bt.Gene.from_values(
95 | ["HES4", "TNFRSF4"], field=bt.Gene.ensembl_gene_id, organism="human"
96 | )
97 | assert len(results) == 0
98 |
99 | results = bt.Gene.from_values(
100 | ["HES4", "TNFRSF4"], field=bt.Gene.symbol, organism="human"
101 | )
102 | assert len(results) == 2
103 |
--------------------------------------------------------------------------------
/tests/core/test_has_parents.py:
--------------------------------------------------------------------------------
1 | import lamindb as ln
2 | from lamindb.models.has_parents import _add_emoji
3 |
4 |
5 | def test_view_parents():
6 | label1 = ln.ULabel(name="label1")
7 | label2 = ln.ULabel(name="label2")
8 | label1.save()
9 | label2.save()
10 | label1.parents.add(label2)
11 | label1.view_parents(ln.ULabel.name, distance=1)
12 | label1.delete()
13 | label2.delete()
14 |
15 |
16 | def test_query_parents_children():
17 | label1 = ln.ULabel(name="label1").save()
18 | label2 = ln.ULabel(name="label2").save()
19 | label3 = ln.ULabel(name="label3").save()
20 | label1.children.add(label2)
21 | label2.children.add(label3)
22 | parents = label3.query_parents()
23 | assert len(parents) == 2
24 | assert label1 in parents and label2 in parents
25 | children = label1.query_children()
26 | assert len(children) == 2
27 | assert label2 in children and label3 in children
28 | label1.delete()
29 | label2.delete()
30 | label3.delete()
31 |
32 |
33 | def test_add_emoji():
34 | transform = ln.Transform(key="test-12345", type="upload")
35 | assert _add_emoji(transform, label="transform") == "🖥️ transform"
36 | transform.save()
37 | run = ln.Run(transform=transform)
38 | assert _add_emoji(run, label="run") == "🖥️ run"
39 | transform.delete()
40 |
41 |
42 | def test_view_lineage_circular():
43 | import pandas as pd
44 |
45 | transform = ln.Transform(key="test").save()
46 | run = ln.Run(transform=transform).save()
47 | artifact = ln.Artifact.from_df(
48 | pd.DataFrame({"a": [1, 2, 3]}), description="test artifact", run=run
49 | ).save()
50 | run.input_artifacts.add(artifact)
51 | artifact.view_lineage()
52 | artifact.delete(permanent=True)
53 | run.delete()
54 | transform.delete()
55 |
--------------------------------------------------------------------------------
/tests/core/test_integrity.py:
--------------------------------------------------------------------------------
1 | import lamindb_setup as ln_setup
2 |
3 |
4 | def test_migrate_check():
5 | assert ln_setup.migrate.check()
6 |
7 |
8 | def test_system_check():
9 | ln_setup.django("check")
10 |
--------------------------------------------------------------------------------
/tests/core/test_manager.py:
--------------------------------------------------------------------------------
1 | import lamindb as ln
2 |
3 |
4 | def test_manager_list():
5 | label = ln.ULabel(name="manager label")
6 | label.save()
7 | label_names = [f"ULabel {i}" for i in range(3)]
8 | labels = [ln.ULabel(name=name) for name in label_names]
9 | ln.save(labels)
10 | label.parents.set(labels)
11 | assert len(label.parents.list()) == 3
12 | assert "ULabel 1" in label.parents.list("name")
13 | label.delete()
14 | for label in labels:
15 | label.delete()
16 |
--------------------------------------------------------------------------------
/tests/core/test_models.py:
--------------------------------------------------------------------------------
1 | import re
2 | import textwrap
3 |
4 | import lamindb as ln
5 | import pandas as pd
6 | import pytest
7 |
8 |
9 | def _strip_ansi(text: str) -> str:
10 | """Remove ANSI escape sequences from a string."""
11 | ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
12 | return ansi_escape.sub("", text)
13 |
14 |
15 | def test_registry__repr__feature():
16 | import lamindb.models as ln
17 |
18 | feature = ln.Param
19 | expected_repr = textwrap.dedent("""\
20 | Feature
21 | Simple fields
22 | .uid: CharField
23 | .name: CharField
24 | .dtype: CharField
25 | .is_type: BooleanField
26 | .unit: CharField
27 | .description: CharField
28 | .array_rank: SmallIntegerField
29 | .array_size: IntegerField
30 | .array_shape: JSONField
31 | .proxy_dtype: CharField
32 | .synonyms: TextField
33 | .created_at: DateTimeField
34 | .updated_at: DateTimeField
35 | Relational fields
36 | .branch: Branch
37 | .space: Space
38 | .created_by: User
39 | .run: Run
40 | .type: Feature
41 | .schemas: Schema
42 | .features: Feature
43 | .values: FeatureValue
44 | .projects: Project
45 | """).strip()
46 |
47 | actual_repr = _strip_ansi(repr(feature))
48 | print(actual_repr)
49 | assert actual_repr.strip() == expected_repr.strip()
50 |
51 |
52 | def test_registry__repr__artifact():
53 | import lamindb.models as ln
54 |
55 | artifact = ln.Artifact
56 | expected_repr = textwrap.dedent("""\
57 | Artifact
58 | Simple fields
59 | .uid: CharField
60 | .key: CharField
61 | .description: CharField
62 | .suffix: CharField
63 | .kind: CharField
64 | .otype: CharField
65 | .size: BigIntegerField
66 | .hash: CharField
67 | .n_files: BigIntegerField
68 | .n_observations: BigIntegerField
69 | .version: CharField
70 | .is_latest: BooleanField
71 | .created_at: DateTimeField
72 | .updated_at: DateTimeField
73 | Relational fields
74 | .branch: Branch
75 | .space: Space
76 | .storage: Storage
77 | .run: Run
78 | .schema: Schema
79 | .created_by: User
80 | .ulabels: ULabel
81 | .input_of_runs: Run
82 | .feature_sets: Schema
83 | .collections: Collection
84 | .records: Record
85 | .references: Reference
86 | .projects: Project
87 | Bionty fields
88 | .organisms: bionty.Organism
89 | .genes: bionty.Gene
90 | .proteins: bionty.Protein
91 | .cell_markers: bionty.CellMarker
92 | .tissues: bionty.Tissue
93 | .cell_types: bionty.CellType
94 | .diseases: bionty.Disease
95 | .cell_lines: bionty.CellLine
96 | .phenotypes: bionty.Phenotype
97 | .pathways: bionty.Pathway
98 | .experimental_factors: bionty.ExperimentalFactor
99 | .developmental_stages: bionty.DevelopmentalStage
100 | .ethnicities: bionty.Ethnicity
101 | """).strip()
102 |
103 | actual_repr = _strip_ansi(repr(artifact))
104 | print(actual_repr)
105 | assert actual_repr.strip() == expected_repr.strip()
106 |
107 |
108 | def test_unsaved_relationship_modification_attempts():
109 | af = ln.Artifact.from_df(
110 | pd.DataFrame({"col1": [1, 2, 3], "col2": [4, 5, 6]}), description="testme"
111 | )
112 |
113 | new_label = ln.ULabel(name="testlabel").save()
114 | with pytest.raises(ValueError) as excinfo:
115 | af.ulabels.add(new_label)
116 |
117 | assert (
118 | str(excinfo.value)
119 | == "You are trying to access the many-to-many relationships of an unsaved Artifact object. Please save it first using '.save()'."
120 | )
121 |
122 | new_label.delete()
123 | af.delete()
124 |
--------------------------------------------------------------------------------
/tests/core/test_notebooks.py:
--------------------------------------------------------------------------------
1 | import os
2 | import subprocess
3 | from pathlib import Path
4 |
5 | import lamindb as ln
6 | import nbproject_test
7 |
8 | notebook_dir = Path(__file__).parent / "notebooks/"
9 | notebook_dir_duplicate = Path(__file__).parent / "notebooks/duplicate/"
10 |
11 |
12 | def test_all_notebooks():
13 | env = os.environ
14 | env["LAMIN_TESTING"] = "true"
15 | nbproject_test.execute_notebooks(notebook_dir)
16 | nbproject_test.execute_notebooks(notebook_dir_duplicate)
17 | del env["LAMIN_TESTING"]
18 |
19 |
20 | def test_run_after_rename_no_uid():
21 | notebook_path = (
22 | notebook_dir / "with-title-initialized-consecutive-finish-not-last-cell.ipynb"
23 | )
24 | result = subprocess.run( # noqa: S602
25 | f"jupyter nbconvert --to notebook --inplace --execute {notebook_path}",
26 | shell=True,
27 | capture_output=True,
28 | )
29 | print(result.stdout.decode())
30 | print(result.stderr.decode())
31 | assert result.returncode == 0
32 |
33 | uid = ln.Transform.get(
34 | key="with-title-initialized-consecutive-finish-not-last-cell.ipynb"
35 | ).uid
36 |
37 | # now, assume the user renames the notebook
38 | new_path = notebook_path.with_name("no-uid-renamed.ipynb")
39 | os.system(f"cp {notebook_path} {new_path}") # noqa: S605
40 |
41 | env = os.environ
42 | env["LAMIN_TESTING"] = "true"
43 | result = subprocess.run( # noqa: S602
44 | f"jupyter nbconvert --to notebook --inplace --execute {new_path}",
45 | shell=True,
46 | capture_output=True,
47 | env=env,
48 | )
49 | print(result.stdout.decode())
50 | print(result.stderr.decode())
51 | assert result.returncode == 0
52 | del env["LAMIN_TESTING"]
53 |
54 | assert ln.Transform.get(key="no-uid-renamed.ipynb").uid == uid
55 |
56 | # new_path.unlink()
57 |
--------------------------------------------------------------------------------
/tests/core/test_run.py:
--------------------------------------------------------------------------------
1 | import lamindb as ln
2 | import pytest
3 |
4 |
5 | def test_run():
6 | transform = ln.Transform(key="My transform")
7 | with pytest.raises(ValueError) as error:
8 | ln.Run(transform)
9 | assert (
10 | error.exconly()
11 | == "ValueError: Please save transform record before creating a run"
12 | )
13 | transform.save()
14 | run = ln.Run(transform)
15 | assert run.reference is None
16 | assert run.reference_type is None
17 | run2 = ln.Run(transform, reference="test1", reference_type="test2")
18 | assert run2.reference == "test1"
19 | assert run2.reference_type == "test2"
20 | assert run.uid != run2.uid
21 | transform.delete()
22 |
23 |
24 | def test_edge_cases():
25 | with pytest.raises(ValueError) as error:
26 | ln.Run(1, 2)
27 | assert error.exconly() == "ValueError: Only one non-keyword arg allowed: transform"
28 | with pytest.raises(TypeError) as error:
29 | ln.Run()
30 | assert error.exconly() == "TypeError: Pass transform parameter"
31 |
--------------------------------------------------------------------------------
/tests/core/test_save.py:
--------------------------------------------------------------------------------
1 | import lamindb as ln
2 | import pytest
3 | from lamindb.models.save import prepare_error_message, store_artifacts
4 |
5 |
6 | def test_bulk_save_and_update():
7 | label_names = [f"ULabel {i} new" for i in range(3)]
8 | labels = [ln.ULabel(name=name) for name in label_names]
9 | # test bulk creation of new records
10 | ln.save(labels)
11 | assert len(ln.ULabel.filter(name__in=label_names).distinct().all()) == 3
12 | labels[0].name = "ULabel 0 updated"
13 | # test bulk update of existing records
14 | ln.save(labels)
15 | assert len(ln.ULabel.filter(name__in=label_names).distinct().all()) == 2
16 | assert ln.ULabel.get(name="ULabel 0 updated")
17 |
18 |
19 | def test_prepare_error_message():
20 | ln.core.datasets.file_mini_csv()
21 | artifact = ln.Artifact("mini.csv", description="test")
22 | exception = Exception("exception")
23 |
24 | error = prepare_error_message([], [artifact], exception)
25 | assert error.startswith(
26 | "The following entries have been successfully uploaded and committed to the database"
27 | )
28 |
29 | error = prepare_error_message([artifact], [], exception)
30 | assert error.startswith("No entries were uploaded or committed to the database")
31 |
32 |
33 | def test_save_data_object():
34 | ln.core.datasets.file_mini_csv()
35 | artifact = ln.Artifact("mini.csv", description="test")
36 | artifact.save()
37 | assert artifact.path.exists()
38 | artifact.delete(permanent=True, storage=True)
39 |
40 |
41 | def test_store_artifacts_acid():
42 | ln.core.datasets.file_mini_csv()
43 | artifact = ln.Artifact("mini.csv", description="test")
44 | artifact._clear_storagekey = "test.csv"
45 | # errors on check_and_attempt_clearing
46 | with pytest.raises(RuntimeError):
47 | artifact.save()
48 |
49 | with pytest.raises(RuntimeError) as error:
50 | store_artifacts([artifact], using_key=None)
51 | assert str(error.exconly()).startswith(
52 | "RuntimeError: The following entries have been successfully uploaded"
53 | )
54 |
55 | artifact.delete(permanent=True)
56 |
57 |
58 | def test_save_parents():
59 | import bionty as bt
60 |
61 | records = bt.CellLine.from_values(["HEPG2", "HUVEC"])
62 | ln.save(records)
63 | assert bt.CellLine.get("4ea731nb").parents.df().shape[0] == 1
64 | bt.CellLine.filter().delete()
65 |
--------------------------------------------------------------------------------
/tests/core/test_search.py:
--------------------------------------------------------------------------------
1 | import bionty as bt
2 | import lamindb as ln
3 | import pytest
4 |
5 |
6 | @pytest.fixture(scope="module")
7 | def prepare_cell_type_registry():
8 | bt.CellType.filter().all().delete()
9 | records = [
10 | {
11 | "ontology_id": "CL:0000084",
12 | "name": "T cell",
13 | "synonyms": "T-cell|T-lymphocyte|T lymphocyte",
14 | "children": ["CL:0000798", "CL:0002420", "CL:0002419", "CL:0000789"],
15 | },
16 | {
17 | "ontology_id": "CL:0000236",
18 | "name": "B cell",
19 | "synonyms": "B-lymphocyte|B lymphocyte|B-cell",
20 | "children": ["CL:0009114", "CL:0001201"],
21 | },
22 | {
23 | "ontology_id": "CL:0000696",
24 | "name": "PP cell",
25 | "synonyms": "type F enteroendocrine cell",
26 | "children": ["CL:0002680"],
27 | },
28 | {
29 | "ontology_id": "CL:0002072",
30 | "name": "nodal myocyte",
31 | "synonyms": "P cell|myocytus nodalis|cardiac pacemaker cell",
32 | "children": ["CL:1000409", "CL:1000410"],
33 | },
34 | ]
35 | public_records = []
36 | for ref_record in records:
37 | record = bt.CellType.from_source(ontology_id=ref_record["ontology_id"])
38 | assert record.name == ref_record["name"]
39 | assert set(record.synonyms.split("|")) == set(ref_record["synonyms"].split("|"))
40 | public_records.append(record)
41 | ln.save(public_records)
42 | yield "prepared"
43 | bt.CellType.filter().all().delete()
44 |
45 |
46 | def test_search_synonyms(prepare_cell_type_registry):
47 | result = bt.CellType.search("P cell").df()
48 | assert set(result.name.iloc[:2]) == {"nodal myocyte", "PP cell"}
49 |
50 |
51 | def test_search_limit(prepare_cell_type_registry):
52 | result = bt.CellType.search("P cell", limit=1).df()
53 | assert len(result) == 1
54 |
55 |
56 | def test_search_case_sensitive(prepare_cell_type_registry):
57 | result = bt.CellType.search("b cell", case_sensitive=False).df()
58 | assert result.name.iloc[0] == "B cell"
59 |
60 |
61 | def test_search_None():
62 | with pytest.raises(
63 | ValueError, match="Cannot search for None value! Please pass a valid string."
64 | ):
65 | bt.CellType.search(None)
66 |
--------------------------------------------------------------------------------
/tests/core/test_tracked.py:
--------------------------------------------------------------------------------
1 | import concurrent.futures
2 |
3 | import lamindb as ln
4 | import pandas as pd
5 | import pytest
6 |
7 |
8 | @ln.tracked()
9 | def process_chunk(chunk_id: int) -> str:
10 | # Create a simple DataFrame
11 | df = pd.DataFrame(
12 | {"id": range(chunk_id * 10, (chunk_id + 1) * 10), "value": range(10)}
13 | )
14 |
15 | # Save it as an artifact
16 | key = f"chunk_{chunk_id}.parquet"
17 | artifact = ln.Artifact.from_df(df, key=key).save()
18 | return artifact.key
19 |
20 |
21 | def test_tracked_parallel():
22 | param_type = ln.Feature(name="Script[test_tracked.py]", is_type=True).save()
23 | ln.Feature(name="chunk_id", dtype="int", type=param_type).save()
24 |
25 | with pytest.raises(RuntimeError) as err:
26 | process_chunk(4)
27 | assert (
28 | err.exconly()
29 | == "RuntimeError: Please track the global run context before using @ln.tracked(): ln.track()"
30 | )
31 |
32 | # Ensure tracking is on
33 | ln.track()
34 |
35 | # Number of parallel executions
36 | n_parallel = 3
37 |
38 | # Use ThreadPoolExecutor for parallel execution
39 | with concurrent.futures.ThreadPoolExecutor(max_workers=n_parallel) as executor:
40 | # Submit all tasks
41 | futures = [executor.submit(process_chunk, i) for i in range(n_parallel)]
42 | # Get results as they complete
43 | chunk_keys = [
44 | future.result() for future in concurrent.futures.as_completed(futures)
45 | ]
46 |
47 | # Verify results
48 | # Each execution should have created its own artifact with unique run
49 | print(f"Created artifacts with keys: {chunk_keys}")
50 | artifacts = [ln.Artifact.get(key=key) for key in chunk_keys]
51 |
52 | # Check that we got the expected number of artifacts
53 | assert len(artifacts) == n_parallel
54 |
55 | # Verify each artifact has its own unique run
56 | runs = [artifact.run for artifact in artifacts]
57 | run_ids = [run.id for run in runs]
58 | print(f"Run IDs: {run_ids}")
59 | assert len(set(run_ids)) == n_parallel # all runs should be unique
60 |
61 | # Verify each run has the correct start and finish times
62 | for run in runs:
63 | print(f"Run details: {run}")
64 | assert run.started_at is not None
65 | assert run.finished_at is not None
66 | assert run.started_at < run.finished_at
67 |
68 | # Clean up test artifacts
69 | for artifact in artifacts:
70 | artifact.delete(permanent=True)
71 |
72 | ln.context._uid = None
73 | ln.context._run = None
74 | ln.context._transform = None
75 | ln.context._path = None
76 |
77 |
78 | if __name__ == "__main__":
79 | test_tracked_parallel()
80 |
--------------------------------------------------------------------------------
/tests/core/test_ulabel.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | import lamindb as ln
4 | import pytest
5 | from lamindb.errors import FieldValidationError
6 |
7 |
8 | def test_ulabel():
9 | with pytest.raises(
10 | FieldValidationError,
11 | match=re.escape(
12 | "Only name, type, is_type, description, reference, reference_type are valid keyword arguments"
13 | ),
14 | ):
15 | ln.ULabel(x=1)
16 |
17 | with pytest.raises(ValueError) as error:
18 | ln.ULabel(1)
19 | assert error.exconly() == "ValueError: Only one non-keyword arg allowed"
20 |
21 | with pytest.raises(
22 | ValueError,
23 | match=re.escape(
24 | "'my_type' should start with a capital letter given you're defining a type"
25 | ),
26 | ):
27 | ln.ULabel(name="my_type", is_type=True)
28 |
29 |
30 | def test_ulabel_plural_type_warning(ccaplog):
31 | ln.ULabel(name="MyThings", is_type=True)
32 | assert (
33 | "name 'MyThings' for type ends with 's', in case you're naming with plural, consider the singular for a type name"
34 | in ccaplog.text
35 | )
36 |
--------------------------------------------------------------------------------
/tests/core/test_view.py:
--------------------------------------------------------------------------------
1 | import lamindb as ln
2 |
3 |
4 | def test_vew():
5 | ln.view(modules="core")
6 | ln.view()
7 |
--------------------------------------------------------------------------------
/tests/core/test_visibility.py:
--------------------------------------------------------------------------------
1 | import lamindb as ln
2 |
3 |
4 | def testbranch_id():
5 | # create a file with default branch_id
6 | with open("./testbranch_id.txt", "w") as f:
7 | f.write("branch_id")
8 | artifact = ln.Artifact("./testbranch_id.txt", description="testbranch_id").save()
9 | assert artifact.branch_id == 1
10 |
11 | # create a collection from file
12 | collection = ln.Collection(artifact, key="testbranch_id").save()
13 |
14 | # delete a collection will put both collection but not linked artifact in trash
15 | collection.delete()
16 | assert collection.ordered_artifacts[0].branch_id == 1
17 | result = ln.Collection.filter(key="testbranch_id").all()
18 | assert len(result) == 0
19 | result = ln.Collection.filter(key="testbranch_id", branch_id=1).all()
20 | assert len(result) == 0
21 | result = ln.Collection.filter(key="testbranch_id", visibility=1).all()
22 | assert len(result) == 0
23 | result = ln.Collection.filter(key="testbranch_id", branch_id=None).all()
24 | assert len(result) == 1
25 | result = ln.Collection.filter(key="testbranch_id", visibility=None).all()
26 | assert len(result) == 1
27 |
28 | # restore
29 | collection.restore()
30 | assert collection.branch_id == 1
31 | assert collection.ordered_artifacts[0].branch_id == 1
32 |
33 | # permanent delete
34 | collection.delete(permanent=True)
35 | result = ln.Artifact.filter(description="testbranch_id", branch_id=None).all()
36 | # also permanently deleted linked file
37 | assert len(result) == 1
38 |
--------------------------------------------------------------------------------
/tests/curators/conftest.py:
--------------------------------------------------------------------------------
1 | import shutil
2 |
3 | import lamindb_setup as ln_setup
4 | import pytest
5 |
6 |
7 | def pytest_sessionstart():
8 | ln_setup.init(storage="./testdb", modules="bionty,wetlab")
9 |
10 |
11 | def pytest_sessionfinish(session: pytest.Session):
12 | shutil.rmtree("./testdb")
13 | ln_setup.delete("testdb", force=True)
14 |
15 |
16 | @pytest.fixture
17 | def ccaplog(caplog):
18 | """Add caplog handler to our custom logger at session start."""
19 | from lamin_utils._logger import logger
20 |
21 | # Add caplog's handler to our custom logger
22 | logger.addHandler(caplog.handler)
23 |
24 | yield caplog
25 |
26 | # Clean up at the end of the session
27 | logger.removeHandler(caplog.handler)
28 |
--------------------------------------------------------------------------------
/tests/curators/test_curators_multivalue.py:
--------------------------------------------------------------------------------
1 | import bionty as bt
2 | import lamindb as ln
3 | import pandas as pd
4 | import pytest
5 | from lamindb.core.exceptions import ValidationError
6 |
7 |
8 | @pytest.fixture
9 | def df():
10 | return pd.DataFrame(
11 | {
12 | "sample_id": [["sample1", "sample2"], ["sample2"], ["sample3"]],
13 | "dose": [[1.2, 2.3], [1.2], [2.3]],
14 | "cell_type": [["B cell", "T cell"], ["B cell"], ["T cell"]],
15 | "tissue": [["blood", "pulmo"], ["blood"], ["lung"]],
16 | }
17 | )
18 |
19 |
20 | @pytest.fixture(scope="module")
21 | def lists_schema():
22 | schema = ln.Schema(
23 | name="lists schema cat",
24 | features=[
25 | ln.Feature(name="sample_id", dtype=list[str]).save(),
26 | ln.Feature(name="dose", dtype=list[float]).save(),
27 | ln.Feature(name="cell_type", dtype=list[str]).save(),
28 | ln.Feature(name="tissue", dtype=list[bt.Tissue]).save(),
29 | ],
30 | ).save()
31 |
32 | yield schema
33 |
34 | schema.delete()
35 | ln.Feature.filter().delete()
36 | bt.Tissue.filter().delete()
37 |
38 |
39 | def test_curator_df_multivalue(df, lists_schema):
40 | curator = ln.curators.DataFrameCurator(df, lists_schema)
41 | with pytest.raises(ValidationError):
42 | curator.validate()
43 | assert curator.cat._cat_vectors.keys() == {"columns", "tissue"}
44 | assert curator.cat._cat_vectors["tissue"]._validated == ["blood", "lung"]
45 | assert curator.cat._cat_vectors["tissue"]._non_validated == ["pulmo"]
46 | assert curator.cat._cat_vectors["tissue"]._synonyms == {"pulmo": "lung"}
47 |
48 | curator.cat.standardize("tissue")
49 | assert curator.cat._cat_vectors["tissue"]._non_validated == []
50 | assert df["tissue"].tolist() == [["blood", "lung"], ["blood"], ["lung"]]
51 |
52 | assert curator.validate() is None
53 |
--------------------------------------------------------------------------------
/tests/curators/test_cxg_curator.py:
--------------------------------------------------------------------------------
1 | import lamindb as ln
2 | import numpy as np
3 |
4 |
5 | def test_cxg_curator():
6 | schema_version = "5.2.0"
7 | adata = ln.core.datasets.small_dataset3_cellxgene()
8 | curator = ln.curators._legacy.CellxGeneAnnDataCatManager(
9 | adata, schema_version=schema_version
10 | )
11 |
12 | adata.obs.rename(columns={"donor": "donor_id"}, inplace=True)
13 | curator = ln.curators._legacy.CellxGeneAnnDataCatManager(
14 | adata,
15 | defaults=ln.curators._legacy.CellxGeneAnnDataCatManager.cxg_categoricals_defaults,
16 | schema_version=schema_version,
17 | )
18 | assert not curator.validate()
19 |
20 | adata = adata[:, ~adata.var.index.isin(curator.non_validated["var_index"])]
21 | adata.obs["tissue"] = adata.obs["tissue"].cat.rename_categories({"lungg": "lung"})
22 | curator = ln.curators._legacy.CellxGeneAnnDataCatManager(
23 | adata, schema_version=schema_version
24 | )
25 | assert curator.validate()
26 |
27 | artifact = curator.save_artifact(
28 | key=f"examples/dataset-curated-against-cxg-{curator.schema_version}.h5ad"
29 | )
30 | title = "Cross-tissue immune cell analysis reveals tissue-specific features in humans (for test demo only)"
31 |
32 | adata.obsm["X_umap"] = np.zeros((adata.shape[0], 2))
33 | adata_cxg = curator.to_cellxgene_anndata(is_primary_data=True, title=title)
34 | assert "cell_type_ontology_term_id" in adata_cxg.obs.columns
35 |
36 | artifact.delete(permanent=True)
37 |
--------------------------------------------------------------------------------
/tests/curators/test_pert_curator.py:
--------------------------------------------------------------------------------
1 | # Here we use `PertCurator` to curate perturbation related columns in a subsetted `AnnData` object of [McFarland et al. 2020](https://www.nature.com/articles/s41467-020-17440-w).
2 |
3 | import bionty as bt
4 | import lamindb as ln
5 | import pandas as pd
6 | import wetlab as wl
7 |
8 |
9 | def test_pert_curator():
10 | ln.settings.verbosity = "hint"
11 | adata = (
12 | ln.Artifact.using("laminlabs/lamindata")
13 | .get(key="scrna/micro-macfarland2020.h5ad")
14 | .load()
15 | )
16 |
17 | # ## Curate and register perturbations
18 | #
19 | # Required columns:
20 | # - Either "pert_target" or "pert_name" and "pert_type" ("pert_type" allows: "genetic", "drug", "biologic", "physical")
21 | # - If pert_dose = True (default), requires "pert_dose" in form of number+unit. E.g. 10.0nM
22 | # - If pert_time = True (default), requires "pert_time" in form of number+unit. E.g. 10.0h
23 |
24 | # +
25 | # rename the columns to match the expected format
26 | adata.obs["pert_time"] = adata.obs["time"].apply(
27 | lambda x: str(x).split(", ")[-1] + "h" if pd.notna(x) else x
28 | ) # we only take the last timepoint
29 | adata.obs["pert_dose"] = adata.obs["dose_value"].map(
30 | lambda x: f"{x}{adata.obs['dose_unit'].iloc[0]}" if pd.notna(x) else None
31 | )
32 | adata.obs.rename(
33 | columns={"perturbation": "pert_name", "perturbation_type": "pert_type"},
34 | inplace=True,
35 | )
36 | # fix the perturbation type as suggested by the curator
37 | adata.obs["pert_type"] = adata.obs["pert_type"].cat.rename_categories(
38 | {"CRISPR": "genetic", "drug": "compound"}
39 | )
40 |
41 | adata.obs["tissue_type"] = "cell culture"
42 |
43 | curator = ln.curators._legacy.PertAnnDataCatManager(adata)
44 |
45 | assert curator.validate() is not True
46 |
47 | # ### Genetic perturbations
48 |
49 | # register genetic perturbations with their target genes
50 | pert_target_map = {
51 | "sggpx4-1": "GPX4",
52 | "sggpx4-2": "GPX4",
53 | "sgor2j2": "OR2J2", # cutting control
54 | }
55 |
56 | ln.settings.creation.search_names = False
57 | for sg_name, gene_symbol in pert_target_map.items():
58 | pert = wl.GeneticPerturbation.filter(
59 | system="CRISPR-Cas9", name=sg_name
60 | ).one_or_none()
61 | if pert is None:
62 | pert = wl.GeneticPerturbation(
63 | system="CRISPR-Cas9",
64 | name=sg_name,
65 | description="cutting control" if sg_name == "sgor2j2" else None,
66 | ).save()
67 | target = wl.PerturbationTarget.filter(name=gene_symbol).one_or_none()
68 | if target is None:
69 | target = wl.PerturbationTarget(name=gene_symbol).save()
70 | pert.targets.add(target)
71 | genes = bt.Gene.filter(symbol=gene_symbol).all()
72 | if len(genes) == 0:
73 | genes = bt.Gene.from_values(
74 | [gene_symbol], field=bt.Gene.symbol, organism="human"
75 | ).save()
76 | target.genes.add(*genes)
77 | ln.settings.creation.search_names = True
78 |
79 | adata.obs["pert_target"] = adata.obs["pert_genetic"].map(pert_target_map)
80 |
81 | # register the negative control without targets: Non-cutting control
82 | wl.GeneticPerturbation(
83 | name="sglacz", system="CRISPR-Cas9", description="non-cutting control"
84 | ).save()
85 |
86 | # ### Compounds
87 |
88 | # the remaining compounds are not in CHEBI and we create records for them
89 | curator.add_new_from("pert_compound")
90 |
91 | # manually fix sex and set assay
92 | adata.obs["sex"] = adata.obs["sex"].astype(str).str.lower()
93 | adata.obs["assay"] = "10x 3' v3"
94 |
95 | # subset the adata to only include the validated genes
96 | if "var_index" in curator.non_validated:
97 | adata = adata[
98 | :, ~adata.var_names.isin(curator.non_validated["var_index"])
99 | ].copy()
100 |
101 | # standardize disease and sex as suggested
102 | curator.standardize("disease")
103 |
104 | curator = wl.PertCurator(adata)
105 | curator.validate()
106 | curator.standardize("all")
107 | curator.add_new_from("all")
108 |
109 | assert curator.validate() is True
110 |
--------------------------------------------------------------------------------
/tests/permissions/conftest.py:
--------------------------------------------------------------------------------
1 | import shutil
2 | from subprocess import DEVNULL, run
3 | from time import perf_counter
4 |
5 | import lamindb_setup as ln_setup
6 | import pytest
7 | from lamin_utils import logger
8 |
9 |
10 | def pytest_sessionstart():
11 | t_execute_start = perf_counter()
12 |
13 | ln_setup.settings.auto_connect = True
14 | # these are called in separate scripts because can't change connection
15 | # within the same python process due to django
16 | # init instance and setup RLS
17 | run( # noqa: S602
18 | "python ./tests/permissions/scripts/setup_instance.py",
19 | shell=True,
20 | capture_output=False,
21 | )
22 | # populate permissions and models via the admin connection
23 | run( # noqa: S602
24 | "python ./tests/permissions/scripts/setup_access.py",
25 | shell=True,
26 | capture_output=False,
27 | )
28 |
29 | total_time_elapsed = perf_counter() - t_execute_start
30 | print(f"Time to setup the instance: {total_time_elapsed:.3f}s")
31 |
32 |
33 | def pytest_sessionfinish(session: pytest.Session):
34 | logger.set_verbosity(1)
35 | shutil.rmtree("./default_storage_permissions")
36 | ln_setup.delete("lamindb-test-permissions", force=True)
37 | run("docker stop pgtest && docker rm pgtest", shell=True, stdout=DEVNULL) # noqa: S602
38 |
--------------------------------------------------------------------------------
/tests/permissions/jwt_utils.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 | import psycopg2
4 |
5 |
6 | def sign_jwt(db_url, payload: dict) -> str:
7 | with psycopg2.connect(db_url) as conn, conn.cursor() as cur:
8 | cur.execute(
9 | """
10 | SELECT sign(
11 | %s::json,
12 | (SELECT security.get_secret('jwt_secret')),
13 | %s
14 | )
15 | """,
16 | (json.dumps(payload), "HS256"),
17 | )
18 | token = cur.fetchone()[0]
19 | if not token:
20 | msg = "Failed to generate JWT"
21 | raise ValueError(msg)
22 | return token
23 |
--------------------------------------------------------------------------------
/tests/permissions/scripts/check_lamin_dev.py:
--------------------------------------------------------------------------------
1 | import lamindb_setup as ln_setup
2 |
3 | ln_setup.settings.auto_connect = False
4 |
5 | import lamindb as ln
6 |
7 | assert ln.setup.settings.user.handle == "testuser1"
8 |
9 | ln.connect("laminlabs/lamin-dev")
10 |
11 | assert ln.setup.settings.instance.slug == "laminlabs/lamin-dev"
12 |
13 | space_name = "Our test space for CI"
14 | ln.track(space=space_name)
15 |
16 | assert ln.context.space.name == space_name
17 | ulabel = ln.ULabel(name="My test ulabel in test space").save()
18 | assert ulabel.space.name == "All" # ulabel should end up in common space
19 | ulabel.delete() # delete silently passes in case another worker deleted the ulabel
20 | assert (
21 | ln.context.transform.space.name == space_name
22 | ) # transform and run in restricted space
23 | assert ln.context.run.space.name == space_name # transform and run in restricted space
24 | ln.context.transform.delete()
25 |
--------------------------------------------------------------------------------
/tests/permissions/scripts/clean_lamin_dev.py:
--------------------------------------------------------------------------------
1 | import lamindb_setup as ln_setup
2 |
3 | ln_setup.settings.auto_connect = False
4 |
5 | import lamindb as ln
6 |
7 | assert ln.setup.settings.user.handle == "testuser1"
8 |
9 | ln.connect("laminlabs/lamin-dev")
10 |
11 | assert ln.setup.settings.instance.slug == "laminlabs/lamin-dev"
12 |
13 | artifact = ln.Artifact.get(key="mytest")
14 | assert artifact.space.name == "Our test space for CI"
15 | artifact.delete()
16 |
--------------------------------------------------------------------------------
/tests/permissions/scripts/setup_access.py:
--------------------------------------------------------------------------------
1 | import lamindb as ln # noqa
2 | import hubmodule.models as hm
3 | from uuid import uuid4
4 | from hubmodule._setup import _install_db_module
5 | from laminhub_rest.core.postgres import DbRoleHandler
6 |
7 | # create a db connection url that works with RLS
8 | JWT_ROLE_NAME = "permissions_jwt"
9 |
10 |
11 | def create_jwt_user(dsn_admin: str, jwt_role_name: str):
12 | db_role_handler = DbRoleHandler(dsn_admin)
13 | jwt_db_url = db_role_handler.create(
14 | jwt_role_name, expires_in=None, alter_if_exists=True
15 | )
16 | db_role_handler.permission.grant_write_jwt(jwt_role_name)
17 | return jwt_db_url
18 |
19 |
20 | pgurl = "postgresql://postgres:pwd@0.0.0.0:5432/pgtest" # admin db connection url
21 | jwt_db_url = create_jwt_user(pgurl, jwt_role_name=JWT_ROLE_NAME)
22 | _install_db_module(pgurl, jwt_role_name=JWT_ROLE_NAME)
23 |
24 | print("Created jwt db connection")
25 |
26 | # create models
27 |
28 | full_access = ln.Space(name="full access", uid="00000001").save() # type: ignore
29 | select_access = ln.Space(name="select access", uid="00000002").save() # type: ignore
30 | no_access = ln.Space(name="no access", uid="00000003").save() # type: ignore
31 | # set read role for the default space
32 | account = hm.Account(
33 | id=ln.setup.settings.user._uuid.hex, uid="accntid1", role="read"
34 | ).save()
35 |
36 | # no access space
37 | ulabel = ln.ULabel(name="no_access_ulabel")
38 | ulabel.space = no_access
39 | ulabel.save()
40 |
41 | project = ln.Project(name="No_access_project") # type: ignore
42 | project.space = no_access
43 | project.save()
44 |
45 | # setup write access space
46 | hm.AccessSpace(account=account, space=full_access, role="write").save()
47 |
48 | ulabel = ln.ULabel(name="full_access_ulabel")
49 | ulabel.space = full_access
50 | ulabel.save()
51 | # setup read access space
52 | hm.AccessSpace(account=account, space=select_access, role="read").save()
53 |
54 | ulabel = ln.ULabel(name="select_ulabel")
55 | ulabel.space = select_access
56 | ulabel.save()
57 | # artificial but better to test
58 | # create a link table referencing rows in different spaces
59 | ulabel.projects.add(project)
60 |
61 | # default space, only select access by default
62 | ulabel = ln.ULabel(name="default_space_ulabel").save()
63 | ulabel.projects.add(project)
64 |
65 | project = ln.Project(name="default_space_project").save()
66 | ulabel.projects.add(project)
67 |
68 | # create a link table referencing ulabel from the default space and project from select space
69 | project = ln.Project(name="select_project")
70 | project.space = select_access
71 | project.save()
72 |
73 | ulabel.projects.add(project)
74 |
75 | # setup team and relevent models
76 | team_access = ln.Space(name="team access", uid="00000004").save() # type: ignore
77 | team = hm.Team(id=uuid4().hex, uid="teamuiduid11", name="test_team", role="read").save()
78 | hm.AccountTeam(account=account, team=team).save()
79 | hm.AccessSpace(team=team, space=team_access, role="read").save()
80 |
81 | feature = ln.Feature(name="team_access_feature", dtype=float)
82 | feature.space = team_access
83 | feature.save()
84 |
85 | print("Created models")
86 |
87 | # save jwt db connection
88 |
89 | ln.setup.settings.instance._db = jwt_db_url
90 | ln.setup.settings.instance._persist()
91 |
--------------------------------------------------------------------------------
/tests/permissions/scripts/setup_instance.py:
--------------------------------------------------------------------------------
1 | import lamindb_setup as ln_setup
2 | from laminci.db import setup_local_test_postgres
3 |
4 | pgurl = setup_local_test_postgres()
5 |
6 | ln_setup.init(
7 | storage="./default_storage_permissions",
8 | name="lamindb-test-permissions",
9 | db=pgurl,
10 | )
11 |
12 | # can't add this app in the init because don't want t trigger the initial migration
13 | # that conflicts with _install_db_module
14 | ln_setup.settings.instance._schema_str = "hubmodule"
15 | ln_setup.settings.instance._persist()
16 |
--------------------------------------------------------------------------------
/tests/storage/conftest.py:
--------------------------------------------------------------------------------
1 | import shutil
2 | from subprocess import DEVNULL, run
3 | from time import perf_counter
4 |
5 | import lamindb_setup as ln_setup
6 | import pytest
7 | from lamin_utils import logger
8 | from laminci.db import setup_local_test_postgres
9 |
10 | AUTO_CONNECT = ln_setup.settings.auto_connect
11 | ln_setup.settings.auto_connect = False
12 |
13 | import lamindb as ln
14 |
15 |
16 | def pytest_sessionstart():
17 | t_execute_start = perf_counter()
18 |
19 | ln_setup._TESTING = True
20 | pgurl = setup_local_test_postgres()
21 | ln.setup.init(
22 | storage="./default_storage_unit_storage",
23 | modules="bionty",
24 | name="lamindb-unit-tests-storage",
25 | db=pgurl,
26 | )
27 | ln.setup.register() # temporarily
28 | ln.setup.settings.auto_connect = True
29 | ln.settings.creation.artifact_silence_missing_run_warning = True
30 | ln.settings.storage = (
31 | "s3://lamindb-ci/test-data" # register as valid storage location
32 | )
33 | ln.settings.storage = "./default_storage_unit_storage"
34 | total_time_elapsed = perf_counter() - t_execute_start
35 | print(f"Time to setup the instance: {total_time_elapsed:.3f}s")
36 |
37 |
38 | def pytest_sessionfinish(session: pytest.Session):
39 | logger.set_verbosity(1)
40 | shutil.rmtree("./default_storage_unit_storage")
41 | # handle below better in the future
42 | if ln.UPath("s3://lamindb-test/storage/.lamindb").exists():
43 | ln.UPath("s3://lamindb-test/storage/.lamindb").rmdir()
44 | another_storage = ln.UPath("s3://lamindb-ci/lamindb-unit-tests-cloud/.lamindb")
45 | if another_storage.exists():
46 | another_storage.rmdir()
47 | ln.setup.delete("lamindb-unit-tests-storage", force=True)
48 | run("docker stop pgtest && docker rm pgtest", shell=True, stdout=DEVNULL) # noqa: S602
49 | ln.setup.settings.auto_connect = AUTO_CONNECT
50 |
--------------------------------------------------------------------------------
/tests/storage/test_artifact_zarr.py:
--------------------------------------------------------------------------------
1 | import shutil
2 | from pathlib import Path
3 |
4 | import anndata as ad
5 | import lamindb as ln
6 | import numpy as np
7 | import pandas as pd
8 | import pytest
9 | from lamindb.core.storage._zarr import identify_zarr_type
10 | from lamindb_setup.core.upath import (
11 | CloudPath,
12 | )
13 |
14 |
15 | @pytest.fixture(scope="session")
16 | def get_small_adata():
17 | return ad.AnnData(
18 | X=np.array([[1, 2, 3], [4, 5, 6]]),
19 | obs={"feat1": ["A", "B"]},
20 | var=pd.DataFrame(index=["MYC", "TCF7", "GATA1"]),
21 | obsm={"X_pca": np.array([[1, 2], [3, 4]])},
22 | )
23 |
24 |
25 | def test_zarr_upload_cache(get_small_adata):
26 | previous_storage = ln.setup.settings.storage.root_as_str
27 | ln.settings.storage = "s3://lamindb-test/core"
28 |
29 | zarr_path = Path("./test_adata.zarr")
30 | get_small_adata.write_zarr(zarr_path)
31 |
32 | artifact = ln.Artifact(zarr_path, key="test_adata.zarr")
33 | assert artifact.otype == "AnnData"
34 | assert artifact.n_files >= 1
35 | artifact.save()
36 |
37 | assert isinstance(artifact.path, CloudPath)
38 | assert artifact.path.exists()
39 | assert identify_zarr_type(artifact.path) == "anndata"
40 |
41 | shutil.rmtree(artifact.cache())
42 |
43 | cache_path = artifact._cache_path
44 | assert isinstance(artifact.load(), ad.AnnData)
45 | assert cache_path.is_dir()
46 |
47 | shutil.rmtree(cache_path)
48 | assert not cache_path.exists()
49 | artifact.cache()
50 | assert cache_path.is_dir()
51 |
52 | artifact.delete(permanent=True, storage=True)
53 | shutil.rmtree(zarr_path)
54 |
55 | # test zarr from memory
56 | artifact = ln.Artifact(get_small_adata, key="test_adata.anndata.zarr")
57 | assert artifact._local_filepath.is_dir()
58 | assert artifact.otype == "AnnData"
59 | assert artifact.suffix == ".anndata.zarr"
60 | assert artifact.n_files >= 1
61 |
62 | artifact.save()
63 | assert isinstance(artifact.path, CloudPath)
64 | assert artifact.path.exists()
65 | cache_path = artifact._cache_path
66 | assert cache_path.is_dir()
67 |
68 | shutil.rmtree(cache_path)
69 | assert not cache_path.exists()
70 |
71 | artifact._memory_rep = None
72 |
73 | assert isinstance(artifact.load(), ad.AnnData)
74 | assert cache_path.is_dir()
75 |
76 | artifact.delete(permanent=True, storage=True)
77 |
78 | ln.settings.storage = previous_storage
79 |
--------------------------------------------------------------------------------
/tests/storage/test_switch_storage.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import lamindb as ln
4 |
5 |
6 | def test_settings_switch_storage():
7 | ln.settings.storage = "./default_storage_unit_storage"
8 | assert (
9 | ln.settings.storage.root.resolve()
10 | == Path("./default_storage_unit_storage").resolve()
11 | )
12 | new_storage_location = "s3://lamindb-ci/test-settings-switch-storage"
13 | ln.settings.storage = new_storage_location
14 | assert ln.setup.settings.storage.type_is_cloud
15 | assert ln.setup.settings.storage.root_as_str == new_storage_location
16 | # root.fs contains the underlying fsspec filesystem
17 | # the following is set by lamindb to True for s3 by default
18 | assert ln.setup.settings.storage.root.fs.cache_regions
19 | ln.settings.storage = new_storage_location, {"cache_regions": False}
20 | assert not ln.setup.settings.storage.root.fs.cache_regions
21 | assert ln.Storage.filter(root=new_storage_location).one_or_none() is not None
22 | # switch back to default storage
23 | ln.settings.storage = "./default_storage_unit_storage"
24 |
--------------------------------------------------------------------------------