├── .github └── workflows │ ├── build.yml │ ├── doc-changes.yml │ └── update_ontologies.yml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── bionty ├── __init__.py ├── _biorecord.py ├── _organism.py ├── _shared_docstrings.py ├── _source.py ├── base │ ├── __init__.py │ ├── _display_sources.py │ ├── _ontology.py │ ├── _ontology_url.py │ ├── _public_ontology.py │ ├── _settings.py │ ├── dev │ │ ├── __init__.py │ │ ├── _doc_util.py │ │ ├── _handle_sources.py │ │ └── _io.py │ ├── entities │ │ ├── __init__.py │ │ ├── _bfxpipeline.py │ │ ├── _biosample.py │ │ ├── _cellline.py │ │ ├── _cellmarker.py │ │ ├── _celltype.py │ │ ├── _developmentalstage.py │ │ ├── _disease.py │ │ ├── _drug.py │ │ ├── _ethnicity.py │ │ ├── _experimentalfactor.py │ │ ├── _gene.py │ │ ├── _organism.py │ │ ├── _pathway.py │ │ ├── _phenotype.py │ │ ├── _protein.py │ │ ├── _shared_docstrings.py │ │ └── _tissue.py │ ├── scripts │ │ ├── bfxpipelines_info │ │ │ └── custom_pipelines.json │ │ ├── check_ontologies_reachable.py │ │ ├── generate_bfxpipelines.py │ │ └── update_new_ontologies.py │ └── sources.yaml ├── core │ ├── __init__.py │ ├── _add_ontology.py │ ├── _settings.py │ └── _source.py ├── ids.py ├── migrations │ ├── 0028_artifactcellline_created_at_and_more.py │ ├── 0029_alter_cellline_previous_runs_and_more.py │ ├── 0030_rename_publicsource_source_and_more.py │ ├── 0031_alter_cellmarker_name_and_more.py │ ├── 0032_rename_source_name_source_description_and_more.py │ ├── 0033_alter_artifactcellline_artifact_and_more.py │ ├── 0034_alter_source_unique_together.py │ ├── 0035_alter_protein_gene_symbol.py │ ├── 0036_alter_source_artifacts_and_more.py │ ├── 0037_alter_cellline_source_alter_cellmarker_source_and_more.py │ ├── 0038_alter_artifactcellline_created_by_and_more.py │ ├── 0039_alter_cellline_source_alter_cellmarker_source_and_more.py │ ├── 0040_rename_feature_ref_is_symbol_artifactgene_feature_ref_is_name_and_more.py │ ├── 0041_alter_artifactcellline_artifact_and_more.py │ ├── 0042_lamindbv1.py │ ├── 0043_lamindbv2_part2.py │ ├── 0044_alter_cellline_space_alter_cellmarker_space_and_more.py │ ├── 0045_rename_aux_cellline__aux_rename_aux_cellmarker__aux_and_more.py │ ├── 0046_alter_cellline__aux_alter_cellmarker__aux_and_more.py │ ├── 0047_lamindbv1_part5.py │ ├── 0048_lamindbv1_part6.py │ ├── 0049_alter_schemacellmarker_cellmarker_and_more.py │ ├── 0050_alter_source_uid.py │ ├── 0051_alter_cellline__branch_code_and_more.py │ ├── 0052_rename__branch_code_cellline_branch_and_more.py │ ├── 0053_recordcellline_cellline_records_recordcellmarker_and_more.py │ ├── 0054_alter_cellline_branch_alter_cellline_space_and_more.py │ ├── 0055_rename_cellline_recordcellline_value_and_more.py │ ├── 0056_alter_recordtissue_record.py │ ├── 0057_alter_cellline_description_alter_cellline_synonyms_and_more.py │ ├── 0058_cellline_page_cellmarker_page_celltype_page_and_more.py │ ├── 0059_cellline_is_locked_cellmarker_is_locked_and_more.py │ ├── 0060_alter_cellline_is_locked_alter_cellmarker_is_locked_and_more.py │ ├── 0061_remove_cellline_page_remove_cellmarker_page_and_more.py │ ├── 0061_squashed.py │ └── __init__.py ├── models.py └── uids.py ├── docs ├── guide.md ├── guide │ ├── concepts.md │ ├── config.md │ └── test_notebooks.py ├── index.md └── reference.md ├── noxfile.py ├── pyproject.toml ├── scripts └── update_ontology_sources.ipynb └── tests ├── base ├── dev │ ├── test_handle_sources.py │ └── test_io.py ├── entities │ ├── test_bfxpipeline.py │ ├── test_biosample.py │ ├── test_cellline.py │ ├── test_cellmarker.py │ ├── test_celltype.py │ ├── test_developmentalstage.py │ ├── test_disease.py │ ├── test_drug.py │ ├── test_ethnicity.py │ ├── test_experimentalfactor.py │ ├── test_gene.py │ ├── test_organism.py │ ├── test_pathway.py │ ├── test_phenotype.py │ ├── test_protein.py │ └── test_tissue.py ├── test_bionty.py ├── test_ontology.py └── test_ontology_url.py └── core ├── conftest.py ├── test_models.py ├── test_organism_requirement.py └── test_source.py /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | on: 3 | push: 4 | branches: [main] 5 | pull_request: 6 | branches: [main, staging] 7 | workflow_dispatch: 8 | 9 | jobs: 10 | pre-filter: 11 | runs-on: ubuntu-latest 12 | outputs: 13 | matrix: ${{ steps.set-matrix.outputs.matrix }} 14 | steps: 15 | - uses: actions/checkout@v4 16 | with: 17 | fetch-depth: 0 18 | 19 | - uses: dorny/paths-filter@v3 20 | id: changes 21 | if: github.event_name != 'push' 22 | with: 23 | filters: | 24 | base: 25 | - 'bionty/base/**' 26 | - 'tests/base/**' 27 | 28 | - id: set-matrix 29 | shell: bash 30 | run: | 31 | BASE_GROUPS=$(jq -n -c '["bionty-core", "bionty-docs"]') 32 | 33 | if [[ "${{ github.event_name }}" == "push" || "${{ steps.changes.outputs.base }}" == "true" ]]; then 34 | # Run everything on push or when base paths change 35 | MATRIX=$(jq -n -c --argjson groups "$BASE_GROUPS" '{group: ($groups + ["bionty-base"])}') 36 | else 37 | # Otherwise only run base groups 38 | MATRIX=$(jq -n -c --argjson groups "$BASE_GROUPS" '{group: $groups}') 39 | fi 40 | 41 | # Output as single line for GitHub Actions 42 | echo "matrix=$(echo "$MATRIX" | jq -c .)" >> $GITHUB_OUTPUT 43 | 44 | # Pretty print for debugging 45 | echo "Generated matrix:" 46 | echo "$MATRIX" | jq . 47 | 48 | test: 49 | needs: pre-filter 50 | runs-on: ubuntu-latest 51 | strategy: 52 | fail-fast: false 53 | matrix: ${{fromJson(needs.pre-filter.outputs.matrix)}} 54 | timeout-minutes: 12 55 | steps: 56 | - uses: actions/checkout@v4 57 | with: 58 | submodules: recursive 59 | fetch-depth: 0 60 | 61 | - uses: actions/setup-python@v5 62 | with: 63 | python-version: "3.12" 64 | 65 | - name: cache pre-commit 66 | uses: actions/cache@v4 67 | with: 68 | path: ~/.cache/pre-commit 69 | key: pre-commit-${{ runner.os }}-${{ hashFiles('.pre-commit-config.yaml') }} 70 | 71 | - name: Install laminci 72 | run: pip install "laminci@git+https://x-access-token:${{ secrets.LAMIN_BUILD_DOCS }}@github.com/laminlabs/laminci" 73 | 74 | - name: Run lint 75 | if: matrix.group == 'bionty-core' 76 | run: nox -s lint 77 | 78 | - uses: aws-actions/configure-aws-credentials@v4 79 | if: ${{ github.event_name == 'push' || github.event.pull_request.head.repo.full_name == github.repository }} 80 | with: 81 | aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} 82 | aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 83 | aws-region: eu-central-1 84 | 85 | - name: checkout lndocs 86 | uses: actions/checkout@v4 87 | if: matrix.group == 'bionty-docs' 88 | with: 89 | repository: laminlabs/lndocs 90 | ssh-key: ${{ secrets.READ_LNDOCS }} 91 | path: lndocs 92 | ref: main 93 | 94 | - name: Run build 95 | run: nox -s "build(group='${{ matrix.group }}')" 96 | 97 | - uses: actions/upload-artifact@v4 98 | with: 99 | name: coverage--${{ matrix.group }} 100 | path: .coverage 101 | include-hidden-files: true 102 | 103 | - uses: nwtgck/actions-netlify@v1.2 104 | if: ${{ matrix.group == 'bionty-docs' && !(github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name != github.repository) }} 105 | with: 106 | publish-dir: "_build/html" 107 | production-deploy: ${{ github.event_name == 'push' }} 108 | github-token: ${{ secrets.GITHUB_TOKEN }} 109 | enable-commit-comment: false 110 | env: 111 | NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }} 112 | NETLIFY_SITE_ID: ${{ secrets.NETLIFY_SITE_ID }} 113 | 114 | coverage: 115 | needs: test 116 | runs-on: ubuntu-latest 117 | steps: 118 | - uses: actions/checkout@v4 119 | 120 | - uses: actions/setup-python@v5 121 | with: 122 | python-version: "3.13" 123 | 124 | - name: Install coverage dependencies 125 | run: | 126 | pip install -U pip uv 127 | uv pip install --system coverage[toml] 128 | uv pip install --system --no-deps . 129 | 130 | - uses: actions/download-artifact@v4 131 | 132 | - name: Run coverage 133 | run: | 134 | coverage combine coverage--*/.coverage* 135 | coverage report --fail-under=0 136 | coverage xml 137 | 138 | - uses: codecov/codecov-action@v2 139 | with: 140 | token: ${{ secrets.CODECOV_TOKEN }} 141 | -------------------------------------------------------------------------------- /.github/workflows/doc-changes.yml: -------------------------------------------------------------------------------- 1 | name: doc-changes 2 | 3 | on: 4 | pull_request_target: 5 | branches: 6 | - main 7 | types: 8 | - closed 9 | 10 | jobs: 11 | latest-changes: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v4 15 | - uses: actions/setup-python@v5 16 | with: 17 | python-version: "3.11" 18 | - run: pip install "laminci[doc-changes]@git+https://x-access-token:${{ secrets.LAMIN_BUILD_DOCS }}@github.com/laminlabs/laminci" 19 | - run: laminci doc-changes 20 | env: 21 | repo_token: ${{ secrets.GITHUB_TOKEN }} 22 | docs_token: ${{ secrets.LAMIN_BUILD_DOCS }} 23 | changelog_file: lamin-docs/docs/changelog/soon/bionty.md 24 | -------------------------------------------------------------------------------- /.github/workflows/update_ontologies.yml: -------------------------------------------------------------------------------- 1 | name: Update ontologies 2 | 3 | on: 4 | schedule: 5 | - cron: "0 0 1 */3 *" # runs at 00:00 UTC on the first day of every 3rd month 6 | workflow_dispatch: 7 | 8 | jobs: 9 | test-urls: 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - name: checkout repository 14 | uses: actions/checkout@v4 15 | 16 | - name: setup python 17 | uses: actions/setup-python@v5 18 | with: 19 | python-version: "3.13" 20 | 21 | - name: Install Bionty 22 | run: pip install .[dev] 23 | 24 | - name: check URLs 25 | run: python ./bionty/base/scripts/update_new_ontologies.py 26 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # bionty data 2 | data/ 3 | sources/ 4 | versions/ 5 | 6 | # macOS 7 | .DS_Store 8 | .AppleDouble 9 | .LSOverride 10 | 11 | # Byte-compiled / optimized / DLL files 12 | __pycache__/ 13 | *.py[cod] 14 | *$py.class 15 | 16 | # C extensions 17 | *.so 18 | 19 | # Distribution / packaging 20 | .Python 21 | env/ 22 | build/ 23 | develop-eggs/ 24 | dist/ 25 | downloads/ 26 | eggs/ 27 | .eggs/ 28 | lib/ 29 | lib64/ 30 | parts/ 31 | sdist/ 32 | var/ 33 | wheels/ 34 | *.egg-info/ 35 | .installed.cfg 36 | *.egg 37 | 38 | # PyInstaller 39 | # Usually these files are written by a python script from a template 40 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 41 | *.manifest 42 | *.spec 43 | 44 | # Installer logs 45 | pip-log.txt 46 | pip-delete-this-directory.txt 47 | 48 | # Unit test / coverage reports 49 | htmlcov/ 50 | .tox/ 51 | .coverage 52 | .coverage.* 53 | .cache 54 | nosetests.xml 55 | coverage.xml 56 | *.cover 57 | .hypothesis/ 58 | .pytest_cache/ 59 | 60 | # Translations 61 | *.mo 62 | *.pot 63 | 64 | # Django stuff: 65 | *.log 66 | local_settings.py 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # celery beat schedule file 88 | celerybeat-schedule 89 | 90 | # SageMath parsed files 91 | *.sage.py 92 | 93 | # dotenv 94 | .env 95 | 96 | # virtualenv 97 | .venv 98 | venv/ 99 | ENV/ 100 | 101 | # mypy 102 | .mypy_cache/ 103 | 104 | # IDE settings 105 | .vscode/ 106 | .idea/ 107 | 108 | # Lamin 109 | _build 110 | docs/bionty.* 111 | lamin_sphinx 112 | docs/conf.py 113 | _docs_tmp* 114 | _dynamic/ 115 | 116 | # Convenience 117 | test.ipynb 118 | run-tests 119 | registration_template.ipynb 120 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | fail_fast: false 2 | default_language_version: 3 | python: python3 4 | default_stages: 5 | - pre-commit 6 | - pre-push 7 | minimum_pre_commit_version: 2.12.0 8 | repos: 9 | - repo: https://github.com/pre-commit/mirrors-prettier 10 | rev: v4.0.0-alpha.4 11 | hooks: 12 | - id: prettier 13 | exclude: | 14 | (?x)( 15 | docs/changelog.md 16 | ) 17 | - repo: https://github.com/kynan/nbstripout 18 | rev: 0.6.1 19 | hooks: 20 | - id: nbstripout 21 | exclude: | 22 | (?x)( 23 | docs/examples/| 24 | docs/notes/ 25 | ) 26 | - repo: https://github.com/astral-sh/ruff-pre-commit 27 | rev: v0.9.4 28 | hooks: 29 | - id: ruff 30 | args: [--fix, --exit-non-zero-on-fix, --unsafe-fixes] 31 | - id: ruff-format 32 | - repo: https://github.com/pre-commit/pre-commit-hooks 33 | rev: v4.5.0 34 | hooks: 35 | - id: detect-private-key 36 | - id: check-ast 37 | - id: end-of-file-fixer 38 | exclude: | 39 | (?x)( 40 | .github/workflows/latest-changes.jinja2| 41 | bionty/base/data 42 | ) 43 | - id: mixed-line-ending 44 | args: [--fix=lf] 45 | - id: trailing-whitespace 46 | - id: check-case-conflict 47 | - repo: https://github.com/pre-commit/mirrors-mypy 48 | rev: v1.14.1 49 | hooks: 50 | - id: mypy 51 | args: [--no-strict-optional, --ignore-missing-imports] 52 | additional_dependencies: ["types-requests", "types-attrs"] 53 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Stars](https://img.shields.io/github/stars/laminlabs/bionty?logo=GitHub&color=yellow)](https://github.com/laminlabs/bionty) 2 | [![pypi](https://img.shields.io/pypi/v/bionty?color=blue&label=pypi%20package)](https://pypi.org/project/bionty) 3 | 4 | # bionty: Registries for basic biological entities, coupled to public ontologies 5 | 6 | - Access >20 public ontologies such as Gene, Protein, CellMarker, ExperimentalFactor, CellType, CellLine, Tissue, … 7 | - Create records from entries in public ontologies using `.from_source()`. 8 | - Access full underlying public ontologies via `.public()` to search & bulk-create records. 9 | - Create in-house ontologies by extending public ontologies using hierarchical relationships among records (`.parents`). 10 | - Use `.synonyms` and `.abbr` to manage synonyms. 11 | - Safeguards against typos & duplications. 12 | - Ontology versioning via the `bionty.Source` registry. 13 | 14 | Read the [docs](https://docs.lamin.ai/bionty). 15 | -------------------------------------------------------------------------------- /bionty/__init__.py: -------------------------------------------------------------------------------- 1 | """Basic biological entities, coupled to public ontologies [`source `__]. 2 | 3 | - Create records from public ontologies using `.from_source()`. 4 | - Access public ontologies via `.public()` to search & bulk-create records. 5 | - Use hierarchical relationships among records (`.parents`). 6 | - Use `.synonyms` and `.abbr` to manage synonyms. 7 | - Manage ontology versions. 8 | 9 | Install and mount `bionty` in a new instance: 10 | 11 | >>> pip install 'bionty' 12 | >>> lamin init --storage --modules bionty 13 | 14 | Import the package: 15 | 16 | >>> import bionty as bt 17 | 18 | Access public ontologies: 19 | 20 | >>> genes = bt.Gene.public() 21 | >>> genes.validate(["BRCA1", "TCF7"], field="symbol") 22 | 23 | Create records from public ontologies: 24 | 25 | >>> cell_type = bt.CellType.from_source(ontology_id="CL:0000037") 26 | >>> cell_type.save() 27 | 28 | View ontological hierarchy: 29 | 30 | >>> cell_type.view_parents() 31 | 32 | Create in-house ontologies: 33 | 34 | >>> cell_type_new = bt.CellType(name="my new cell type") 35 | >>> cell_type_new.save() 36 | >>> cell_type_new.parents.add(cell_type) 37 | >>> cell_type_new.view_parents() 38 | 39 | Manage synonyms: 40 | 41 | >>> cell_type_new.add_synonyms(["my cell type", "my cell"]) 42 | >>> cell_type_new.set_abbr("MCT") 43 | 44 | Detailed guides: 45 | 46 | - :doc:`docs:public-ontologies` 47 | - :doc:`docs:manage-ontologies` 48 | 49 | Registries: 50 | 51 | .. autosummary:: 52 | :toctree: . 53 | 54 | Organism 55 | Gene 56 | Protein 57 | CellMarker 58 | CellType 59 | CellLine 60 | Tissue 61 | Disease 62 | Pathway 63 | Phenotype 64 | ExperimentalFactor 65 | DevelopmentalStage 66 | Ethnicity 67 | 68 | Settings: 69 | 70 | .. autosummary:: 71 | :toctree: . 72 | 73 | settings 74 | 75 | Ontology versions: 76 | 77 | .. autosummary:: 78 | :toctree: . 79 | 80 | Source 81 | 82 | Submodules: 83 | 84 | .. autosummary:: 85 | :toctree: . 86 | 87 | core 88 | base 89 | uids 90 | 91 | """ 92 | 93 | __version__ = "1.8.1" 94 | 95 | from lamindb_setup._check_setup import _check_instance_setup 96 | 97 | from . import _biorecord, base, uids 98 | 99 | _check_instance_setup(from_module="bionty") 100 | 101 | from .core._settings import settings 102 | from .models import ( 103 | CellLine, 104 | CellMarker, 105 | CellType, 106 | DevelopmentalStage, 107 | Disease, 108 | Ethnicity, 109 | ExperimentalFactor, 110 | Gene, 111 | Organism, 112 | Pathway, 113 | Phenotype, 114 | Protein, 115 | Source, 116 | Tissue, 117 | ) 118 | 119 | __all__ = [ 120 | # registries 121 | "CellLine", 122 | "CellMarker", 123 | "CellType", 124 | "DevelopmentalStage", 125 | "Disease", 126 | "Ethnicity", 127 | "ExperimentalFactor", 128 | "Gene", 129 | "Organism", 130 | "Pathway", 131 | "Phenotype", 132 | "Protein", 133 | "Source", 134 | "Tissue", 135 | # modules 136 | "settings", 137 | "base", 138 | "core", 139 | "uids", 140 | ] 141 | 142 | ids = uids # backward compat 143 | -------------------------------------------------------------------------------- /bionty/_biorecord.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING 4 | 5 | from .uids import encode_uid 6 | 7 | if TYPE_CHECKING: 8 | from types import ModuleType 9 | 10 | from .models import BioRecord 11 | 12 | 13 | def lookup2kwargs(record: BioRecord, *args, **kwargs) -> dict: 14 | """Pass bionty search/lookup results.""" 15 | from ._organism import create_or_get_organism_record 16 | from ._source import get_source_record 17 | 18 | arg = args[0] 19 | if isinstance(arg, tuple): 20 | bionty_kwargs = arg._asdict() # type:ignore 21 | else: 22 | bionty_kwargs = arg[0]._asdict() 23 | 24 | if len(bionty_kwargs) > 0: 25 | # add organism and source 26 | organism_record = create_or_get_organism_record( 27 | registry=record.__class__, organism=kwargs.get("organism") 28 | ) 29 | if organism_record is not None: 30 | bionty_kwargs["organism"] = organism_record 31 | bionty_kwargs["source"] = get_source_record( 32 | registry=record.__class__, 33 | organism=organism_record, 34 | source=kwargs.get("source"), 35 | ) 36 | 37 | model_field_names = {i.name for i in record._meta.fields} 38 | model_field_names.add("parents") 39 | bionty_kwargs = { 40 | k: v for k, v in bionty_kwargs.items() if k in model_field_names 41 | } 42 | return encode_uid(registry=record.__class__, kwargs=bionty_kwargs) 43 | 44 | 45 | def list_biorecord_models(schema_module: ModuleType): 46 | """List all BioRecord models in a given schema module.""" 47 | import inspect 48 | 49 | import lamindb as ln # needed here 50 | 51 | from .models import BioRecord 52 | 53 | return [ 54 | attr 55 | for attr in dir(schema_module.models) 56 | if inspect.isclass(getattr(schema_module.models, attr)) 57 | and issubclass(getattr(schema_module.models, attr), BioRecord) 58 | ] 59 | -------------------------------------------------------------------------------- /bionty/_organism.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | import pandas as pd 4 | from django.core.exceptions import FieldDoesNotExist, ObjectDoesNotExist 5 | from lamin_utils import logger 6 | 7 | from .models import BioRecord, Organism 8 | 9 | 10 | class OrganismNotSet(SystemExit): 11 | """The `organism` parameter was not passed or is not globally set.""" 12 | 13 | pass 14 | 15 | 16 | def create_or_get_organism_record( 17 | organism: str | Organism | None, registry: type[BioRecord], field: str | None = None 18 | ) -> Organism | None: 19 | """Create or get an organism record from the given organism name.""" 20 | # return None if a registry doesn't require organism field 21 | organism_record = None 22 | if is_organism_required(registry): 23 | # using global setting of organism 24 | from .core._settings import settings 25 | from .models import Organism 26 | 27 | if organism is None and settings.organism is not None: 28 | logger.debug(f"using default organism = {settings.organism.name}") 29 | return settings.organism 30 | 31 | if isinstance(organism, Organism): 32 | organism_record = organism 33 | elif isinstance(organism, str): 34 | try: 35 | # existing organism record 36 | organism_record = Organism.objects.get(name=organism) 37 | except ObjectDoesNotExist: 38 | try: 39 | # create a organism record from bionty reference 40 | organisms = Organism.from_values([organism]) 41 | if len(organisms) == 0: 42 | raise ValueError( 43 | f"Organism {organism} can't be created from the bionty reference, check your spelling or create it manually." 44 | ) 45 | organism_record = organisms[0].save() # type:ignore 46 | except KeyError: 47 | # no such organism is found in bionty reference 48 | organism_record = None 49 | 50 | if organism_record is None: 51 | if hasattr(registry, "_ontology_id_field") and field in { 52 | registry._ontology_id_field, 53 | "uid", 54 | }: 55 | return None 56 | raise OrganismNotSet( 57 | f"{registry.__name__} requires to specify a organism name via `organism=` or `bionty.settings.organism=`!" 58 | ) 59 | 60 | return organism_record 61 | 62 | 63 | def is_organism_required(registry: type[BioRecord]) -> bool: 64 | """Check if the registry has an organism field and is required. 65 | 66 | Returns: 67 | True if the registry has an organism field and is required, False otherwise. 68 | """ 69 | try: 70 | organism_field = registry._meta.get_field("organism") 71 | # organism is not required or not a relation 72 | if organism_field.null or not organism_field.is_relation: 73 | return False 74 | else: 75 | return True 76 | except FieldDoesNotExist: 77 | return False 78 | 79 | 80 | def organism_from_ensembl_id(id: str, using_key: str | None) -> Organism | None: 81 | """Get organism record from ensembl id.""" 82 | import bionty as bt 83 | from bionty.base.dev._io import s3_bionty_assets 84 | 85 | # below has to consume a file path and NOT a directory because otherwise it fails on reticulate 86 | localpath = s3_bionty_assets( 87 | ".lamindb/0QeqXlKq9aqW8aqe0000.parquet", 88 | bt.base.settings.dynamicdir / "ensembl_prefix.parquet", 89 | ) 90 | ensembl_prefixes = pd.read_parquet(localpath).set_index("gene_prefix") 91 | 92 | prefix = ( 93 | re.search(r"^[A-Za-z]+", id).group(0) if re.search(r"^[A-Za-z]+", id) else id 94 | ) 95 | if prefix in ensembl_prefixes.index: 96 | organism_name = ensembl_prefixes.loc[prefix, "name"].lower() 97 | 98 | using_key = None if using_key == "default" else using_key 99 | 100 | organism_record = ( 101 | bt.Organism.using(using_key).filter(name=organism_name).one_or_none() 102 | ) 103 | if organism_record is None: 104 | organisms = bt.Organism.from_values([organism_name]) 105 | if len(organisms) > 0: 106 | organism_record = organisms[0] 107 | organism_record.save(using=using_key) 108 | else: 109 | raise OrganismNotSet( 110 | f"Organism {organism_name} can't be created from the source, check your spelling or create it manually." 111 | ) 112 | return organism_record 113 | return None 114 | -------------------------------------------------------------------------------- /bionty/_shared_docstrings.py: -------------------------------------------------------------------------------- 1 | doc_from_source = """\ 2 | source: Optional Source record to use 3 | mute: Whether to suppress logging 4 | """ 5 | -------------------------------------------------------------------------------- /bionty/_source.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from lamindb.models import SQLRecord 3 | 4 | import bionty.base as bt_base 5 | 6 | from ._organism import create_or_get_organism_record 7 | 8 | 9 | def get_source_record( 10 | registry: type[SQLRecord], 11 | organism: str | SQLRecord | None = None, 12 | source: SQLRecord | None = None, 13 | ) -> SQLRecord: 14 | """Get a Source record for a given BioRecord model.""" 15 | from .models import Source 16 | 17 | if source is not None: 18 | return source 19 | 20 | organism_record = create_or_get_organism_record(organism, registry) 21 | 22 | entity_name = registry.__get_name_with_module__() 23 | filter_kwargs = {"entity": entity_name} 24 | if isinstance(organism_record, SQLRecord): 25 | filter_kwargs["organism"] = organism_record.name 26 | elif isinstance(organism, str): 27 | filter_kwargs["organism"] = organism 28 | 29 | sources = Source.filter(**filter_kwargs).all() 30 | if len(sources) == 0: 31 | raise ValueError(f"No source record found for filter {filter_kwargs}") 32 | if len(sources) == 1: 33 | return sources.one() 34 | 35 | current_sources = sources.filter(currently_used=True).all() 36 | if len(current_sources) == 1: 37 | return current_sources.first() 38 | elif len(current_sources) > 1: 39 | if organism is None: 40 | # for Organism, in most cases we load from the vertebrates source because ncbitaxon is too big 41 | if entity_name == "bionty.Organism": 42 | current_sources_vertebrates = current_sources.filter( 43 | organism="vertebrates" 44 | ).all() 45 | if len(current_sources_vertebrates) > 0: 46 | return current_sources_vertebrates.first() 47 | # return source with organism="all" 48 | current_sources_all = current_sources.filter(organism="all").all() 49 | if len(current_sources_all) > 0: 50 | return current_sources_all.first() 51 | return current_sources.first() 52 | else: # len(current_sources) == 0 53 | sources_all = sources.filter(organism="all").all() 54 | if len(sources_all) > 0: 55 | # return source with organism="all" 56 | return sources_all.first() 57 | return sources.first() 58 | 59 | 60 | def filter_public_df_columns( 61 | model: type[SQLRecord], public_ontology: bt_base.PublicOntology 62 | ) -> pd.DataFrame: 63 | """Filter columns of public ontology to match the model fields.""" 64 | 65 | def _prepare_public_df(model: type[SQLRecord], bionty_df: pd.DataFrame): 66 | """Prepare the bionty DataFrame to match the model fields.""" 67 | if bionty_df.empty: 68 | return bionty_df 69 | if model.__get_name_with_module__() == "bionty.Gene": 70 | # groupby ensembl_gene_id and concat ncbi_gene_ids 71 | groupby_id_col = ( 72 | "ensembl_gene_id" if "ensembl_gene_id" in bionty_df else "stable_id" 73 | ) 74 | if groupby_id_col not in bionty_df: 75 | raise ValueError( 76 | "public df must contain column 'ensembl_gene_id' or 'stable_id'" 77 | ) 78 | bionty_df.drop( 79 | columns=["hgnc_id", "mgi_id", "index"], errors="ignore", inplace=True 80 | ) 81 | agg_kwags = {} 82 | if "ncbi_gene_id" in bionty_df: 83 | bionty_df.drop_duplicates( 84 | [groupby_id_col, "ncbi_gene_id"], inplace=True 85 | ) 86 | bionty_df["ncbi_gene_id"] = bionty_df["ncbi_gene_id"].fillna("") 87 | bionty_df.rename( 88 | columns={"ncbi_gene_id": "ncbi_gene_ids"}, inplace=True 89 | ) 90 | agg_kwags["ncbi_gene_ids"] = "|".join 91 | for col in ["symbol", "biotype", "description", "synonyms"]: 92 | if col in bionty_df: 93 | agg_kwags[col] = "first" # type: ignore 94 | bionty_df = bionty_df.groupby(groupby_id_col).agg(agg_kwags).reset_index() 95 | 96 | # rename definition to description for the bionty registry in db 97 | if "definition" in bionty_df: 98 | bionty_df.rename(columns={"definition": "description"}, inplace=True) 99 | return bionty_df 100 | 101 | bionty_df = pd.DataFrame() 102 | if public_ontology is not None: 103 | model_field_names = {i.name for i in model._meta.fields} 104 | # parents needs to be added here as relationships aren't in fields 105 | model_field_names.add("parents") 106 | bionty_df = _prepare_public_df( 107 | model, public_ontology.to_dataframe().reset_index() 108 | ) 109 | bionty_df = bionty_df.loc[:, bionty_df.columns.isin(model_field_names)] 110 | return bionty_df 111 | -------------------------------------------------------------------------------- /bionty/base/__init__.py: -------------------------------------------------------------------------------- 1 | """Access to public ontologies. 2 | 3 | `bionty.base` is the read-only interface for public ontology that underlies bionty and doesn't require a lamindb instance. 4 | 5 | Import the package: 6 | 7 | >>> import bionty.base as bt_base 8 | 9 | Access public ontologies: 10 | 11 | >>> genes = bt_base.Gene() 12 | 13 | Get a DataFrame of all available values: 14 | 15 | >>> genes.to_dataframe() 16 | 17 | Entities 18 | ======== 19 | 20 | Bionty base provides access to several entities, most of which are also supported by Bionty. 21 | 22 | .. autosummary:: 23 | :toctree: . 24 | 25 | Organism 26 | Gene 27 | Protein 28 | CellMarker 29 | CellType 30 | CellLine 31 | Tissue 32 | Disease 33 | Phenotype 34 | Pathway 35 | ExperimentalFactor 36 | DevelopmentalStage 37 | Drug 38 | Ethnicity 39 | BFXPipeline 40 | BioSample 41 | 42 | Base class 43 | ---------- 44 | 45 | `Pronto Ontology objects `__ can be accessed via `{entity}.to_pronto()`. 46 | 47 | .. autosummary:: 48 | :toctree: . 49 | 50 | PublicOntology 51 | PublicOntologyField 52 | 53 | Ontology sources 54 | ---------------- 55 | 56 | .. autosummary:: 57 | :toctree: . 58 | 59 | display_sources 60 | display_currently_used_sources 61 | settings 62 | 63 | """ 64 | 65 | # dynamic classes 66 | from . import dev 67 | from ._display_sources import display_currently_used_sources, display_sources 68 | 69 | # tools 70 | from ._public_ontology import PublicOntology, PublicOntologyField 71 | from ._settings import settings 72 | 73 | # sources 74 | # from .dev._handle_sources import reset_sources 75 | from .entities._bfxpipeline import BFXPipeline 76 | from .entities._biosample import BioSample 77 | from .entities._cellline import CellLine 78 | from .entities._cellmarker import CellMarker 79 | from .entities._celltype import CellType 80 | from .entities._developmentalstage import DevelopmentalStage 81 | from .entities._disease import Disease 82 | from .entities._drug import Drug 83 | from .entities._ethnicity import Ethnicity 84 | from .entities._experimentalfactor import ExperimentalFactor 85 | from .entities._gene import Gene 86 | from .entities._organism import Organism 87 | from .entities._pathway import Pathway 88 | from .entities._phenotype import Phenotype 89 | from .entities._protein import Protein 90 | from .entities._tissue import Tissue 91 | -------------------------------------------------------------------------------- /bionty/base/_display_sources.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from lamin_utils import logger 3 | from lamindb_setup.core import deprecated 4 | 5 | from bionty.base.dev._handle_sources import LAMINDB_INSTANCE_LOADED 6 | 7 | from ._settings import settings 8 | from .dev._handle_sources import parse_currently_used_sources 9 | 10 | 11 | def display_sources() -> pd.DataFrame: 12 | """Displays all available sources. 13 | 14 | Example:: 15 | 16 | import bionty.base as bt_base 17 | 18 | bt.display_sources() 19 | """ 20 | from .dev._handle_sources import parse_sources_yaml 21 | 22 | return parse_sources_yaml(settings.public_sources).set_index("entity") # type: ignore 23 | 24 | 25 | @deprecated("display_sources") 26 | def display_available_sources() -> pd.DataFrame: 27 | return display_sources() 28 | 29 | 30 | def display_currently_used_sources(mute: bool = False) -> pd.DataFrame: 31 | """Displays all currently used sources. 32 | 33 | Active version is unique for entity + organism. 34 | 35 | Example:: 36 | 37 | import bionty.base as bt_base 38 | 39 | bt.display_currently_used_sources() 40 | """ 41 | if LAMINDB_INSTANCE_LOADED(): 42 | if not mute: 43 | logger.error( 44 | "You have a LaminDB instance loaded, please run the following to check default sources:\n" 45 | " → bt.Source.filter(currently_used=True).to_dataframe()" 46 | ) 47 | 48 | versions = parse_currently_used_sources(settings.public_sources) 49 | 50 | df_rows = [] 51 | for bionty_class, bionty_class_data in versions.items(): 52 | for organism, organism_data in bionty_class_data.items(): 53 | for source, version in organism_data.items(): 54 | df_rows.append( 55 | { 56 | "entity": bionty_class, 57 | "organism": organism, 58 | "name": source, 59 | "version": version, 60 | } 61 | ) 62 | 63 | return pd.DataFrame(df_rows).set_index("entity") 64 | -------------------------------------------------------------------------------- /bionty/base/_ontology_url.py: -------------------------------------------------------------------------------- 1 | from functools import lru_cache 2 | 3 | import requests 4 | 5 | 6 | def import_bioregistry(): 7 | """Import bioregistry module if available.""" 8 | try: 9 | import bioregistry 10 | 11 | return bioregistry 12 | except ImportError: 13 | raise ImportError( 14 | "Please install bioregistry with `pip install bioregistry`." 15 | ) from None 16 | 17 | 18 | class OntologyURLError(Exception): 19 | """Base exception for ontology resolver errors.""" 20 | 21 | pass 22 | 23 | 24 | class OntologyNotFoundError(OntologyURLError): 25 | """Raised when an ontology cannot be found.""" 26 | 27 | pass 28 | 29 | 30 | class OntologyVersionNotFoundError(OntologyURLError): 31 | """Raised when a specific version of an ontology cannot be found.""" 32 | 33 | pass 34 | 35 | 36 | @lru_cache(maxsize=128) 37 | def get_ontology_url(prefix: str, version: str | None = None) -> tuple[str, str]: 38 | """Get a versioned download URL for an ontology based on its CURIE prefix. 39 | 40 | Args: 41 | prefix: The CURIE prefix (e.g., 'GO', 'MONDO', 'HP') 42 | version: Optional version string (e.g., '2023-01-01') 43 | If None, the latest version will be determined. 44 | 45 | Returns: 46 | Tuple of (download_url, version_string) 47 | 48 | Raises: 49 | OntologyNotFoundError: If the ontology cannot be found 50 | OntologyVersionNotFoundError: If no versioned URL can be found 51 | """ 52 | bioregistry = import_bioregistry() 53 | 54 | if not prefix: 55 | raise ValueError("please provide a prefix") 56 | 57 | # Normalize the prefix 58 | normalized = bioregistry.normalize_prefix(prefix) or prefix 59 | 60 | # Check if the prefix exists at all 61 | if not _prefix_exists(normalized): 62 | raise OntologyNotFoundError(f"ontology with prefix '{prefix}' not found") 63 | 64 | # If specific version requested, try to get it 65 | if version: 66 | # try standard versioned URL patterns 67 | url, ver = _get_specific_version(normalized, version) 68 | if url: 69 | return url, ver 70 | 71 | raise OntologyVersionNotFoundError( 72 | f"version '{version}' of ontology '{prefix}' not found" 73 | ) 74 | 75 | # For latest version 76 | url, ver = _get_latest_from_ols4(normalized) 77 | if url: 78 | return url, ver 79 | 80 | # If we get here, no versioned URL was found 81 | raise OntologyVersionNotFoundError( 82 | f"no versioned URL found for ontology '{prefix}'" 83 | ) 84 | 85 | 86 | def _prefix_exists(prefix: str) -> bool: 87 | """Check if a prefix exists in any registry.""" 88 | bioregistry = import_bioregistry() 89 | 90 | if bioregistry.normalize_prefix(prefix): 91 | return True 92 | 93 | # Check OLS4 94 | try: 95 | response = requests.head( 96 | f"https://www.ebi.ac.uk/ols4/api/ontologies/{prefix.lower()}", timeout=5 97 | ) 98 | if response.status_code < 400: 99 | return True 100 | except requests.RequestException: 101 | pass 102 | 103 | return False 104 | 105 | 106 | def _url_exists(url: str) -> bool: 107 | """Check if a URL exists and returns a valid response.""" 108 | try: 109 | response = requests.head(url, timeout=5, allow_redirects=True) 110 | return response.status_code >= 200 and response.status_code < 400 111 | except requests.RequestException: 112 | return False 113 | 114 | 115 | def _extract_version_from_iri(version_iri: str | None): 116 | """Extract version from an IRI string by taking the second-to-last path component.""" 117 | if isinstance(version_iri, str): 118 | # If we have at least two parts, return the second-to-last 119 | parts = version_iri.split("/") 120 | if len(parts) >= 2: 121 | return parts[-2].removeprefix("v") 122 | 123 | 124 | def _get_specific_version(prefix: str, version: str) -> tuple[str | None, str | None]: 125 | """Get URL for a specific version of an ontology using standard patterns.""" 126 | bioregistry = import_bioregistry() 127 | 128 | # Clean version string 129 | clean_version = version[1:] if version.startswith("v") else version 130 | obo_prefix = bioregistry.get_obofoundry_prefix(prefix) or prefix 131 | 132 | # Try standard OBO Foundry versioned patterns 133 | standard_patterns = [ 134 | # Direct version path 135 | f"http://purl.obolibrary.org/obo/{obo_prefix.lower()}/{clean_version}/{obo_prefix.lower()}.owl", 136 | # Releases directory 137 | f"http://purl.obolibrary.org/obo/{obo_prefix.lower()}/releases/{clean_version}/{obo_prefix.lower()}.owl", 138 | # Semantic version with v prefix 139 | f"http://purl.obolibrary.org/obo/{obo_prefix.lower()}/v{clean_version}/{obo_prefix.lower()}.owl", 140 | ] 141 | 142 | for url in standard_patterns: 143 | if _url_exists(url): 144 | return url, clean_version 145 | 146 | return None, None 147 | 148 | 149 | def _get_latest_from_ols4(prefix: str) -> tuple[str | None, str | None]: 150 | """Get the latest version information from OLS4.""" 151 | try: 152 | response = requests.get( 153 | f"https://www.ebi.ac.uk/ols4/api/ontologies/{prefix.lower()}", timeout=30 154 | ) 155 | if response.status_code != 200: 156 | return None, None 157 | 158 | data = response.json() 159 | config = data.get("config", {}) 160 | 161 | # Get version information 162 | version = config.get("version") 163 | 164 | # Check versionIri first (preferred source) 165 | version_iri = config.get("versionIri") 166 | if version_iri and _url_exists(version_iri): 167 | # If we have a versionIri and it exists, use it 168 | # Extract version from IRI if not already provided 169 | if not version: 170 | version = _extract_version_from_iri(version_iri) 171 | return version_iri, version 172 | 173 | # Fall back to fileLocation if available 174 | file_location = config.get("fileLocation") 175 | if file_location and _url_exists(file_location): 176 | if not version and version_iri: 177 | # even when version_iri is not accessible, we can still extract the version, for example: pw 178 | version = _extract_version_from_iri(version_iri) 179 | return file_location, version 180 | 181 | # No valid URLs found 182 | return None, None 183 | 184 | except requests.RequestException: 185 | return None, None 186 | -------------------------------------------------------------------------------- /bionty/base/_settings.py: -------------------------------------------------------------------------------- 1 | from functools import wraps 2 | from pathlib import Path 3 | 4 | HOME_DIR = Path(f"{Path.home()}/.lamin/bionty").resolve() 5 | ROOT_DIR = Path(__file__).parent.resolve() 6 | 7 | 8 | def check_datasetdir_exists(f): 9 | @wraps(f) 10 | def wrapper(*args, **kwargs): 11 | settings.datasetdir.mkdir(exist_ok=True) 12 | return f(*args, **kwargs) 13 | 14 | return wrapper 15 | 16 | 17 | def check_dynamicdir_exists(f): 18 | @wraps(f) 19 | def wrapper(*args, **kwargs): 20 | settings.dynamicdir.mkdir(exist_ok=True) 21 | return f(*args, **kwargs) 22 | 23 | return wrapper 24 | 25 | 26 | class Settings: 27 | def __init__( 28 | self, 29 | datasetdir: str | Path = ROOT_DIR / "data/", 30 | dynamicdir: str | Path = ROOT_DIR / "_dynamic/", 31 | ): 32 | # setters convert to Path and resolve: 33 | self.datasetdir = datasetdir 34 | self.dynamicdir = dynamicdir 35 | 36 | @property 37 | def datasetdir(self): 38 | """Directory for datasets.""" 39 | return self._datasetdir 40 | 41 | @datasetdir.setter 42 | def datasetdir(self, datasetdir: str | Path): 43 | self._datasetdir = Path(datasetdir).resolve() 44 | 45 | @property 46 | def dynamicdir(self): 47 | """Directory for datasets.""" 48 | return self._dynamicdir 49 | 50 | @dynamicdir.setter 51 | def dynamicdir(self, dynamicdir: str | Path): 52 | self._dynamicdir = Path(dynamicdir).resolve() 53 | 54 | @property 55 | def public_sources(self): 56 | return ROOT_DIR / "sources.yaml" 57 | 58 | 59 | settings = Settings() 60 | -------------------------------------------------------------------------------- /bionty/base/dev/__init__.py: -------------------------------------------------------------------------------- 1 | """Dev. 2 | 3 | .. autosummary:: 4 | :toctree: . 5 | 6 | InspectResult 7 | """ 8 | 9 | from lamin_utils._inspect import InspectResult 10 | -------------------------------------------------------------------------------- /bionty/base/dev/_doc_util.py: -------------------------------------------------------------------------------- 1 | from textwrap import dedent 2 | 3 | 4 | def _doc_params(**kwds): # pragma: no cover 5 | r"""\ 6 | Docstrings should start with "\" in the first line for proper formatting. 7 | """ 8 | 9 | def dec(obj): 10 | obj.__orig_doc__ = obj.__doc__ 11 | obj.__doc__ = dedent(obj.__doc__).format_map(kwds) 12 | return obj 13 | 14 | return dec 15 | -------------------------------------------------------------------------------- /bionty/base/dev/_handle_sources.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from pathlib import Path 4 | 5 | import pandas as pd 6 | 7 | from bionty.base._settings import settings 8 | from bionty.base.dev._io import load_yaml 9 | 10 | 11 | def LAMINDB_INSTANCE_LOADED(): 12 | is_loaded = False 13 | lnenv_filepath = Path.home() / ".lamin/current_instance.env" 14 | if lnenv_filepath.exists(): 15 | with open(lnenv_filepath.as_posix()) as f: 16 | is_loaded = "bionty" in f.read().split("schema_str=")[-1] 17 | return is_loaded 18 | 19 | 20 | def parse_sources_yaml( 21 | filepath: str | Path = settings.public_sources, 22 | url_pattern: bool = False, 23 | ) -> pd.DataFrame: 24 | """Parse values from sources yaml file into a DataFrame. 25 | 26 | Args: 27 | filepath: Path to the versions yaml file. 28 | 29 | Returns: 30 | - entity 31 | - name 32 | - organism 33 | - version 34 | - url 35 | - description 36 | - source_website 37 | """ 38 | all_rows = [] 39 | for entity, sources in load_yaml(filepath).items(): 40 | if entity == "version": 41 | continue 42 | for source, organism_source in sources.items(): 43 | name = organism_source.get("name", "") 44 | website = organism_source.get("website", "") 45 | for organism, versions in organism_source.items(): 46 | if organism in ["name", "website"]: 47 | continue 48 | latest_version = str(versions.get("latest-version")) 49 | url = versions.get("url") 50 | if not url_pattern: 51 | url = url.replace("{version}", latest_version) 52 | row = (entity, source, organism, latest_version, url, name, website) 53 | all_rows.append(row) 54 | 55 | return pd.DataFrame( 56 | all_rows, 57 | columns=[ 58 | "entity", 59 | "name", 60 | "organism", 61 | "version", 62 | "url", 63 | "description", 64 | "source_website", 65 | ], 66 | ) 67 | 68 | 69 | def parse_currently_used_sources(yaml: str | Path | list[dict]) -> dict: 70 | """Parse out the most recent versions from yaml.""" 71 | if isinstance(yaml, str | Path): 72 | df = parse_sources_yaml(yaml) 73 | df_current = ( 74 | df[["entity", "name", "organism", "version"]] # type: ignore 75 | .drop_duplicates(["entity", "organism", "name"], keep="first") 76 | .groupby(["entity", "organism", "name"], sort=False) 77 | .max() 78 | ) 79 | records = df_current.reset_index().to_dict(orient="records") 80 | else: 81 | records = yaml 82 | 83 | current_dict: dict = {} 84 | for kwargs in records: 85 | entity, organism, source, version = ( 86 | kwargs["entity"], 87 | kwargs["organism"], 88 | kwargs["name"], 89 | kwargs["version"], 90 | ) 91 | if entity not in current_dict: 92 | current_dict[entity] = {} 93 | if organism not in current_dict[entity]: 94 | current_dict[entity][organism] = {source: version} 95 | return current_dict 96 | -------------------------------------------------------------------------------- /bionty/base/dev/_io.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | from pathlib import Path 3 | 4 | import requests # type:ignore 5 | import yaml # type:ignore 6 | from lamindb_setup.core.upath import UPath 7 | from rich.progress import Progress 8 | 9 | from bionty.base._settings import settings 10 | 11 | 12 | def load_yaml(filename: str | Path): # pragma: no cover 13 | with open(filename) as f: 14 | return yaml.safe_load(f) 15 | 16 | 17 | def write_yaml( 18 | data: dict, 19 | filename: str | Path, 20 | sort_keys: bool = False, 21 | default_flow_style: bool = False, 22 | ): # pragma: no cover 23 | with open(filename, "w") as f: 24 | yaml.dump( 25 | data, 26 | f, 27 | sort_keys=sort_keys, 28 | default_flow_style=default_flow_style, 29 | ) 30 | 31 | 32 | def url_download( 33 | url: str, localpath: str | Path | None = None, block_size: int = 1024, **kwargs 34 | ) -> str | Path | None: 35 | """Downloads a file to a specified path. 36 | 37 | Args: 38 | url: The URL to download. 39 | localpath: The path to download the file to. 40 | block_size: Buffer size in bytes for sending a file-like message body. 41 | **kwargs: Keyword arguments are passed to 'requests' 42 | 43 | Returns: 44 | The localpath file is downloaded to 45 | 46 | Raises: 47 | HttpError: If the request response is not 200 and OK. 48 | """ 49 | if url.startswith("file://"): 50 | url = url.split("file://")[-1] 51 | shutil.copy(url, localpath) 52 | return localpath 53 | try: 54 | response = requests.get(url, stream=True, allow_redirects=True, **kwargs) 55 | response.raise_for_status() 56 | 57 | total_content_length = int(response.headers.get("content-length", 0)) 58 | if localpath is None: 59 | localpath = url.split("/")[-1] 60 | 61 | if total_content_length > 5000000: 62 | with Progress(refresh_per_second=10, transient=True) as progress: 63 | task = progress.add_task( 64 | "[red]downloading...", total=total_content_length 65 | ) 66 | 67 | with open(localpath, "wb") as file: 68 | for data in response.iter_content(block_size): 69 | file.write(data) 70 | progress.update(task, advance=block_size) 71 | # force the progress bar to 100% at the end 72 | progress.update(task, completed=total_content_length, refresh=True) 73 | else: 74 | with open(localpath, "wb") as file: 75 | for data in response.iter_content(block_size): 76 | file.write(data) 77 | 78 | return localpath 79 | 80 | except requests.exceptions.HTTPError as err: 81 | if err.response.status_code == 404: 82 | raise requests.exceptions.HTTPError( 83 | f"URL not found (404): '{url}'. Check for typos." 84 | ) from err 85 | else: 86 | raise requests.exceptions.HTTPError( 87 | f"HTTP error ({err.response.status_code}): {url}." 88 | ) from err 89 | 90 | 91 | def s3_bionty_assets( 92 | filename: str, localpath: Path = None, assets_base_url: str = "s3://bionty-assets" 93 | ): 94 | """Synchronizes a S3 file path with local file storage. 95 | 96 | If the file does not exist locally it gets downloaded to datasetdir/filename or the passed localpath. 97 | If the file does not exist on S3, the file does not get synchronized, no erroring. 98 | 99 | Args: 100 | filename: The suffix of the assets_base_url. 101 | localpath: Local base path of the file to sync. 102 | assets_base_url: The S3 base URL. Prefix of the filename. 103 | 104 | Returns: 105 | A Path object of the synchronized path. 106 | """ 107 | if localpath is None: 108 | localpath = settings.datasetdir / filename 109 | else: # it errors on reticulate if we pass a directory 110 | if localpath.exists(): 111 | assert localpath.is_file(), ( 112 | f"localpath {localpath} has to be a file path, not a directory" 113 | ) 114 | # this requires s3fs, but it is installed by lamindb 115 | # skip_instance_cache=True to avoid interference with cached filesystems 116 | # especially with their dircache 117 | remote_path = ( 118 | UPath( 119 | assets_base_url, 120 | skip_instance_cache=True, 121 | use_listings_cache=True, 122 | anon=True, 123 | ) 124 | / filename 125 | ) 126 | # check that the remote path exists and is available 127 | try: 128 | remote_stat = remote_path.stat() 129 | except (FileNotFoundError, PermissionError): 130 | return localpath 131 | # this is needed unfortunately because s3://bionty-assets doesn't have ListObjectsV2 for anonymous users. 132 | # Moreover, ListObjectsV2 is triggered inside .synchronize if no cache is present. 133 | # TODO: check if this is still needed 134 | parent_path = remote_path.parent.path.rstrip("/") 135 | remote_path.fs.dircache[parent_path] = [remote_stat.as_info()] 136 | # synchronize the remote path 137 | if hasattr(remote_path, "synchronize_to"): 138 | remote_path.synchronize_to( 139 | localpath, error_no_origin=False, print_progress=True 140 | ) 141 | else: 142 | # UPath.synchronize is deprecated 143 | remote_path.synchronize(localpath, error_no_origin=False, print_progress=True) 144 | # clean the artificial cache 145 | del remote_path.fs.dircache[parent_path] 146 | 147 | return localpath 148 | -------------------------------------------------------------------------------- /bionty/base/entities/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/laminlabs/bionty/620a707fe3266d49249af3b47bebdac043b36326/bionty/base/entities/__init__.py -------------------------------------------------------------------------------- /bionty/base/entities/_bfxpipeline.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Literal 4 | 5 | from bionty.base._public_ontology import PublicOntology 6 | from bionty.base.dev._doc_util import _doc_params 7 | 8 | from ._shared_docstrings import doc_entites 9 | 10 | 11 | @_doc_params(doc_entities=doc_entites) 12 | class BFXPipeline(PublicOntology): 13 | """Bioinformatics pipelines. 14 | 15 | Args: 16 | {doc_entities} 17 | """ 18 | 19 | def __init__( 20 | self, 21 | organism: Literal["all"] | None = None, 22 | source: Literal["lamin"] | None = None, 23 | version: Literal["1.0.0"] | None = None, 24 | **kwargs, 25 | ) -> None: 26 | super().__init__( 27 | source=source, 28 | version=version, 29 | organism=organism, 30 | ols_supported=False, 31 | **kwargs, 32 | ) 33 | -------------------------------------------------------------------------------- /bionty/base/entities/_biosample.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Literal 4 | 5 | from bionty.base._public_ontology import PublicOntology 6 | from bionty.base.dev._doc_util import _doc_params 7 | 8 | from ._shared_docstrings import doc_entites 9 | 10 | 11 | @_doc_params(doc_entities=doc_entites) 12 | class BioSample(PublicOntology): 13 | """BioSample attributes. 14 | 15 | 1. NCBI BioSample Attributes 16 | https://www.ncbi.nlm.nih.gov/biosample/docs/attributes 17 | 18 | Args: 19 | {doc_entities} 20 | """ 21 | 22 | def __init__( 23 | self, 24 | organism: Literal["all"] | None = None, 25 | source: Literal["ncbi"] | None = None, 26 | version: Literal["2023-09"] | None = None, 27 | **kwargs, 28 | ) -> None: 29 | super().__init__( 30 | source=source, 31 | version=version, 32 | organism=organism, 33 | ols_supported=False, 34 | **kwargs, 35 | ) 36 | -------------------------------------------------------------------------------- /bionty/base/entities/_cellline.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Literal 4 | 5 | from bionty.base._public_ontology import PublicOntology 6 | from bionty.base.dev._doc_util import _doc_params 7 | 8 | from ._shared_docstrings import doc_entites 9 | 10 | 11 | @_doc_params(doc_entities=doc_entites) 12 | class CellLine(PublicOntology): 13 | """Cell line. 14 | 15 | 1. Cell Line Ontology 16 | https://github.com/CLO-ontology/CLO 17 | 18 | 2. DepMap 19 | https://depmap.org 20 | 21 | Args: 22 | {doc_entities} 23 | """ 24 | 25 | def __init__( 26 | self, 27 | organism: Literal["all"] | None = None, 28 | source: Literal["clo", "depmap"] | None = None, 29 | version: Literal[ 30 | # Cell Line Ontology 31 | "2023-03-28", 32 | "2022-03-21" 33 | # DepMap 34 | "2024-Q2", 35 | ] 36 | | None = None, 37 | **kwargs, 38 | ) -> None: 39 | super().__init__( 40 | source=source, 41 | version=version, 42 | organism=organism, 43 | include_id_prefixes={"clo": ["CLO:"]}, 44 | **kwargs, 45 | ) 46 | -------------------------------------------------------------------------------- /bionty/base/entities/_cellmarker.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Literal 4 | 5 | from bionty.base._public_ontology import PublicOntology 6 | from bionty.base.dev._doc_util import _doc_params 7 | 8 | from ._shared_docstrings import doc_entites 9 | 10 | 11 | @_doc_params(doc_entities=doc_entites) 12 | class CellMarker(PublicOntology): 13 | """Cell markers. 14 | 15 | 1. Cell Marker Ontology 16 | http://bio-bigdata.hrbmu.edu.cn/CellMarker/ 17 | 18 | Args: 19 | {doc_entities} 20 | """ 21 | 22 | def __init__( 23 | self, 24 | organism: Literal["human", "mouse"] | None = None, 25 | source: Literal["cellmarker"] | None = None, 26 | version: Literal["2.0"] | None = None, 27 | **kwargs, 28 | ) -> None: 29 | super().__init__( 30 | source=source, 31 | version=version, 32 | organism=organism, 33 | ols_supported=False, 34 | **kwargs, 35 | ) 36 | -------------------------------------------------------------------------------- /bionty/base/entities/_celltype.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Literal 4 | 5 | from bionty.base._public_ontology import PublicOntology 6 | from bionty.base.dev._doc_util import _doc_params 7 | 8 | from ._shared_docstrings import doc_entites 9 | 10 | 11 | @_doc_params(doc_entities=doc_entites) 12 | class CellType(PublicOntology): 13 | """Cell type ontologies. 14 | 15 | 1. Cell ontology 16 | https://github.com/obophenotype/cell-ontology 17 | 18 | Args: 19 | {doc_entities} 20 | """ 21 | 22 | def __init__( 23 | self, 24 | organism: Literal["all"] | None = None, 25 | source: Literal["cl"] | None = None, 26 | version: Literal[ 27 | "2025-04-10", 28 | "2024-08-16", 29 | "2024-05-15", 30 | "2024-04-05", 31 | "2024-02-13", 32 | "2024-01-04", 33 | "2023-08-24", 34 | "2023-04-20", 35 | "2023-02-15", 36 | "2022-08-16", 37 | ] 38 | | None = None, 39 | **kwargs, 40 | ) -> None: 41 | super().__init__( 42 | source=source, 43 | version=version, 44 | organism=organism, 45 | include_id_prefixes={"cl": ["CL:"]}, 46 | **kwargs, 47 | ) 48 | -------------------------------------------------------------------------------- /bionty/base/entities/_developmentalstage.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Literal 4 | 5 | from bionty.base._public_ontology import PublicOntology 6 | from bionty.base.dev._doc_util import _doc_params 7 | 8 | from ._shared_docstrings import doc_entites 9 | 10 | 11 | @_doc_params(doc_entities=doc_entites) 12 | class DevelopmentalStage(PublicOntology): 13 | """Developmental Stage. 14 | 15 | 1. Developmental Stage Ontology 16 | https://github.com/obophenotype/developmental-stage-ontologies 17 | 18 | Args: 19 | {doc_entities} 20 | """ 21 | 22 | def __init__( 23 | self, 24 | organism: Literal["human", "mouse"] | None = None, 25 | source: Literal["hsapdv", "mmusdv"] | None = None, 26 | version: Literal["2025-01-23", "2024-05-28", "2020-03-10"] | None = None, 27 | **kwargs, 28 | ) -> None: 29 | super().__init__( 30 | source=source, 31 | version=version, 32 | organism=organism, 33 | include_id_prefixes={"hsapdv": ["HsapDv:"], "mmusdv": ["MmusDv:"]}, 34 | include_rel="part_of", 35 | **kwargs, 36 | ) 37 | -------------------------------------------------------------------------------- /bionty/base/entities/_disease.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Literal 4 | 5 | from bionty.base._public_ontology import PublicOntology 6 | from bionty.base.dev._doc_util import _doc_params 7 | 8 | from ._shared_docstrings import doc_entites 9 | 10 | 11 | @_doc_params(doc_entities=doc_entites) 12 | class Disease(PublicOntology): 13 | """Disease ontologies. 14 | 15 | 1. Mondo 16 | Edits of terms are coordinated and reviewed on: 17 | https://github.com/monarch-initiative/mondo 18 | 19 | 2. Human Disease Ontology 20 | Edits of terms are coordinated and reviewed on: 21 | https://github.com/DiseaseOntology/HumanDiseaseOntology 22 | 23 | 3. International Classification of Diseases (ICD) 24 | Edits of terms are coordinated and reviewed on: 25 | https://www.who.int/standards/classifications/classification-of-diseases 26 | 27 | Args: 28 | {doc_entities} 29 | """ 30 | 31 | def __init__( 32 | self, 33 | organism: Literal["all", "human"] | None = None, 34 | source: Literal["mondo", "doid", "icd"] | None = None, 35 | version: Literal[ 36 | # Mondo 37 | "2025-06-03", 38 | "2024-08-06", 39 | "2024-06-04", 40 | "2024-05-08", 41 | "2024-02-06", 42 | "2024-01-03", 43 | "2023-08-02", 44 | "2023-04-04", 45 | "2023-02-06", 46 | "2022-10-11", 47 | "2023-04-04", 48 | # DOID 49 | "2025-05-30", 50 | "2024-05-29", 51 | "2024-01-31", 52 | "2023-03-31", 53 | "2023-01-30", 54 | # ICD 55 | "icd-9-2011", 56 | "icd-10-2020", 57 | "icd-10-2024", 58 | "icd-11-2023", 59 | ] 60 | | None = None, 61 | **kwargs, 62 | ) -> None: 63 | super().__init__( 64 | source=source, 65 | version=version, 66 | organism=organism, 67 | include_id_prefixes={"mondo": ["MONDO:"]}, 68 | **kwargs, 69 | ) 70 | -------------------------------------------------------------------------------- /bionty/base/entities/_drug.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Literal 4 | 5 | from bionty.base._public_ontology import PublicOntology 6 | from bionty.base.dev._doc_util import _doc_params 7 | 8 | from ._shared_docstrings import doc_entites 9 | 10 | 11 | @_doc_params(doc_entities=doc_entites) 12 | class Drug(PublicOntology): 13 | """Drug ontologies. 14 | 15 | 1. DRON 16 | Edits of terms are coordinated and reviewed on: 17 | https://bioportal.bioontology.org/ontologies/DRON/ 18 | 19 | 2. CHEBI 20 | https://www.ebi.ac.uk/chebi/ 21 | 22 | Args: 23 | {doc_entities} 24 | """ 25 | 26 | def __init__( 27 | self, 28 | organism: Literal["all"] | None = None, 29 | source: Literal["dron", "chebi"] | None = None, 30 | version: Literal[ 31 | # DRON 32 | "2025-04-18", 33 | "2024-08-05", 34 | "2023-03-10", 35 | # CHEBI 36 | "2024-07-27", 37 | "2024-03-02", 38 | ] 39 | | None = None, 40 | **kwargs, 41 | ) -> None: 42 | super().__init__( 43 | source=source, 44 | version=version, 45 | organism=organism, 46 | include_id_prefixes={"dron": ["DRON:"]}, 47 | **kwargs, 48 | ) 49 | -------------------------------------------------------------------------------- /bionty/base/entities/_ethnicity.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Literal 4 | 5 | from bionty.base._public_ontology import PublicOntology 6 | from bionty.base.dev._doc_util import _doc_params 7 | 8 | from ._shared_docstrings import doc_entites 9 | 10 | 11 | @_doc_params(doc_entities=doc_entites) 12 | class Ethnicity(PublicOntology): 13 | """Ethnicity. 14 | 15 | 1. Human Ancestry Ontology 16 | https://github.com/EBISPOT/hancestro 17 | 18 | Args: 19 | {doc_entities} 20 | """ 21 | 22 | def __init__( 23 | self, 24 | organism: Literal["human"] | None = None, 25 | source: Literal["hancestro"] | None = None, 26 | version: Literal["3.0", "2025-04-01"] | None = None, 27 | **kwargs, 28 | ) -> None: 29 | super().__init__( 30 | source=source, 31 | version=version, 32 | organism=organism, 33 | include_id_prefixes={"hancestro": ["HANCESTRO:"]}, 34 | **kwargs, 35 | ) 36 | -------------------------------------------------------------------------------- /bionty/base/entities/_experimentalfactor.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING, Literal 4 | 5 | from bionty.base._public_ontology import PublicOntology 6 | from bionty.base.dev._doc_util import _doc_params 7 | from bionty.base.entities._shared_docstrings import organism_removed 8 | 9 | if TYPE_CHECKING: 10 | from bionty.base._ontology import Ontology 11 | 12 | 13 | @_doc_params(doc_entities=organism_removed) 14 | class ExperimentalFactor(PublicOntology): 15 | """Experimental Factor. 16 | 17 | 1. Experimental Factor Ontology 18 | https://www.ebi.ac.uk/ols/ontologies/efo 19 | 20 | Args: 21 | {doc_entities} 22 | """ 23 | 24 | def __init__( 25 | self, 26 | organism: Literal["all"] | None = None, 27 | source: Literal["efo"] | None = None, 28 | version: Literal[ 29 | "3.78.0", 30 | "3.70.0", 31 | "3.69.0", 32 | "3.66.0", 33 | "3.65.0", 34 | "3.63.0", 35 | "3.62.0", 36 | "3.57.0", 37 | "3.48.0", 38 | ] 39 | | None = None, 40 | **kwargs, 41 | ) -> None: 42 | super().__init__( 43 | organism=organism, 44 | source=source, 45 | version=version, 46 | include_id_prefixes={"efo": ["EFO:", "http://www.ebi.ac.uk/efo/"]}, 47 | **kwargs, 48 | ) 49 | 50 | def to_pronto(self, mute: bool = False) -> Ontology: 51 | """The Pronto Ontology object. 52 | 53 | See: https://pronto.readthedocs.io/en/stable/api/pronto.Ontology.html 54 | """ 55 | from bionty.base._ontology import Ontology 56 | 57 | self._download_ontology_file( 58 | localpath=self._local_ontology_path, 59 | url=self._url, 60 | ) 61 | return Ontology( 62 | handle=self._local_ontology_path, 63 | prefix="http://www.ebi.ac.uk/efo/", 64 | ) 65 | -------------------------------------------------------------------------------- /bionty/base/entities/_organism.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Literal 4 | 5 | import pandas as pd 6 | from lamindb_setup.core import deprecated 7 | 8 | from bionty.base._public_ontology import PublicOntology 9 | from bionty.base.dev._doc_util import _doc_params 10 | from bionty.base.dev._io import s3_bionty_assets 11 | from bionty.base.entities._shared_docstrings import organism_removed 12 | 13 | 14 | @_doc_params(doc_entities=organism_removed) 15 | class Organism(PublicOntology): 16 | """Organism. 17 | 18 | 1. NCBItaxon Ontology 19 | https://github.com/obophenotype/ncbitaxon 20 | 21 | 2. Organism ontology 22 | https://www.ensembl.org/index.html 23 | 24 | Args: 25 | {doc_entities} 26 | """ 27 | 28 | def __init__( 29 | self, 30 | taxa: ( 31 | Literal["vertebrates", "bacteria", "fungi", "metazoa", "plants", "all"] 32 | | None 33 | ) = None, 34 | source: Literal["ensembl", "ncbitaxon"] | None = None, 35 | version: ( 36 | Literal[ 37 | # NCBITaxon 38 | "2025-03-13", 39 | "2023-06-20", 40 | # Ensembl 41 | "release-112", 42 | "release-57", 43 | ] 44 | | None 45 | ) = None, 46 | **kwargs, 47 | ): 48 | # To support the organism kwarg being passed in getattr access in other parts of the code 49 | # https://github.com/laminlabs/bionty/issues/163 50 | if "organism" in kwargs and taxa is None: 51 | taxa = kwargs.pop("organism") 52 | super().__init__(organism=taxa, source=source, version=version, **kwargs) 53 | 54 | def _load_df(self) -> pd.DataFrame: 55 | if self.source == "ensembl": 56 | if not self._local_parquet_path.exists(): 57 | # try to download from s3 58 | s3_bionty_assets( 59 | filename=self._parquet_filename, 60 | assets_base_url="s3://bionty-assets", 61 | localpath=self._local_parquet_path, 62 | ) 63 | 64 | # try to download from original url 65 | if not self._local_parquet_path.exists(): 66 | self._url_download(self._url, self._local_ontology_path) # type:ignore 67 | df = pd.read_csv( 68 | self._local_ontology_path, 69 | sep="\t", 70 | index_col=False, # type:ignore 71 | ) 72 | df.rename( 73 | columns={ 74 | "#name": "name", 75 | "species": "scientific_name", 76 | "taxonomy_id": "ontology_id", 77 | }, 78 | inplace=True, 79 | ) 80 | df["name"] = df["name"].str.lower() 81 | df["ontology_id"] = "NCBITaxon:" + df["ontology_id"].astype(str) 82 | df["scientific_name"] = df["scientific_name"].apply( 83 | lambda x: " ".join( 84 | [x.split("_")[0].capitalize()] + x.split("_")[1:] 85 | ) 86 | ) 87 | df["synonyms"] = None 88 | df.to_parquet(self._local_parquet_path) 89 | return df 90 | else: 91 | df = pd.read_parquet(self._local_parquet_path) 92 | if "synonyms" not in df.columns: 93 | # add synonyms column if it doesn't exist 94 | df["synonyms"] = None 95 | return _standardize_scientific_name(df) 96 | else: 97 | return super()._load_df() 98 | 99 | def to_dataframe(self) -> pd.DataFrame: 100 | """Pandas DataFrame of the ontology. 101 | 102 | Returns: 103 | A Pandas DataFrame of the ontology. 104 | 105 | Example:: 106 | 107 | import bionty.base as bionty_base 108 | 109 | bt.Organism().to_dataframe() 110 | """ 111 | return self._df.set_index("name") 112 | 113 | @deprecated("to_dataframe") 114 | def df(self) -> pd.DataFrame: 115 | return self.to_dataframe() 116 | 117 | 118 | def _standardize_scientific_name(df: pd.DataFrame) -> pd.DataFrame: 119 | """Standardize scientific name following NCBITaxon convention. 120 | 121 | homo_sapiens -> Homo sapiens 122 | """ 123 | df["scientific_name"] = df["scientific_name"].apply( 124 | lambda x: " ".join([x.split("_")[0].capitalize()] + x.split("_")[1:]) 125 | ) 126 | return df 127 | -------------------------------------------------------------------------------- /bionty/base/entities/_pathway.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Literal 4 | 5 | from bionty.base._public_ontology import PublicOntology 6 | from bionty.base.dev._doc_util import _doc_params 7 | 8 | from ._shared_docstrings import doc_entites 9 | 10 | 11 | @_doc_params(doc_entities=doc_entites) 12 | class Pathway(PublicOntology): 13 | """Pathway. 14 | 15 | 1. Gene Ontology 16 | https://bioportal.bioontology.org/ontologies/GO/?p=summary 17 | 18 | 2. Pathway Ontology 19 | https://bioportal.bioontology.org/ontologies/PW/?p=summary 20 | 21 | Args: 22 | {doc_entities} 23 | """ 24 | 25 | def __init__( 26 | self, 27 | organism: Literal["all"] | None = None, 28 | source: Literal["go", "pw"] | None = None, 29 | version: Literal[ 30 | # Gene Ontology 31 | "2024-11-03", 32 | "2024-06-17", 33 | "2023-05-10", 34 | # Pathway Ontology 35 | "7.84", 36 | ] 37 | | None = None, 38 | **kwargs, 39 | ) -> None: 40 | super().__init__(source=source, version=version, organism=organism, **kwargs) 41 | -------------------------------------------------------------------------------- /bionty/base/entities/_phenotype.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Literal 4 | 5 | from bionty.base._public_ontology import PublicOntology 6 | from bionty.base.dev._doc_util import _doc_params 7 | 8 | from ._shared_docstrings import doc_entites 9 | 10 | 11 | @_doc_params(doc_entities=doc_entites) 12 | class Phenotype(PublicOntology): 13 | """Phenotype. 14 | 15 | 1. Human Phenotype Ontology 16 | https://hpo.jax.org/app/ 17 | 18 | 2. PATO - the Phenotype And Trait Ontology 19 | https://github.com/pato-ontology/pato 20 | 21 | 3.Phecodes ICD10 map 22 | https://phewascatalog.org/phecodes_icd10 23 | 24 | 3. PATO - Phenotype And Trait Ontology 25 | https://obofoundry.org/ontology/pato.html 26 | 27 | Args: 28 | {doc_entities} 29 | """ 30 | 31 | def __init__( 32 | self, 33 | organism: Literal["human", "all"] | None = None, 34 | source: Literal["hp", "phe", "pato"] | None = None, 35 | version: Literal[ 36 | # HP 37 | "2025-05-06", 38 | "2024-04-26", 39 | "2024-03-06", 40 | "2023-06-17", 41 | "2023-04-05", 42 | "2023-01-27", 43 | # Pato 44 | "2025-05-14", 45 | "2024-03-28", 46 | "2023-05-18", 47 | # Phe 48 | "1.2", 49 | ] 50 | | None = None, 51 | **kwargs, 52 | ) -> None: 53 | super().__init__( 54 | source=source, 55 | version=version, 56 | organism=organism, 57 | include_id_prefixes={ 58 | "hp": ["HP:"], 59 | "mp": ["MP:"], # mp might require an exclusion prefix for mpath 60 | "zp": ["ZP:"], 61 | "pato": ["PATO:"], 62 | }, 63 | **kwargs, 64 | ) 65 | -------------------------------------------------------------------------------- /bionty/base/entities/_protein.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Literal 4 | 5 | from bionty.base._public_ontology import PublicOntology 6 | from bionty.base.dev._doc_util import _doc_params 7 | 8 | from ._shared_docstrings import doc_entites 9 | 10 | 11 | @_doc_params(doc_entities=doc_entites) 12 | class Protein(PublicOntology): 13 | """Protein. 14 | 15 | 1. Uniprot 16 | https://www.uniprot.org/ 17 | 18 | Args: 19 | {doc_entities} 20 | """ 21 | 22 | def __init__( 23 | self, 24 | organism: Literal["human", "mouse"] | None = None, 25 | source: Literal["uniprot"] | None = None, 26 | version: Literal["2024-03", "2023-03", "2023-02"] | None = None, 27 | **kwargs, 28 | ) -> None: 29 | super().__init__( 30 | source=source, 31 | version=version, 32 | organism=organism, 33 | ols_supported=False, 34 | **kwargs, 35 | ) 36 | -------------------------------------------------------------------------------- /bionty/base/entities/_shared_docstrings.py: -------------------------------------------------------------------------------- 1 | doc_entites = """\ 2 | organism: `name` of `Organism` entity. 3 | source: The name of the source in the sources.yaml file. 4 | Get available sources with `.display_available_sources()`. 5 | version: The version of the ontology. Typically a date or an actual version. 6 | Get latest versions with `.display_available_sources()`. 7 | """ 8 | organism_removed_tmp = "\n".join(doc_entites.split("\n")[1:]).split("\n") 9 | organism_removed_tmp[0] = organism_removed_tmp[0].removeprefix(" ") 10 | organism_removed = "\n".join(organism_removed_tmp) 11 | 12 | 13 | doc_curate = """\ 14 | df: DataFrame with a column of identifiers 15 | column: If `column` is `None`, checks the existing index for compliance with 16 | the default identifier. 17 | If `column` denotes an entity identifier, tries to map that identifier 18 | to the default identifier. 19 | field: The type of identifier for mapping. 20 | """ 21 | -------------------------------------------------------------------------------- /bionty/base/entities/_tissue.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Literal 4 | 5 | from bionty.base._public_ontology import PublicOntology 6 | from bionty.base.dev._doc_util import _doc_params 7 | 8 | from ._shared_docstrings import doc_entites 9 | 10 | 11 | @_doc_params(doc_entities=doc_entites) 12 | class Tissue(PublicOntology): 13 | """Tissue. 14 | 15 | 1. Uberon 16 | https://github.com/obophenotype/uberon 17 | 18 | Args: 19 | {doc_entities} 20 | """ 21 | 22 | def __init__( 23 | self, 24 | organism: Literal["all"] | None = None, 25 | source: Literal["uberon"] | None = None, 26 | version: Literal[ 27 | "2025-05-28", 28 | "2024-08-07", 29 | "2024-05-13", 30 | "2024-03-22", 31 | "2024-02-20", 32 | "2024-01-18", 33 | "2023-09-05", 34 | "2023-04-19", 35 | "2023-02-14", 36 | "2022-08-19", 37 | ] 38 | | None = None, 39 | **kwargs, 40 | ) -> None: 41 | super().__init__( 42 | source=source, 43 | version=version, 44 | organism=organism, 45 | include_id_prefixes={"uberon": ["UBERON:"]}, 46 | include_rel="part_of", 47 | **kwargs, 48 | ) 49 | -------------------------------------------------------------------------------- /bionty/base/scripts/bfxpipelines_info/custom_pipelines.json: -------------------------------------------------------------------------------- 1 | { 2 | "cell_ranger_v8_0_0": { 3 | "id": "X3Y4Z5A6B7C8", 4 | "name": "Cell Ranger v8.0.0", 5 | "versions": "8.0.0", 6 | "reference": "https://support.10xgenomics.com/single-cell-gene-expression/software/downloads/latest" 7 | }, 8 | "cell_ranger_v7_2_0": { 9 | "id": "N4F5G6H7I8J9", 10 | "name": "Cell Ranger v7.2.0", 11 | "versions": "7.2.0", 12 | "reference": "https://support.10xgenomics.com/single-cell-gene-expression/software/downloads/latest" 13 | }, 14 | "cell_ranger_v7_1_0": { 15 | "id": "V2RbClSNDq4H", 16 | "name": "Cell Ranger v7.1.0", 17 | "versions": "7.1.0", 18 | "reference": "https://support.10xgenomics.com/single-cell-gene-expression/software/downloads/latest" 19 | }, 20 | "cell_ranger_v7_0_1": { 21 | "id": "T3r0zDQQJ97p", 22 | "name": "Cell Ranger v7.0.1", 23 | "versions": "7.0.1", 24 | "reference": "https://support.10xgenomics.com/single-cell-gene-expression/software/downloads/latest" 25 | }, 26 | "cell_ranger_v7_0_0": { 27 | "id": "t28U9XvyjCEw", 28 | "name": "Cell Ranger v7.0.0", 29 | "versions": "7.0.0", 30 | "reference": "https://support.10xgenomics.com/single-cell-gene-expression/software/downloads/latest" 31 | }, 32 | "cell_ranger_v6_1_2": { 33 | "id": "G4R5bC6a9DcX", 34 | "name": "Cell Ranger v6.1.2", 35 | "versions": "6.1.2", 36 | "reference": "https://support.10xgenomics.com/single-cell-gene-expression/software/downloads/6.1" 37 | }, 38 | "cell_ranger_v6_0_0": { 39 | "id": "F6U8dE2tJ3LZ", 40 | "name": "Cell Ranger v6.0.0", 41 | "versions": "6.0.0", 42 | "reference": "https://support.10xgenomics.com/single-cell-gene-expression/software/downloads/6.0" 43 | }, 44 | "spaceranger_v3_0_0": { 45 | "id": "A1B2C3D4E5F6", 46 | "name": "Spaceranger v3.0.0", 47 | "versions": "3.0.0", 48 | "reference": "https://support.10xgenomics.com/spatial-gene-expression/software/downloads/latest" 49 | }, 50 | "spaceranger_v2_1_1": { 51 | "id": "G7H8I9J0K1L2", 52 | "name": "Spaceranger v2.1.1", 53 | "versions": "2.1.1", 54 | "reference": "https://support.10xgenomics.com/spatial-gene-expression/software/downloads/2.1" 55 | }, 56 | "spaceranger_v2_1_0": { 57 | "id": "M3N4O5P6Q7R8", 58 | "name": "Spaceranger v2.1.0", 59 | "versions": "2.1.0", 60 | "reference": "https://support.10xgenomics.com/spatial-gene-expression/software/downloads/2.1" 61 | }, 62 | "spaceranger_v2_0_1": { 63 | "id": "M9V2bR6aK3eD", 64 | "name": "Spaceranger v2.0.1", 65 | "versions": "2.0.1", 66 | "reference": "https://support.10xgenomics.com/spatial-gene-expression/software/downloads/2.0" 67 | }, 68 | "xenium_v2_0_0": { 69 | "id": "S1T2U3V4W5X6", 70 | "name": "Xenium v2.0.0", 71 | "versions": "2.0.0", 72 | "reference": "https://www.10xgenomics.com/products/xenium" 73 | }, 74 | "xenium_v1_7_1": { 75 | "id": "Y7Z8A9B0C1D2", 76 | "name": "Xenium v1.7.1", 77 | "versions": "1.7.1", 78 | "reference": "https://www.10xgenomics.com/products/xenium" 79 | }, 80 | "xenium_v1_7_0": { 81 | "id": "E3F4G5H6I7J8", 82 | "name": "Xenium v1.7.0", 83 | "versions": "1.7.0", 84 | "reference": "https://www.10xgenomics.com/products/xenium" 85 | }, 86 | "xenium_v1_6_0": { 87 | "id": "K9L0M1N2O3P4", 88 | "name": "Xenium v1.6.0", 89 | "versions": "1.6.0", 90 | "reference": "https://www.10xgenomics.com/products/xenium" 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /bionty/base/scripts/check_ontologies_reachable.py: -------------------------------------------------------------------------------- 1 | import re 2 | import urllib.request 3 | from http.client import BadStatusLine 4 | from pathlib import Path 5 | from urllib.error import HTTPError, URLError 6 | 7 | import yaml # type:ignore 8 | 9 | VERSIONS_FILE_PATH = Path.cwd() / "bionty_base" / "sources.yaml" 10 | 11 | 12 | def extract_urls_from_yaml(yaml_file): 13 | with open(yaml_file) as file: 14 | yaml_data = yaml.safe_load(file) 15 | urls = [] 16 | 17 | def extract_urls(data): 18 | if isinstance(data, str): 19 | urls.extend( 20 | re.findall( 21 | r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+", 22 | data, 23 | ) 24 | ) 25 | elif isinstance(data, dict): 26 | for value in data.values(): 27 | extract_urls(value) 28 | elif isinstance(data, list): 29 | for item in data: 30 | extract_urls(item) 31 | 32 | extract_urls(yaml_data) 33 | 34 | return urls 35 | 36 | 37 | urls = extract_urls_from_yaml(VERSIONS_FILE_PATH) 38 | 39 | failed_urls = [] 40 | for url in urls: 41 | try: 42 | assert urllib.request.urlopen(url, timeout=1000).getcode() == 200 43 | except (URLError, BadStatusLine): 44 | print(f"URL: {url} is currently not accessible.") 45 | pass 46 | except (AssertionError, ValueError, HTTPError) as e: 47 | failed_urls.append([url, e]) 48 | 49 | if len(failed_urls) != 0: 50 | for fail in failed_urls: 51 | print(fail) 52 | raise AssertionError(f"{len(failed_urls)} URLs failed.") 53 | -------------------------------------------------------------------------------- /bionty/base/scripts/generate_bfxpipelines.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import hashlib 3 | import json 4 | import os 5 | import secrets 6 | import string 7 | from pathlib import Path 8 | 9 | import pandas as pd 10 | from github import Github 11 | from rich.progress import track 12 | 13 | BASE_BFX_PIPELINES_PATH = "./scripts/bfxpipelines_info" 14 | 15 | 16 | def base62(n_char: int) -> str: 17 | """Like nanoid without hyphen and underscore.""" 18 | alphabet = string.digits + string.ascii_letters.swapcase() 19 | id = "".join(secrets.choice(alphabet) for i in range(n_char)) 20 | return id 21 | 22 | 23 | def to_b64_str(bstr: bytes) -> str: 24 | b64 = base64.urlsafe_b64encode(bstr).decode().strip("=") 25 | return b64 26 | 27 | 28 | def hash_str(s: str) -> str: 29 | bstr = s.encode("utf-8") 30 | # as we're truncating at a short length, we choose md5 over sha512 31 | return to_b64_str(hashlib.md5(bstr).digest()) 32 | 33 | 34 | def hash_id(input_id: str | None = None, *, n_char: int) -> str: 35 | if input_id is None: 36 | return base62(n_char=n_char) 37 | else: 38 | return hash_str(input_id)[:n_char].replace("_", "0").replace("-", "0") 39 | 40 | 41 | def generate_nf_core_pipelines_info() -> None: 42 | """Generates a json file that contains all required pipelines information by querying the nf-core Github org.""" 43 | gh_login = Github(os.getenv("GITHUB_TOKEN")) 44 | nf_core_org = gh_login.get_organization("nf-core") 45 | blacklist = ["cookiecutter", "tools"] 46 | nf_core_pipelines = {} 47 | 48 | for repo in track( 49 | nf_core_org.get_repos(), 50 | description="Fetching information from nf-core repositories...", 51 | ): 52 | if "pipeline" in list(repo.get_topics()): 53 | if repo.name in blacklist: 54 | continue 55 | 56 | for version in repo.get_releases(): 57 | actual_version = ( 58 | version.tag_name if len(version.tag_name) >= 1 else "pre-release" 59 | ) 60 | pipeline_name = f"{repo.name} v{actual_version}" 61 | underscore_pipeline_name = ( 62 | pipeline_name.replace(" ", "_").replace(".", "_").replace("-", "_") 63 | ) 64 | 65 | nf_core_pipelines[underscore_pipeline_name] = { 66 | "id": hash_id(pipeline_name, n_char=12), 67 | "name": f"nf-core {pipeline_name}", 68 | "versions": actual_version, 69 | "reference": repo.html_url, 70 | } 71 | 72 | with open(f"{BASE_BFX_PIPELINES_PATH}/nf_core_pipelines.json", "w") as f: 73 | json_data = json.dumps(nf_core_pipelines, indent=4) 74 | f.write(json_data) 75 | 76 | 77 | def merge_json_files(pipelines_folder_path: str | Path, output_path: str) -> None: 78 | """Merge all JSON files in a folder and write the merged data to a new JSON file. 79 | 80 | Args: 81 | pipelines_folder_path: Path to the folder containing the JSON files. 82 | output_path: Path to the output JSON file. 83 | """ 84 | pipelines_folder_path = Path(pipelines_folder_path) 85 | file_paths = list(pipelines_folder_path.glob("*.json")) 86 | 87 | pipeline_json: dict = {} 88 | 89 | for file_path in file_paths: 90 | with open(file_path) as f: 91 | if not str(file_path).endswith("bfxpipelines.json"): 92 | pipelines_info = json.load(f) 93 | pipeline_json = {**pipeline_json, **pipelines_info} 94 | 95 | with open(output_path, "w") as f: 96 | json.dump(pipeline_json, f, indent=4) 97 | 98 | 99 | def write_parquet_file(bfxpipelines_json: str, output_path: str) -> None: 100 | """Takes a bfxpipelines.json file as generated from merge_json_files and writes a corresponding parquet file.""" 101 | with open(bfxpipelines_json) as f: 102 | data = json.load(f) 103 | 104 | df = pd.DataFrame(data).transpose() 105 | df.drop("versions", inplace=True, axis=1) 106 | df.rename(columns={"id": "ontology_id"}, inplace=True) 107 | df.set_index("ontology_id", inplace=True, drop=True) 108 | df.to_parquet(output_path) 109 | 110 | 111 | generate_nf_core_pipelines_info() 112 | merge_json_files( 113 | pipelines_folder_path=BASE_BFX_PIPELINES_PATH, 114 | output_path=f"{BASE_BFX_PIPELINES_PATH}/bfxpipelines.json", 115 | ) 116 | write_parquet_file( 117 | bfxpipelines_json=f"{BASE_BFX_PIPELINES_PATH}/bfxpipelines.json", 118 | output_path=f"{BASE_BFX_PIPELINES_PATH}/bfxpipelines.parquet", 119 | ) 120 | -------------------------------------------------------------------------------- /bionty/base/scripts/update_new_ontologies.py: -------------------------------------------------------------------------------- 1 | import bioregistry 2 | from bionty.base.dev._handle_sources import parse_sources_yaml 3 | from rich import print 4 | 5 | sources = parse_sources_yaml() 6 | latest_versions = ( 7 | sources.groupby("source")["version"] 8 | .apply( 9 | lambda version: version.iloc[ 10 | version.astype(str).str.replace(".", "").str.isdigit().argmax() 11 | ] 12 | ) 13 | .reset_index() 14 | ) 15 | latest_versions_dict = latest_versions.set_index("source")["version"].to_dict() 16 | 17 | new_latest_versions: dict[str, str] = {} 18 | for source, current_latest_version in latest_versions_dict.items(): 19 | bioregistry_version = bioregistry.get_version(source) 20 | if bioregistry_version: 21 | if bioregistry_version > current_latest_version: 22 | new_latest_versions[source] = bioregistry_version 23 | 24 | if len(new_latest_versions) != 0: 25 | for source, version in new_latest_versions.items(): 26 | print( 27 | f"[bold blue]Source: [green]{source}[blue] has a more recent version:" 28 | f" [green]{version}" 29 | ) 30 | raise AssertionError( 31 | f"{len(new_latest_versions.keys())} databases have more recent versions." 32 | ) 33 | -------------------------------------------------------------------------------- /bionty/core/__init__.py: -------------------------------------------------------------------------------- 1 | """Developer API. 2 | 3 | .. autosummary:: 4 | :toctree: . 5 | 6 | BioRecord 7 | StaticReference 8 | Settings 9 | sync_public_sources 10 | """ 11 | 12 | from lamindb_setup._check_setup import _check_instance_setup 13 | 14 | _check_instance_setup(from_module="bionty") 15 | 16 | from bionty.models import BioRecord, StaticReference 17 | 18 | from ._add_ontology import add_ontology 19 | from ._settings import Settings 20 | from ._source import sync_public_sources 21 | 22 | # backward-compat 23 | sync_all_sources_to_latest = sync_public_sources 24 | -------------------------------------------------------------------------------- /bionty/core/_settings.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from lamin_utils import logger 4 | 5 | from bionty.models import Organism 6 | 7 | 8 | class Settings: 9 | """Settings. 10 | 11 | Directly use `bt.settings` rather than instantiating this class yourself. 12 | """ 13 | 14 | def __init__(self): 15 | self._organism = "human" 16 | 17 | @property 18 | def organism(self) -> Organism | None: 19 | """Default organism argument (default `"human"`). 20 | 21 | Default organism to use in cases of ambiguity. For instance, gene symbols are duplicated across organisms and need to be disambiguated. 22 | 23 | Examples: 24 | 25 | :: 26 | 27 | bionty.settings.organism = "mouse" 28 | """ 29 | if isinstance(self._organism, str): 30 | self.organism = self._organism # type: ignore 31 | return self._organism 32 | 33 | @organism.setter 34 | def organism(self, name: str | Organism): 35 | if isinstance(name, Organism): 36 | self._organism = name 37 | else: 38 | import lamindb as ln 39 | 40 | organisms = Organism.from_values([name], mute=True) 41 | if len(organisms) == 0: 42 | raise ValueError( 43 | f"No organism with name='{name}' is found, please create a organism record!" 44 | ) 45 | else: 46 | organism = organisms[0] 47 | if organism._state.adding: # type:ignore 48 | organism.save() # type:ignore 49 | self._organism = organism 50 | 51 | 52 | settings = Settings() 53 | settings.__doc__ = """Global :class:`~bionty.core.Settings`.""" 54 | -------------------------------------------------------------------------------- /bionty/ids.py: -------------------------------------------------------------------------------- 1 | from .uids import * # noqa: F403 2 | -------------------------------------------------------------------------------- /bionty/migrations/0029_alter_cellline_previous_runs_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.1 on 2024-06-13 10:31 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("bionty", "0028_artifactcellline_created_at_and_more"), 9 | ] 10 | 11 | operations = [ 12 | migrations.AlterField( 13 | model_name="cellline", 14 | name="previous_runs", 15 | field=models.ManyToManyField(related_name="+", to="lamindb.run"), 16 | ), 17 | migrations.AlterField( 18 | model_name="cellmarker", 19 | name="previous_runs", 20 | field=models.ManyToManyField(related_name="+", to="lamindb.run"), 21 | ), 22 | migrations.AlterField( 23 | model_name="celltype", 24 | name="previous_runs", 25 | field=models.ManyToManyField(related_name="+", to="lamindb.run"), 26 | ), 27 | migrations.AlterField( 28 | model_name="developmentalstage", 29 | name="previous_runs", 30 | field=models.ManyToManyField(related_name="+", to="lamindb.run"), 31 | ), 32 | migrations.AlterField( 33 | model_name="disease", 34 | name="previous_runs", 35 | field=models.ManyToManyField(related_name="+", to="lamindb.run"), 36 | ), 37 | migrations.AlterField( 38 | model_name="ethnicity", 39 | name="previous_runs", 40 | field=models.ManyToManyField(related_name="+", to="lamindb.run"), 41 | ), 42 | migrations.AlterField( 43 | model_name="experimentalfactor", 44 | name="previous_runs", 45 | field=models.ManyToManyField(related_name="+", to="lamindb.run"), 46 | ), 47 | migrations.AlterField( 48 | model_name="gene", 49 | name="previous_runs", 50 | field=models.ManyToManyField(related_name="+", to="lamindb.run"), 51 | ), 52 | migrations.AlterField( 53 | model_name="organism", 54 | name="previous_runs", 55 | field=models.ManyToManyField(related_name="+", to="lamindb.run"), 56 | ), 57 | migrations.AlterField( 58 | model_name="pathway", 59 | name="previous_runs", 60 | field=models.ManyToManyField(related_name="+", to="lamindb.run"), 61 | ), 62 | migrations.AlterField( 63 | model_name="phenotype", 64 | name="previous_runs", 65 | field=models.ManyToManyField(related_name="+", to="lamindb.run"), 66 | ), 67 | migrations.AlterField( 68 | model_name="protein", 69 | name="previous_runs", 70 | field=models.ManyToManyField(related_name="+", to="lamindb.run"), 71 | ), 72 | migrations.AlterField( 73 | model_name="publicsource", 74 | name="previous_runs", 75 | field=models.ManyToManyField(related_name="+", to="lamindb.run"), 76 | ), 77 | migrations.AlterField( 78 | model_name="tissue", 79 | name="previous_runs", 80 | field=models.ManyToManyField(related_name="+", to="lamindb.run"), 81 | ), 82 | ] 83 | -------------------------------------------------------------------------------- /bionty/migrations/0031_alter_cellmarker_name_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.1 on 2024-07-31 11:46 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("bionty", "0030_rename_publicsource_source_and_more"), 9 | ] 10 | 11 | operations = [ 12 | migrations.AlterField( 13 | model_name="cellmarker", 14 | name="name", 15 | field=models.CharField(db_index=True, max_length=64), 16 | ), 17 | migrations.AlterUniqueTogether( 18 | name="cellmarker", 19 | unique_together={("name", "organism")}, 20 | ), 21 | ] 22 | -------------------------------------------------------------------------------- /bionty/migrations/0032_rename_source_name_source_description_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.1 on 2024-08-01 12:34 2 | 3 | import django.db.models.deletion 4 | from django.db import migrations, models 5 | 6 | import bionty 7 | import bionty.ids 8 | from bionty._biorecord import encode_uid, list_biorecord_models 9 | 10 | 11 | def prepend_bionty_to_entity(apps, schema_editor): 12 | bionty_models = list_biorecord_models(bionty) 13 | Source = apps.get_model("bionty", "Source") 14 | for source in Source.objects.all(): 15 | if source.entity in bionty_models and not source.entity.startswith("bionty."): 16 | # append bionty to entity 17 | source.entity = f"bionty.{source.entity}" 18 | # re-encode uid 19 | source.uid = encode_uid( 20 | Source, 21 | { 22 | "entity": source.entity, 23 | "name": source.name, 24 | "organism": source.organism, 25 | "version": source.version, 26 | }, 27 | )["uid"] 28 | source.save() 29 | 30 | 31 | class Migration(migrations.Migration): 32 | dependencies = [ 33 | ("bionty", "0031_alter_cellmarker_name_and_more"), 34 | ] 35 | 36 | operations = [ 37 | migrations.RenameField( 38 | model_name="source", 39 | old_name="source_name", 40 | new_name="description", 41 | ), 42 | migrations.RenameField( 43 | model_name="source", 44 | old_name="source", 45 | new_name="name", 46 | ), 47 | migrations.RenameField( 48 | model_name="source", 49 | old_name="df", 50 | new_name="dataframe_artifact", 51 | ), 52 | migrations.AlterField( 53 | model_name="source", 54 | name="dataframe_artifact", 55 | field=models.ForeignKey( 56 | default=None, 57 | null=True, 58 | on_delete=django.db.models.deletion.PROTECT, 59 | related_name="source_dataframe_of", 60 | to="lamindb.artifact", 61 | ), 62 | ), 63 | migrations.AlterField( 64 | model_name="source", 65 | name="artifacts", 66 | field=models.ManyToManyField( 67 | related_name="source_artifact_of", to="lamindb.artifact" 68 | ), 69 | ), 70 | migrations.AlterField( 71 | model_name="source", 72 | name="entity", 73 | field=models.CharField(db_index=True, max_length=256), 74 | ), 75 | migrations.AlterField( 76 | model_name="source", 77 | name="uid", 78 | field=models.CharField( 79 | default=bionty.ids.source, max_length=4, unique=True 80 | ), 81 | ), 82 | migrations.RunPython(prepend_bionty_to_entity), 83 | ] 84 | -------------------------------------------------------------------------------- /bionty/migrations/0034_alter_source_unique_together.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.1 on 2024-08-02 07:53 2 | 3 | from django.db import migrations, models 4 | 5 | import bionty.ids 6 | 7 | 8 | class Migration(migrations.Migration): 9 | dependencies = [ 10 | ("bionty", "0033_alter_artifactcellline_artifact_and_more"), 11 | ] 12 | 13 | operations = [ 14 | migrations.AlterUniqueTogether( 15 | name="source", unique_together=(("entity", "name", "organism", "version"),) 16 | ), 17 | ] 18 | -------------------------------------------------------------------------------- /bionty/migrations/0035_alter_protein_gene_symbol.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2024-08-09 08:49 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("bionty", "0034_alter_source_unique_together"), 9 | ] 10 | 11 | operations = [ 12 | migrations.AlterField( 13 | model_name="protein", 14 | name="gene_symbol", 15 | field=models.CharField( 16 | db_index=True, default=None, max_length=256, null=True 17 | ), 18 | ), 19 | ] 20 | -------------------------------------------------------------------------------- /bionty/migrations/0036_alter_source_artifacts_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2024-08-09 10:13 2 | 3 | import django.db.models.deletion 4 | from django.db import migrations, models 5 | 6 | 7 | class Migration(migrations.Migration): 8 | dependencies = [ 9 | ("bionty", "0035_alter_protein_gene_symbol"), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterField( 14 | model_name="source", 15 | name="artifacts", 16 | field=models.ManyToManyField( 17 | related_name="_source_artifact_of", to="lamindb.artifact" 18 | ), 19 | ), 20 | migrations.AlterField( 21 | model_name="source", 22 | name="dataframe_artifact", 23 | field=models.ForeignKey( 24 | default=None, 25 | null=True, 26 | on_delete=django.db.models.deletion.PROTECT, 27 | related_name="_source_dataframe_of", 28 | to="lamindb.artifact", 29 | ), 30 | ), 31 | ] 32 | -------------------------------------------------------------------------------- /bionty/migrations/0037_alter_cellline_source_alter_cellmarker_source_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2024-08-27 09:42 2 | 3 | import django.db.models.deletion 4 | from django.db import migrations, models 5 | 6 | 7 | class Migration(migrations.Migration): 8 | dependencies = [ 9 | ("bionty", "0036_alter_source_artifacts_and_more"), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterField( 14 | model_name="cellline", 15 | name="source", 16 | field=models.ForeignKey( 17 | null=True, 18 | on_delete=django.db.models.deletion.PROTECT, 19 | to="bionty.source", 20 | ), 21 | ), 22 | migrations.AlterField( 23 | model_name="cellmarker", 24 | name="source", 25 | field=models.ForeignKey( 26 | null=True, 27 | on_delete=django.db.models.deletion.PROTECT, 28 | to="bionty.source", 29 | ), 30 | ), 31 | migrations.AlterField( 32 | model_name="celltype", 33 | name="source", 34 | field=models.ForeignKey( 35 | null=True, 36 | on_delete=django.db.models.deletion.PROTECT, 37 | to="bionty.source", 38 | ), 39 | ), 40 | migrations.AlterField( 41 | model_name="developmentalstage", 42 | name="source", 43 | field=models.ForeignKey( 44 | null=True, 45 | on_delete=django.db.models.deletion.PROTECT, 46 | to="bionty.source", 47 | ), 48 | ), 49 | migrations.AlterField( 50 | model_name="disease", 51 | name="source", 52 | field=models.ForeignKey( 53 | null=True, 54 | on_delete=django.db.models.deletion.PROTECT, 55 | to="bionty.source", 56 | ), 57 | ), 58 | migrations.AlterField( 59 | model_name="ethnicity", 60 | name="source", 61 | field=models.ForeignKey( 62 | null=True, 63 | on_delete=django.db.models.deletion.PROTECT, 64 | to="bionty.source", 65 | ), 66 | ), 67 | migrations.AlterField( 68 | model_name="experimentalfactor", 69 | name="source", 70 | field=models.ForeignKey( 71 | null=True, 72 | on_delete=django.db.models.deletion.PROTECT, 73 | to="bionty.source", 74 | ), 75 | ), 76 | migrations.AlterField( 77 | model_name="gene", 78 | name="source", 79 | field=models.ForeignKey( 80 | null=True, 81 | on_delete=django.db.models.deletion.PROTECT, 82 | to="bionty.source", 83 | ), 84 | ), 85 | migrations.AlterField( 86 | model_name="organism", 87 | name="source", 88 | field=models.ForeignKey( 89 | null=True, 90 | on_delete=django.db.models.deletion.PROTECT, 91 | to="bionty.source", 92 | ), 93 | ), 94 | migrations.AlterField( 95 | model_name="pathway", 96 | name="source", 97 | field=models.ForeignKey( 98 | null=True, 99 | on_delete=django.db.models.deletion.PROTECT, 100 | to="bionty.source", 101 | ), 102 | ), 103 | migrations.AlterField( 104 | model_name="phenotype", 105 | name="source", 106 | field=models.ForeignKey( 107 | null=True, 108 | on_delete=django.db.models.deletion.PROTECT, 109 | to="bionty.source", 110 | ), 111 | ), 112 | migrations.AlterField( 113 | model_name="protein", 114 | name="source", 115 | field=models.ForeignKey( 116 | null=True, 117 | on_delete=django.db.models.deletion.PROTECT, 118 | to="bionty.source", 119 | ), 120 | ), 121 | migrations.AlterField( 122 | model_name="tissue", 123 | name="source", 124 | field=models.ForeignKey( 125 | null=True, 126 | on_delete=django.db.models.deletion.PROTECT, 127 | to="bionty.source", 128 | ), 129 | ), 130 | ] 131 | -------------------------------------------------------------------------------- /bionty/migrations/0039_alter_cellline_source_alter_cellmarker_source_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2024-09-09 11:43 2 | 3 | import django.db.models.deletion 4 | from django.db import migrations, models 5 | 6 | 7 | class Migration(migrations.Migration): 8 | dependencies = [ 9 | ("bionty", "0038_alter_artifactcellline_created_by_and_more"), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterField( 14 | model_name="cellline", 15 | name="source", 16 | field=models.ForeignKey( 17 | null=True, 18 | on_delete=django.db.models.deletion.PROTECT, 19 | related_name="+", 20 | to="bionty.source", 21 | ), 22 | ), 23 | migrations.AlterField( 24 | model_name="cellmarker", 25 | name="source", 26 | field=models.ForeignKey( 27 | null=True, 28 | on_delete=django.db.models.deletion.PROTECT, 29 | related_name="+", 30 | to="bionty.source", 31 | ), 32 | ), 33 | migrations.AlterField( 34 | model_name="celltype", 35 | name="source", 36 | field=models.ForeignKey( 37 | null=True, 38 | on_delete=django.db.models.deletion.PROTECT, 39 | related_name="+", 40 | to="bionty.source", 41 | ), 42 | ), 43 | migrations.AlterField( 44 | model_name="developmentalstage", 45 | name="source", 46 | field=models.ForeignKey( 47 | null=True, 48 | on_delete=django.db.models.deletion.PROTECT, 49 | related_name="+", 50 | to="bionty.source", 51 | ), 52 | ), 53 | migrations.AlterField( 54 | model_name="disease", 55 | name="source", 56 | field=models.ForeignKey( 57 | null=True, 58 | on_delete=django.db.models.deletion.PROTECT, 59 | related_name="+", 60 | to="bionty.source", 61 | ), 62 | ), 63 | migrations.AlterField( 64 | model_name="ethnicity", 65 | name="source", 66 | field=models.ForeignKey( 67 | null=True, 68 | on_delete=django.db.models.deletion.PROTECT, 69 | related_name="+", 70 | to="bionty.source", 71 | ), 72 | ), 73 | migrations.AlterField( 74 | model_name="experimentalfactor", 75 | name="source", 76 | field=models.ForeignKey( 77 | null=True, 78 | on_delete=django.db.models.deletion.PROTECT, 79 | related_name="+", 80 | to="bionty.source", 81 | ), 82 | ), 83 | migrations.AlterField( 84 | model_name="gene", 85 | name="source", 86 | field=models.ForeignKey( 87 | null=True, 88 | on_delete=django.db.models.deletion.PROTECT, 89 | related_name="+", 90 | to="bionty.source", 91 | ), 92 | ), 93 | migrations.AlterField( 94 | model_name="organism", 95 | name="source", 96 | field=models.ForeignKey( 97 | null=True, 98 | on_delete=django.db.models.deletion.PROTECT, 99 | related_name="+", 100 | to="bionty.source", 101 | ), 102 | ), 103 | migrations.AlterField( 104 | model_name="pathway", 105 | name="source", 106 | field=models.ForeignKey( 107 | null=True, 108 | on_delete=django.db.models.deletion.PROTECT, 109 | related_name="+", 110 | to="bionty.source", 111 | ), 112 | ), 113 | migrations.AlterField( 114 | model_name="phenotype", 115 | name="source", 116 | field=models.ForeignKey( 117 | null=True, 118 | on_delete=django.db.models.deletion.PROTECT, 119 | related_name="+", 120 | to="bionty.source", 121 | ), 122 | ), 123 | migrations.AlterField( 124 | model_name="protein", 125 | name="source", 126 | field=models.ForeignKey( 127 | null=True, 128 | on_delete=django.db.models.deletion.PROTECT, 129 | related_name="+", 130 | to="bionty.source", 131 | ), 132 | ), 133 | migrations.AlterField( 134 | model_name="tissue", 135 | name="source", 136 | field=models.ForeignKey( 137 | null=True, 138 | on_delete=django.db.models.deletion.PROTECT, 139 | related_name="+", 140 | to="bionty.source", 141 | ), 142 | ), 143 | ] 144 | -------------------------------------------------------------------------------- /bionty/migrations/0040_rename_feature_ref_is_symbol_artifactgene_feature_ref_is_name_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.1.1 on 2024-10-18 14:17 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("bionty", "0039_alter_cellline_source_alter_cellmarker_source_and_more"), 9 | ] 10 | 11 | operations = [ 12 | migrations.RenameField( 13 | model_name="artifactgene", 14 | old_name="feature_ref_is_symbol", 15 | new_name="feature_ref_is_name", 16 | ), 17 | migrations.RenameField( 18 | model_name="artifactgene", 19 | old_name="gene_ref_is_symbol", 20 | new_name="label_ref_is_name", 21 | ), 22 | migrations.AlterUniqueTogether( 23 | name="artifactcellline", 24 | unique_together={("artifact", "cellline", "feature")}, 25 | ), 26 | migrations.AlterUniqueTogether( 27 | name="artifactcellmarker", 28 | unique_together={("artifact", "cellmarker", "feature")}, 29 | ), 30 | migrations.AlterUniqueTogether( 31 | name="artifactcelltype", 32 | unique_together={("artifact", "celltype", "feature")}, 33 | ), 34 | migrations.AlterUniqueTogether( 35 | name="artifactdevelopmentalstage", 36 | unique_together={("artifact", "developmentalstage", "feature")}, 37 | ), 38 | migrations.AlterUniqueTogether( 39 | name="artifactdisease", 40 | unique_together={("artifact", "disease", "feature")}, 41 | ), 42 | migrations.AlterUniqueTogether( 43 | name="artifactethnicity", 44 | unique_together={("artifact", "ethnicity", "feature")}, 45 | ), 46 | migrations.AlterUniqueTogether( 47 | name="artifactexperimentalfactor", 48 | unique_together={("artifact", "experimentalfactor", "feature")}, 49 | ), 50 | migrations.AlterUniqueTogether( 51 | name="artifactgene", 52 | unique_together={("artifact", "gene", "feature")}, 53 | ), 54 | migrations.AlterUniqueTogether( 55 | name="artifactorganism", 56 | unique_together={("artifact", "organism", "feature")}, 57 | ), 58 | migrations.AlterUniqueTogether( 59 | name="artifactpathway", 60 | unique_together={("artifact", "pathway", "feature")}, 61 | ), 62 | migrations.AlterUniqueTogether( 63 | name="artifactphenotype", 64 | unique_together={("artifact", "phenotype", "feature")}, 65 | ), 66 | migrations.AlterUniqueTogether( 67 | name="artifactprotein", 68 | unique_together={("artifact", "protein", "feature")}, 69 | ), 70 | migrations.AlterUniqueTogether( 71 | name="artifacttissue", 72 | unique_together={("artifact", "tissue", "feature")}, 73 | ), 74 | migrations.AlterUniqueTogether( 75 | name="featuresetcellmarker", 76 | unique_together={("featureset", "cellmarker")}, 77 | ), 78 | migrations.AlterUniqueTogether( 79 | name="featuresetgene", 80 | unique_together={("featureset", "gene")}, 81 | ), 82 | migrations.AlterUniqueTogether( 83 | name="featuresetpathway", 84 | unique_together={("featureset", "pathway")}, 85 | ), 86 | migrations.AlterUniqueTogether( 87 | name="featuresetprotein", 88 | unique_together={("featureset", "protein")}, 89 | ), 90 | ] 91 | -------------------------------------------------------------------------------- /bionty/migrations/0043_lamindbv2_part2.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-01-10 23:59 2 | 3 | import django.db.models.deletion 4 | import lamindb.base.fields 5 | from django.db import migrations 6 | 7 | 8 | class Migration(migrations.Migration): 9 | dependencies = [ 10 | ("bionty", "0042_lamindbv1"), 11 | ] 12 | 13 | operations = [ 14 | migrations.AddField( 15 | model_name="cellline", 16 | name="space", 17 | field=lamindb.base.fields.ForeignKey( 18 | blank=True, 19 | default=1, 20 | on_delete=django.db.models.deletion.PROTECT, 21 | to="lamindb.space", 22 | ), 23 | ), 24 | migrations.AddField( 25 | model_name="cellmarker", 26 | name="space", 27 | field=lamindb.base.fields.ForeignKey( 28 | blank=True, 29 | default=1, 30 | on_delete=django.db.models.deletion.PROTECT, 31 | to="lamindb.space", 32 | ), 33 | ), 34 | migrations.AddField( 35 | model_name="celltype", 36 | name="space", 37 | field=lamindb.base.fields.ForeignKey( 38 | blank=True, 39 | default=1, 40 | on_delete=django.db.models.deletion.PROTECT, 41 | to="lamindb.space", 42 | ), 43 | ), 44 | migrations.AddField( 45 | model_name="developmentalstage", 46 | name="space", 47 | field=lamindb.base.fields.ForeignKey( 48 | blank=True, 49 | default=1, 50 | on_delete=django.db.models.deletion.PROTECT, 51 | to="lamindb.space", 52 | ), 53 | ), 54 | migrations.AddField( 55 | model_name="disease", 56 | name="space", 57 | field=lamindb.base.fields.ForeignKey( 58 | blank=True, 59 | default=1, 60 | on_delete=django.db.models.deletion.PROTECT, 61 | to="lamindb.space", 62 | ), 63 | ), 64 | migrations.AddField( 65 | model_name="ethnicity", 66 | name="space", 67 | field=lamindb.base.fields.ForeignKey( 68 | blank=True, 69 | default=1, 70 | on_delete=django.db.models.deletion.PROTECT, 71 | to="lamindb.space", 72 | ), 73 | ), 74 | migrations.AddField( 75 | model_name="experimentalfactor", 76 | name="space", 77 | field=lamindb.base.fields.ForeignKey( 78 | blank=True, 79 | default=1, 80 | on_delete=django.db.models.deletion.PROTECT, 81 | to="lamindb.space", 82 | ), 83 | ), 84 | migrations.AddField( 85 | model_name="gene", 86 | name="space", 87 | field=lamindb.base.fields.ForeignKey( 88 | blank=True, 89 | default=1, 90 | on_delete=django.db.models.deletion.PROTECT, 91 | to="lamindb.space", 92 | ), 93 | ), 94 | migrations.AddField( 95 | model_name="organism", 96 | name="space", 97 | field=lamindb.base.fields.ForeignKey( 98 | blank=True, 99 | default=1, 100 | on_delete=django.db.models.deletion.PROTECT, 101 | to="lamindb.space", 102 | ), 103 | ), 104 | migrations.AddField( 105 | model_name="pathway", 106 | name="space", 107 | field=lamindb.base.fields.ForeignKey( 108 | blank=True, 109 | default=1, 110 | on_delete=django.db.models.deletion.PROTECT, 111 | to="lamindb.space", 112 | ), 113 | ), 114 | migrations.AddField( 115 | model_name="phenotype", 116 | name="space", 117 | field=lamindb.base.fields.ForeignKey( 118 | blank=True, 119 | default=1, 120 | on_delete=django.db.models.deletion.PROTECT, 121 | to="lamindb.space", 122 | ), 123 | ), 124 | migrations.AddField( 125 | model_name="protein", 126 | name="space", 127 | field=lamindb.base.fields.ForeignKey( 128 | blank=True, 129 | default=1, 130 | on_delete=django.db.models.deletion.PROTECT, 131 | to="lamindb.space", 132 | ), 133 | ), 134 | migrations.AddField( 135 | model_name="source", 136 | name="space", 137 | field=lamindb.base.fields.ForeignKey( 138 | blank=True, 139 | default=1, 140 | on_delete=django.db.models.deletion.PROTECT, 141 | to="lamindb.space", 142 | ), 143 | ), 144 | migrations.AddField( 145 | model_name="tissue", 146 | name="space", 147 | field=lamindb.base.fields.ForeignKey( 148 | blank=True, 149 | default=1, 150 | on_delete=django.db.models.deletion.PROTECT, 151 | to="lamindb.space", 152 | ), 153 | ), 154 | ] 155 | -------------------------------------------------------------------------------- /bionty/migrations/0044_alter_cellline_space_alter_cellmarker_space_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-01-11 16:44 2 | 3 | import django.db.models.deletion 4 | import lamindb.base.fields 5 | from django.db import migrations 6 | 7 | 8 | class Migration(migrations.Migration): 9 | dependencies = [ 10 | ("bionty", "0043_lamindbv2_part2"), 11 | ("lamindb", "0072_remove_user__branch_code_remove_user_aux_and_more"), 12 | ] 13 | 14 | operations = [ 15 | migrations.AlterField( 16 | model_name="cellline", 17 | name="space", 18 | field=lamindb.base.fields.ForeignKey( 19 | blank=True, 20 | db_default=1, 21 | default=1, 22 | on_delete=django.db.models.deletion.PROTECT, 23 | to="lamindb.space", 24 | ), 25 | ), 26 | migrations.AlterField( 27 | model_name="cellmarker", 28 | name="space", 29 | field=lamindb.base.fields.ForeignKey( 30 | blank=True, 31 | db_default=1, 32 | default=1, 33 | on_delete=django.db.models.deletion.PROTECT, 34 | to="lamindb.space", 35 | ), 36 | ), 37 | migrations.AlterField( 38 | model_name="celltype", 39 | name="space", 40 | field=lamindb.base.fields.ForeignKey( 41 | blank=True, 42 | db_default=1, 43 | default=1, 44 | on_delete=django.db.models.deletion.PROTECT, 45 | to="lamindb.space", 46 | ), 47 | ), 48 | migrations.AlterField( 49 | model_name="developmentalstage", 50 | name="space", 51 | field=lamindb.base.fields.ForeignKey( 52 | blank=True, 53 | db_default=1, 54 | default=1, 55 | on_delete=django.db.models.deletion.PROTECT, 56 | to="lamindb.space", 57 | ), 58 | ), 59 | migrations.AlterField( 60 | model_name="disease", 61 | name="space", 62 | field=lamindb.base.fields.ForeignKey( 63 | blank=True, 64 | db_default=1, 65 | default=1, 66 | on_delete=django.db.models.deletion.PROTECT, 67 | to="lamindb.space", 68 | ), 69 | ), 70 | migrations.AlterField( 71 | model_name="ethnicity", 72 | name="space", 73 | field=lamindb.base.fields.ForeignKey( 74 | blank=True, 75 | db_default=1, 76 | default=1, 77 | on_delete=django.db.models.deletion.PROTECT, 78 | to="lamindb.space", 79 | ), 80 | ), 81 | migrations.AlterField( 82 | model_name="experimentalfactor", 83 | name="space", 84 | field=lamindb.base.fields.ForeignKey( 85 | blank=True, 86 | db_default=1, 87 | default=1, 88 | on_delete=django.db.models.deletion.PROTECT, 89 | to="lamindb.space", 90 | ), 91 | ), 92 | migrations.AlterField( 93 | model_name="gene", 94 | name="space", 95 | field=lamindb.base.fields.ForeignKey( 96 | blank=True, 97 | db_default=1, 98 | default=1, 99 | on_delete=django.db.models.deletion.PROTECT, 100 | to="lamindb.space", 101 | ), 102 | ), 103 | migrations.AlterField( 104 | model_name="organism", 105 | name="space", 106 | field=lamindb.base.fields.ForeignKey( 107 | blank=True, 108 | db_default=1, 109 | default=1, 110 | on_delete=django.db.models.deletion.PROTECT, 111 | to="lamindb.space", 112 | ), 113 | ), 114 | migrations.AlterField( 115 | model_name="pathway", 116 | name="space", 117 | field=lamindb.base.fields.ForeignKey( 118 | blank=True, 119 | db_default=1, 120 | default=1, 121 | on_delete=django.db.models.deletion.PROTECT, 122 | to="lamindb.space", 123 | ), 124 | ), 125 | migrations.AlterField( 126 | model_name="phenotype", 127 | name="space", 128 | field=lamindb.base.fields.ForeignKey( 129 | blank=True, 130 | db_default=1, 131 | default=1, 132 | on_delete=django.db.models.deletion.PROTECT, 133 | to="lamindb.space", 134 | ), 135 | ), 136 | migrations.AlterField( 137 | model_name="protein", 138 | name="space", 139 | field=lamindb.base.fields.ForeignKey( 140 | blank=True, 141 | db_default=1, 142 | default=1, 143 | on_delete=django.db.models.deletion.PROTECT, 144 | to="lamindb.space", 145 | ), 146 | ), 147 | migrations.AlterField( 148 | model_name="source", 149 | name="space", 150 | field=lamindb.base.fields.ForeignKey( 151 | blank=True, 152 | db_default=1, 153 | default=1, 154 | on_delete=django.db.models.deletion.PROTECT, 155 | to="lamindb.space", 156 | ), 157 | ), 158 | migrations.AlterField( 159 | model_name="tissue", 160 | name="space", 161 | field=lamindb.base.fields.ForeignKey( 162 | blank=True, 163 | db_default=1, 164 | default=1, 165 | on_delete=django.db.models.deletion.PROTECT, 166 | to="lamindb.space", 167 | ), 168 | ), 169 | ] 170 | -------------------------------------------------------------------------------- /bionty/migrations/0045_rename_aux_cellline__aux_rename_aux_cellmarker__aux_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-01-13 11:15 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("bionty", "0044_alter_cellline_space_alter_cellmarker_space_and_more"), 9 | ] 10 | 11 | operations = [ 12 | migrations.RenameField( 13 | model_name="cellline", 14 | old_name="aux", 15 | new_name="_aux", 16 | ), 17 | migrations.RenameField( 18 | model_name="cellmarker", 19 | old_name="aux", 20 | new_name="_aux", 21 | ), 22 | migrations.RenameField( 23 | model_name="celltype", 24 | old_name="aux", 25 | new_name="_aux", 26 | ), 27 | migrations.RenameField( 28 | model_name="developmentalstage", 29 | old_name="aux", 30 | new_name="_aux", 31 | ), 32 | migrations.RenameField( 33 | model_name="disease", 34 | old_name="aux", 35 | new_name="_aux", 36 | ), 37 | migrations.RenameField( 38 | model_name="ethnicity", 39 | old_name="aux", 40 | new_name="_aux", 41 | ), 42 | migrations.RenameField( 43 | model_name="experimentalfactor", 44 | old_name="aux", 45 | new_name="_aux", 46 | ), 47 | migrations.RenameField( 48 | model_name="gene", 49 | old_name="aux", 50 | new_name="_aux", 51 | ), 52 | migrations.RenameField( 53 | model_name="organism", 54 | old_name="aux", 55 | new_name="_aux", 56 | ), 57 | migrations.RenameField( 58 | model_name="pathway", 59 | old_name="aux", 60 | new_name="_aux", 61 | ), 62 | migrations.RenameField( 63 | model_name="phenotype", 64 | old_name="aux", 65 | new_name="_aux", 66 | ), 67 | migrations.RenameField( 68 | model_name="protein", 69 | old_name="aux", 70 | new_name="_aux", 71 | ), 72 | migrations.RenameField( 73 | model_name="source", 74 | old_name="aux", 75 | new_name="_aux", 76 | ), 77 | migrations.RenameField( 78 | model_name="tissue", 79 | old_name="aux", 80 | new_name="_aux", 81 | ), 82 | ] 83 | -------------------------------------------------------------------------------- /bionty/migrations/0046_alter_cellline__aux_alter_cellmarker__aux_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-01-13 11:55 2 | 3 | import lamindb.base.fields 4 | from django.db import migrations 5 | 6 | 7 | class Migration(migrations.Migration): 8 | dependencies = [ 9 | ("bionty", "0045_rename_aux_cellline__aux_rename_aux_cellmarker__aux_and_more"), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterField( 14 | model_name="cellline", 15 | name="_aux", 16 | field=lamindb.base.fields.JSONField( 17 | blank=True, db_default=None, default=None, null=True 18 | ), 19 | ), 20 | migrations.AlterField( 21 | model_name="cellmarker", 22 | name="_aux", 23 | field=lamindb.base.fields.JSONField( 24 | blank=True, db_default=None, default=None, null=True 25 | ), 26 | ), 27 | migrations.AlterField( 28 | model_name="celltype", 29 | name="_aux", 30 | field=lamindb.base.fields.JSONField( 31 | blank=True, db_default=None, default=None, null=True 32 | ), 33 | ), 34 | migrations.AlterField( 35 | model_name="developmentalstage", 36 | name="_aux", 37 | field=lamindb.base.fields.JSONField( 38 | blank=True, db_default=None, default=None, null=True 39 | ), 40 | ), 41 | migrations.AlterField( 42 | model_name="disease", 43 | name="_aux", 44 | field=lamindb.base.fields.JSONField( 45 | blank=True, db_default=None, default=None, null=True 46 | ), 47 | ), 48 | migrations.AlterField( 49 | model_name="ethnicity", 50 | name="_aux", 51 | field=lamindb.base.fields.JSONField( 52 | blank=True, db_default=None, default=None, null=True 53 | ), 54 | ), 55 | migrations.AlterField( 56 | model_name="experimentalfactor", 57 | name="_aux", 58 | field=lamindb.base.fields.JSONField( 59 | blank=True, db_default=None, default=None, null=True 60 | ), 61 | ), 62 | migrations.AlterField( 63 | model_name="gene", 64 | name="_aux", 65 | field=lamindb.base.fields.JSONField( 66 | blank=True, db_default=None, default=None, null=True 67 | ), 68 | ), 69 | migrations.AlterField( 70 | model_name="organism", 71 | name="_aux", 72 | field=lamindb.base.fields.JSONField( 73 | blank=True, db_default=None, default=None, null=True 74 | ), 75 | ), 76 | migrations.AlterField( 77 | model_name="pathway", 78 | name="_aux", 79 | field=lamindb.base.fields.JSONField( 80 | blank=True, db_default=None, default=None, null=True 81 | ), 82 | ), 83 | migrations.AlterField( 84 | model_name="phenotype", 85 | name="_aux", 86 | field=lamindb.base.fields.JSONField( 87 | blank=True, db_default=None, default=None, null=True 88 | ), 89 | ), 90 | migrations.AlterField( 91 | model_name="protein", 92 | name="_aux", 93 | field=lamindb.base.fields.JSONField( 94 | blank=True, db_default=None, default=None, null=True 95 | ), 96 | ), 97 | migrations.AlterField( 98 | model_name="source", 99 | name="_aux", 100 | field=lamindb.base.fields.JSONField( 101 | blank=True, db_default=None, default=None, null=True 102 | ), 103 | ), 104 | migrations.AlterField( 105 | model_name="tissue", 106 | name="_aux", 107 | field=lamindb.base.fields.JSONField( 108 | blank=True, db_default=None, default=None, null=True 109 | ), 110 | ), 111 | ] 112 | -------------------------------------------------------------------------------- /bionty/migrations/0047_lamindbv1_part5.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-01-13 16:08 2 | 3 | import django.db.models.deletion 4 | import lamindb.base.fields 5 | import lamindb.models 6 | from django.db import migrations, models 7 | 8 | 9 | class Migration(migrations.Migration): 10 | dependencies = [ 11 | ("bionty", "0046_alter_cellline__aux_alter_cellmarker__aux_and_more"), 12 | ("lamindb", "0075_lamindbv1_part5"), 13 | ] 14 | 15 | operations = [ 16 | migrations.RenameModel( 17 | old_name="FeatureSetCellMarker", 18 | new_name="SchemaCellMarker", 19 | ), 20 | migrations.RenameModel( 21 | old_name="FeatureSetGene", 22 | new_name="SchemaGene", 23 | ), 24 | migrations.RenameModel( 25 | old_name="FeatureSetPathway", 26 | new_name="SchemaPathway", 27 | ), 28 | migrations.RenameModel( 29 | old_name="FeatureSetProtein", 30 | new_name="SchemaProtein", 31 | ), 32 | migrations.RenameField( 33 | model_name="gene", 34 | old_name="feature_sets", 35 | new_name="schemas", 36 | ), 37 | migrations.RenameField( 38 | model_name="protein", 39 | old_name="feature_sets", 40 | new_name="schemas", 41 | ), 42 | migrations.RenameField( 43 | model_name="cellmarker", 44 | old_name="feature_sets", 45 | new_name="schemas", 46 | ), 47 | migrations.RenameField( 48 | model_name="pathway", 49 | old_name="feature_sets", 50 | new_name="schemas", 51 | ), 52 | migrations.RenameField( 53 | model_name="schemacellmarker", 54 | old_name="featureset", 55 | new_name="schema", 56 | ), 57 | migrations.RenameField( 58 | model_name="schemagene", 59 | old_name="featureset", 60 | new_name="schema", 61 | ), 62 | migrations.RenameField( 63 | model_name="schemapathway", 64 | old_name="featureset", 65 | new_name="schema", 66 | ), 67 | migrations.RenameField( 68 | model_name="schemaprotein", 69 | old_name="featureset", 70 | new_name="schema", 71 | ), 72 | migrations.AlterField( 73 | model_name="cellmarker", 74 | name="schemas", 75 | field=models.ManyToManyField( 76 | related_name="cell_markers", 77 | through="bionty.SchemaCellMarker", 78 | to="lamindb.schema", 79 | ), 80 | ), 81 | migrations.AlterField( 82 | model_name="gene", 83 | name="schemas", 84 | field=models.ManyToManyField( 85 | related_name="genes", through="bionty.SchemaGene", to="lamindb.schema" 86 | ), 87 | ), 88 | migrations.AlterField( 89 | model_name="pathway", 90 | name="schemas", 91 | field=models.ManyToManyField( 92 | related_name="pathways", 93 | through="bionty.SchemaPathway", 94 | to="lamindb.schema", 95 | ), 96 | ), 97 | migrations.AlterField( 98 | model_name="protein", 99 | name="schemas", 100 | field=models.ManyToManyField( 101 | related_name="proteins", 102 | through="bionty.SchemaProtein", 103 | to="lamindb.schema", 104 | ), 105 | ), 106 | migrations.AlterField( 107 | model_name="schemacellmarker", 108 | name="schema", 109 | field=lamindb.base.fields.ForeignKey( 110 | blank=True, 111 | on_delete=django.db.models.deletion.CASCADE, 112 | related_name="+", 113 | to="lamindb.schema", 114 | ), 115 | ), 116 | migrations.AlterField( 117 | model_name="schemagene", 118 | name="schema", 119 | field=lamindb.base.fields.ForeignKey( 120 | blank=True, 121 | on_delete=django.db.models.deletion.CASCADE, 122 | related_name="+", 123 | to="lamindb.schema", 124 | ), 125 | ), 126 | migrations.AlterField( 127 | model_name="schemapathway", 128 | name="schema", 129 | field=lamindb.base.fields.ForeignKey( 130 | blank=True, 131 | on_delete=django.db.models.deletion.CASCADE, 132 | related_name="+", 133 | to="lamindb.schema", 134 | ), 135 | ), 136 | migrations.AlterField( 137 | model_name="schemaprotein", 138 | name="schema", 139 | field=lamindb.base.fields.ForeignKey( 140 | blank=True, 141 | on_delete=django.db.models.deletion.CASCADE, 142 | related_name="+", 143 | to="lamindb.schema", 144 | ), 145 | ), 146 | ] 147 | -------------------------------------------------------------------------------- /bionty/migrations/0049_alter_schemacellmarker_cellmarker_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-01-27 07:22 2 | 3 | import django.db.models.deletion 4 | import lamindb.base.fields 5 | from django.db import migrations 6 | 7 | 8 | class Migration(migrations.Migration): 9 | dependencies = [ 10 | ("bionty", "0048_lamindbv1_part6"), 11 | ] 12 | 13 | operations = [ 14 | migrations.AlterField( 15 | model_name="schemacellmarker", 16 | name="cellmarker", 17 | field=lamindb.base.fields.ForeignKey( 18 | blank=True, 19 | on_delete=django.db.models.deletion.PROTECT, 20 | related_name="links_schema", 21 | to="bionty.cellmarker", 22 | ), 23 | ), 24 | migrations.AlterField( 25 | model_name="schemacellmarker", 26 | name="schema", 27 | field=lamindb.base.fields.ForeignKey( 28 | blank=True, 29 | on_delete=django.db.models.deletion.CASCADE, 30 | related_name="links_cellmarker", 31 | to="lamindb.schema", 32 | ), 33 | ), 34 | migrations.AlterField( 35 | model_name="schemagene", 36 | name="gene", 37 | field=lamindb.base.fields.ForeignKey( 38 | blank=True, 39 | on_delete=django.db.models.deletion.PROTECT, 40 | related_name="links_schema", 41 | to="bionty.gene", 42 | ), 43 | ), 44 | migrations.AlterField( 45 | model_name="schemagene", 46 | name="schema", 47 | field=lamindb.base.fields.ForeignKey( 48 | blank=True, 49 | on_delete=django.db.models.deletion.CASCADE, 50 | related_name="links_gene", 51 | to="lamindb.schema", 52 | ), 53 | ), 54 | migrations.AlterField( 55 | model_name="schemapathway", 56 | name="pathway", 57 | field=lamindb.base.fields.ForeignKey( 58 | blank=True, 59 | on_delete=django.db.models.deletion.PROTECT, 60 | related_name="links_schema", 61 | to="bionty.pathway", 62 | ), 63 | ), 64 | migrations.AlterField( 65 | model_name="schemapathway", 66 | name="schema", 67 | field=lamindb.base.fields.ForeignKey( 68 | blank=True, 69 | on_delete=django.db.models.deletion.CASCADE, 70 | related_name="links_pathway", 71 | to="lamindb.schema", 72 | ), 73 | ), 74 | migrations.AlterField( 75 | model_name="schemaprotein", 76 | name="protein", 77 | field=lamindb.base.fields.ForeignKey( 78 | blank=True, 79 | on_delete=django.db.models.deletion.PROTECT, 80 | related_name="links_schema", 81 | to="bionty.protein", 82 | ), 83 | ), 84 | migrations.AlterField( 85 | model_name="schemaprotein", 86 | name="schema", 87 | field=lamindb.base.fields.ForeignKey( 88 | blank=True, 89 | on_delete=django.db.models.deletion.CASCADE, 90 | related_name="links_protein", 91 | to="lamindb.schema", 92 | ), 93 | ), 94 | ] 95 | -------------------------------------------------------------------------------- /bionty/migrations/0050_alter_source_uid.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-02-16 21:50 2 | 3 | import lamindb.base.fields 4 | from django.db import migrations 5 | 6 | import bionty.ids 7 | from bionty._biorecord import encode_uid 8 | 9 | 10 | def populate_uids(apps, schema_editor): 11 | Source = apps.get_model("bionty", "Source") 12 | for source in Source.objects.all(): 13 | # Convert model instance to dictionary of fields 14 | kwargs = { 15 | field.name: getattr(source, field.name) 16 | for field in source._meta.fields 17 | if field.name != "uid" # Exclude uid field itself 18 | } 19 | # Generate and save new uid 20 | source.uid = encode_uid(registry=Source, kwargs=kwargs)["uid"] 21 | source.save() 22 | 23 | 24 | class Migration(migrations.Migration): 25 | dependencies = [ 26 | ("bionty", "0049_alter_schemacellmarker_cellmarker_and_more"), 27 | ] 28 | 29 | operations = [ 30 | migrations.AlterField( 31 | model_name="source", 32 | name="uid", 33 | field=lamindb.base.fields.CharField( 34 | blank=True, default=bionty.ids.source, max_length=8, unique=True 35 | ), 36 | ), 37 | migrations.RunPython(populate_uids, reverse_code=migrations.RunPython.noop), 38 | ] 39 | -------------------------------------------------------------------------------- /bionty/migrations/0051_alter_cellline__branch_code_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-05-25 11:59 2 | 3 | import django.db.models.deletion 4 | import lamindb.base.fields 5 | from django.db import migrations 6 | 7 | 8 | class Migration(migrations.Migration): 9 | dependencies = [ 10 | ("bionty", "0050_alter_source_uid"), 11 | ("lamindb", "0100_branch_alter_artifact__branch_code_and_more"), 12 | ] 13 | 14 | operations = [ 15 | migrations.AlterField( 16 | model_name="cellline", 17 | name="_branch_code", 18 | field=lamindb.base.fields.ForeignKey( 19 | blank=True, 20 | db_column="_branch_code", 21 | db_default=1, 22 | default=1, 23 | on_delete=django.db.models.deletion.PROTECT, 24 | to="lamindb.branch", 25 | ), 26 | ), 27 | migrations.AlterField( 28 | model_name="cellmarker", 29 | name="_branch_code", 30 | field=lamindb.base.fields.ForeignKey( 31 | blank=True, 32 | db_column="_branch_code", 33 | db_default=1, 34 | default=1, 35 | on_delete=django.db.models.deletion.PROTECT, 36 | to="lamindb.branch", 37 | ), 38 | ), 39 | migrations.AlterField( 40 | model_name="celltype", 41 | name="_branch_code", 42 | field=lamindb.base.fields.ForeignKey( 43 | blank=True, 44 | db_column="_branch_code", 45 | db_default=1, 46 | default=1, 47 | on_delete=django.db.models.deletion.PROTECT, 48 | to="lamindb.branch", 49 | ), 50 | ), 51 | migrations.AlterField( 52 | model_name="developmentalstage", 53 | name="_branch_code", 54 | field=lamindb.base.fields.ForeignKey( 55 | blank=True, 56 | db_column="_branch_code", 57 | db_default=1, 58 | default=1, 59 | on_delete=django.db.models.deletion.PROTECT, 60 | to="lamindb.branch", 61 | ), 62 | ), 63 | migrations.AlterField( 64 | model_name="disease", 65 | name="_branch_code", 66 | field=lamindb.base.fields.ForeignKey( 67 | blank=True, 68 | db_column="_branch_code", 69 | db_default=1, 70 | default=1, 71 | on_delete=django.db.models.deletion.PROTECT, 72 | to="lamindb.branch", 73 | ), 74 | ), 75 | migrations.AlterField( 76 | model_name="ethnicity", 77 | name="_branch_code", 78 | field=lamindb.base.fields.ForeignKey( 79 | blank=True, 80 | db_column="_branch_code", 81 | db_default=1, 82 | default=1, 83 | on_delete=django.db.models.deletion.PROTECT, 84 | to="lamindb.branch", 85 | ), 86 | ), 87 | migrations.AlterField( 88 | model_name="experimentalfactor", 89 | name="_branch_code", 90 | field=lamindb.base.fields.ForeignKey( 91 | blank=True, 92 | db_column="_branch_code", 93 | db_default=1, 94 | default=1, 95 | on_delete=django.db.models.deletion.PROTECT, 96 | to="lamindb.branch", 97 | ), 98 | ), 99 | migrations.AlterField( 100 | model_name="gene", 101 | name="_branch_code", 102 | field=lamindb.base.fields.ForeignKey( 103 | blank=True, 104 | db_column="_branch_code", 105 | db_default=1, 106 | default=1, 107 | on_delete=django.db.models.deletion.PROTECT, 108 | to="lamindb.branch", 109 | ), 110 | ), 111 | migrations.AlterField( 112 | model_name="organism", 113 | name="_branch_code", 114 | field=lamindb.base.fields.ForeignKey( 115 | blank=True, 116 | db_column="_branch_code", 117 | db_default=1, 118 | default=1, 119 | on_delete=django.db.models.deletion.PROTECT, 120 | to="lamindb.branch", 121 | ), 122 | ), 123 | migrations.AlterField( 124 | model_name="pathway", 125 | name="_branch_code", 126 | field=lamindb.base.fields.ForeignKey( 127 | blank=True, 128 | db_column="_branch_code", 129 | db_default=1, 130 | default=1, 131 | on_delete=django.db.models.deletion.PROTECT, 132 | to="lamindb.branch", 133 | ), 134 | ), 135 | migrations.AlterField( 136 | model_name="phenotype", 137 | name="_branch_code", 138 | field=lamindb.base.fields.ForeignKey( 139 | blank=True, 140 | db_column="_branch_code", 141 | db_default=1, 142 | default=1, 143 | on_delete=django.db.models.deletion.PROTECT, 144 | to="lamindb.branch", 145 | ), 146 | ), 147 | migrations.AlterField( 148 | model_name="protein", 149 | name="_branch_code", 150 | field=lamindb.base.fields.ForeignKey( 151 | blank=True, 152 | db_column="_branch_code", 153 | db_default=1, 154 | default=1, 155 | on_delete=django.db.models.deletion.PROTECT, 156 | to="lamindb.branch", 157 | ), 158 | ), 159 | migrations.AlterField( 160 | model_name="source", 161 | name="_branch_code", 162 | field=lamindb.base.fields.ForeignKey( 163 | blank=True, 164 | db_column="_branch_code", 165 | db_default=1, 166 | default=1, 167 | on_delete=django.db.models.deletion.PROTECT, 168 | to="lamindb.branch", 169 | ), 170 | ), 171 | migrations.AlterField( 172 | model_name="tissue", 173 | name="_branch_code", 174 | field=lamindb.base.fields.ForeignKey( 175 | blank=True, 176 | db_column="_branch_code", 177 | db_default=1, 178 | default=1, 179 | on_delete=django.db.models.deletion.PROTECT, 180 | to="lamindb.branch", 181 | ), 182 | ), 183 | ] 184 | -------------------------------------------------------------------------------- /bionty/migrations/0052_rename__branch_code_cellline_branch_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-05-25 12:04 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("bionty", "0051_alter_cellline__branch_code_and_more"), 9 | ] 10 | 11 | operations = [ 12 | migrations.RenameField( 13 | model_name="cellline", 14 | old_name="_branch_code", 15 | new_name="branch", 16 | ), 17 | migrations.RenameField( 18 | model_name="cellmarker", 19 | old_name="_branch_code", 20 | new_name="branch", 21 | ), 22 | migrations.RenameField( 23 | model_name="celltype", 24 | old_name="_branch_code", 25 | new_name="branch", 26 | ), 27 | migrations.RenameField( 28 | model_name="developmentalstage", 29 | old_name="_branch_code", 30 | new_name="branch", 31 | ), 32 | migrations.RenameField( 33 | model_name="disease", 34 | old_name="_branch_code", 35 | new_name="branch", 36 | ), 37 | migrations.RenameField( 38 | model_name="ethnicity", 39 | old_name="_branch_code", 40 | new_name="branch", 41 | ), 42 | migrations.RenameField( 43 | model_name="experimentalfactor", 44 | old_name="_branch_code", 45 | new_name="branch", 46 | ), 47 | migrations.RenameField( 48 | model_name="gene", 49 | old_name="_branch_code", 50 | new_name="branch", 51 | ), 52 | migrations.RenameField( 53 | model_name="organism", 54 | old_name="_branch_code", 55 | new_name="branch", 56 | ), 57 | migrations.RenameField( 58 | model_name="pathway", 59 | old_name="_branch_code", 60 | new_name="branch", 61 | ), 62 | migrations.RenameField( 63 | model_name="phenotype", 64 | old_name="_branch_code", 65 | new_name="branch", 66 | ), 67 | migrations.RenameField( 68 | model_name="protein", 69 | old_name="_branch_code", 70 | new_name="branch", 71 | ), 72 | migrations.RenameField( 73 | model_name="source", 74 | old_name="_branch_code", 75 | new_name="branch", 76 | ), 77 | migrations.RenameField( 78 | model_name="tissue", 79 | old_name="_branch_code", 80 | new_name="branch", 81 | ), 82 | ] 83 | -------------------------------------------------------------------------------- /bionty/migrations/0056_alter_recordtissue_record.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.2 on 2025-07-05 15:36 2 | 3 | import django.db.models.deletion 4 | import lamindb.base.fields 5 | from django.db import migrations 6 | 7 | 8 | class Migration(migrations.Migration): 9 | dependencies = [ 10 | ("bionty", "0055_rename_cellline_recordcellline_value_and_more"), 11 | ] 12 | 13 | operations = [ 14 | migrations.AlterField( 15 | model_name="recordtissue", 16 | name="record", 17 | field=lamindb.base.fields.ForeignKey( 18 | blank=True, 19 | on_delete=django.db.models.deletion.CASCADE, 20 | related_name="values_tissue", 21 | to="lamindb.record", 22 | ), 23 | ), 24 | ] 25 | -------------------------------------------------------------------------------- /bionty/migrations/0058_cellline_page_cellmarker_page_celltype_page_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.1.12 on 2025-09-28 23:37 2 | 3 | import django.db.models.deletion 4 | import lamindb.base.fields 5 | from django.db import migrations 6 | 7 | 8 | class Migration(migrations.Migration): 9 | dependencies = [ 10 | ("bionty", "0057_alter_cellline_description_alter_cellline_synonyms_and_more"), 11 | ("lamindb", "0124_page_artifact_page_collection_page_feature_page_and_more"), 12 | ] 13 | 14 | operations = [] # type: ignore 15 | -------------------------------------------------------------------------------- /bionty/migrations/0059_cellline_is_locked_cellmarker_is_locked_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.1.12 on 2025-09-29 00:46 2 | 3 | import lamindb.base.fields 4 | from django.db import migrations 5 | 6 | 7 | class Migration(migrations.Migration): 8 | dependencies = [ 9 | ("bionty", "0058_cellline_page_cellmarker_page_celltype_page_and_more"), 10 | ] 11 | 12 | operations = [ 13 | migrations.AddField( 14 | model_name="cellline", 15 | name="is_locked", 16 | field=lamindb.base.fields.BooleanField(blank=True, default=False), 17 | ), 18 | migrations.AddField( 19 | model_name="cellmarker", 20 | name="is_locked", 21 | field=lamindb.base.fields.BooleanField(blank=True, default=False), 22 | ), 23 | migrations.AddField( 24 | model_name="celltype", 25 | name="is_locked", 26 | field=lamindb.base.fields.BooleanField(blank=True, default=False), 27 | ), 28 | migrations.AddField( 29 | model_name="developmentalstage", 30 | name="is_locked", 31 | field=lamindb.base.fields.BooleanField(blank=True, default=False), 32 | ), 33 | migrations.AddField( 34 | model_name="disease", 35 | name="is_locked", 36 | field=lamindb.base.fields.BooleanField(blank=True, default=False), 37 | ), 38 | migrations.AddField( 39 | model_name="ethnicity", 40 | name="is_locked", 41 | field=lamindb.base.fields.BooleanField(blank=True, default=False), 42 | ), 43 | migrations.AddField( 44 | model_name="experimentalfactor", 45 | name="is_locked", 46 | field=lamindb.base.fields.BooleanField(blank=True, default=False), 47 | ), 48 | migrations.AddField( 49 | model_name="gene", 50 | name="is_locked", 51 | field=lamindb.base.fields.BooleanField(blank=True, default=False), 52 | ), 53 | migrations.AddField( 54 | model_name="organism", 55 | name="is_locked", 56 | field=lamindb.base.fields.BooleanField(blank=True, default=False), 57 | ), 58 | migrations.AddField( 59 | model_name="pathway", 60 | name="is_locked", 61 | field=lamindb.base.fields.BooleanField(blank=True, default=False), 62 | ), 63 | migrations.AddField( 64 | model_name="phenotype", 65 | name="is_locked", 66 | field=lamindb.base.fields.BooleanField(blank=True, default=False), 67 | ), 68 | migrations.AddField( 69 | model_name="protein", 70 | name="is_locked", 71 | field=lamindb.base.fields.BooleanField(blank=True, default=False), 72 | ), 73 | migrations.AddField( 74 | model_name="source", 75 | name="is_locked", 76 | field=lamindb.base.fields.BooleanField(blank=True, default=False), 77 | ), 78 | migrations.AddField( 79 | model_name="tissue", 80 | name="is_locked", 81 | field=lamindb.base.fields.BooleanField(blank=True, default=False), 82 | ), 83 | ] 84 | -------------------------------------------------------------------------------- /bionty/migrations/0060_alter_cellline_is_locked_alter_cellmarker_is_locked_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.1.12 on 2025-09-29 07:51 2 | 3 | import lamindb.base.fields 4 | from django.db import migrations 5 | 6 | 7 | class Migration(migrations.Migration): 8 | dependencies = [ 9 | ("bionty", "0059_cellline_is_locked_cellmarker_is_locked_and_more"), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterField( 14 | model_name="cellline", 15 | name="is_locked", 16 | field=lamindb.base.fields.BooleanField( 17 | blank=True, db_default=False, default=False 18 | ), 19 | ), 20 | migrations.AlterField( 21 | model_name="cellmarker", 22 | name="is_locked", 23 | field=lamindb.base.fields.BooleanField( 24 | blank=True, db_default=False, default=False 25 | ), 26 | ), 27 | migrations.AlterField( 28 | model_name="celltype", 29 | name="is_locked", 30 | field=lamindb.base.fields.BooleanField( 31 | blank=True, db_default=False, default=False 32 | ), 33 | ), 34 | migrations.AlterField( 35 | model_name="developmentalstage", 36 | name="is_locked", 37 | field=lamindb.base.fields.BooleanField( 38 | blank=True, db_default=False, default=False 39 | ), 40 | ), 41 | migrations.AlterField( 42 | model_name="disease", 43 | name="is_locked", 44 | field=lamindb.base.fields.BooleanField( 45 | blank=True, db_default=False, default=False 46 | ), 47 | ), 48 | migrations.AlterField( 49 | model_name="ethnicity", 50 | name="is_locked", 51 | field=lamindb.base.fields.BooleanField( 52 | blank=True, db_default=False, default=False 53 | ), 54 | ), 55 | migrations.AlterField( 56 | model_name="experimentalfactor", 57 | name="is_locked", 58 | field=lamindb.base.fields.BooleanField( 59 | blank=True, db_default=False, default=False 60 | ), 61 | ), 62 | migrations.AlterField( 63 | model_name="gene", 64 | name="is_locked", 65 | field=lamindb.base.fields.BooleanField( 66 | blank=True, db_default=False, default=False 67 | ), 68 | ), 69 | migrations.AlterField( 70 | model_name="organism", 71 | name="is_locked", 72 | field=lamindb.base.fields.BooleanField( 73 | blank=True, db_default=False, default=False 74 | ), 75 | ), 76 | migrations.AlterField( 77 | model_name="pathway", 78 | name="is_locked", 79 | field=lamindb.base.fields.BooleanField( 80 | blank=True, db_default=False, default=False 81 | ), 82 | ), 83 | migrations.AlterField( 84 | model_name="phenotype", 85 | name="is_locked", 86 | field=lamindb.base.fields.BooleanField( 87 | blank=True, db_default=False, default=False 88 | ), 89 | ), 90 | migrations.AlterField( 91 | model_name="protein", 92 | name="is_locked", 93 | field=lamindb.base.fields.BooleanField( 94 | blank=True, db_default=False, default=False 95 | ), 96 | ), 97 | migrations.AlterField( 98 | model_name="source", 99 | name="is_locked", 100 | field=lamindb.base.fields.BooleanField( 101 | blank=True, db_default=False, default=False 102 | ), 103 | ), 104 | migrations.AlterField( 105 | model_name="tissue", 106 | name="is_locked", 107 | field=lamindb.base.fields.BooleanField( 108 | blank=True, db_default=False, default=False 109 | ), 110 | ), 111 | ] 112 | -------------------------------------------------------------------------------- /bionty/migrations/0061_remove_cellline_page_remove_cellmarker_page_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.1.12 on 2025-10-01 14:39 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("bionty", "0060_alter_cellline_is_locked_alter_cellmarker_is_locked_and_more"), 9 | ] 10 | 11 | operations = [] # type: ignore 12 | -------------------------------------------------------------------------------- /bionty/migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/laminlabs/bionty/620a707fe3266d49249af3b47bebdac043b36326/bionty/migrations/__init__.py -------------------------------------------------------------------------------- /bionty/uids.py: -------------------------------------------------------------------------------- 1 | """UIDs. 2 | 3 | Entity-related generators: 4 | 5 | .. autosummary:: 6 | :toctree: . 7 | 8 | gene 9 | protein 10 | cellmarker 11 | ontology 12 | source 13 | 14 | """ 15 | 16 | import hashlib 17 | import secrets 18 | import string 19 | 20 | 21 | def base62(n_char: int) -> str: 22 | """Random Base62 string.""" 23 | alphabet = string.digits + string.ascii_letters.swapcase() 24 | id = "".join(secrets.choice(alphabet) for i in range(n_char)) 25 | return id 26 | 27 | 28 | def encode_base62(s: str) -> str: 29 | from lamin_utils._base62 import encodebytes 30 | 31 | return encodebytes(hashlib.md5(s.encode()).digest()) 32 | 33 | 34 | def hash_id(input_id: str | None = None, *, n_char: int) -> str: 35 | if input_id is None: 36 | return base62(n_char=n_char) 37 | else: 38 | return encode_base62(input_id)[:n_char] 39 | 40 | 41 | def gene(input_id: str | None = None) -> str: 42 | """12 base62.""" 43 | return hash_id(input_id, n_char=12) 44 | 45 | 46 | def protein(input_id: str | None = None) -> str: 47 | """12 base62.""" 48 | return hash_id(input_id, n_char=12) 49 | 50 | 51 | def cellmarker(input_id: str | None = None) -> str: 52 | """12 base62.""" 53 | return hash_id(input_id, n_char=12) 54 | 55 | 56 | def ontology(input_id: str | None = None): 57 | """8 base62.""" 58 | return hash_id(input_id, n_char=8) 59 | 60 | 61 | def source(input_id: str | None = None): 62 | """8 base62.""" 63 | return hash_id(input_id, n_char=8) 64 | 65 | 66 | def encode_uid(registry: type, kwargs: dict): 67 | """The type passed needs to be a subclass of BioRecord.""" 68 | from lamindb.models import SQLRecord 69 | 70 | from . import ids 71 | 72 | if kwargs.get("uid") is not None: 73 | # if uid is passed, no encoding is needed 74 | return kwargs 75 | name = registry.__name__.lower() 76 | if hasattr(registry, "organism_id"): 77 | organism = kwargs.get("organism") 78 | if organism is None: 79 | if kwargs.get("organism_id") is not None: 80 | from .models import Organism 81 | 82 | organism = Organism.get(kwargs.get("organism_id")).name 83 | elif isinstance(organism, SQLRecord): 84 | organism = organism.name 85 | else: 86 | organism = "" 87 | 88 | if hasattr(registry, "_ontology_id_field"): 89 | ontology_id_field = registry._ontology_id_field 90 | else: 91 | ontology_id_field = "ontology_id" 92 | if hasattr(registry, "_name_field"): 93 | name_field = registry._name_field 94 | else: 95 | name_field = "name" 96 | 97 | str_to_encode = None 98 | if name == "source": 99 | str_to_encode = f"{kwargs.get('entity', '')}{kwargs.get('name', '')}{kwargs.get('organism', '')}{kwargs.get('version', '')}" 100 | elif name == "gene": # gene has multiple id fields 101 | str_to_encode = kwargs.get(ontology_id_field) 102 | if str_to_encode is None or str_to_encode == "": 103 | str_to_encode = kwargs.get("stable_id") 104 | if str_to_encode is None or str_to_encode == "": 105 | str_to_encode = f"{kwargs.get(name_field)}{organism}" # name + organism 106 | if str_to_encode is None or str_to_encode == "": 107 | raise AssertionError( 108 | f"must provide {ontology_id_field}, stable_id or {name_field}" 109 | ) 110 | else: 111 | str_to_encode = kwargs.get(ontology_id_field) 112 | if str_to_encode is None or str_to_encode == "": 113 | str_to_encode = f"{kwargs.get(name_field)}{organism}" # name + organism 114 | if str_to_encode is None or str_to_encode == "": 115 | raise AssertionError(f"must provide {ontology_id_field} or {name_field}") 116 | 117 | if str_to_encode is not None and len(str_to_encode) > 0: 118 | try: 119 | id_encoder = getattr(ids, name) 120 | except Exception: 121 | if ontology_id_field == "ontology_id": 122 | id_encoder = ids.ontology 123 | else: 124 | return kwargs 125 | kwargs["uid"] = id_encoder(str_to_encode) 126 | return kwargs 127 | 128 | 129 | def encode_uid_for_hub(registry_name: str, registry_schema_json: dict, kwargs: dict): 130 | """Encode the uid for the hub. 131 | 132 | Note that `organism` record must be passed in kwargs instead of `organism_id`. 133 | """ 134 | from . import ids 135 | 136 | if kwargs.get("uid") is not None: 137 | # if uid is passed, no encoding is needed 138 | return kwargs 139 | name = registry_name.lower() 140 | # here we need to pass the organism record, not organism_id 141 | organism = kwargs.get("organism", "") 142 | if organism: 143 | organism = organism.get("name", "") 144 | 145 | # default to ontology_id 146 | ontology_id_field = registry_schema_json.get("_ontology_id_field", "ontology_id") 147 | name_field = registry_schema_json.get("_name_field", "name") 148 | 149 | str_to_encode = None 150 | if name == "source": 151 | str_to_encode = f"{kwargs.get('entity', '')}{kwargs.get('name', '')}{kwargs.get('organism', '')}{kwargs.get('version', '')}" 152 | elif name == "gene": # gene has multiple id fields 153 | str_to_encode = kwargs.get(ontology_id_field) 154 | if str_to_encode is None or str_to_encode == "": 155 | str_to_encode = kwargs.get("stable_id") 156 | if str_to_encode is None or str_to_encode == "": 157 | str_to_encode = f"{kwargs.get(name_field)}{organism}" # name + organism 158 | if str_to_encode is None or str_to_encode == "": 159 | raise AssertionError( 160 | f"must provide {ontology_id_field}, stable_id or {name_field}" 161 | ) 162 | else: 163 | str_to_encode = kwargs.get(ontology_id_field) 164 | if str_to_encode is None or str_to_encode == "": 165 | str_to_encode = f"{kwargs.get(name_field)}{organism}" # name + organism 166 | if str_to_encode is None or str_to_encode == "": 167 | raise AssertionError(f"must provide {ontology_id_field} or {name_field}") 168 | 169 | if str_to_encode is not None and len(str_to_encode) > 0: 170 | try: 171 | id_encoder = getattr(ids, name) 172 | except Exception: 173 | if ontology_id_field == "ontology_id": 174 | id_encoder = ids.ontology 175 | else: 176 | return kwargs 177 | kwargs["uid"] = id_encoder(str_to_encode) 178 | return kwargs 179 | -------------------------------------------------------------------------------- /docs/guide.md: -------------------------------------------------------------------------------- 1 | # Guide 2 | 3 | ```{toctree} 4 | :maxdepth: 2 5 | 6 | 7 | ./guide/concepts 8 | ./guide/config 9 | ``` 10 | 11 | ```{toctree} 12 | :hidden: 13 | 14 | ``` 15 | -------------------------------------------------------------------------------- /docs/guide/concepts.md: -------------------------------------------------------------------------------- 1 | # Concepts 2 | 3 | ## Entity 4 | 5 | Let's define a biological entity (e.g., `Organism`) to be a variable that takes values from a vocabulary of terms with biological meaning. 6 | 7 | 1. There are different roughly equivalent vocabularies for the same entity. For example, one can describe organism with the vocabulary of the scientific names, the vocabulary of the common names, or the vocabulary of ontology IDs for the same organism. 8 | 2. There are different versions & sources of these vocabularies. 9 | 3. Terms in the vocabularies have different granularity, and are often hierarchical. 10 | 4. Typically, vocabularies are based on a given version of a public reference ontology, but contain additional “custom” terms corresponding to "new knowledge" absent from reference ontologies. For example, new cell types or states, new synthetic genes, etc. 11 | 12 | ## PublicOntology object 13 | 14 | The central class {class}`~bionty.base.PublicOntology` models 3 of the 4 above-mentioned properties of biological entities: 15 | 16 | 1. Every `PublicOntology` object comes with a table of terms in which each column corresponds to an alternative vocabulary for the entity. 17 | 2. Every table is versioned & has a tracked reference source (typically, a public ontology). 18 | 3. Most tables have a children column that allows mapping hierarchies. 19 | 4. Adding user-defined records amounts to managing manage-ontologies through Bionty's SQL models. 20 | -------------------------------------------------------------------------------- /docs/guide/config.md: -------------------------------------------------------------------------------- 1 | # Configuration 2 | 3 | ## Public bionty sources 4 | 5 | Bionty maintains a [sources.yaml](https://raw.githubusercontent.com/laminlabs/bionty/main/bionty/base/sources.yaml) listing public sources of each entity. 6 | These sources are curated ([bionty-assets](https://github.com/laminlabs/bionty-assets)) and stored in a [bionty-assets instance](https://lamin.ai/laminlabs/bionty-assets/) to provide fast and reliable access. 7 | Cached sources files are stored at your local `bionty/base/_dynamic/` directory. 8 | 9 | ## Display public sources 10 | 11 | The available and currently used ontologies can also be printed with 12 | `bionty.base.display_available_sources` or `bionty.base.display_currently_used_sources`. 13 | 14 | ## Structure of the sources.yaml 15 | 16 | ```yaml 17 | entity: # Bionty entity class name, e.g. CellType 18 | source: # short name of the source, (CURIE prefix for ontologies) e.g. cl 19 | organism: # organism common name, (if none applied, use 'all') e.g. human 20 | version: # version of the source 21 | url: # "link to the source file" 22 | ``` 23 | 24 | ## Default ontologies and versions in sources.yaml 25 | 26 | For each entity, the **first source** and its **maximum version** defined in [sources.yaml](https://raw.githubusercontent.com/laminlabs/bionty/main/bionty/base/sources.yaml) is used as default. 27 | To set your own default ontology and version, shift the order of entries. 28 | For example, in the following "doid" used when "organism" is specified as "human": 29 | 30 | (highlighted sources are considered the default) 31 | 32 | ```{code-block} yaml 33 | --- 34 | emphasize-lines: 2-6,12-16 35 | --- 36 | Disease: 37 | mondo: 38 | all: 39 | 2023-02-06: 40 | source: http://purl.obolibrary.org/obo/mondo/releases/2023-02-06/mondo.owl 41 | 2022-10-11: 42 | source: http://purl.obolibrary.org/obo/mondo/releases/2022-10-11/mondo.owl 43 | name: Mondo Disease Ontology 44 | website: https://mondo.monarchinitiative.org/ 45 | doid: 46 | human: 47 | 2023-01-30: 48 | source: http://purl.obolibrary.org/obo/doid/releases/2023-01-30/doid.obo 49 | name: Human Disease Ontology 50 | website: https://disease-ontology.org/ 51 | inhouse_diseases: 52 | human: 53 | 2000-01-01: 54 | source: http://download-my-diseases.com/releases/2000-01-01/mydiseases.owl 55 | name: My in-house Disease Ontology 56 | website: http://my-website.com 57 | ``` 58 | 59 |
60 | 61 | We may change the default to "inhouse_diseases" when "organism" is specified as "human", by the following: 62 | 63 | ```{code-block} yaml 64 | --- 65 | emphasize-lines: 2,3,7-9,12-16 66 | --- 67 | Disease: 68 | mondo: 69 | all: 70 | 2022-10-11: 71 | source: http://purl.obolibrary.org/obo/mondo/releases/2022-10-11/mondo.owl 72 | 2023-02-06: 73 | source: http://purl.obolibrary.org/obo/mondo/releases/2023-02-06/mondo.owl 74 | name: Mondo Disease Ontology 75 | website: https://mondo.monarchinitiative.org/ 76 | inhouse_diseases: 77 | human: 78 | 2000-01-01: 79 | source: http://download-my-diseases.com/releases/2000-01-01/mydiseases.owl 80 | name: My in-house Disease Ontology 81 | website: http://my-website.com 82 | doid: 83 | human: 84 | 2023-01-30: 85 | source: http://purl.obolibrary.org/obo/doid/releases/2023-01-30/doid.obo 86 | name: Human Disease Ontology 87 | website: https://disease-ontology.org/ 88 | ``` 89 | -------------------------------------------------------------------------------- /docs/guide/test_notebooks.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import nbproject_test as test 4 | 5 | 6 | def test_notebooks(): 7 | nbdir = Path(__file__).parent 8 | test.execute_notebooks(nbdir, write=True) 9 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | ```{include} ../README.md 2 | :start-line: 0 3 | :end-line: 1 4 | ``` 5 | 6 | ```{toctree} 7 | :maxdepth: 1 8 | :hidden: 9 | 10 | guide 11 | reference 12 | ``` 13 | -------------------------------------------------------------------------------- /docs/reference.md: -------------------------------------------------------------------------------- 1 | # Reference 2 | 3 | ```{eval-rst} 4 | .. automodule:: bionty 5 | ``` 6 | -------------------------------------------------------------------------------- /noxfile.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import nox 4 | from laminci.nox import build_docs, install_lamindb, run, run_pre_commit 5 | 6 | nox.options.default_venv_backend = "none" 7 | 8 | IS_PR = os.getenv("GITHUB_EVENT_NAME") != "push" 9 | 10 | 11 | @nox.session 12 | def lint(session: nox.Session) -> None: 13 | run_pre_commit(session) 14 | 15 | 16 | @nox.session 17 | @nox.parametrize("group", ["bionty-base", "bionty-core", "bionty-docs"]) 18 | def build(session: nox.Session, group: str): 19 | branch = ( 20 | "main" if IS_PR else "main" 21 | ) # point to "main" for PRs, to "release" for main 22 | install_lamindb(session, branch=branch) 23 | run(session, "uv pip install --system wetlab") 24 | session.run(*"uv pip install --system -e .[dev]".split()) 25 | 26 | coverage_args = "--cov=bionty --cov-append --cov-report=term-missing" 27 | if group == "bionty-base": 28 | session.run(*f"pytest {coverage_args} ./tests/base".split()) 29 | elif group == "bionty-core": 30 | session.run(*f"pytest {coverage_args} ./tests/core".split()) 31 | elif group == "bionty-docs": 32 | session.run(*f"pytest -s {coverage_args} ./docs/guide".split()) 33 | run(session, "lamin init --storage ./docsbuild --modules bionty") 34 | build_docs(session, strict=True) 35 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["flit_core >=3.2,<4"] 3 | build-backend = "flit_core.buildapi" 4 | 5 | [project] 6 | name = "bionty" 7 | requires-python = ">=3.10,<3.14" 8 | authors = [{name = "Lamin Labs", email = "open-source@lamin.ai"}] 9 | readme = "README.md" 10 | dynamic = ["version", "description"] 11 | classifiers = [ 12 | "License :: OSI Approved :: Apache Software License", 13 | "Programming Language :: Python :: 3.10", 14 | "Programming Language :: Python :: 3.11", 15 | "Programming Language :: Python :: 3.12", 16 | "Programming Language :: Python :: 3.13", 17 | ] 18 | dependencies = [ 19 | "lamindb>=1.12a1", 20 | "lamindb_setup>=0.81.2", 21 | "lamin_utils>=0.13.9", 22 | "requests", 23 | "pyyaml", 24 | ] 25 | 26 | [project.urls] 27 | Home = "https://github.com/laminlabs/bionty" 28 | 29 | [project.optional-dependencies] 30 | dev = [ 31 | "laminci", 32 | "pre-commit", 33 | "pytest>=6.0", 34 | "pytest-cov", 35 | "nbproject-test", 36 | "pronto", 37 | "pymysql", 38 | "bioregistry", 39 | ] 40 | 41 | [tool.pytest.ini_options] 42 | testpaths = [ 43 | "tests", 44 | ] 45 | filterwarnings = [ 46 | "ignore::DeprecationWarning:botocore.*", 47 | "ignore::pronto.utils.warnings.SyntaxWarning", 48 | "ignore::pronto.utils.warnings.NotImplementedWarning", 49 | ] 50 | 51 | [tool.coverage.run] 52 | omit = [ 53 | "bionty/core/*", 54 | "bionty/migrations/*", 55 | "bionty/models.py", 56 | "bionty/ids.py", 57 | ] 58 | 59 | [tool.ruff] 60 | src = ["src"] 61 | line-length = 88 62 | lint.select = [ 63 | "F", # Errors detected by Pyflakes 64 | "E", # Error detected by Pycodestyle 65 | "W", # Warning detected by Pycodestyle 66 | "I", # isort 67 | "D", # pydocstyle 68 | "B", # flake8-bugbear 69 | "TID", # flake8-tidy-imports 70 | "C4", # flake8-comprehensions 71 | "BLE", # flake8-blind-except 72 | "UP", # pyupgrade 73 | "RUF100", # Report unused noqa directives 74 | "TCH", # Typing imports 75 | "NPY", # Numpy specific rules 76 | "PTH" # Use pathlib 77 | ] 78 | lint.ignore = [ 79 | # Do not catch blind exception: `Exception` 80 | "BLE001", 81 | # Errors from function calls in argument defaults. These are fine when the result is immutable. 82 | "B008", 83 | # line too long -> we accept long comment lines; black gets rid of long code lines 84 | "E501", 85 | # Do not assign a lambda expression, use a def -> lambda expression assignments are convenient 86 | "E731", 87 | # allow I, O, l as variable names -> I is the identity matrix 88 | "E741", 89 | # Missing docstring in public module 90 | "D100", 91 | # undocumented-public-class 92 | "D101", 93 | # Missing docstring in public method 94 | "D102", 95 | # Missing docstring in public function 96 | "D103", 97 | # Missing docstring in public package 98 | "D104", 99 | # __magic__ methods are are often self-explanatory, allow missing docstrings 100 | "D105", 101 | # Missing docstring in public nested class 102 | "D106", 103 | # Missing docstring in __init__ 104 | "D107", 105 | ## Disable one in each pair of mutually incompatible rules 106 | # We don’t want a blank line before a class docstring 107 | "D203", 108 | # 1 blank line required after class docstring 109 | "D204", 110 | # first line should end with a period [Bug: doesn't work with single-line docstrings] 111 | # We want docstrings to start immediately after the opening triple quote 112 | "D213", 113 | # blank line required between summary line and description 114 | "D205", 115 | # Section underline is over-indented ("{name}") 116 | "D215", 117 | # First line should end with a period 118 | "D400", 119 | # First line should be in imperative mood; try rephrasing 120 | "D401", 121 | # First word of the first line should be capitalized: {} -> {} 122 | "D403", 123 | # First word of the docstring should not be "This" 124 | "D404", 125 | # Section name should end with a newline ("{name}") 126 | "D406", 127 | # Missing dashed underline after section ("{name}") 128 | "D407", 129 | # Section underline should be in the line following the section's name ("{name}") 130 | "D408", 131 | # Section underline should match the length of its name ("{name}") 132 | "D409", 133 | # No blank lines allowed between a section header and its content ("{name}") 134 | "D412", 135 | # Missing blank line after last section ("{name}") 136 | "D413", 137 | # Missing argument description 138 | "D417", 139 | # Imports unused 140 | "F401", 141 | # camcelcase imported as lowercase 142 | "N813", 143 | # module import not at top level of file 144 | "E402", 145 | # open()` should be replaced by `Path.open() 146 | "PTH123", 147 | ] 148 | 149 | [tool.ruff.lint.pydocstyle] 150 | convention = "google" 151 | 152 | [tool.ruff.lint.per-file-ignores] 153 | "docs/*" = ["I"] 154 | "tests/*" = ["D"] 155 | "*/__init__.py" = ["F401"] 156 | -------------------------------------------------------------------------------- /scripts/update_ontology_sources.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "32bd32d7", 6 | "metadata": {}, 7 | "source": [ 8 | "# Update ontology sources" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "id": "66ec8c1e", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "!lamin connect laminlabs/bionty-assets" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "id": "905819d0", 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "import bionty as bt\n", 29 | "import lamindb as ln\n", 30 | "from bionty.base._ontology_url import get_ontology_url\n", 31 | "from bionty.core._source import register_source_in_bionty_assets\n", 32 | "from lamin_utils import logger\n", 33 | "\n", 34 | "ln.settings.verbosity = \"hint\"\n", 35 | "\n", 36 | "ln.track(\"7extigZj6QNG\")" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "id": "cff430f5", 42 | "metadata": {}, 43 | "source": [ 44 | "All entities that are not listed in the following matrix must be curated manually as they require intervention.\n", 45 | "Consult https://bionty-assets-gczz.netlify.app/ingest/ for guidance." 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "id": "321c31b9", 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "configs = [\n", 56 | " (\"Disease\", \"mondo\", \"all\"),\n", 57 | " (\"CellType\", \"cl\", \"all\"),\n", 58 | " (\"Organism\", \"ncbitaxon\", \"all\"),\n", 59 | " (\n", 60 | " \"Tissue\",\n", 61 | " \"uberon\",\n", 62 | " \"all\",\n", 63 | " ), # This may take a long time due to parsing, set verbosity to hint to see progress updates\n", 64 | " (\"Disease\", \"doid\", \"human\"),\n", 65 | " (\"ExperimentalFactor\", \"efo\", \"all\"),\n", 66 | " (\"Phenotype\", \"pato\", \"all\"),\n", 67 | " (\"Phenotype\", \"hp\", \"human\"),\n", 68 | " (\"Pathway\", \"go\", \"all\"),\n", 69 | " # (\"Pathway\", \"pw\", \"all\"), Currently leads to a URL error - upstream issue\n", 70 | " (\"DevelopmentalStage\", \"hsapdv\", \"human\"),\n", 71 | " (\"DevelopmentalStage\", \"mmusdv\", \"mouse\"),\n", 72 | " (\"Ethnicity\", \"hancestro\", \"human\"),\n", 73 | " # (\"Drug\", \"dron\", \"all\"), Not a Bionty entity (yet)\n", 74 | "]" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "id": "c51bf23b", 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [ 84 | "for i, config in enumerate(configs, 1):\n", 85 | " entity, source_name, organism, *version = config\n", 86 | " config_id = f\"{entity}_{source_name}_{organism}\"\n", 87 | " logger.info(f\"[{i}/{len(configs)}] Processing {config_id}\")\n", 88 | "\n", 89 | " try:\n", 90 | " *_, version_to_use = get_ontology_url(\n", 91 | " prefix=source_name, version=version[0] if version else None\n", 92 | " )\n", 93 | "\n", 94 | " new_df = getattr(bt.base, entity)(\n", 95 | " source=source_name, version=version_to_use\n", 96 | " ).to_dataframe()\n", 97 | "\n", 98 | " if new_df.empty or not {\"name\", \"synonyms\"}.issubset(new_df.columns):\n", 99 | " logger.warning(f\"{config_id} failed validation. Skipping...\")\n", 100 | " continue\n", 101 | "\n", 102 | " try:\n", 103 | " current_source = bt.Source.filter(\n", 104 | " entity=f\"bionty.{entity}\",\n", 105 | " name=source_name,\n", 106 | " organism=organism,\n", 107 | " currently_used=True,\n", 108 | " ).one_or_none()\n", 109 | " if current_source:\n", 110 | " current_df = getattr(bt.base, entity)(\n", 111 | " source=current_source\n", 112 | " ).to_dataframe()\n", 113 | " if new_df.shape[0] < current_df.shape[0]:\n", 114 | " logger.warning(\n", 115 | " f\"{config_id} has fewer rows than current. Skipping...\"\n", 116 | " )\n", 117 | " continue\n", 118 | " except ValueError as e:\n", 119 | " if \"No source url is available\" in str(e):\n", 120 | " pass # This occurs during testing in local instances\n", 121 | " else:\n", 122 | " raise\n", 123 | " except Exception:\n", 124 | " pass\n", 125 | "\n", 126 | " source_rec = getattr(bt, entity).add_source(\n", 127 | " source=source_name, version=version_to_use\n", 128 | " )\n", 129 | " register_source_in_bionty_assets(\n", 130 | " f\"{bt.base.settings.dynamicdir}/df_{organism}__{source_name}__{version_to_use}__{entity}.parquet\",\n", 131 | " source=source_rec,\n", 132 | " is_dataframe=True,\n", 133 | " )\n", 134 | " register_source_in_bionty_assets(\n", 135 | " f\"{bt.base.settings.dynamicdir}/ontology_{organism}__{source_name}__{version_to_use}__{entity}\",\n", 136 | " source=source_rec,\n", 137 | " is_dataframe=False,\n", 138 | " )\n", 139 | "\n", 140 | " logger.info(f\"registered {config_id} version {version_to_use}\")\n", 141 | "\n", 142 | " except ValueError as e:\n", 143 | " if \"artifact already exists\" in str(e):\n", 144 | " logger.warning(f\"{config_id} already registered. Skipping...\")\n", 145 | " else:\n", 146 | " logger.error(f\"{config_id} failed: {e}\")\n", 147 | " except FileNotFoundError:\n", 148 | " logger.warning(f\"{config_id} files not found. Skipping...\")\n", 149 | " except Exception as e:\n", 150 | " logger.error(\n", 151 | " f\"[{i}/{len(configs)}] {config_id} failed: {type(e).__name__}: {str(e)}\"\n", 152 | " )\n", 153 | " continue" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": null, 159 | "id": "f93f3b4a", 160 | "metadata": {}, 161 | "outputs": [], 162 | "source": [ 163 | "ln.finish()" 164 | ] 165 | } 166 | ], 167 | "metadata": { 168 | "kernelspec": { 169 | "display_name": "lamindb", 170 | "language": "python", 171 | "name": "python3" 172 | }, 173 | "language_info": { 174 | "codemirror_mode": { 175 | "name": "ipython", 176 | "version": 3 177 | }, 178 | "file_extension": ".py", 179 | "mimetype": "text/x-python", 180 | "name": "python", 181 | "nbconvert_exporter": "python", 182 | "pygments_lexer": "ipython3", 183 | "version": "3.12.8" 184 | } 185 | }, 186 | "nbformat": 4, 187 | "nbformat_minor": 5 188 | } 189 | -------------------------------------------------------------------------------- /tests/base/dev/test_handle_sources.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | from pathlib import Path 3 | 4 | import pytest 5 | from bionty.base.dev._handle_sources import ( 6 | parse_currently_used_sources, 7 | parse_sources_yaml, 8 | ) 9 | 10 | 11 | @pytest.fixture(scope="function") 12 | def versions_yaml_replica(): 13 | input_file_content = """ 14 | version: "0.3.0" 15 | Organism: 16 | ensembl: 17 | vertebrates: 18 | latest-version: release-112 19 | url: https://ftp.ensembl.org/pub/{version}/species_EnsemblVertebrates.txt 20 | name: Ensembl 21 | website: https://www.ensembl.org/index.html 22 | Gene: 23 | ensembl: 24 | human: 25 | latest-version: release-112 26 | url: s3://bionty-assets/df_human__ensembl__{version}__Gene.parquet 27 | mouse: 28 | latest-version: release-112 29 | url: s3://bionty-assets/df_mouse__ensembl__{version}__Gene.parquet 30 | name: Ensembl 31 | website: https://www.ensembl.org/index.html 32 | CellType: 33 | cl: 34 | all: 35 | latest-version: 2024-08-16 36 | url: http://purl.obolibrary.org/obo/cl/releases/{version}/cl.owl 37 | name: Cell Ontology 38 | website: https://obophenotype.github.io/cell-ontology 39 | """ 40 | with tempfile.NamedTemporaryFile(mode="w+", delete=False) as f: 41 | f.write(input_file_content) 42 | f.flush() 43 | yield f.name 44 | 45 | Path(f.name).unlink() 46 | 47 | 48 | def test_parse_versions_yaml(versions_yaml_replica): 49 | parsed_df = parse_sources_yaml(versions_yaml_replica) 50 | assert parsed_df.shape == (4, 7) 51 | assert all(parsed_df["entity"].values == ["Organism", "Gene", "Gene", "CellType"]) 52 | assert all(parsed_df["organism"].values == ["vertebrates", "human", "mouse", "all"]) 53 | assert all(parsed_df["name"].values == ["ensembl", "ensembl", "ensembl", "cl"]) 54 | 55 | 56 | def test_parse_current_versions(versions_yaml_replica): 57 | expected = { 58 | "Organism": {"vertebrates": {"ensembl": "release-112"}}, 59 | "Gene": { 60 | "human": {"ensembl": "release-112"}, 61 | "mouse": {"ensembl": "release-112"}, 62 | }, 63 | "CellType": {"all": {"cl": "2024-08-16"}}, 64 | } 65 | 66 | assert parse_currently_used_sources(versions_yaml_replica) == expected 67 | -------------------------------------------------------------------------------- /tests/base/dev/test_io.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | from pathlib import Path 3 | 4 | import pytest 5 | from bionty.base.dev._io import url_download 6 | 7 | 8 | @pytest.fixture 9 | def local(tmp_path): 10 | url = "https://bionty-assets.s3.amazonaws.com/bfxpipelines.json" 11 | localpath = tmp_path / Path(url).name 12 | yield localpath, url 13 | if localpath.exists(): 14 | localpath.unlink() 15 | 16 | 17 | def test_url_download(local): 18 | localpath = local[0] 19 | url = local[1] 20 | assert not localpath.exists() 21 | 22 | downloaded_path = Path(url_download(url=url, localpath=localpath)) 23 | assert downloaded_path.exists() 24 | 25 | 26 | def test_local_file(): 27 | with tempfile.TemporaryDirectory() as temp_dir: 28 | local_file = Path(temp_dir) / "test.txt" 29 | target_file = Path(temp_dir) / "downloaded.txt" 30 | test_content = "temporary file" 31 | 32 | local_file.write_text(test_content) 33 | assert local_file.exists(), "Test file was not created" 34 | 35 | downloaded_path = Path( 36 | url_download(url=f"file://{local_file}", localpath=target_file) 37 | ) 38 | 39 | assert downloaded_path.exists(), "Downloaded file not found" 40 | assert downloaded_path.read_text() == test_content, "Content mismatch" 41 | 42 | if downloaded_path.exists(): 43 | downloaded_path.unlink() 44 | -------------------------------------------------------------------------------- /tests/base/entities/test_bfxpipeline.py: -------------------------------------------------------------------------------- 1 | import bionty.base as bt_base 2 | import pandas as pd 3 | 4 | 5 | def test_lamin_bfxpipeline_inspect_name(): 6 | df = pd.DataFrame( 7 | index=[ 8 | "nf-core methylseq v2.6.0", 9 | "Cell Ranger v8.0.0", 10 | "This bfx pipeline does not exist", 11 | ] 12 | ) 13 | 14 | bfxp = bt_base.BFXPipeline(source="lamin") 15 | inspected_df = bfxp.inspect(df.index, field=bfxp.name, return_df=True) 16 | 17 | inspect = inspected_df["__validated__"].reset_index(drop=True) 18 | expected_series = pd.Series([True, True, False]) 19 | 20 | assert inspect.equals(expected_series) 21 | -------------------------------------------------------------------------------- /tests/base/entities/test_biosample.py: -------------------------------------------------------------------------------- 1 | import bionty.base as bt_base 2 | 3 | 4 | def test_ncbi_biosample(): 5 | bs = bt_base.BioSample(source="ncbi") 6 | df = bs.to_dataframe() 7 | assert "edta_inhibitor_tested" in df.abbr.tolist() 8 | -------------------------------------------------------------------------------- /tests/base/entities/test_cellline.py: -------------------------------------------------------------------------------- 1 | import bionty.base as bt_base 2 | import pandas as pd 3 | 4 | 5 | def test_clo_cellline_inspect_name(): 6 | df = pd.DataFrame( 7 | index=[ 8 | "253D cell", 9 | "HEK293", 10 | "2C1H7 cell", 11 | "283TAg cell", 12 | "This cell line does not exist", 13 | ] 14 | ) 15 | 16 | cl = bt_base.CellLine(source="clo") 17 | inspected_df = cl.inspect(df.index, field=cl.name, return_df=True) 18 | 19 | inspect = inspected_df["__validated__"].reset_index(drop=True) 20 | expected_series = pd.Series([True, True, True, True, False]) 21 | 22 | assert inspect.equals(expected_series) 23 | 24 | 25 | def test_depmap_cellline_inspect_name(): 26 | df = pd.DataFrame( 27 | index=[ 28 | "NIH:OVCAR-3", 29 | "NRH-LMS1", 30 | "HEL", 31 | "HEL 92.1.7", 32 | "This cell line does not exist", 33 | ] 34 | ) 35 | 36 | cl = bt_base.CellLine(source="depmap") 37 | inspected_df = cl.inspect(df.index, field=cl.name, return_df=True) 38 | 39 | inspect = inspected_df["__validated__"].reset_index(drop=True) 40 | expected_series = pd.Series([True, True, True, True, False]) 41 | 42 | assert inspect.equals(expected_series) 43 | -------------------------------------------------------------------------------- /tests/base/entities/test_cellmarker.py: -------------------------------------------------------------------------------- 1 | import bionty.base as bt_base 2 | import pandas as pd 3 | 4 | 5 | def test_cellmarker_cellmarker_inspect_name_human(): 6 | df = pd.DataFrame( 7 | index=["CCR7", "CD69", "CD8", "CD45RA", "This protein does not exist"] 8 | ) 9 | 10 | cm = bt_base.CellMarker(source="cellmarker") 11 | curated = cm.inspect(df.index, field=cm.name) 12 | 13 | assert curated["validated"] == ["CD69", "CD8", "CD45RA"] 14 | assert curated["non_validated"] == ["CCR7", "This protein does not exist"] 15 | 16 | 17 | def test_cellmarker_cellmarker_inspect_name_mouse(): 18 | df = pd.DataFrame( 19 | index=["Tcf4", "Cd36", "Cd34", "Lgr6", "This protein does not exist"] 20 | ) 21 | 22 | cm = bt_base.CellMarker(source="cellmarker", organism="mouse") 23 | inspected_df = cm.inspect(df.index, field=cm.name, return_df=True) 24 | 25 | inspect = inspected_df["__validated__"].reset_index(drop=True) 26 | expected_series = pd.Series([True, False, True, True, False]) 27 | 28 | assert inspect.equals(expected_series) 29 | -------------------------------------------------------------------------------- /tests/base/entities/test_celltype.py: -------------------------------------------------------------------------------- 1 | import bionty.base as bt_base 2 | import pandas as pd 3 | 4 | 5 | def test_cl_celltype_inspect_name(): 6 | df = pd.DataFrame( 7 | index=[ 8 | "Boettcher cell", 9 | "bone marrow cell", 10 | "interstitial cell of ovary", 11 | "pancreatic ductal cell", 12 | "This cell type does not exist", 13 | ] 14 | ) 15 | 16 | ct = bt_base.CellType(source="cl") 17 | inspected_df = ct.inspect(df.index, field=ct.name, return_df=True) 18 | 19 | inspect = inspected_df["__validated__"].reset_index(drop=True) 20 | expected_series = pd.Series([True, True, True, True, False]) 21 | 22 | assert inspect.equals(expected_series) 23 | 24 | 25 | def test_cl_celltype_version(): 26 | # old version, not in s3://bionty-assets 27 | ct = bt_base.CellType(version="2020-05-20") 28 | assert ct.to_dataframe().shape[0] == 2355 29 | -------------------------------------------------------------------------------- /tests/base/entities/test_developmentalstage.py: -------------------------------------------------------------------------------- 1 | import bionty.base as bt_base 2 | import pandas as pd 3 | 4 | 5 | def test_hsapdv_developmentalstage_inspect_name(): 6 | df = pd.DataFrame( 7 | index=[ 8 | "blastula stage", 9 | "Carnegie stage 03", 10 | "neurula stage", 11 | "organogenesis stage", 12 | "This developmental stage does not exist", 13 | ] 14 | ) 15 | 16 | ds = bt_base.DevelopmentalStage(source="hsapdv") 17 | inspected_df = ds.inspect(df.index, field=ds.name, return_df=True) 18 | 19 | inspect = inspected_df["__validated__"].reset_index(drop=True) 20 | expected_series = pd.Series([True, True, True, True, False]) 21 | 22 | assert inspect.equals(expected_series) 23 | -------------------------------------------------------------------------------- /tests/base/entities/test_disease.py: -------------------------------------------------------------------------------- 1 | import bionty.base as bt_base 2 | import pandas as pd 3 | 4 | 5 | def test_mondo_disease_inspect_name(): 6 | df = pd.DataFrame( 7 | index=[ 8 | "supraglottis cancer", 9 | "alexia", 10 | "trigonitis", 11 | "paranasal sinus disorder", 12 | "This disease does not exist", 13 | ] 14 | ) 15 | 16 | ds = bt_base.Disease(source="mondo", version="2025-06-03") 17 | inspected_df = ds.inspect(df.index, field=ds.name, return_df=True) 18 | 19 | inspect = inspected_df["__validated__"].reset_index(drop=True) 20 | expected_series = pd.Series([True, True, True, True, False]) 21 | 22 | assert inspect.equals(expected_series) 23 | 24 | 25 | def test_doid_disease_inspect_ontology_id(): 26 | df = pd.DataFrame( 27 | index=[ 28 | "DOID:0001816", 29 | "DOID:0002116", 30 | "DOID:5547", 31 | "DOID:5551", 32 | "This disease does not exist", 33 | ] 34 | ) 35 | 36 | ds = bt_base.Disease(source="doid") 37 | inspected_df = ds.inspect(df.index, field=ds.ontology_id, return_df=True) 38 | 39 | inspect = inspected_df["__validated__"].reset_index(drop=True) 40 | expected_series = pd.Series([True, True, True, True, False]) 41 | 42 | assert inspect.equals(expected_series) 43 | 44 | 45 | def test_icd_9_disease_inspect_name(): 46 | df = pd.DataFrame( 47 | index=[ 48 | "Cholera d/t vib cholerae", 49 | "Typhoid fever", 50 | "Mult gest-plac/sac NOS", 51 | "Paratyphoid fever a", 52 | "This disease does not exist", 53 | ] 54 | ) 55 | 56 | ds = bt_base.Disease(source="icd", version="icd-9-2011") 57 | inspected_df = ds.inspect(df.index, field=ds.name, return_df=True) 58 | 59 | inspect = inspected_df["__validated__"].reset_index(drop=True) 60 | expected_series = pd.Series([True, True, True, True, False]) 61 | 62 | assert inspect.equals(expected_series) 63 | 64 | 65 | def test_icd_10_disease_inspect_name(): 66 | df = pd.DataFrame( 67 | index=[ 68 | "Vaping-related disorder", 69 | "COVID-19", 70 | "Typhoid fever with heart involvement", 71 | "Typhoid fever, unspecified", 72 | "This disease does not exist", 73 | ] 74 | ) 75 | 76 | ds = bt_base.Disease(source="icd", version="icd-10-2020") 77 | inspected_df = ds.inspect(df.index, field=ds.name, return_df=True) 78 | 79 | inspect = inspected_df["__validated__"].reset_index(drop=True) 80 | expected_series = pd.Series([True, True, True, True, False]) 81 | 82 | assert inspect.equals(expected_series) 83 | 84 | 85 | def test_icd_11_disease_inspect_name(): 86 | df = pd.DataFrame( 87 | index=[ 88 | "Certain infectious or parasitic diseases", 89 | "Cholera", 90 | "Intestinal infection due to other Vibrio", 91 | "Gastroenteritis or colitis of infectious origin", 92 | "This disease does not exist", 93 | ] 94 | ) 95 | 96 | ds = bt_base.Disease(source="icd", version="icd-11-2023") 97 | inspected_df = ds.inspect(df.index, field=ds.name, return_df=True) 98 | 99 | inspect = inspected_df["__validated__"].reset_index(drop=True) 100 | expected_series = pd.Series([True, True, True, True, False]) 101 | 102 | assert inspect.equals(expected_series) 103 | -------------------------------------------------------------------------------- /tests/base/entities/test_drug.py: -------------------------------------------------------------------------------- 1 | import bionty.base as bt_base 2 | import pandas as pd 3 | 4 | 5 | def test_dron_drug_inspect_name(): 6 | df = pd.DataFrame( 7 | index=[ 8 | "triflusal", 9 | "citrus bioflavonoids", 10 | "Candida albicans", 11 | "Hyoscyamus extract", 12 | "This drug does not exist", 13 | ] 14 | ) 15 | 16 | dt = bt_base.Drug(source="dron") 17 | inspected_df = dt.inspect(df.index, field=dt.name, return_df=True) 18 | 19 | inspect = inspected_df["__validated__"].reset_index(drop=True) 20 | expected_series = pd.Series([True, True, True, True, False]) 21 | 22 | assert inspect.equals(expected_series) 23 | 24 | 25 | def test_chebi_drug_inspect_name(): 26 | df = pd.DataFrame( 27 | index=[ 28 | "navitoclax", 29 | "Vismione D", 30 | "(+)-Atherospermoline", 31 | "N-[(2R,3S,6R)-2-(hydroxymethyl)-6-[2-[[oxo-[4-(trifluoromethyl)anilino]methyl]amino]ethyl]-3-oxanyl]-3-pyridinecarboxamide", 32 | "This drug does not exist", 33 | ] 34 | ) 35 | 36 | dt = bt_base.Drug(source="chebi") 37 | inspected_df = dt.inspect(df.index, field=dt.name, return_df=True) 38 | 39 | inspect = inspected_df["__validated__"].reset_index(drop=True) 40 | expected_series = pd.Series([True, True, True, True, False]) 41 | 42 | assert inspect.equals(expected_series) 43 | 44 | 45 | def test_chebi_chembl_id(): 46 | dt = bt_base.Drug(source="chebi") 47 | assert "CHEMBL500609" in dt.to_dataframe()["chembl_id"].values 48 | -------------------------------------------------------------------------------- /tests/base/entities/test_ethnicity.py: -------------------------------------------------------------------------------- 1 | import bionty.base as bt_base 2 | import pandas as pd 3 | 4 | 5 | def test_hancestro_ethnicity_inspect_name(): 6 | df = pd.DataFrame( 7 | index=[ 8 | "Mende", 9 | "European", 10 | "South Asian", 11 | "Arab", 12 | "This ethnicity does not exist", 13 | ] 14 | ) 15 | 16 | et = bt_base.Ethnicity(source="hancestro") 17 | df.index = et.standardize(df.index) 18 | inspected_df = et.inspect(df.index, field=et.name, return_df=True) 19 | 20 | inspect = inspected_df["__validated__"].reset_index(drop=True) 21 | expected_series = pd.Series([True, True, True, True, False]) 22 | 23 | assert inspect.equals(expected_series) 24 | -------------------------------------------------------------------------------- /tests/base/entities/test_experimentalfactor.py: -------------------------------------------------------------------------------- 1 | import bionty.base as bt_base 2 | import pandas as pd 3 | 4 | 5 | def test_efo_experimental_factor_inspect_ontology_id(): 6 | df = pd.DataFrame( 7 | index=[ 8 | "EFO:1002048", 9 | "EFO:1002050", 10 | "EFO:1002047", 11 | "EFO:1002049", 12 | "This readout does not exist", 13 | ] 14 | ) 15 | 16 | ro = bt_base.ExperimentalFactor(source="efo") 17 | inspected_df = ro.inspect(df.index, ro.ontology_id, return_df=True) 18 | 19 | inspect = inspected_df["__validated__"].reset_index(drop=True) 20 | expected_series = pd.Series([True, True, True, True, False]) 21 | 22 | assert inspect.equals(expected_series) 23 | 24 | 25 | def test_efo_shape(): 26 | """We observed issues with new EFO versions not including all records.""" 27 | # 3.78.0 is the latest version where had initially observed this issue 28 | # If this works well, we may unpin the fixed version 29 | assert bt_base.ExperimentalFactor(version="3.78.0").to_dataframe().shape[0] > 18000 30 | -------------------------------------------------------------------------------- /tests/base/entities/test_gene.py: -------------------------------------------------------------------------------- 1 | import bionty.base as bt_base 2 | import pandas as pd 3 | import pytest 4 | from bionty.base.entities._gene import MappingResult 5 | 6 | 7 | @pytest.fixture(scope="module") 8 | def genes(): 9 | data = { 10 | "gene symbol": ["A1CF", "A1BG", "FANCD1", "corrupted"], 11 | "ncbi id": ["29974", "1", "5133", "corrupted"], 12 | "ensembl_gene_id": [ 13 | "ENSG00000148584", 14 | "ENSG00000121410", 15 | "ENSG00000188389", 16 | "ENSG0000corrupted", 17 | ], 18 | } 19 | df = pd.DataFrame(data).set_index("ensembl_gene_id") 20 | 21 | gn = bt_base.Gene(source="ensembl") 22 | 23 | return df, gn 24 | 25 | 26 | def test_gene_ensembl_inspect_hgnc_id(genes): 27 | df, gn = genes 28 | 29 | inspected_df = gn.inspect(df["ncbi id"], field=gn.ncbi_gene_id, return_df=True) 30 | 31 | inspect = inspected_df["__validated__"].reset_index(drop=True) 32 | expected_series = pd.Series([True, True, True, False]) 33 | 34 | assert inspect.equals(expected_series) 35 | 36 | 37 | def test_ensemblgene_download(): 38 | from bionty.base.entities._gene import EnsemblGene 39 | 40 | ensembl_gene = EnsemblGene(organism="human", version="release-110") 41 | assert ensembl_gene._organism.name == "human" 42 | 43 | external_df = ensembl_gene.external_dbs() 44 | assert external_df.shape[0] > 1 45 | 46 | df = ensembl_gene.download_df(external_db_names={"HGNC": "hgnc_id"}) 47 | assert df.shape[0] > 6000 48 | assert "hgnc_id" in df.columns 49 | 50 | 51 | def test_ensemblgene_map_legacy_ids(): 52 | gn = bt_base.Gene(organism="human", version="release-110") 53 | legacy_genes = [ 54 | "ENSG00000280710", 55 | "ENSG00000261490", 56 | "ENSG00000203812", 57 | "ENSG00000204092", 58 | "ENSG00000215271", 59 | ] 60 | result = gn.map_legacy_ids(legacy_genes) 61 | assert result == MappingResult( 62 | mapped={ 63 | "ENSG00000204092": "ENSG00000226070", 64 | "ENSG00000215271": "ENSG00000290292", 65 | "ENSG00000261490": "ENSG00000071127", 66 | "ENSG00000280710": "ENSG00000125304", 67 | }, 68 | ambiguous={"ENSG00000203812": ["ENSG00000288859", "ENSG00000288825"]}, 69 | unmapped=[], 70 | ) 71 | 72 | result = gn.map_legacy_ids("ENSG00000280710") 73 | assert result == MappingResult( 74 | mapped={"ENSG00000280710": "ENSG00000125304"}, 75 | ambiguous={}, 76 | unmapped=[], 77 | ) 78 | 79 | result = gn.map_legacy_ids(["ENSG00000280710"]) 80 | assert result == MappingResult( 81 | mapped={"ENSG00000280710": "ENSG00000125304"}, 82 | ambiguous={}, 83 | unmapped=[], 84 | ) 85 | 86 | 87 | def test_old_ensembl_version(): 88 | gene_ontology_102 = bt_base.Gene( 89 | source="ensembl", organism="mouse", version="release-102" 90 | ) 91 | assert ( 92 | "ENSMUSG00000021745" 93 | in gene_ontology_102.to_dataframe()["ensembl_gene_id"].values 94 | ) 95 | 96 | gene_ontology_112 = bt_base.Gene( 97 | source="ensembl", organism="mouse", version="release-112" 98 | ) 99 | assert ( 100 | "ENSMUSG00000021745" 101 | not in gene_ontology_112.to_dataframe()["ensembl_gene_id"].values 102 | ) 103 | -------------------------------------------------------------------------------- /tests/base/entities/test_organism.py: -------------------------------------------------------------------------------- 1 | import bionty.base as bt_base 2 | import pandas as pd 3 | 4 | 5 | def test_ensembl_organism_inspect_name(): 6 | df = pd.DataFrame( 7 | index=[ 8 | "spiny chromis", 9 | "silver-eye", 10 | "platyfish", 11 | "california sea lion", 12 | "This organism does not exist", 13 | ] 14 | ) 15 | 16 | sp = bt_base.Organism(source="ensembl") 17 | inspected_df = sp.inspect(df.index, field=sp.name, return_df=True) 18 | 19 | inspect = inspected_df["__validated__"].reset_index(drop=True) 20 | expected_series = pd.Series([True, True, True, True, False]) 21 | 22 | assert inspect.equals(expected_series) 23 | 24 | 25 | def test_ncbitaxon_organism_inspect_name(): 26 | df = pd.DataFrame( 27 | index=[ 28 | "human", 29 | "ancylobacter aquaticus", 30 | "microbacterium sp. 6.11-vpa", 31 | "calamagrostis varia", 32 | "This organism does not exist", 33 | ] 34 | ) 35 | sp = bt_base.Organism(source="ncbitaxon") 36 | inspected_df = sp.inspect(df.index, field=sp.name, return_df=True) 37 | 38 | inspect = inspected_df["__validated__"].reset_index(drop=True) 39 | expected_series = pd.Series([True, True, True, True, False]) 40 | 41 | assert inspect.equals(expected_series) 42 | 43 | 44 | def test_ensembl_organism_version(): 45 | df = bt_base.Organism(version="release-108").to_dataframe() 46 | assert df.shape[0] == 315 47 | 48 | 49 | def test_ensembl_organism_taxa(): 50 | for sp in ["bacteria", "plants", "fungi", "metazoa"]: 51 | df = bt_base.Organism(taxa=sp).to_dataframe() 52 | assert df.shape[0] > 10 53 | 54 | 55 | def test_ncbitaxon_organism(): 56 | df = bt_base.Organism(source="ncbitaxon").to_dataframe() 57 | assert df.shape[0] > 10 58 | -------------------------------------------------------------------------------- /tests/base/entities/test_pathway.py: -------------------------------------------------------------------------------- 1 | import bionty.base as bt_base 2 | import pandas as pd 3 | 4 | 5 | def test_pw_go_inspect_ontology_id(): 6 | df = pd.DataFrame( 7 | index=[ 8 | "GO:1905210", 9 | "GO:1905211", 10 | "GO:1905212", 11 | "GO:1905208", 12 | "This pathway does not exist", 13 | ] 14 | ) 15 | 16 | pw = bt_base.Pathway(source="go") 17 | inspected_df = pw.inspect(df.index, pw.ontology_id, return_df=True) 18 | 19 | inspect = inspected_df["__validated__"].reset_index(drop=True) 20 | expected_series = pd.Series([True, True, True, True, False]) 21 | 22 | assert inspect.equals(expected_series) 23 | 24 | 25 | def test_pw_pathway_inspect_name(): 26 | df = pd.DataFrame( 27 | index=[ 28 | "Toll-like receptor 9 signaling pathway", 29 | "Toll-like receptor TLR1:TLR2 signaling pathway", 30 | "classic metabolic pathway", 31 | "regulatory pathway", 32 | "This pathway does not exist", 33 | ] 34 | ) 35 | 36 | pw = bt_base.Pathway(source="pw") 37 | inspected_df = pw.inspect(df.index, field=pw.name, return_df=True) 38 | 39 | inspect = inspected_df["__validated__"].reset_index(drop=True) 40 | expected_series = pd.Series([True, True, True, True, False]) 41 | 42 | assert inspect.equals(expected_series) 43 | -------------------------------------------------------------------------------- /tests/base/entities/test_phenotype.py: -------------------------------------------------------------------------------- 1 | import bionty.base as bt_base 2 | import pandas as pd 3 | 4 | 5 | def test_hp_phenotype_inspect_name(): 6 | df = pd.DataFrame( 7 | index=[ 8 | "Specific learning disability", 9 | "Dystonia", 10 | "Cerebral hemorrhage", 11 | "Slurred speech", 12 | "This phenotype does not exist", 13 | ] 14 | ) 15 | 16 | pt = bt_base.Phenotype(source="hp") 17 | inspected_df = pt.inspect(df.index, field=pt.name, return_df=True) 18 | 19 | inspect = inspected_df["__validated__"].reset_index(drop=True) 20 | expected_series = pd.Series([True, True, True, True, False]) 21 | 22 | assert inspect.equals(expected_series) 23 | 24 | 25 | def test_mp_phenotype_inspect_name(): 26 | df = pd.DataFrame( 27 | index=[ 28 | "decreased ovary apoptosis", 29 | "abnormal Ebner's gland morphology", 30 | "abnormal lacrimal sac morphology", 31 | "abnormal nictitating membrane morphology", 32 | "This phenotype does not exist", 33 | ] 34 | ) 35 | 36 | pt = bt_base.Phenotype(source="mp") 37 | inspected_df = pt.inspect(df.index, field=pt.name, return_df=True) 38 | 39 | inspect = inspected_df["__validated__"].reset_index(drop=True) 40 | expected_series = pd.Series([True, True, True, True, False]) 41 | 42 | assert inspect.equals(expected_series) 43 | 44 | 45 | def test_zp_phenotype_inspect_name(): 46 | df = pd.DataFrame( 47 | index=[ 48 | "somitogenesis disrupted, abnormal", 49 | "somite specification disrupted, abnormal", 50 | "liver has extra parts of type collagen trimer liver, abnormal", 51 | "neuromast hair cell normal process quality apoptotic process, abnormal", 52 | "This phenotype does not exist", 53 | ] 54 | ) 55 | 56 | pt = bt_base.Phenotype(source="zp") 57 | inspected_df = pt.inspect(df.index, field=pt.name, return_df=True) 58 | 59 | inspect = inspected_df["__validated__"].reset_index(drop=True) 60 | expected_series = pd.Series([True, True, True, True, False]) 61 | 62 | assert inspect.equals(expected_series) 63 | 64 | 65 | def test_phe_phenotype_inspect_name(): 66 | df = pd.DataFrame( 67 | index=[ 68 | "Intestinal infection due to C. difficile", 69 | "Sepsis and SIRS", 70 | "Systemic inflammatory response syndrome (SIRS)", 71 | "Septic shock", 72 | "This phenotype does not exist", 73 | ] 74 | ) 75 | 76 | pt = bt_base.Phenotype(source="phe") 77 | inspected_df = pt.inspect(df.index, field=pt.name, return_df=True) 78 | 79 | inspect = inspected_df["__validated__"].reset_index(drop=True) 80 | expected_series = pd.Series([True, True, True, True, False]) 81 | 82 | assert inspect.equals(expected_series) 83 | 84 | 85 | def test_path_phenotype(): 86 | df = pd.DataFrame( 87 | index=[ 88 | "nocturnal", 89 | "male", 90 | "female", 91 | "mobility", 92 | "This phenotype does not exist", 93 | ] 94 | ) 95 | 96 | pt = bt_base.Phenotype(source="pato") 97 | inspected_df = pt.inspect(df.index, field=pt.name, return_df=True) 98 | inspect = inspected_df["__validated__"].reset_index(drop=True) 99 | expected_series = pd.Series([True, True, True, True, False]) 100 | 101 | assert inspect.equals(expected_series) 102 | -------------------------------------------------------------------------------- /tests/base/entities/test_protein.py: -------------------------------------------------------------------------------- 1 | import bionty.base as bt_base 2 | import pandas as pd 3 | 4 | 5 | def test_uniprot_protein_inspect_uniprotkb_id(): 6 | df = pd.DataFrame( 7 | index=[ 8 | "A0A024QZ08", # no longer exist in 2024-03 version 9 | "X6RLV5", 10 | "X6RM24", 11 | "A0A024QZQ1", # no longer exist in 2024-03 version 12 | "This protein does not exist", 13 | ] 14 | ) 15 | 16 | pr = bt_base.Protein(source="uniprot") 17 | inspected_df = pr.inspect(df.index, pr.uniprotkb_id, return_df=True) 18 | 19 | inspect = inspected_df["__validated__"].reset_index(drop=True) 20 | expected_series = pd.Series([False, True, True, False, False]) 21 | 22 | assert inspect.equals(expected_series) 23 | -------------------------------------------------------------------------------- /tests/base/entities/test_tissue.py: -------------------------------------------------------------------------------- 1 | import bionty.base as bt_base 2 | import pandas as pd 3 | 4 | 5 | def test_uberon_tissue_inspect_ontology_id(): 6 | df = pd.DataFrame( 7 | index=[ 8 | "UBERON:0000000", 9 | "UBERON:0000005", 10 | "UBERON:8600001", 11 | "UBERON:8600002", 12 | "This tissue does not exist", 13 | ] 14 | ) 15 | 16 | ts = bt_base.Tissue(source="uberon", version="2025-05-28") 17 | inspected_df = ts.inspect(df.index, ts.ontology_id, return_df=True) 18 | 19 | inspect = inspected_df["__validated__"].reset_index(drop=True) 20 | expected_series = pd.Series([True, True, True, True, False]) 21 | 22 | assert inspect.equals(expected_series) 23 | -------------------------------------------------------------------------------- /tests/base/test_bionty.py: -------------------------------------------------------------------------------- 1 | import bionty.base as bt_base 2 | import pytest 3 | 4 | 5 | def test_unavailable_sources(): 6 | with pytest.raises(ValueError): 7 | bt_base.CellType(source="random") 8 | 9 | 10 | def test_diff_successful(): 11 | disease_bt_1 = bt_base.Disease(source="mondo", version="2023-04-04") 12 | disease_bt_2 = bt_base.Disease(source="mondo", version="2023-02-06") 13 | 14 | new_entries, modified_entries = disease_bt_1.diff(disease_bt_2) 15 | assert len(new_entries) == 819 16 | assert len(modified_entries) == 249 17 | 18 | 19 | def test_diff_value_errors(): 20 | # Two different PublicOntology object types 21 | disease_bt = bt_base.Disease() 22 | phenotype_bt = bt_base.Phenotype() 23 | with pytest.raises(ValueError): 24 | disease_bt.diff(phenotype_bt) 25 | 26 | # Different sources 27 | disease_bt_1 = bt_base.Disease(source="mondo") 28 | disease_bt_2 = bt_base.Disease(source="doid") 29 | with pytest.raises(ValueError): 30 | disease_bt_1.diff(disease_bt_2) 31 | 32 | # Same version 33 | disease_bt_3 = bt_base.Disease(source="mondo", version="2023-04-04") 34 | disease_bt_4 = bt_base.Disease(source="mondo", version="2023-04-04") 35 | with pytest.raises(ValueError): 36 | disease_bt_3.diff(disease_bt_4) 37 | -------------------------------------------------------------------------------- /tests/base/test_ontology.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from bionty.base._ontology import Ontology 4 | from bionty.base.dev._io import s3_bionty_assets 5 | 6 | 7 | def test_ontology(): 8 | localpath = s3_bionty_assets("ontology_all__pw__7.79__Pathway") 9 | 10 | try: 11 | onto = Ontology(localpath) 12 | assert onto.get_term("PW:0000014").name == "neurodegenerative pathway" 13 | 14 | df = onto.to_df(source="pw", include_id_prefixes={"pw": ["PW"]}) 15 | assert df.shape == (2647, 4) 16 | assert df.index.name == "ontology_id" 17 | 18 | finally: 19 | if Path(localpath).exists: 20 | Path(localpath).unlink() 21 | -------------------------------------------------------------------------------- /tests/base/test_ontology_url.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from bionty.base._ontology_url import ( 3 | OntologyNotFoundError, 4 | OntologyVersionNotFoundError, 5 | get_ontology_url, 6 | ) 7 | 8 | 9 | def test_get_ontology_url(): 10 | # Test with a known prefix and version 11 | url, ver = get_ontology_url("OBA", "2022-05-11") 12 | assert url == "http://purl.obolibrary.org/obo/oba/releases/2022-05-11/oba.owl" 13 | assert ver == "2022-05-11" 14 | 15 | # Test with a known prefix and no version 16 | prefix = "OBA" 17 | url, ver = get_ontology_url(prefix) 18 | assert url is not None 19 | assert ver is not None 20 | 21 | # A wrong version 22 | with pytest.raises(OntologyVersionNotFoundError): 23 | get_ontology_url("OBA", "wrong_version") 24 | 25 | # Test with an unknown prefix 26 | with pytest.raises(OntologyNotFoundError): 27 | get_ontology_url("UNKNOWN_PREFIX") 28 | 29 | prefixes = [ 30 | "ncbitaxon", 31 | "clo", 32 | "cl", 33 | "uberon", 34 | "mondo", 35 | "doid", 36 | "efo", 37 | "pato", 38 | "hp", 39 | "mp", 40 | "zp", 41 | "go", 42 | "pw", 43 | "dron", 44 | "hsapdv", 45 | "mmusdv", 46 | "hancestro", 47 | ] 48 | 49 | for prefix in prefixes: 50 | url, ver = get_ontology_url(prefix) 51 | assert url is not None 52 | assert ver is not None 53 | -------------------------------------------------------------------------------- /tests/core/conftest.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | 3 | import lamindb_setup as ln_setup 4 | import pytest 5 | 6 | 7 | def pytest_sessionstart(): 8 | ln_setup.init(storage="./testdb", modules="bionty,wetlab") 9 | 10 | 11 | def pytest_sessionfinish(session: pytest.Session): 12 | shutil.rmtree("./testdb") 13 | ln_setup.delete("testdb", force=True) 14 | -------------------------------------------------------------------------------- /tests/core/test_models.py: -------------------------------------------------------------------------------- 1 | import bionty as bt 2 | 3 | 4 | def test_public_synonym_mapping(): 5 | bt_result = bt.Gene.public(organism="human").inspect( 6 | ["ABC1", "TNFRSF4"], field="symbol" 7 | ) 8 | assert bt_result.synonyms_mapper == {"ABC1": "HEATR6"} 9 | 10 | bt_result = bt.Gene.public(organism="human").inspect( 11 | ["ABC1", "TNFRSF4"], field="symbol", inspect_synonyms=False 12 | ) 13 | assert bt_result.synonyms_mapper == {} 14 | 15 | 16 | def test_encode_uids(): 17 | cell_type = bt.CellType( 18 | ontology_id="CL:0000084", 19 | _skip_validation=True, 20 | ) 21 | assert cell_type.uid == "22LvKd01" 22 | 23 | organism = bt.Organism( 24 | ontology_id="NCBITaxon:9606", 25 | name="human", 26 | _skip_validation=True, 27 | ) 28 | assert organism.uid == "1dpCL6Td" 29 | 30 | bt.settings.organism = "human" 31 | cell_marker = bt.CellMarker( 32 | name="test", 33 | organism=bt.settings.organism, 34 | _skip_validation=True, 35 | ) 36 | assert cell_marker.uid == "2dZ52W9noUDK" 37 | 38 | gene = bt.Gene( 39 | ensembl_gene_id="ENSG00000081059", 40 | symbol="TCF7", 41 | organism=bt.settings.organism, # required 42 | _skip_validation=True, 43 | ) 44 | assert gene.uid == "7IkHKPl0ScQR" 45 | 46 | disease = bt.Source( 47 | entity="bionty.Disease", 48 | name="mondo", 49 | version="2023-04-04", 50 | organism="all", 51 | _skip_validation=True, 52 | ) 53 | assert disease.uid == "Hgw08Vk3" 54 | 55 | phenotype = bt.Source( 56 | entity="bionty.Phenotype", 57 | name="hp", 58 | version="2023-06-17", 59 | organism="human", 60 | _skip_validation=True, 61 | ) 62 | assert phenotype.uid == "451W7iJS" 63 | -------------------------------------------------------------------------------- /tests/core/test_organism_requirement.py: -------------------------------------------------------------------------------- 1 | import bionty as bt 2 | import pytest 3 | from bionty._organism import OrganismNotSet 4 | 5 | 6 | def test_from_values_organism(): 7 | bt.settings._organism = None 8 | with pytest.raises(OrganismNotSet): 9 | bt.Gene.from_values(["ABC1"], bt.Gene.symbol) 10 | # no organism is needed if the values are ensembl gene ids 11 | result = bt.Gene.from_values(["ENSG00000068097"], bt.Gene.ensembl_gene_id) 12 | assert len(result) == 1 13 | result = bt.Gene.from_values( 14 | ["ENSMUSG00000102862", "ENSMUSG00000084826"], field=bt.Gene.ensembl_gene_id 15 | ) 16 | assert len(result) == 2 17 | result = bt.Gene.from_values( 18 | ["HRA1", "ETS1-1"], field=bt.Gene.stable_id, organism="saccharomyces cerevisiae" 19 | ) 20 | assert len(result) == 2 21 | 22 | bt.settings.organism = "human" 23 | values = ["ABC1"] 24 | standardized_values = bt.Gene.public().standardize(values) 25 | records = bt.Gene.from_values(standardized_values, bt.Gene.symbol) 26 | assert records[0].ensembl_gene_id == "ENSG00000068097" 27 | 28 | # TODO: Gene.public() should raise error if organism is not provided 29 | standardized_values = bt.Gene.public(organism="mouse").standardize(values) 30 | records = bt.Gene.from_values(standardized_values, bt.Gene.symbol, organism="mouse") 31 | assert records[0].ensembl_gene_id == "ENSMUSG00000015243" 32 | 33 | # clean up 34 | bt.settings._organism = None 35 | bt.Organism.filter().delete(permanent=True) 36 | bt.Gene.filter().delete(permanent=True) 37 | --------------------------------------------------------------------------------