├── .github
    └── workflows
    │   ├── build.yml
    │   ├── doc-changes.yml
    │   └── update_ontologies.yml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── bionty
    ├── __init__.py
    ├── _biorecord.py
    ├── _organism.py
    ├── _shared_docstrings.py
    ├── _source.py
    ├── base
    │   ├── __init__.py
    │   ├── _display_sources.py
    │   ├── _ontology.py
    │   ├── _ontology_url.py
    │   ├── _public_ontology.py
    │   ├── _settings.py
    │   ├── dev
    │   │   ├── __init__.py
    │   │   ├── _doc_util.py
    │   │   ├── _handle_sources.py
    │   │   └── _io.py
    │   ├── entities
    │   │   ├── __init__.py
    │   │   ├── _bfxpipeline.py
    │   │   ├── _biosample.py
    │   │   ├── _cellline.py
    │   │   ├── _cellmarker.py
    │   │   ├── _celltype.py
    │   │   ├── _developmentalstage.py
    │   │   ├── _disease.py
    │   │   ├── _drug.py
    │   │   ├── _ethnicity.py
    │   │   ├── _experimentalfactor.py
    │   │   ├── _gene.py
    │   │   ├── _organism.py
    │   │   ├── _pathway.py
    │   │   ├── _phenotype.py
    │   │   ├── _protein.py
    │   │   ├── _shared_docstrings.py
    │   │   └── _tissue.py
    │   ├── scripts
    │   │   ├── bfxpipelines_info
    │   │   │   └── custom_pipelines.json
    │   │   ├── check_ontologies_reachable.py
    │   │   ├── generate_bfxpipelines.py
    │   │   └── update_new_ontologies.py
    │   └── sources.yaml
    ├── core
    │   ├── __init__.py
    │   ├── _add_ontology.py
    │   ├── _settings.py
    │   └── _source.py
    ├── ids.py
    ├── migrations
    │   ├── 0028_artifactcellline_created_at_and_more.py
    │   ├── 0029_alter_cellline_previous_runs_and_more.py
    │   ├── 0030_rename_publicsource_source_and_more.py
    │   ├── 0031_alter_cellmarker_name_and_more.py
    │   ├── 0032_rename_source_name_source_description_and_more.py
    │   ├── 0033_alter_artifactcellline_artifact_and_more.py
    │   ├── 0034_alter_source_unique_together.py
    │   ├── 0035_alter_protein_gene_symbol.py
    │   ├── 0036_alter_source_artifacts_and_more.py
    │   ├── 0037_alter_cellline_source_alter_cellmarker_source_and_more.py
    │   ├── 0038_alter_artifactcellline_created_by_and_more.py
    │   ├── 0039_alter_cellline_source_alter_cellmarker_source_and_more.py
    │   ├── 0040_rename_feature_ref_is_symbol_artifactgene_feature_ref_is_name_and_more.py
    │   ├── 0041_alter_artifactcellline_artifact_and_more.py
    │   ├── 0042_lamindbv1.py
    │   ├── 0043_lamindbv2_part2.py
    │   ├── 0044_alter_cellline_space_alter_cellmarker_space_and_more.py
    │   ├── 0045_rename_aux_cellline__aux_rename_aux_cellmarker__aux_and_more.py
    │   ├── 0046_alter_cellline__aux_alter_cellmarker__aux_and_more.py
    │   ├── 0047_lamindbv1_part5.py
    │   ├── 0048_lamindbv1_part6.py
    │   ├── 0049_alter_schemacellmarker_cellmarker_and_more.py
    │   ├── 0050_alter_source_uid.py
    │   ├── 0051_alter_cellline__branch_code_and_more.py
    │   ├── 0052_rename__branch_code_cellline_branch_and_more.py
    │   ├── 0053_recordcellline_cellline_records_recordcellmarker_and_more.py
    │   ├── 0054_alter_cellline_branch_alter_cellline_space_and_more.py
    │   ├── 0055_rename_cellline_recordcellline_value_and_more.py
    │   ├── 0056_alter_recordtissue_record.py
    │   ├── 0057_alter_cellline_description_alter_cellline_synonyms_and_more.py
    │   ├── 0058_cellline_page_cellmarker_page_celltype_page_and_more.py
    │   ├── 0059_cellline_is_locked_cellmarker_is_locked_and_more.py
    │   ├── 0060_alter_cellline_is_locked_alter_cellmarker_is_locked_and_more.py
    │   ├── 0061_remove_cellline_page_remove_cellmarker_page_and_more.py
    │   ├── 0061_squashed.py
    │   └── __init__.py
    ├── models.py
    └── uids.py
├── docs
    ├── guide.md
    ├── guide
    │   ├── concepts.md
    │   ├── config.md
    │   └── test_notebooks.py
    ├── index.md
    └── reference.md
├── noxfile.py
├── pyproject.toml
├── scripts
    └── update_ontology_sources.ipynb
└── tests
    ├── base
        ├── dev
        │   ├── test_handle_sources.py
        │   └── test_io.py
        ├── entities
        │   ├── test_bfxpipeline.py
        │   ├── test_biosample.py
        │   ├── test_cellline.py
        │   ├── test_cellmarker.py
        │   ├── test_celltype.py
        │   ├── test_developmentalstage.py
        │   ├── test_disease.py
        │   ├── test_drug.py
        │   ├── test_ethnicity.py
        │   ├── test_experimentalfactor.py
        │   ├── test_gene.py
        │   ├── test_organism.py
        │   ├── test_pathway.py
        │   ├── test_phenotype.py
        │   ├── test_protein.py
        │   └── test_tissue.py
        ├── test_bionty.py
        ├── test_ontology.py
        └── test_ontology_url.py
    └── core
        ├── conftest.py
        ├── test_models.py
        ├── test_organism_requirement.py
        └── test_source.py


/.github/workflows/build.yml:
--------------------------------------------------------------------------------
  1 | name: build
  2 | on:
  3 |   push:
  4 |     branches: [main]
  5 |   pull_request:
  6 |     branches: [main, staging]
  7 |   workflow_dispatch:
  8 | 
  9 | jobs:
 10 |   pre-filter:
 11 |     runs-on: ubuntu-latest
 12 |     outputs:
 13 |       matrix: ${{ steps.set-matrix.outputs.matrix }}
 14 |     steps:
 15 |       - uses: actions/checkout@v4
 16 |         with:
 17 |           fetch-depth: 0
 18 | 
 19 |       - uses: dorny/paths-filter@v3
 20 |         id: changes
 21 |         if: github.event_name != 'push'
 22 |         with:
 23 |           filters: |
 24 |             base:
 25 |               - 'bionty/base/**'
 26 |               - 'tests/base/**'
 27 | 
 28 |       - id: set-matrix
 29 |         shell: bash
 30 |         run: |
 31 |           BASE_GROUPS=$(jq -n -c '["bionty-core", "bionty-docs"]')
 32 | 
 33 |           if [[ "${{ github.event_name }}" == "push" || "${{ steps.changes.outputs.base }}" == "true" ]]; then
 34 |             # Run everything on push or when base paths change
 35 |             MATRIX=$(jq -n -c --argjson groups "$BASE_GROUPS" '{group: ($groups + ["bionty-base"])}')
 36 |           else
 37 |             # Otherwise only run base groups
 38 |             MATRIX=$(jq -n -c --argjson groups "$BASE_GROUPS" '{group: $groups}')
 39 |           fi
 40 | 
 41 |           # Output as single line for GitHub Actions
 42 |           echo "matrix=$(echo "$MATRIX" | jq -c .)" >> $GITHUB_OUTPUT
 43 | 
 44 |           # Pretty print for debugging
 45 |           echo "Generated matrix:"
 46 |           echo "$MATRIX" | jq .
 47 | 
 48 |   test:
 49 |     needs: pre-filter
 50 |     runs-on: ubuntu-latest
 51 |     strategy:
 52 |       fail-fast: false
 53 |       matrix: ${{fromJson(needs.pre-filter.outputs.matrix)}}
 54 |     timeout-minutes: 12
 55 |     steps:
 56 |       - uses: actions/checkout@v4
 57 |         with:
 58 |           submodules: recursive
 59 |           fetch-depth: 0
 60 | 
 61 |       - uses: actions/setup-python@v5
 62 |         with:
 63 |           python-version: "3.12"
 64 | 
 65 |       - name: cache pre-commit
 66 |         uses: actions/cache@v4
 67 |         with:
 68 |           path: ~/.cache/pre-commit
 69 |           key: pre-commit-${{ runner.os }}-${{ hashFiles('.pre-commit-config.yaml') }}
 70 | 
 71 |       - name: Install laminci
 72 |         run: pip install "laminci@git+https://x-access-token:${{ secrets.LAMIN_BUILD_DOCS }}@github.com/laminlabs/laminci"
 73 | 
 74 |       - name: Run lint
 75 |         if: matrix.group == 'bionty-core'
 76 |         run: nox -s lint
 77 | 
 78 |       - uses: aws-actions/configure-aws-credentials@v4
 79 |         if: ${{ github.event_name == 'push' || github.event.pull_request.head.repo.full_name == github.repository }}
 80 |         with:
 81 |           aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
 82 |           aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
 83 |           aws-region: eu-central-1
 84 | 
 85 |       - name: checkout lndocs
 86 |         uses: actions/checkout@v4
 87 |         if: matrix.group == 'bionty-docs'
 88 |         with:
 89 |           repository: laminlabs/lndocs
 90 |           ssh-key: ${{ secrets.READ_LNDOCS }}
 91 |           path: lndocs
 92 |           ref: main
 93 | 
 94 |       - name: Run build
 95 |         run: nox -s "build(group='${{ matrix.group }}')"
 96 | 
 97 |       - uses: actions/upload-artifact@v4
 98 |         with:
 99 |           name: coverage--${{ matrix.group }}
100 |           path: .coverage
101 |           include-hidden-files: true
102 | 
103 |       - uses: nwtgck/actions-netlify@v1.2
104 |         if: ${{ matrix.group == 'bionty-docs' && !(github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name != github.repository) }}
105 |         with:
106 |           publish-dir: "_build/html"
107 |           production-deploy: ${{ github.event_name == 'push' }}
108 |           github-token: ${{ secrets.GITHUB_TOKEN }}
109 |           enable-commit-comment: false
110 |         env:
111 |           NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }}
112 |           NETLIFY_SITE_ID: ${{ secrets.NETLIFY_SITE_ID }}
113 | 
114 |   coverage:
115 |     needs: test
116 |     runs-on: ubuntu-latest
117 |     steps:
118 |       - uses: actions/checkout@v4
119 | 
120 |       - uses: actions/setup-python@v5
121 |         with:
122 |           python-version: "3.13"
123 | 
124 |       - name: Install coverage dependencies
125 |         run: |
126 |           pip install -U pip uv
127 |           uv pip install --system coverage[toml]
128 |           uv pip install --system --no-deps .
129 | 
130 |       - uses: actions/download-artifact@v4
131 | 
132 |       - name: Run coverage
133 |         run: |
134 |           coverage combine coverage--*/.coverage*
135 |           coverage report --fail-under=0
136 |           coverage xml
137 | 
138 |       - uses: codecov/codecov-action@v2
139 |         with:
140 |           token: ${{ secrets.CODECOV_TOKEN }}
141 | 


--------------------------------------------------------------------------------
/.github/workflows/doc-changes.yml:
--------------------------------------------------------------------------------
 1 | name: doc-changes
 2 | 
 3 | on:
 4 |   pull_request_target:
 5 |     branches:
 6 |       - main
 7 |     types:
 8 |       - closed
 9 | 
10 | jobs:
11 |   latest-changes:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - uses: actions/checkout@v4
15 |       - uses: actions/setup-python@v5
16 |         with:
17 |           python-version: "3.11"
18 |       - run: pip install "laminci[doc-changes]@git+https://x-access-token:${{ secrets.LAMIN_BUILD_DOCS }}@github.com/laminlabs/laminci"
19 |       - run: laminci doc-changes
20 |         env:
21 |           repo_token: ${{ secrets.GITHUB_TOKEN }}
22 |           docs_token: ${{ secrets.LAMIN_BUILD_DOCS }}
23 |           changelog_file: lamin-docs/docs/changelog/soon/bionty.md
24 | 


--------------------------------------------------------------------------------
/.github/workflows/update_ontologies.yml:
--------------------------------------------------------------------------------
 1 | name: Update ontologies
 2 | 
 3 | on:
 4 |   schedule:
 5 |     - cron: "0 0 1 */3 *" # runs at 00:00 UTC on the first day of every 3rd month
 6 |   workflow_dispatch:
 7 | 
 8 | jobs:
 9 |   test-urls:
10 |     runs-on: ubuntu-latest
11 | 
12 |     steps:
13 |       - name: checkout repository
14 |         uses: actions/checkout@v4
15 | 
16 |       - name: setup python
17 |         uses: actions/setup-python@v5
18 |         with:
19 |           python-version: "3.13"
20 | 
21 |       - name: Install Bionty
22 |         run: pip install .[dev]
23 | 
24 |       - name: check URLs
25 |         run: python ./bionty/base/scripts/update_new_ontologies.py
26 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # bionty data
  2 | data/
  3 | sources/
  4 | versions/
  5 | 
  6 | # macOS
  7 | .DS_Store
  8 | .AppleDouble
  9 | .LSOverride
 10 | 
 11 | # Byte-compiled / optimized / DLL files
 12 | __pycache__/
 13 | *.py[cod]
 14 | *$py.class
 15 | 
 16 | # C extensions
 17 | *.so
 18 | 
 19 | # Distribution / packaging
 20 | .Python
 21 | env/
 22 | build/
 23 | develop-eggs/
 24 | dist/
 25 | downloads/
 26 | eggs/
 27 | .eggs/
 28 | lib/
 29 | lib64/
 30 | parts/
 31 | sdist/
 32 | var/
 33 | wheels/
 34 | *.egg-info/
 35 | .installed.cfg
 36 | *.egg
 37 | 
 38 | # PyInstaller
 39 | #  Usually these files are written by a python script from a template
 40 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 41 | *.manifest
 42 | *.spec
 43 | 
 44 | # Installer logs
 45 | pip-log.txt
 46 | pip-delete-this-directory.txt
 47 | 
 48 | # Unit test / coverage reports
 49 | htmlcov/
 50 | .tox/
 51 | .coverage
 52 | .coverage.*
 53 | .cache
 54 | nosetests.xml
 55 | coverage.xml
 56 | *.cover
 57 | .hypothesis/
 58 | .pytest_cache/
 59 | 
 60 | # Translations
 61 | *.mo
 62 | *.pot
 63 | 
 64 | # Django stuff:
 65 | *.log
 66 | local_settings.py
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # celery beat schedule file
 88 | celerybeat-schedule
 89 | 
 90 | # SageMath parsed files
 91 | *.sage.py
 92 | 
 93 | # dotenv
 94 | .env
 95 | 
 96 | # virtualenv
 97 | .venv
 98 | venv/
 99 | ENV/
100 | 
101 | # mypy
102 | .mypy_cache/
103 | 
104 | # IDE settings
105 | .vscode/
106 | .idea/
107 | 
108 | # Lamin
109 | _build
110 | docs/bionty.*
111 | lamin_sphinx
112 | docs/conf.py
113 | _docs_tmp*
114 | _dynamic/
115 | 
116 | # Convenience
117 | test.ipynb
118 | run-tests
119 | registration_template.ipynb
120 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | fail_fast: false
 2 | default_language_version:
 3 |   python: python3
 4 | default_stages:
 5 |   - pre-commit
 6 |   - pre-push
 7 | minimum_pre_commit_version: 2.12.0
 8 | repos:
 9 |   - repo: https://github.com/pre-commit/mirrors-prettier
10 |     rev: v4.0.0-alpha.4
11 |     hooks:
12 |       - id: prettier
13 |         exclude: |
14 |           (?x)(
15 |             docs/changelog.md
16 |           )
17 |   - repo: https://github.com/kynan/nbstripout
18 |     rev: 0.6.1
19 |     hooks:
20 |       - id: nbstripout
21 |         exclude: |
22 |           (?x)(
23 |               docs/examples/|
24 |               docs/notes/
25 |           )
26 |   - repo: https://github.com/astral-sh/ruff-pre-commit
27 |     rev: v0.9.4
28 |     hooks:
29 |       - id: ruff
30 |         args: [--fix, --exit-non-zero-on-fix, --unsafe-fixes]
31 |       - id: ruff-format
32 |   - repo: https://github.com/pre-commit/pre-commit-hooks
33 |     rev: v4.5.0
34 |     hooks:
35 |       - id: detect-private-key
36 |       - id: check-ast
37 |       - id: end-of-file-fixer
38 |         exclude: |
39 |           (?x)(
40 |               .github/workflows/latest-changes.jinja2|
41 |               bionty/base/data
42 |             )
43 |       - id: mixed-line-ending
44 |         args: [--fix=lf]
45 |       - id: trailing-whitespace
46 |       - id: check-case-conflict
47 |   - repo: https://github.com/pre-commit/mirrors-mypy
48 |     rev: v1.14.1
49 |     hooks:
50 |       - id: mypy
51 |         args: [--no-strict-optional, --ignore-missing-imports]
52 |         additional_dependencies: ["types-requests", "types-attrs"]
53 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![Stars](https://img.shields.io/github/stars/laminlabs/bionty?logo=GitHub&color=yellow)](https://github.com/laminlabs/bionty)
 2 | [![pypi](https://img.shields.io/pypi/v/bionty?color=blue&label=pypi%20package)](https://pypi.org/project/bionty)
 3 | 
 4 | # bionty: Registries for basic biological entities, coupled to public ontologies
 5 | 
 6 | - Access >20 public ontologies such as Gene, Protein, CellMarker, ExperimentalFactor, CellType, CellLine, Tissue, …
 7 | - Create records from entries in public ontologies using `.from_source()`.
 8 | - Access full underlying public ontologies via `.public()` to search & bulk-create records.
 9 | - Create in-house ontologies by extending public ontologies using hierarchical relationships among records (`.parents`).
10 | - Use `.synonyms` and `.abbr` to manage synonyms.
11 | - Safeguards against typos & duplications.
12 | - Ontology versioning via the `bionty.Source` registry.
13 | 
14 | Read the [docs](https://docs.lamin.ai/bionty).
15 | 


--------------------------------------------------------------------------------
/bionty/__init__.py:
--------------------------------------------------------------------------------
  1 | """Basic biological entities, coupled to public ontologies [`source <https://github.com/laminlabs/bionty/blob/main/bionty/models.py>`__].
  2 | 
  3 | - Create records from public ontologies using `.from_source()`.
  4 | - Access public ontologies via `.public()` to search & bulk-create records.
  5 | - Use hierarchical relationships among records (`.parents`).
  6 | - Use `.synonyms` and `.abbr` to manage synonyms.
  7 | - Manage ontology versions.
  8 | 
  9 | Install and mount `bionty` in a new instance:
 10 | 
 11 | >>> pip install 'bionty'
 12 | >>> lamin init --storage <path_to_storage_location> --modules bionty
 13 | 
 14 | Import the package:
 15 | 
 16 | >>> import bionty as bt
 17 | 
 18 | Access public ontologies:
 19 | 
 20 | >>> genes = bt.Gene.public()
 21 | >>> genes.validate(["BRCA1", "TCF7"], field="symbol")
 22 | 
 23 | Create records from public ontologies:
 24 | 
 25 | >>> cell_type = bt.CellType.from_source(ontology_id="CL:0000037")
 26 | >>> cell_type.save()
 27 | 
 28 | View ontological hierarchy:
 29 | 
 30 | >>> cell_type.view_parents()
 31 | 
 32 | Create in-house ontologies:
 33 | 
 34 | >>> cell_type_new = bt.CellType(name="my new cell type")
 35 | >>> cell_type_new.save()
 36 | >>> cell_type_new.parents.add(cell_type)
 37 | >>> cell_type_new.view_parents()
 38 | 
 39 | Manage synonyms:
 40 | 
 41 | >>> cell_type_new.add_synonyms(["my cell type", "my cell"])
 42 | >>> cell_type_new.set_abbr("MCT")
 43 | 
 44 | Detailed guides:
 45 | 
 46 | - :doc:`docs:public-ontologies`
 47 | - :doc:`docs:manage-ontologies`
 48 | 
 49 | Registries:
 50 | 
 51 | .. autosummary::
 52 |    :toctree: .
 53 | 
 54 |    Organism
 55 |    Gene
 56 |    Protein
 57 |    CellMarker
 58 |    CellType
 59 |    CellLine
 60 |    Tissue
 61 |    Disease
 62 |    Pathway
 63 |    Phenotype
 64 |    ExperimentalFactor
 65 |    DevelopmentalStage
 66 |    Ethnicity
 67 | 
 68 | Settings:
 69 | 
 70 | .. autosummary::
 71 |    :toctree: .
 72 | 
 73 |    settings
 74 | 
 75 | Ontology versions:
 76 | 
 77 | .. autosummary::
 78 |    :toctree: .
 79 | 
 80 |    Source
 81 | 
 82 | Submodules:
 83 | 
 84 | .. autosummary::
 85 |    :toctree: .
 86 | 
 87 |    core
 88 |    base
 89 |    uids
 90 | 
 91 | """
 92 | 
 93 | __version__ = "1.8.1"
 94 | 
 95 | from lamindb_setup._check_setup import _check_instance_setup
 96 | 
 97 | from . import _biorecord, base, uids
 98 | 
 99 | _check_instance_setup(from_module="bionty")
100 | 
101 | from .core._settings import settings
102 | from .models import (
103 |     CellLine,
104 |     CellMarker,
105 |     CellType,
106 |     DevelopmentalStage,
107 |     Disease,
108 |     Ethnicity,
109 |     ExperimentalFactor,
110 |     Gene,
111 |     Organism,
112 |     Pathway,
113 |     Phenotype,
114 |     Protein,
115 |     Source,
116 |     Tissue,
117 | )
118 | 
119 | __all__ = [
120 |     # registries
121 |     "CellLine",
122 |     "CellMarker",
123 |     "CellType",
124 |     "DevelopmentalStage",
125 |     "Disease",
126 |     "Ethnicity",
127 |     "ExperimentalFactor",
128 |     "Gene",
129 |     "Organism",
130 |     "Pathway",
131 |     "Phenotype",
132 |     "Protein",
133 |     "Source",
134 |     "Tissue",
135 |     # modules
136 |     "settings",
137 |     "base",
138 |     "core",
139 |     "uids",
140 | ]
141 | 
142 | ids = uids  # backward compat
143 | 


--------------------------------------------------------------------------------
/bionty/_biorecord.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import TYPE_CHECKING
 4 | 
 5 | from .uids import encode_uid
 6 | 
 7 | if TYPE_CHECKING:
 8 |     from types import ModuleType
 9 | 
10 |     from .models import BioRecord
11 | 
12 | 
13 | def lookup2kwargs(record: BioRecord, *args, **kwargs) -> dict:
14 |     """Pass bionty search/lookup results."""
15 |     from ._organism import create_or_get_organism_record
16 |     from ._source import get_source_record
17 | 
18 |     arg = args[0]
19 |     if isinstance(arg, tuple):
20 |         bionty_kwargs = arg._asdict()  # type:ignore
21 |     else:
22 |         bionty_kwargs = arg[0]._asdict()
23 | 
24 |     if len(bionty_kwargs) > 0:
25 |         # add organism and source
26 |         organism_record = create_or_get_organism_record(
27 |             registry=record.__class__, organism=kwargs.get("organism")
28 |         )
29 |         if organism_record is not None:
30 |             bionty_kwargs["organism"] = organism_record
31 |         bionty_kwargs["source"] = get_source_record(
32 |             registry=record.__class__,
33 |             organism=organism_record,
34 |             source=kwargs.get("source"),
35 |         )
36 | 
37 |         model_field_names = {i.name for i in record._meta.fields}
38 |         model_field_names.add("parents")
39 |         bionty_kwargs = {
40 |             k: v for k, v in bionty_kwargs.items() if k in model_field_names
41 |         }
42 |     return encode_uid(registry=record.__class__, kwargs=bionty_kwargs)
43 | 
44 | 
45 | def list_biorecord_models(schema_module: ModuleType):
46 |     """List all BioRecord models in a given schema module."""
47 |     import inspect
48 | 
49 |     import lamindb as ln  # needed here
50 | 
51 |     from .models import BioRecord
52 | 
53 |     return [
54 |         attr
55 |         for attr in dir(schema_module.models)
56 |         if inspect.isclass(getattr(schema_module.models, attr))
57 |         and issubclass(getattr(schema_module.models, attr), BioRecord)
58 |     ]
59 | 


--------------------------------------------------------------------------------
/bionty/_organism.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | 
  3 | import pandas as pd
  4 | from django.core.exceptions import FieldDoesNotExist, ObjectDoesNotExist
  5 | from lamin_utils import logger
  6 | 
  7 | from .models import BioRecord, Organism
  8 | 
  9 | 
 10 | class OrganismNotSet(SystemExit):
 11 |     """The `organism` parameter was not passed or is not globally set."""
 12 | 
 13 |     pass
 14 | 
 15 | 
 16 | def create_or_get_organism_record(
 17 |     organism: str | Organism | None, registry: type[BioRecord], field: str | None = None
 18 | ) -> Organism | None:
 19 |     """Create or get an organism record from the given organism name."""
 20 |     # return None if a registry doesn't require organism field
 21 |     organism_record = None
 22 |     if is_organism_required(registry):
 23 |         # using global setting of organism
 24 |         from .core._settings import settings
 25 |         from .models import Organism
 26 | 
 27 |         if organism is None and settings.organism is not None:
 28 |             logger.debug(f"using default organism = {settings.organism.name}")
 29 |             return settings.organism
 30 | 
 31 |         if isinstance(organism, Organism):
 32 |             organism_record = organism
 33 |         elif isinstance(organism, str):
 34 |             try:
 35 |                 # existing organism record
 36 |                 organism_record = Organism.objects.get(name=organism)
 37 |             except ObjectDoesNotExist:
 38 |                 try:
 39 |                     # create a organism record from bionty reference
 40 |                     organisms = Organism.from_values([organism])
 41 |                     if len(organisms) == 0:
 42 |                         raise ValueError(
 43 |                             f"Organism {organism} can't be created from the bionty reference, check your spelling or create it manually."
 44 |                         )
 45 |                     organism_record = organisms[0].save()  # type:ignore
 46 |                 except KeyError:
 47 |                     # no such organism is found in bionty reference
 48 |                     organism_record = None
 49 | 
 50 |         if organism_record is None:
 51 |             if hasattr(registry, "_ontology_id_field") and field in {
 52 |                 registry._ontology_id_field,
 53 |                 "uid",
 54 |             }:
 55 |                 return None
 56 |             raise OrganismNotSet(
 57 |                 f"{registry.__name__} requires to specify a organism name via `organism=` or `bionty.settings.organism=`!"
 58 |             )
 59 | 
 60 |     return organism_record
 61 | 
 62 | 
 63 | def is_organism_required(registry: type[BioRecord]) -> bool:
 64 |     """Check if the registry has an organism field and is required.
 65 | 
 66 |     Returns:
 67 |         True if the registry has an organism field and is required, False otherwise.
 68 |     """
 69 |     try:
 70 |         organism_field = registry._meta.get_field("organism")
 71 |         # organism is not required or not a relation
 72 |         if organism_field.null or not organism_field.is_relation:
 73 |             return False
 74 |         else:
 75 |             return True
 76 |     except FieldDoesNotExist:
 77 |         return False
 78 | 
 79 | 
 80 | def organism_from_ensembl_id(id: str, using_key: str | None) -> Organism | None:
 81 |     """Get organism record from ensembl id."""
 82 |     import bionty as bt
 83 |     from bionty.base.dev._io import s3_bionty_assets
 84 | 
 85 |     # below has to consume a file path and NOT a directory because otherwise it fails on reticulate
 86 |     localpath = s3_bionty_assets(
 87 |         ".lamindb/0QeqXlKq9aqW8aqe0000.parquet",
 88 |         bt.base.settings.dynamicdir / "ensembl_prefix.parquet",
 89 |     )
 90 |     ensembl_prefixes = pd.read_parquet(localpath).set_index("gene_prefix")
 91 | 
 92 |     prefix = (
 93 |         re.search(r"^[A-Za-z]+", id).group(0) if re.search(r"^[A-Za-z]+", id) else id
 94 |     )
 95 |     if prefix in ensembl_prefixes.index:
 96 |         organism_name = ensembl_prefixes.loc[prefix, "name"].lower()
 97 | 
 98 |         using_key = None if using_key == "default" else using_key
 99 | 
100 |         organism_record = (
101 |             bt.Organism.using(using_key).filter(name=organism_name).one_or_none()
102 |         )
103 |         if organism_record is None:
104 |             organisms = bt.Organism.from_values([organism_name])
105 |             if len(organisms) > 0:
106 |                 organism_record = organisms[0]
107 |                 organism_record.save(using=using_key)
108 |             else:
109 |                 raise OrganismNotSet(
110 |                     f"Organism {organism_name} can't be created from the source, check your spelling or create it manually."
111 |                 )
112 |         return organism_record
113 |     return None
114 | 


--------------------------------------------------------------------------------
/bionty/_shared_docstrings.py:
--------------------------------------------------------------------------------
1 | doc_from_source = """\
2 |     source: Optional Source record to use
3 |             mute: Whether to suppress logging
4 | """
5 | 


--------------------------------------------------------------------------------
/bionty/_source.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | from lamindb.models import SQLRecord
  3 | 
  4 | import bionty.base as bt_base
  5 | 
  6 | from ._organism import create_or_get_organism_record
  7 | 
  8 | 
  9 | def get_source_record(
 10 |     registry: type[SQLRecord],
 11 |     organism: str | SQLRecord | None = None,
 12 |     source: SQLRecord | None = None,
 13 | ) -> SQLRecord:
 14 |     """Get a Source record for a given BioRecord model."""
 15 |     from .models import Source
 16 | 
 17 |     if source is not None:
 18 |         return source
 19 | 
 20 |     organism_record = create_or_get_organism_record(organism, registry)
 21 | 
 22 |     entity_name = registry.__get_name_with_module__()
 23 |     filter_kwargs = {"entity": entity_name}
 24 |     if isinstance(organism_record, SQLRecord):
 25 |         filter_kwargs["organism"] = organism_record.name
 26 |     elif isinstance(organism, str):
 27 |         filter_kwargs["organism"] = organism
 28 | 
 29 |     sources = Source.filter(**filter_kwargs).all()
 30 |     if len(sources) == 0:
 31 |         raise ValueError(f"No source record found for filter {filter_kwargs}")
 32 |     if len(sources) == 1:
 33 |         return sources.one()
 34 | 
 35 |     current_sources = sources.filter(currently_used=True).all()
 36 |     if len(current_sources) == 1:
 37 |         return current_sources.first()
 38 |     elif len(current_sources) > 1:
 39 |         if organism is None:
 40 |             # for Organism, in most cases we load from the vertebrates source because ncbitaxon is too big
 41 |             if entity_name == "bionty.Organism":
 42 |                 current_sources_vertebrates = current_sources.filter(
 43 |                     organism="vertebrates"
 44 |                 ).all()
 45 |                 if len(current_sources_vertebrates) > 0:
 46 |                     return current_sources_vertebrates.first()
 47 |             # return source with organism="all"
 48 |             current_sources_all = current_sources.filter(organism="all").all()
 49 |             if len(current_sources_all) > 0:
 50 |                 return current_sources_all.first()
 51 |         return current_sources.first()
 52 |     else:  # len(current_sources) == 0
 53 |         sources_all = sources.filter(organism="all").all()
 54 |         if len(sources_all) > 0:
 55 |             # return source with organism="all"
 56 |             return sources_all.first()
 57 |         return sources.first()
 58 | 
 59 | 
 60 | def filter_public_df_columns(
 61 |     model: type[SQLRecord], public_ontology: bt_base.PublicOntology
 62 | ) -> pd.DataFrame:
 63 |     """Filter columns of public ontology to match the model fields."""
 64 | 
 65 |     def _prepare_public_df(model: type[SQLRecord], bionty_df: pd.DataFrame):
 66 |         """Prepare the bionty DataFrame to match the model fields."""
 67 |         if bionty_df.empty:
 68 |             return bionty_df
 69 |         if model.__get_name_with_module__() == "bionty.Gene":
 70 |             # groupby ensembl_gene_id and concat ncbi_gene_ids
 71 |             groupby_id_col = (
 72 |                 "ensembl_gene_id" if "ensembl_gene_id" in bionty_df else "stable_id"
 73 |             )
 74 |             if groupby_id_col not in bionty_df:
 75 |                 raise ValueError(
 76 |                     "public df must contain column 'ensembl_gene_id' or 'stable_id'"
 77 |                 )
 78 |             bionty_df.drop(
 79 |                 columns=["hgnc_id", "mgi_id", "index"], errors="ignore", inplace=True
 80 |             )
 81 |             agg_kwags = {}
 82 |             if "ncbi_gene_id" in bionty_df:
 83 |                 bionty_df.drop_duplicates(
 84 |                     [groupby_id_col, "ncbi_gene_id"], inplace=True
 85 |                 )
 86 |                 bionty_df["ncbi_gene_id"] = bionty_df["ncbi_gene_id"].fillna("")
 87 |                 bionty_df.rename(
 88 |                     columns={"ncbi_gene_id": "ncbi_gene_ids"}, inplace=True
 89 |                 )
 90 |                 agg_kwags["ncbi_gene_ids"] = "|".join
 91 |             for col in ["symbol", "biotype", "description", "synonyms"]:
 92 |                 if col in bionty_df:
 93 |                     agg_kwags[col] = "first"  # type: ignore
 94 |             bionty_df = bionty_df.groupby(groupby_id_col).agg(agg_kwags).reset_index()
 95 | 
 96 |         # rename definition to description for the bionty registry in db
 97 |         if "definition" in bionty_df:
 98 |             bionty_df.rename(columns={"definition": "description"}, inplace=True)
 99 |         return bionty_df
100 | 
101 |     bionty_df = pd.DataFrame()
102 |     if public_ontology is not None:
103 |         model_field_names = {i.name for i in model._meta.fields}
104 |         # parents needs to be added here as relationships aren't in fields
105 |         model_field_names.add("parents")
106 |         bionty_df = _prepare_public_df(
107 |             model, public_ontology.to_dataframe().reset_index()
108 |         )
109 |         bionty_df = bionty_df.loc[:, bionty_df.columns.isin(model_field_names)]
110 |     return bionty_df
111 | 


--------------------------------------------------------------------------------
/bionty/base/__init__.py:
--------------------------------------------------------------------------------
 1 | """Access to public ontologies.
 2 | 
 3 | `bionty.base` is the read-only interface for public ontology that underlies bionty and doesn't require a lamindb instance.
 4 | 
 5 | Import the package:
 6 | 
 7 | >>> import bionty.base as bt_base
 8 | 
 9 | Access public ontologies:
10 | 
11 | >>> genes = bt_base.Gene()
12 | 
13 | Get a DataFrame of all available values:
14 | 
15 | >>> genes.to_dataframe()
16 | 
17 | Entities
18 | ========
19 | 
20 | Bionty base provides access to several entities, most of which are also supported by Bionty.
21 | 
22 | .. autosummary::
23 |    :toctree: .
24 | 
25 |    Organism
26 |    Gene
27 |    Protein
28 |    CellMarker
29 |    CellType
30 |    CellLine
31 |    Tissue
32 |    Disease
33 |    Phenotype
34 |    Pathway
35 |    ExperimentalFactor
36 |    DevelopmentalStage
37 |    Drug
38 |    Ethnicity
39 |    BFXPipeline
40 |    BioSample
41 | 
42 | Base class
43 | ----------
44 | 
45 | `Pronto Ontology objects <https://pronto.readthedocs.io/en/stable/api/pronto.Ontology.html>`__ can be accessed via `{entity}.to_pronto()`.
46 | 
47 | .. autosummary::
48 |    :toctree: .
49 | 
50 |    PublicOntology
51 |    PublicOntologyField
52 | 
53 | Ontology sources
54 | ----------------
55 | 
56 | .. autosummary::
57 |    :toctree: .
58 | 
59 |    display_sources
60 |    display_currently_used_sources
61 |    settings
62 | 
63 | """
64 | 
65 | # dynamic classes
66 | from . import dev
67 | from ._display_sources import display_currently_used_sources, display_sources
68 | 
69 | # tools
70 | from ._public_ontology import PublicOntology, PublicOntologyField
71 | from ._settings import settings
72 | 
73 | # sources
74 | # from .dev._handle_sources import reset_sources
75 | from .entities._bfxpipeline import BFXPipeline
76 | from .entities._biosample import BioSample
77 | from .entities._cellline import CellLine
78 | from .entities._cellmarker import CellMarker
79 | from .entities._celltype import CellType
80 | from .entities._developmentalstage import DevelopmentalStage
81 | from .entities._disease import Disease
82 | from .entities._drug import Drug
83 | from .entities._ethnicity import Ethnicity
84 | from .entities._experimentalfactor import ExperimentalFactor
85 | from .entities._gene import Gene
86 | from .entities._organism import Organism
87 | from .entities._pathway import Pathway
88 | from .entities._phenotype import Phenotype
89 | from .entities._protein import Protein
90 | from .entities._tissue import Tissue
91 | 


--------------------------------------------------------------------------------
/bionty/base/_display_sources.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from lamin_utils import logger
 3 | from lamindb_setup.core import deprecated
 4 | 
 5 | from bionty.base.dev._handle_sources import LAMINDB_INSTANCE_LOADED
 6 | 
 7 | from ._settings import settings
 8 | from .dev._handle_sources import parse_currently_used_sources
 9 | 
10 | 
11 | def display_sources() -> pd.DataFrame:
12 |     """Displays all available sources.
13 | 
14 |     Example::
15 | 
16 |         import bionty.base as bt_base
17 | 
18 |         bt.display_sources()
19 |     """
20 |     from .dev._handle_sources import parse_sources_yaml
21 | 
22 |     return parse_sources_yaml(settings.public_sources).set_index("entity")  # type: ignore
23 | 
24 | 
25 | @deprecated("display_sources")
26 | def display_available_sources() -> pd.DataFrame:
27 |     return display_sources()
28 | 
29 | 
30 | def display_currently_used_sources(mute: bool = False) -> pd.DataFrame:
31 |     """Displays all currently used sources.
32 | 
33 |     Active version is unique for entity + organism.
34 | 
35 |     Example::
36 | 
37 |         import bionty.base as bt_base
38 | 
39 |         bt.display_currently_used_sources()
40 |     """
41 |     if LAMINDB_INSTANCE_LOADED():
42 |         if not mute:
43 |             logger.error(
44 |                 "You have a LaminDB instance loaded, please run the following to check default sources:\n"
45 |                 "    → bt.Source.filter(currently_used=True).to_dataframe()"
46 |             )
47 | 
48 |     versions = parse_currently_used_sources(settings.public_sources)
49 | 
50 |     df_rows = []
51 |     for bionty_class, bionty_class_data in versions.items():
52 |         for organism, organism_data in bionty_class_data.items():
53 |             for source, version in organism_data.items():
54 |                 df_rows.append(
55 |                     {
56 |                         "entity": bionty_class,
57 |                         "organism": organism,
58 |                         "name": source,
59 |                         "version": version,
60 |                     }
61 |                 )
62 | 
63 |     return pd.DataFrame(df_rows).set_index("entity")
64 | 


--------------------------------------------------------------------------------
/bionty/base/_ontology_url.py:
--------------------------------------------------------------------------------
  1 | from functools import lru_cache
  2 | 
  3 | import requests
  4 | 
  5 | 
  6 | def import_bioregistry():
  7 |     """Import bioregistry module if available."""
  8 |     try:
  9 |         import bioregistry
 10 | 
 11 |         return bioregistry
 12 |     except ImportError:
 13 |         raise ImportError(
 14 |             "Please install bioregistry with `pip install bioregistry`."
 15 |         ) from None
 16 | 
 17 | 
 18 | class OntologyURLError(Exception):
 19 |     """Base exception for ontology resolver errors."""
 20 | 
 21 |     pass
 22 | 
 23 | 
 24 | class OntologyNotFoundError(OntologyURLError):
 25 |     """Raised when an ontology cannot be found."""
 26 | 
 27 |     pass
 28 | 
 29 | 
 30 | class OntologyVersionNotFoundError(OntologyURLError):
 31 |     """Raised when a specific version of an ontology cannot be found."""
 32 | 
 33 |     pass
 34 | 
 35 | 
 36 | @lru_cache(maxsize=128)
 37 | def get_ontology_url(prefix: str, version: str | None = None) -> tuple[str, str]:
 38 |     """Get a versioned download URL for an ontology based on its CURIE prefix.
 39 | 
 40 |     Args:
 41 |         prefix: The CURIE prefix (e.g., 'GO', 'MONDO', 'HP')
 42 |         version: Optional version string (e.g., '2023-01-01')
 43 |                 If None, the latest version will be determined.
 44 | 
 45 |     Returns:
 46 |         Tuple of (download_url, version_string)
 47 | 
 48 |     Raises:
 49 |         OntologyNotFoundError: If the ontology cannot be found
 50 |         OntologyVersionNotFoundError: If no versioned URL can be found
 51 |     """
 52 |     bioregistry = import_bioregistry()
 53 | 
 54 |     if not prefix:
 55 |         raise ValueError("please provide a prefix")
 56 | 
 57 |     # Normalize the prefix
 58 |     normalized = bioregistry.normalize_prefix(prefix) or prefix
 59 | 
 60 |     # Check if the prefix exists at all
 61 |     if not _prefix_exists(normalized):
 62 |         raise OntologyNotFoundError(f"ontology with prefix '{prefix}' not found")
 63 | 
 64 |     # If specific version requested, try to get it
 65 |     if version:
 66 |         # try standard versioned URL patterns
 67 |         url, ver = _get_specific_version(normalized, version)
 68 |         if url:
 69 |             return url, ver
 70 | 
 71 |         raise OntologyVersionNotFoundError(
 72 |             f"version '{version}' of ontology '{prefix}' not found"
 73 |         )
 74 | 
 75 |     # For latest version
 76 |     url, ver = _get_latest_from_ols4(normalized)
 77 |     if url:
 78 |         return url, ver
 79 | 
 80 |     # If we get here, no versioned URL was found
 81 |     raise OntologyVersionNotFoundError(
 82 |         f"no versioned URL found for ontology '{prefix}'"
 83 |     )
 84 | 
 85 | 
 86 | def _prefix_exists(prefix: str) -> bool:
 87 |     """Check if a prefix exists in any registry."""
 88 |     bioregistry = import_bioregistry()
 89 | 
 90 |     if bioregistry.normalize_prefix(prefix):
 91 |         return True
 92 | 
 93 |     # Check OLS4
 94 |     try:
 95 |         response = requests.head(
 96 |             f"https://www.ebi.ac.uk/ols4/api/ontologies/{prefix.lower()}", timeout=5
 97 |         )
 98 |         if response.status_code < 400:
 99 |             return True
100 |     except requests.RequestException:
101 |         pass
102 | 
103 |     return False
104 | 
105 | 
106 | def _url_exists(url: str) -> bool:
107 |     """Check if a URL exists and returns a valid response."""
108 |     try:
109 |         response = requests.head(url, timeout=5, allow_redirects=True)
110 |         return response.status_code >= 200 and response.status_code < 400
111 |     except requests.RequestException:
112 |         return False
113 | 
114 | 
115 | def _extract_version_from_iri(version_iri: str | None):
116 |     """Extract version from an IRI string by taking the second-to-last path component."""
117 |     if isinstance(version_iri, str):
118 |         # If we have at least two parts, return the second-to-last
119 |         parts = version_iri.split("/")
120 |         if len(parts) >= 2:
121 |             return parts[-2].removeprefix("v")
122 | 
123 | 
124 | def _get_specific_version(prefix: str, version: str) -> tuple[str | None, str | None]:
125 |     """Get URL for a specific version of an ontology using standard patterns."""
126 |     bioregistry = import_bioregistry()
127 | 
128 |     # Clean version string
129 |     clean_version = version[1:] if version.startswith("v") else version
130 |     obo_prefix = bioregistry.get_obofoundry_prefix(prefix) or prefix
131 | 
132 |     # Try standard OBO Foundry versioned patterns
133 |     standard_patterns = [
134 |         # Direct version path
135 |         f"http://purl.obolibrary.org/obo/{obo_prefix.lower()}/{clean_version}/{obo_prefix.lower()}.owl",
136 |         # Releases directory
137 |         f"http://purl.obolibrary.org/obo/{obo_prefix.lower()}/releases/{clean_version}/{obo_prefix.lower()}.owl",
138 |         # Semantic version with v prefix
139 |         f"http://purl.obolibrary.org/obo/{obo_prefix.lower()}/v{clean_version}/{obo_prefix.lower()}.owl",
140 |     ]
141 | 
142 |     for url in standard_patterns:
143 |         if _url_exists(url):
144 |             return url, clean_version
145 | 
146 |     return None, None
147 | 
148 | 
149 | def _get_latest_from_ols4(prefix: str) -> tuple[str | None, str | None]:
150 |     """Get the latest version information from OLS4."""
151 |     try:
152 |         response = requests.get(
153 |             f"https://www.ebi.ac.uk/ols4/api/ontologies/{prefix.lower()}", timeout=30
154 |         )
155 |         if response.status_code != 200:
156 |             return None, None
157 | 
158 |         data = response.json()
159 |         config = data.get("config", {})
160 | 
161 |         # Get version information
162 |         version = config.get("version")
163 | 
164 |         # Check versionIri first (preferred source)
165 |         version_iri = config.get("versionIri")
166 |         if version_iri and _url_exists(version_iri):
167 |             # If we have a versionIri and it exists, use it
168 |             # Extract version from IRI if not already provided
169 |             if not version:
170 |                 version = _extract_version_from_iri(version_iri)
171 |             return version_iri, version
172 | 
173 |         # Fall back to fileLocation if available
174 |         file_location = config.get("fileLocation")
175 |         if file_location and _url_exists(file_location):
176 |             if not version and version_iri:
177 |                 # even when version_iri is not accessible, we can still extract the version, for example: pw
178 |                 version = _extract_version_from_iri(version_iri)
179 |             return file_location, version
180 | 
181 |         # No valid URLs found
182 |         return None, None
183 | 
184 |     except requests.RequestException:
185 |         return None, None
186 | 


--------------------------------------------------------------------------------
/bionty/base/_settings.py:
--------------------------------------------------------------------------------
 1 | from functools import wraps
 2 | from pathlib import Path
 3 | 
 4 | HOME_DIR = Path(f"{Path.home()}/.lamin/bionty").resolve()
 5 | ROOT_DIR = Path(__file__).parent.resolve()
 6 | 
 7 | 
 8 | def check_datasetdir_exists(f):
 9 |     @wraps(f)
10 |     def wrapper(*args, **kwargs):
11 |         settings.datasetdir.mkdir(exist_ok=True)
12 |         return f(*args, **kwargs)
13 | 
14 |     return wrapper
15 | 
16 | 
17 | def check_dynamicdir_exists(f):
18 |     @wraps(f)
19 |     def wrapper(*args, **kwargs):
20 |         settings.dynamicdir.mkdir(exist_ok=True)
21 |         return f(*args, **kwargs)
22 | 
23 |     return wrapper
24 | 
25 | 
26 | class Settings:
27 |     def __init__(
28 |         self,
29 |         datasetdir: str | Path = ROOT_DIR / "data/",
30 |         dynamicdir: str | Path = ROOT_DIR / "_dynamic/",
31 |     ):
32 |         # setters convert to Path and resolve:
33 |         self.datasetdir = datasetdir
34 |         self.dynamicdir = dynamicdir
35 | 
36 |     @property
37 |     def datasetdir(self):
38 |         """Directory for datasets."""
39 |         return self._datasetdir
40 | 
41 |     @datasetdir.setter
42 |     def datasetdir(self, datasetdir: str | Path):
43 |         self._datasetdir = Path(datasetdir).resolve()
44 | 
45 |     @property
46 |     def dynamicdir(self):
47 |         """Directory for datasets."""
48 |         return self._dynamicdir
49 | 
50 |     @dynamicdir.setter
51 |     def dynamicdir(self, dynamicdir: str | Path):
52 |         self._dynamicdir = Path(dynamicdir).resolve()
53 | 
54 |     @property
55 |     def public_sources(self):
56 |         return ROOT_DIR / "sources.yaml"
57 | 
58 | 
59 | settings = Settings()
60 | 


--------------------------------------------------------------------------------
/bionty/base/dev/__init__.py:
--------------------------------------------------------------------------------
 1 | """Dev.
 2 | 
 3 | .. autosummary::
 4 |    :toctree: .
 5 | 
 6 |    InspectResult
 7 | """
 8 | 
 9 | from lamin_utils._inspect import InspectResult
10 | 


--------------------------------------------------------------------------------
/bionty/base/dev/_doc_util.py:
--------------------------------------------------------------------------------
 1 | from textwrap import dedent
 2 | 
 3 | 
 4 | def _doc_params(**kwds):  # pragma: no cover
 5 |     r"""\
 6 |     Docstrings should start with "\" in the first line for proper formatting.
 7 |     """
 8 | 
 9 |     def dec(obj):
10 |         obj.__orig_doc__ = obj.__doc__
11 |         obj.__doc__ = dedent(obj.__doc__).format_map(kwds)
12 |         return obj
13 | 
14 |     return dec
15 | 


--------------------------------------------------------------------------------
/bionty/base/dev/_handle_sources.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from pathlib import Path
 4 | 
 5 | import pandas as pd
 6 | 
 7 | from bionty.base._settings import settings
 8 | from bionty.base.dev._io import load_yaml
 9 | 
10 | 
11 | def LAMINDB_INSTANCE_LOADED():
12 |     is_loaded = False
13 |     lnenv_filepath = Path.home() / ".lamin/current_instance.env"
14 |     if lnenv_filepath.exists():
15 |         with open(lnenv_filepath.as_posix()) as f:
16 |             is_loaded = "bionty" in f.read().split("schema_str=")[-1]
17 |     return is_loaded
18 | 
19 | 
20 | def parse_sources_yaml(
21 |     filepath: str | Path = settings.public_sources,
22 |     url_pattern: bool = False,
23 | ) -> pd.DataFrame:
24 |     """Parse values from sources yaml file into a DataFrame.
25 | 
26 |     Args:
27 |         filepath: Path to the versions yaml file.
28 | 
29 |     Returns:
30 |         - entity
31 |         - name
32 |         - organism
33 |         - version
34 |         - url
35 |         - description
36 |         - source_website
37 |     """
38 |     all_rows = []
39 |     for entity, sources in load_yaml(filepath).items():
40 |         if entity == "version":
41 |             continue
42 |         for source, organism_source in sources.items():
43 |             name = organism_source.get("name", "")
44 |             website = organism_source.get("website", "")
45 |             for organism, versions in organism_source.items():
46 |                 if organism in ["name", "website"]:
47 |                     continue
48 |                 latest_version = str(versions.get("latest-version"))
49 |                 url = versions.get("url")
50 |                 if not url_pattern:
51 |                     url = url.replace("{version}", latest_version)
52 |                 row = (entity, source, organism, latest_version, url, name, website)
53 |                 all_rows.append(row)
54 | 
55 |     return pd.DataFrame(
56 |         all_rows,
57 |         columns=[
58 |             "entity",
59 |             "name",
60 |             "organism",
61 |             "version",
62 |             "url",
63 |             "description",
64 |             "source_website",
65 |         ],
66 |     )
67 | 
68 | 
69 | def parse_currently_used_sources(yaml: str | Path | list[dict]) -> dict:
70 |     """Parse out the most recent versions from yaml."""
71 |     if isinstance(yaml, str | Path):
72 |         df = parse_sources_yaml(yaml)
73 |         df_current = (
74 |             df[["entity", "name", "organism", "version"]]  # type: ignore
75 |             .drop_duplicates(["entity", "organism", "name"], keep="first")
76 |             .groupby(["entity", "organism", "name"], sort=False)
77 |             .max()
78 |         )
79 |         records = df_current.reset_index().to_dict(orient="records")
80 |     else:
81 |         records = yaml
82 | 
83 |     current_dict: dict = {}
84 |     for kwargs in records:
85 |         entity, organism, source, version = (
86 |             kwargs["entity"],
87 |             kwargs["organism"],
88 |             kwargs["name"],
89 |             kwargs["version"],
90 |         )
91 |         if entity not in current_dict:
92 |             current_dict[entity] = {}
93 |         if organism not in current_dict[entity]:
94 |             current_dict[entity][organism] = {source: version}
95 |     return current_dict
96 | 


--------------------------------------------------------------------------------
/bionty/base/dev/_io.py:
--------------------------------------------------------------------------------
  1 | import shutil
  2 | from pathlib import Path
  3 | 
  4 | import requests  # type:ignore
  5 | import yaml  # type:ignore
  6 | from lamindb_setup.core.upath import UPath
  7 | from rich.progress import Progress
  8 | 
  9 | from bionty.base._settings import settings
 10 | 
 11 | 
 12 | def load_yaml(filename: str | Path):  # pragma: no cover
 13 |     with open(filename) as f:
 14 |         return yaml.safe_load(f)
 15 | 
 16 | 
 17 | def write_yaml(
 18 |     data: dict,
 19 |     filename: str | Path,
 20 |     sort_keys: bool = False,
 21 |     default_flow_style: bool = False,
 22 | ):  # pragma: no cover
 23 |     with open(filename, "w") as f:
 24 |         yaml.dump(
 25 |             data,
 26 |             f,
 27 |             sort_keys=sort_keys,
 28 |             default_flow_style=default_flow_style,
 29 |         )
 30 | 
 31 | 
 32 | def url_download(
 33 |     url: str, localpath: str | Path | None = None, block_size: int = 1024, **kwargs
 34 | ) -> str | Path | None:
 35 |     """Downloads a file to a specified path.
 36 | 
 37 |     Args:
 38 |         url: The URL to download.
 39 |         localpath: The path to download the file to.
 40 |         block_size: Buffer size in bytes for sending a file-like message body.
 41 |         **kwargs: Keyword arguments are passed to 'requests'
 42 | 
 43 |     Returns:
 44 |         The localpath file is downloaded to
 45 | 
 46 |     Raises:
 47 |         HttpError: If the request response is not 200 and OK.
 48 |     """
 49 |     if url.startswith("file://"):
 50 |         url = url.split("file://")[-1]
 51 |         shutil.copy(url, localpath)
 52 |         return localpath
 53 |     try:
 54 |         response = requests.get(url, stream=True, allow_redirects=True, **kwargs)
 55 |         response.raise_for_status()
 56 | 
 57 |         total_content_length = int(response.headers.get("content-length", 0))
 58 |         if localpath is None:
 59 |             localpath = url.split("/")[-1]
 60 | 
 61 |         if total_content_length > 5000000:
 62 |             with Progress(refresh_per_second=10, transient=True) as progress:
 63 |                 task = progress.add_task(
 64 |                     "[red]downloading...", total=total_content_length
 65 |                 )
 66 | 
 67 |                 with open(localpath, "wb") as file:
 68 |                     for data in response.iter_content(block_size):
 69 |                         file.write(data)
 70 |                         progress.update(task, advance=block_size)
 71 |                 # force the progress bar to 100% at the end
 72 |                 progress.update(task, completed=total_content_length, refresh=True)
 73 |         else:
 74 |             with open(localpath, "wb") as file:
 75 |                 for data in response.iter_content(block_size):
 76 |                     file.write(data)
 77 | 
 78 |         return localpath
 79 | 
 80 |     except requests.exceptions.HTTPError as err:
 81 |         if err.response.status_code == 404:
 82 |             raise requests.exceptions.HTTPError(
 83 |                 f"URL not found (404): '{url}'. Check for typos."
 84 |             ) from err
 85 |         else:
 86 |             raise requests.exceptions.HTTPError(
 87 |                 f"HTTP error ({err.response.status_code}): {url}."
 88 |             ) from err
 89 | 
 90 | 
 91 | def s3_bionty_assets(
 92 |     filename: str, localpath: Path = None, assets_base_url: str = "s3://bionty-assets"
 93 | ):
 94 |     """Synchronizes a S3 file path with local file storage.
 95 | 
 96 |     If the file does not exist locally it gets downloaded to datasetdir/filename or the passed localpath.
 97 |     If the file does not exist on S3, the file does not get synchronized, no erroring.
 98 | 
 99 |     Args:
100 |         filename: The suffix of the assets_base_url.
101 |         localpath: Local base path of the file to sync.
102 |         assets_base_url: The S3 base URL. Prefix of the filename.
103 | 
104 |     Returns:
105 |         A Path object of the synchronized path.
106 |     """
107 |     if localpath is None:
108 |         localpath = settings.datasetdir / filename
109 |     else:  # it errors on reticulate if we pass a directory
110 |         if localpath.exists():
111 |             assert localpath.is_file(), (
112 |                 f"localpath {localpath} has to be a file path, not a directory"
113 |             )
114 |     # this requires s3fs, but it is installed by lamindb
115 |     # skip_instance_cache=True to avoid interference with cached filesystems
116 |     # especially with their dircache
117 |     remote_path = (
118 |         UPath(
119 |             assets_base_url,
120 |             skip_instance_cache=True,
121 |             use_listings_cache=True,
122 |             anon=True,
123 |         )
124 |         / filename
125 |     )
126 |     # check that the remote path exists and is available
127 |     try:
128 |         remote_stat = remote_path.stat()
129 |     except (FileNotFoundError, PermissionError):
130 |         return localpath
131 |     # this is needed unfortunately because s3://bionty-assets doesn't have ListObjectsV2 for anonymous users.
132 |     # Moreover, ListObjectsV2 is triggered inside .synchronize if no cache is present.
133 |     # TODO: check if this is still needed
134 |     parent_path = remote_path.parent.path.rstrip("/")
135 |     remote_path.fs.dircache[parent_path] = [remote_stat.as_info()]
136 |     # synchronize the remote path
137 |     if hasattr(remote_path, "synchronize_to"):
138 |         remote_path.synchronize_to(
139 |             localpath, error_no_origin=False, print_progress=True
140 |         )
141 |     else:
142 |         # UPath.synchronize is deprecated
143 |         remote_path.synchronize(localpath, error_no_origin=False, print_progress=True)
144 |     # clean the artificial cache
145 |     del remote_path.fs.dircache[parent_path]
146 | 
147 |     return localpath
148 | 


--------------------------------------------------------------------------------
/bionty/base/entities/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/laminlabs/bionty/620a707fe3266d49249af3b47bebdac043b36326/bionty/base/entities/__init__.py


--------------------------------------------------------------------------------
/bionty/base/entities/_bfxpipeline.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import Literal
 4 | 
 5 | from bionty.base._public_ontology import PublicOntology
 6 | from bionty.base.dev._doc_util import _doc_params
 7 | 
 8 | from ._shared_docstrings import doc_entites
 9 | 
10 | 
11 | @_doc_params(doc_entities=doc_entites)
12 | class BFXPipeline(PublicOntology):
13 |     """Bioinformatics pipelines.
14 | 
15 |     Args:
16 |         {doc_entities}
17 |     """
18 | 
19 |     def __init__(
20 |         self,
21 |         organism: Literal["all"] | None = None,
22 |         source: Literal["lamin"] | None = None,
23 |         version: Literal["1.0.0"] | None = None,
24 |         **kwargs,
25 |     ) -> None:
26 |         super().__init__(
27 |             source=source,
28 |             version=version,
29 |             organism=organism,
30 |             ols_supported=False,
31 |             **kwargs,
32 |         )
33 | 


--------------------------------------------------------------------------------
/bionty/base/entities/_biosample.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import Literal
 4 | 
 5 | from bionty.base._public_ontology import PublicOntology
 6 | from bionty.base.dev._doc_util import _doc_params
 7 | 
 8 | from ._shared_docstrings import doc_entites
 9 | 
10 | 
11 | @_doc_params(doc_entities=doc_entites)
12 | class BioSample(PublicOntology):
13 |     """BioSample attributes.
14 | 
15 |     1. NCBI BioSample Attributes
16 |     https://www.ncbi.nlm.nih.gov/biosample/docs/attributes
17 | 
18 |     Args:
19 |         {doc_entities}
20 |     """
21 | 
22 |     def __init__(
23 |         self,
24 |         organism: Literal["all"] | None = None,
25 |         source: Literal["ncbi"] | None = None,
26 |         version: Literal["2023-09"] | None = None,
27 |         **kwargs,
28 |     ) -> None:
29 |         super().__init__(
30 |             source=source,
31 |             version=version,
32 |             organism=organism,
33 |             ols_supported=False,
34 |             **kwargs,
35 |         )
36 | 


--------------------------------------------------------------------------------
/bionty/base/entities/_cellline.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import Literal
 4 | 
 5 | from bionty.base._public_ontology import PublicOntology
 6 | from bionty.base.dev._doc_util import _doc_params
 7 | 
 8 | from ._shared_docstrings import doc_entites
 9 | 
10 | 
11 | @_doc_params(doc_entities=doc_entites)
12 | class CellLine(PublicOntology):
13 |     """Cell line.
14 | 
15 |     1. Cell Line Ontology
16 |     https://github.com/CLO-ontology/CLO
17 | 
18 |     2. DepMap
19 |     https://depmap.org
20 | 
21 |     Args:
22 |         {doc_entities}
23 |     """
24 | 
25 |     def __init__(
26 |         self,
27 |         organism: Literal["all"] | None = None,
28 |         source: Literal["clo", "depmap"] | None = None,
29 |         version: Literal[
30 |             # Cell Line Ontology
31 |             "2023-03-28",
32 |             "2022-03-21"
33 |             # DepMap
34 |             "2024-Q2",
35 |         ]
36 |         | None = None,
37 |         **kwargs,
38 |     ) -> None:
39 |         super().__init__(
40 |             source=source,
41 |             version=version,
42 |             organism=organism,
43 |             include_id_prefixes={"clo": ["CLO:"]},
44 |             **kwargs,
45 |         )
46 | 


--------------------------------------------------------------------------------
/bionty/base/entities/_cellmarker.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import Literal
 4 | 
 5 | from bionty.base._public_ontology import PublicOntology
 6 | from bionty.base.dev._doc_util import _doc_params
 7 | 
 8 | from ._shared_docstrings import doc_entites
 9 | 
10 | 
11 | @_doc_params(doc_entities=doc_entites)
12 | class CellMarker(PublicOntology):
13 |     """Cell markers.
14 | 
15 |     1. Cell Marker Ontology
16 |     http://bio-bigdata.hrbmu.edu.cn/CellMarker/
17 | 
18 |     Args:
19 |         {doc_entities}
20 |     """
21 | 
22 |     def __init__(
23 |         self,
24 |         organism: Literal["human", "mouse"] | None = None,
25 |         source: Literal["cellmarker"] | None = None,
26 |         version: Literal["2.0"] | None = None,
27 |         **kwargs,
28 |     ) -> None:
29 |         super().__init__(
30 |             source=source,
31 |             version=version,
32 |             organism=organism,
33 |             ols_supported=False,
34 |             **kwargs,
35 |         )
36 | 


--------------------------------------------------------------------------------
/bionty/base/entities/_celltype.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import Literal
 4 | 
 5 | from bionty.base._public_ontology import PublicOntology
 6 | from bionty.base.dev._doc_util import _doc_params
 7 | 
 8 | from ._shared_docstrings import doc_entites
 9 | 
10 | 
11 | @_doc_params(doc_entities=doc_entites)
12 | class CellType(PublicOntology):
13 |     """Cell type ontologies.
14 | 
15 |     1. Cell ontology
16 |     https://github.com/obophenotype/cell-ontology
17 | 
18 |     Args:
19 |         {doc_entities}
20 |     """
21 | 
22 |     def __init__(
23 |         self,
24 |         organism: Literal["all"] | None = None,
25 |         source: Literal["cl"] | None = None,
26 |         version: Literal[
27 |             "2025-04-10",
28 |             "2024-08-16",
29 |             "2024-05-15",
30 |             "2024-04-05",
31 |             "2024-02-13",
32 |             "2024-01-04",
33 |             "2023-08-24",
34 |             "2023-04-20",
35 |             "2023-02-15",
36 |             "2022-08-16",
37 |         ]
38 |         | None = None,
39 |         **kwargs,
40 |     ) -> None:
41 |         super().__init__(
42 |             source=source,
43 |             version=version,
44 |             organism=organism,
45 |             include_id_prefixes={"cl": ["CL:"]},
46 |             **kwargs,
47 |         )
48 | 


--------------------------------------------------------------------------------
/bionty/base/entities/_developmentalstage.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import Literal
 4 | 
 5 | from bionty.base._public_ontology import PublicOntology
 6 | from bionty.base.dev._doc_util import _doc_params
 7 | 
 8 | from ._shared_docstrings import doc_entites
 9 | 
10 | 
11 | @_doc_params(doc_entities=doc_entites)
12 | class DevelopmentalStage(PublicOntology):
13 |     """Developmental Stage.
14 | 
15 |     1. Developmental Stage Ontology
16 |     https://github.com/obophenotype/developmental-stage-ontologies
17 | 
18 |     Args:
19 |         {doc_entities}
20 |     """
21 | 
22 |     def __init__(
23 |         self,
24 |         organism: Literal["human", "mouse"] | None = None,
25 |         source: Literal["hsapdv", "mmusdv"] | None = None,
26 |         version: Literal["2025-01-23", "2024-05-28", "2020-03-10"] | None = None,
27 |         **kwargs,
28 |     ) -> None:
29 |         super().__init__(
30 |             source=source,
31 |             version=version,
32 |             organism=organism,
33 |             include_id_prefixes={"hsapdv": ["HsapDv:"], "mmusdv": ["MmusDv:"]},
34 |             include_rel="part_of",
35 |             **kwargs,
36 |         )
37 | 


--------------------------------------------------------------------------------
/bionty/base/entities/_disease.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import Literal
 4 | 
 5 | from bionty.base._public_ontology import PublicOntology
 6 | from bionty.base.dev._doc_util import _doc_params
 7 | 
 8 | from ._shared_docstrings import doc_entites
 9 | 
10 | 
11 | @_doc_params(doc_entities=doc_entites)
12 | class Disease(PublicOntology):
13 |     """Disease ontologies.
14 | 
15 |     1. Mondo
16 |     Edits of terms are coordinated and reviewed on:
17 |     https://github.com/monarch-initiative/mondo
18 | 
19 |     2. Human Disease Ontology
20 |     Edits of terms are coordinated and reviewed on:
21 |     https://github.com/DiseaseOntology/HumanDiseaseOntology
22 | 
23 |     3. International Classification of Diseases (ICD)
24 |     Edits of terms are coordinated and reviewed on:
25 |     https://www.who.int/standards/classifications/classification-of-diseases
26 | 
27 |     Args:
28 |         {doc_entities}
29 |     """
30 | 
31 |     def __init__(
32 |         self,
33 |         organism: Literal["all", "human"] | None = None,
34 |         source: Literal["mondo", "doid", "icd"] | None = None,
35 |         version: Literal[
36 |             # Mondo
37 |             "2025-06-03",
38 |             "2024-08-06",
39 |             "2024-06-04",
40 |             "2024-05-08",
41 |             "2024-02-06",
42 |             "2024-01-03",
43 |             "2023-08-02",
44 |             "2023-04-04",
45 |             "2023-02-06",
46 |             "2022-10-11",
47 |             "2023-04-04",
48 |             # DOID
49 |             "2025-05-30",
50 |             "2024-05-29",
51 |             "2024-01-31",
52 |             "2023-03-31",
53 |             "2023-01-30",
54 |             # ICD
55 |             "icd-9-2011",
56 |             "icd-10-2020",
57 |             "icd-10-2024",
58 |             "icd-11-2023",
59 |         ]
60 |         | None = None,
61 |         **kwargs,
62 |     ) -> None:
63 |         super().__init__(
64 |             source=source,
65 |             version=version,
66 |             organism=organism,
67 |             include_id_prefixes={"mondo": ["MONDO:"]},
68 |             **kwargs,
69 |         )
70 | 


--------------------------------------------------------------------------------
/bionty/base/entities/_drug.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import Literal
 4 | 
 5 | from bionty.base._public_ontology import PublicOntology
 6 | from bionty.base.dev._doc_util import _doc_params
 7 | 
 8 | from ._shared_docstrings import doc_entites
 9 | 
10 | 
11 | @_doc_params(doc_entities=doc_entites)
12 | class Drug(PublicOntology):
13 |     """Drug ontologies.
14 | 
15 |     1. DRON
16 |     Edits of terms are coordinated and reviewed on:
17 |     https://bioportal.bioontology.org/ontologies/DRON/
18 | 
19 |     2. CHEBI
20 |     https://www.ebi.ac.uk/chebi/
21 | 
22 |     Args:
23 |         {doc_entities}
24 |     """
25 | 
26 |     def __init__(
27 |         self,
28 |         organism: Literal["all"] | None = None,
29 |         source: Literal["dron", "chebi"] | None = None,
30 |         version: Literal[
31 |             # DRON
32 |             "2025-04-18",
33 |             "2024-08-05",
34 |             "2023-03-10",
35 |             # CHEBI
36 |             "2024-07-27",
37 |             "2024-03-02",
38 |         ]
39 |         | None = None,
40 |         **kwargs,
41 |     ) -> None:
42 |         super().__init__(
43 |             source=source,
44 |             version=version,
45 |             organism=organism,
46 |             include_id_prefixes={"dron": ["DRON:"]},
47 |             **kwargs,
48 |         )
49 | 


--------------------------------------------------------------------------------
/bionty/base/entities/_ethnicity.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import Literal
 4 | 
 5 | from bionty.base._public_ontology import PublicOntology
 6 | from bionty.base.dev._doc_util import _doc_params
 7 | 
 8 | from ._shared_docstrings import doc_entites
 9 | 
10 | 
11 | @_doc_params(doc_entities=doc_entites)
12 | class Ethnicity(PublicOntology):
13 |     """Ethnicity.
14 | 
15 |     1. Human Ancestry Ontology
16 |     https://github.com/EBISPOT/hancestro
17 | 
18 |     Args:
19 |         {doc_entities}
20 |     """
21 | 
22 |     def __init__(
23 |         self,
24 |         organism: Literal["human"] | None = None,
25 |         source: Literal["hancestro"] | None = None,
26 |         version: Literal["3.0", "2025-04-01"] | None = None,
27 |         **kwargs,
28 |     ) -> None:
29 |         super().__init__(
30 |             source=source,
31 |             version=version,
32 |             organism=organism,
33 |             include_id_prefixes={"hancestro": ["HANCESTRO:"]},
34 |             **kwargs,
35 |         )
36 | 


--------------------------------------------------------------------------------
/bionty/base/entities/_experimentalfactor.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import TYPE_CHECKING, Literal
 4 | 
 5 | from bionty.base._public_ontology import PublicOntology
 6 | from bionty.base.dev._doc_util import _doc_params
 7 | from bionty.base.entities._shared_docstrings import organism_removed
 8 | 
 9 | if TYPE_CHECKING:
10 |     from bionty.base._ontology import Ontology
11 | 
12 | 
13 | @_doc_params(doc_entities=organism_removed)
14 | class ExperimentalFactor(PublicOntology):
15 |     """Experimental Factor.
16 | 
17 |     1. Experimental Factor Ontology
18 |     https://www.ebi.ac.uk/ols/ontologies/efo
19 | 
20 |     Args:
21 |         {doc_entities}
22 |     """
23 | 
24 |     def __init__(
25 |         self,
26 |         organism: Literal["all"] | None = None,
27 |         source: Literal["efo"] | None = None,
28 |         version: Literal[
29 |             "3.78.0",
30 |             "3.70.0",
31 |             "3.69.0",
32 |             "3.66.0",
33 |             "3.65.0",
34 |             "3.63.0",
35 |             "3.62.0",
36 |             "3.57.0",
37 |             "3.48.0",
38 |         ]
39 |         | None = None,
40 |         **kwargs,
41 |     ) -> None:
42 |         super().__init__(
43 |             organism=organism,
44 |             source=source,
45 |             version=version,
46 |             include_id_prefixes={"efo": ["EFO:", "http://www.ebi.ac.uk/efo/"]},
47 |             **kwargs,
48 |         )
49 | 
50 |     def to_pronto(self, mute: bool = False) -> Ontology:
51 |         """The Pronto Ontology object.
52 | 
53 |         See: https://pronto.readthedocs.io/en/stable/api/pronto.Ontology.html
54 |         """
55 |         from bionty.base._ontology import Ontology
56 | 
57 |         self._download_ontology_file(
58 |             localpath=self._local_ontology_path,
59 |             url=self._url,
60 |         )
61 |         return Ontology(
62 |             handle=self._local_ontology_path,
63 |             prefix="http://www.ebi.ac.uk/efo/",
64 |         )
65 | 


--------------------------------------------------------------------------------
/bionty/base/entities/_organism.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from typing import Literal
  4 | 
  5 | import pandas as pd
  6 | from lamindb_setup.core import deprecated
  7 | 
  8 | from bionty.base._public_ontology import PublicOntology
  9 | from bionty.base.dev._doc_util import _doc_params
 10 | from bionty.base.dev._io import s3_bionty_assets
 11 | from bionty.base.entities._shared_docstrings import organism_removed
 12 | 
 13 | 
 14 | @_doc_params(doc_entities=organism_removed)
 15 | class Organism(PublicOntology):
 16 |     """Organism.
 17 | 
 18 |     1. NCBItaxon Ontology
 19 |     https://github.com/obophenotype/ncbitaxon
 20 | 
 21 |     2. Organism ontology
 22 |     https://www.ensembl.org/index.html
 23 | 
 24 |     Args:
 25 |         {doc_entities}
 26 |     """
 27 | 
 28 |     def __init__(
 29 |         self,
 30 |         taxa: (
 31 |             Literal["vertebrates", "bacteria", "fungi", "metazoa", "plants", "all"]
 32 |             | None
 33 |         ) = None,
 34 |         source: Literal["ensembl", "ncbitaxon"] | None = None,
 35 |         version: (
 36 |             Literal[
 37 |                 # NCBITaxon
 38 |                 "2025-03-13",
 39 |                 "2023-06-20",
 40 |                 # Ensembl
 41 |                 "release-112",
 42 |                 "release-57",
 43 |             ]
 44 |             | None
 45 |         ) = None,
 46 |         **kwargs,
 47 |     ):
 48 |         # To support the organism kwarg being passed in getattr access in other parts of the code
 49 |         # https://github.com/laminlabs/bionty/issues/163
 50 |         if "organism" in kwargs and taxa is None:
 51 |             taxa = kwargs.pop("organism")
 52 |         super().__init__(organism=taxa, source=source, version=version, **kwargs)
 53 | 
 54 |     def _load_df(self) -> pd.DataFrame:
 55 |         if self.source == "ensembl":
 56 |             if not self._local_parquet_path.exists():
 57 |                 # try to download from s3
 58 |                 s3_bionty_assets(
 59 |                     filename=self._parquet_filename,
 60 |                     assets_base_url="s3://bionty-assets",
 61 |                     localpath=self._local_parquet_path,
 62 |                 )
 63 | 
 64 |             # try to download from original url
 65 |             if not self._local_parquet_path.exists():
 66 |                 self._url_download(self._url, self._local_ontology_path)  # type:ignore
 67 |                 df = pd.read_csv(
 68 |                     self._local_ontology_path,
 69 |                     sep="\t",
 70 |                     index_col=False,  # type:ignore
 71 |                 )
 72 |                 df.rename(
 73 |                     columns={
 74 |                         "#name": "name",
 75 |                         "species": "scientific_name",
 76 |                         "taxonomy_id": "ontology_id",
 77 |                     },
 78 |                     inplace=True,
 79 |                 )
 80 |                 df["name"] = df["name"].str.lower()
 81 |                 df["ontology_id"] = "NCBITaxon:" + df["ontology_id"].astype(str)
 82 |                 df["scientific_name"] = df["scientific_name"].apply(
 83 |                     lambda x: " ".join(
 84 |                         [x.split("_")[0].capitalize()] + x.split("_")[1:]
 85 |                     )
 86 |                 )
 87 |                 df["synonyms"] = None
 88 |                 df.to_parquet(self._local_parquet_path)
 89 |                 return df
 90 |             else:
 91 |                 df = pd.read_parquet(self._local_parquet_path)
 92 |                 if "synonyms" not in df.columns:
 93 |                     # add synonyms column if it doesn't exist
 94 |                     df["synonyms"] = None
 95 |                 return _standardize_scientific_name(df)
 96 |         else:
 97 |             return super()._load_df()
 98 | 
 99 |     def to_dataframe(self) -> pd.DataFrame:
100 |         """Pandas DataFrame of the ontology.
101 | 
102 |         Returns:
103 |             A Pandas DataFrame of the ontology.
104 | 
105 |         Example::
106 | 
107 |             import bionty.base as bionty_base
108 | 
109 |             bt.Organism().to_dataframe()
110 |         """
111 |         return self._df.set_index("name")
112 | 
113 |     @deprecated("to_dataframe")
114 |     def df(self) -> pd.DataFrame:
115 |         return self.to_dataframe()
116 | 
117 | 
118 | def _standardize_scientific_name(df: pd.DataFrame) -> pd.DataFrame:
119 |     """Standardize scientific name following NCBITaxon convention.
120 | 
121 |     homo_sapiens -> Homo sapiens
122 |     """
123 |     df["scientific_name"] = df["scientific_name"].apply(
124 |         lambda x: " ".join([x.split("_")[0].capitalize()] + x.split("_")[1:])
125 |     )
126 |     return df
127 | 


--------------------------------------------------------------------------------
/bionty/base/entities/_pathway.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import Literal
 4 | 
 5 | from bionty.base._public_ontology import PublicOntology
 6 | from bionty.base.dev._doc_util import _doc_params
 7 | 
 8 | from ._shared_docstrings import doc_entites
 9 | 
10 | 
11 | @_doc_params(doc_entities=doc_entites)
12 | class Pathway(PublicOntology):
13 |     """Pathway.
14 | 
15 |     1. Gene Ontology
16 |     https://bioportal.bioontology.org/ontologies/GO/?p=summary
17 | 
18 |     2. Pathway Ontology
19 |     https://bioportal.bioontology.org/ontologies/PW/?p=summary
20 | 
21 |     Args:
22 |         {doc_entities}
23 |     """
24 | 
25 |     def __init__(
26 |         self,
27 |         organism: Literal["all"] | None = None,
28 |         source: Literal["go", "pw"] | None = None,
29 |         version: Literal[
30 |             # Gene Ontology
31 |             "2024-11-03",
32 |             "2024-06-17",
33 |             "2023-05-10",
34 |             # Pathway Ontology
35 |             "7.84",
36 |         ]
37 |         | None = None,
38 |         **kwargs,
39 |     ) -> None:
40 |         super().__init__(source=source, version=version, organism=organism, **kwargs)
41 | 


--------------------------------------------------------------------------------
/bionty/base/entities/_phenotype.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import Literal
 4 | 
 5 | from bionty.base._public_ontology import PublicOntology
 6 | from bionty.base.dev._doc_util import _doc_params
 7 | 
 8 | from ._shared_docstrings import doc_entites
 9 | 
10 | 
11 | @_doc_params(doc_entities=doc_entites)
12 | class Phenotype(PublicOntology):
13 |     """Phenotype.
14 | 
15 |     1. Human Phenotype Ontology
16 |     https://hpo.jax.org/app/
17 | 
18 |     2. PATO - the Phenotype And Trait Ontology
19 |     https://github.com/pato-ontology/pato
20 | 
21 |     3.Phecodes ICD10 map
22 |     https://phewascatalog.org/phecodes_icd10
23 | 
24 |     3. PATO - Phenotype And Trait Ontology
25 |     https://obofoundry.org/ontology/pato.html
26 | 
27 |     Args:
28 |         {doc_entities}
29 |     """
30 | 
31 |     def __init__(
32 |         self,
33 |         organism: Literal["human", "all"] | None = None,
34 |         source: Literal["hp", "phe", "pato"] | None = None,
35 |         version: Literal[
36 |             # HP
37 |             "2025-05-06",
38 |             "2024-04-26",
39 |             "2024-03-06",
40 |             "2023-06-17",
41 |             "2023-04-05",
42 |             "2023-01-27",
43 |             # Pato
44 |             "2025-05-14",
45 |             "2024-03-28",
46 |             "2023-05-18",
47 |             # Phe
48 |             "1.2",
49 |         ]
50 |         | None = None,
51 |         **kwargs,
52 |     ) -> None:
53 |         super().__init__(
54 |             source=source,
55 |             version=version,
56 |             organism=organism,
57 |             include_id_prefixes={
58 |                 "hp": ["HP:"],
59 |                 "mp": ["MP:"],  # mp might require an exclusion prefix for mpath
60 |                 "zp": ["ZP:"],
61 |                 "pato": ["PATO:"],
62 |             },
63 |             **kwargs,
64 |         )
65 | 


--------------------------------------------------------------------------------
/bionty/base/entities/_protein.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import Literal
 4 | 
 5 | from bionty.base._public_ontology import PublicOntology
 6 | from bionty.base.dev._doc_util import _doc_params
 7 | 
 8 | from ._shared_docstrings import doc_entites
 9 | 
10 | 
11 | @_doc_params(doc_entities=doc_entites)
12 | class Protein(PublicOntology):
13 |     """Protein.
14 | 
15 |     1. Uniprot
16 |     https://www.uniprot.org/
17 | 
18 |     Args:
19 |         {doc_entities}
20 |     """
21 | 
22 |     def __init__(
23 |         self,
24 |         organism: Literal["human", "mouse"] | None = None,
25 |         source: Literal["uniprot"] | None = None,
26 |         version: Literal["2024-03", "2023-03", "2023-02"] | None = None,
27 |         **kwargs,
28 |     ) -> None:
29 |         super().__init__(
30 |             source=source,
31 |             version=version,
32 |             organism=organism,
33 |             ols_supported=False,
34 |             **kwargs,
35 |         )
36 | 


--------------------------------------------------------------------------------
/bionty/base/entities/_shared_docstrings.py:
--------------------------------------------------------------------------------
 1 | doc_entites = """\
 2 | organism: `name` of `Organism` entity.
 3 |         source: The name of the source in the sources.yaml file.
 4 |                 Get available sources with `.display_available_sources()`.
 5 |         version: The version of the ontology. Typically a date or an actual version.
 6 |                 Get latest versions with `.display_available_sources()`.
 7 | """
 8 | organism_removed_tmp = "\n".join(doc_entites.split("\n")[1:]).split("\n")
 9 | organism_removed_tmp[0] = organism_removed_tmp[0].removeprefix("        ")
10 | organism_removed = "\n".join(organism_removed_tmp)
11 | 
12 | 
13 | doc_curate = """\
14 | df: DataFrame with a column of identifiers
15 |         column: If `column` is `None`, checks the existing index for compliance with
16 |                 the default identifier.
17 |                 If `column` denotes an entity identifier, tries to map that identifier
18 |                 to the default identifier.
19 |         field: The type of identifier for mapping.
20 | """
21 | 


--------------------------------------------------------------------------------
/bionty/base/entities/_tissue.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import Literal
 4 | 
 5 | from bionty.base._public_ontology import PublicOntology
 6 | from bionty.base.dev._doc_util import _doc_params
 7 | 
 8 | from ._shared_docstrings import doc_entites
 9 | 
10 | 
11 | @_doc_params(doc_entities=doc_entites)
12 | class Tissue(PublicOntology):
13 |     """Tissue.
14 | 
15 |     1. Uberon
16 |     https://github.com/obophenotype/uberon
17 | 
18 |     Args:
19 |         {doc_entities}
20 |     """
21 | 
22 |     def __init__(
23 |         self,
24 |         organism: Literal["all"] | None = None,
25 |         source: Literal["uberon"] | None = None,
26 |         version: Literal[
27 |             "2025-05-28",
28 |             "2024-08-07",
29 |             "2024-05-13",
30 |             "2024-03-22",
31 |             "2024-02-20",
32 |             "2024-01-18",
33 |             "2023-09-05",
34 |             "2023-04-19",
35 |             "2023-02-14",
36 |             "2022-08-19",
37 |         ]
38 |         | None = None,
39 |         **kwargs,
40 |     ) -> None:
41 |         super().__init__(
42 |             source=source,
43 |             version=version,
44 |             organism=organism,
45 |             include_id_prefixes={"uberon": ["UBERON:"]},
46 |             include_rel="part_of",
47 |             **kwargs,
48 |         )
49 | 


--------------------------------------------------------------------------------
/bionty/base/scripts/bfxpipelines_info/custom_pipelines.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "cell_ranger_v8_0_0": {
 3 |     "id": "X3Y4Z5A6B7C8",
 4 |     "name": "Cell Ranger v8.0.0",
 5 |     "versions": "8.0.0",
 6 |     "reference": "https://support.10xgenomics.com/single-cell-gene-expression/software/downloads/latest"
 7 |   },
 8 |   "cell_ranger_v7_2_0": {
 9 |     "id": "N4F5G6H7I8J9",
10 |     "name": "Cell Ranger v7.2.0",
11 |     "versions": "7.2.0",
12 |     "reference": "https://support.10xgenomics.com/single-cell-gene-expression/software/downloads/latest"
13 |   },
14 |   "cell_ranger_v7_1_0": {
15 |     "id": "V2RbClSNDq4H",
16 |     "name": "Cell Ranger v7.1.0",
17 |     "versions": "7.1.0",
18 |     "reference": "https://support.10xgenomics.com/single-cell-gene-expression/software/downloads/latest"
19 |   },
20 |   "cell_ranger_v7_0_1": {
21 |     "id": "T3r0zDQQJ97p",
22 |     "name": "Cell Ranger v7.0.1",
23 |     "versions": "7.0.1",
24 |     "reference": "https://support.10xgenomics.com/single-cell-gene-expression/software/downloads/latest"
25 |   },
26 |   "cell_ranger_v7_0_0": {
27 |     "id": "t28U9XvyjCEw",
28 |     "name": "Cell Ranger v7.0.0",
29 |     "versions": "7.0.0",
30 |     "reference": "https://support.10xgenomics.com/single-cell-gene-expression/software/downloads/latest"
31 |   },
32 |   "cell_ranger_v6_1_2": {
33 |     "id": "G4R5bC6a9DcX",
34 |     "name": "Cell Ranger v6.1.2",
35 |     "versions": "6.1.2",
36 |     "reference": "https://support.10xgenomics.com/single-cell-gene-expression/software/downloads/6.1"
37 |   },
38 |   "cell_ranger_v6_0_0": {
39 |     "id": "F6U8dE2tJ3LZ",
40 |     "name": "Cell Ranger v6.0.0",
41 |     "versions": "6.0.0",
42 |     "reference": "https://support.10xgenomics.com/single-cell-gene-expression/software/downloads/6.0"
43 |   },
44 |   "spaceranger_v3_0_0": {
45 |     "id": "A1B2C3D4E5F6",
46 |     "name": "Spaceranger v3.0.0",
47 |     "versions": "3.0.0",
48 |     "reference": "https://support.10xgenomics.com/spatial-gene-expression/software/downloads/latest"
49 |   },
50 |   "spaceranger_v2_1_1": {
51 |     "id": "G7H8I9J0K1L2",
52 |     "name": "Spaceranger v2.1.1",
53 |     "versions": "2.1.1",
54 |     "reference": "https://support.10xgenomics.com/spatial-gene-expression/software/downloads/2.1"
55 |   },
56 |   "spaceranger_v2_1_0": {
57 |     "id": "M3N4O5P6Q7R8",
58 |     "name": "Spaceranger v2.1.0",
59 |     "versions": "2.1.0",
60 |     "reference": "https://support.10xgenomics.com/spatial-gene-expression/software/downloads/2.1"
61 |   },
62 |   "spaceranger_v2_0_1": {
63 |     "id": "M9V2bR6aK3eD",
64 |     "name": "Spaceranger v2.0.1",
65 |     "versions": "2.0.1",
66 |     "reference": "https://support.10xgenomics.com/spatial-gene-expression/software/downloads/2.0"
67 |   },
68 |   "xenium_v2_0_0": {
69 |     "id": "S1T2U3V4W5X6",
70 |     "name": "Xenium v2.0.0",
71 |     "versions": "2.0.0",
72 |     "reference": "https://www.10xgenomics.com/products/xenium"
73 |   },
74 |   "xenium_v1_7_1": {
75 |     "id": "Y7Z8A9B0C1D2",
76 |     "name": "Xenium v1.7.1",
77 |     "versions": "1.7.1",
78 |     "reference": "https://www.10xgenomics.com/products/xenium"
79 |   },
80 |   "xenium_v1_7_0": {
81 |     "id": "E3F4G5H6I7J8",
82 |     "name": "Xenium v1.7.0",
83 |     "versions": "1.7.0",
84 |     "reference": "https://www.10xgenomics.com/products/xenium"
85 |   },
86 |   "xenium_v1_6_0": {
87 |     "id": "K9L0M1N2O3P4",
88 |     "name": "Xenium v1.6.0",
89 |     "versions": "1.6.0",
90 |     "reference": "https://www.10xgenomics.com/products/xenium"
91 |   }
92 | }
93 | 


--------------------------------------------------------------------------------
/bionty/base/scripts/check_ontologies_reachable.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import urllib.request
 3 | from http.client import BadStatusLine
 4 | from pathlib import Path
 5 | from urllib.error import HTTPError, URLError
 6 | 
 7 | import yaml  # type:ignore
 8 | 
 9 | VERSIONS_FILE_PATH = Path.cwd() / "bionty_base" / "sources.yaml"
10 | 
11 | 
12 | def extract_urls_from_yaml(yaml_file):
13 |     with open(yaml_file) as file:
14 |         yaml_data = yaml.safe_load(file)
15 |         urls = []
16 | 
17 |         def extract_urls(data):
18 |             if isinstance(data, str):
19 |                 urls.extend(
20 |                     re.findall(
21 |                         r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+",
22 |                         data,
23 |                     )
24 |                 )
25 |             elif isinstance(data, dict):
26 |                 for value in data.values():
27 |                     extract_urls(value)
28 |             elif isinstance(data, list):
29 |                 for item in data:
30 |                     extract_urls(item)
31 | 
32 |         extract_urls(yaml_data)
33 | 
34 |         return urls
35 | 
36 | 
37 | urls = extract_urls_from_yaml(VERSIONS_FILE_PATH)
38 | 
39 | failed_urls = []
40 | for url in urls:
41 |     try:
42 |         assert urllib.request.urlopen(url, timeout=1000).getcode() == 200
43 |     except (URLError, BadStatusLine):
44 |         print(f"URL: {url} is currently not accessible.")
45 |         pass
46 |     except (AssertionError, ValueError, HTTPError) as e:
47 |         failed_urls.append([url, e])
48 | 
49 | if len(failed_urls) != 0:
50 |     for fail in failed_urls:
51 |         print(fail)
52 |     raise AssertionError(f"{len(failed_urls)} URLs failed.")
53 | 


--------------------------------------------------------------------------------
/bionty/base/scripts/generate_bfxpipelines.py:
--------------------------------------------------------------------------------
  1 | import base64
  2 | import hashlib
  3 | import json
  4 | import os
  5 | import secrets
  6 | import string
  7 | from pathlib import Path
  8 | 
  9 | import pandas as pd
 10 | from github import Github
 11 | from rich.progress import track
 12 | 
 13 | BASE_BFX_PIPELINES_PATH = "./scripts/bfxpipelines_info"
 14 | 
 15 | 
 16 | def base62(n_char: int) -> str:
 17 |     """Like nanoid without hyphen and underscore."""
 18 |     alphabet = string.digits + string.ascii_letters.swapcase()
 19 |     id = "".join(secrets.choice(alphabet) for i in range(n_char))
 20 |     return id
 21 | 
 22 | 
 23 | def to_b64_str(bstr: bytes) -> str:
 24 |     b64 = base64.urlsafe_b64encode(bstr).decode().strip("=")
 25 |     return b64
 26 | 
 27 | 
 28 | def hash_str(s: str) -> str:
 29 |     bstr = s.encode("utf-8")
 30 |     # as we're truncating at a short length, we choose md5 over sha512
 31 |     return to_b64_str(hashlib.md5(bstr).digest())
 32 | 
 33 | 
 34 | def hash_id(input_id: str | None = None, *, n_char: int) -> str:
 35 |     if input_id is None:
 36 |         return base62(n_char=n_char)
 37 |     else:
 38 |         return hash_str(input_id)[:n_char].replace("_", "0").replace("-", "0")
 39 | 
 40 | 
 41 | def generate_nf_core_pipelines_info() -> None:
 42 |     """Generates a json file that contains all required pipelines information by querying the nf-core Github org."""
 43 |     gh_login = Github(os.getenv("GITHUB_TOKEN"))
 44 |     nf_core_org = gh_login.get_organization("nf-core")
 45 |     blacklist = ["cookiecutter", "tools"]
 46 |     nf_core_pipelines = {}
 47 | 
 48 |     for repo in track(
 49 |         nf_core_org.get_repos(),
 50 |         description="Fetching information from nf-core repositories...",
 51 |     ):
 52 |         if "pipeline" in list(repo.get_topics()):
 53 |             if repo.name in blacklist:
 54 |                 continue
 55 | 
 56 |             for version in repo.get_releases():
 57 |                 actual_version = (
 58 |                     version.tag_name if len(version.tag_name) >= 1 else "pre-release"
 59 |                 )
 60 |                 pipeline_name = f"{repo.name} v{actual_version}"
 61 |                 underscore_pipeline_name = (
 62 |                     pipeline_name.replace(" ", "_").replace(".", "_").replace("-", "_")
 63 |                 )
 64 | 
 65 |                 nf_core_pipelines[underscore_pipeline_name] = {
 66 |                     "id": hash_id(pipeline_name, n_char=12),
 67 |                     "name": f"nf-core {pipeline_name}",
 68 |                     "versions": actual_version,
 69 |                     "reference": repo.html_url,
 70 |                 }
 71 | 
 72 |     with open(f"{BASE_BFX_PIPELINES_PATH}/nf_core_pipelines.json", "w") as f:
 73 |         json_data = json.dumps(nf_core_pipelines, indent=4)
 74 |         f.write(json_data)
 75 | 
 76 | 
 77 | def merge_json_files(pipelines_folder_path: str | Path, output_path: str) -> None:
 78 |     """Merge all JSON files in a folder and write the merged data to a new JSON file.
 79 | 
 80 |     Args:
 81 |         pipelines_folder_path: Path to the folder containing the JSON files.
 82 |         output_path: Path to the output JSON file.
 83 |     """
 84 |     pipelines_folder_path = Path(pipelines_folder_path)
 85 |     file_paths = list(pipelines_folder_path.glob("*.json"))
 86 | 
 87 |     pipeline_json: dict = {}
 88 | 
 89 |     for file_path in file_paths:
 90 |         with open(file_path) as f:
 91 |             if not str(file_path).endswith("bfxpipelines.json"):
 92 |                 pipelines_info = json.load(f)
 93 |                 pipeline_json = {**pipeline_json, **pipelines_info}
 94 | 
 95 |     with open(output_path, "w") as f:
 96 |         json.dump(pipeline_json, f, indent=4)
 97 | 
 98 | 
 99 | def write_parquet_file(bfxpipelines_json: str, output_path: str) -> None:
100 |     """Takes a bfxpipelines.json file as generated from merge_json_files and writes a corresponding parquet file."""
101 |     with open(bfxpipelines_json) as f:
102 |         data = json.load(f)
103 | 
104 |     df = pd.DataFrame(data).transpose()
105 |     df.drop("versions", inplace=True, axis=1)
106 |     df.rename(columns={"id": "ontology_id"}, inplace=True)
107 |     df.set_index("ontology_id", inplace=True, drop=True)
108 |     df.to_parquet(output_path)
109 | 
110 | 
111 | generate_nf_core_pipelines_info()
112 | merge_json_files(
113 |     pipelines_folder_path=BASE_BFX_PIPELINES_PATH,
114 |     output_path=f"{BASE_BFX_PIPELINES_PATH}/bfxpipelines.json",
115 | )
116 | write_parquet_file(
117 |     bfxpipelines_json=f"{BASE_BFX_PIPELINES_PATH}/bfxpipelines.json",
118 |     output_path=f"{BASE_BFX_PIPELINES_PATH}/bfxpipelines.parquet",
119 | )
120 | 


--------------------------------------------------------------------------------
/bionty/base/scripts/update_new_ontologies.py:
--------------------------------------------------------------------------------
 1 | import bioregistry
 2 | from bionty.base.dev._handle_sources import parse_sources_yaml
 3 | from rich import print
 4 | 
 5 | sources = parse_sources_yaml()
 6 | latest_versions = (
 7 |     sources.groupby("source")["version"]
 8 |     .apply(
 9 |         lambda version: version.iloc[
10 |             version.astype(str).str.replace(".", "").str.isdigit().argmax()
11 |         ]
12 |     )
13 |     .reset_index()
14 | )
15 | latest_versions_dict = latest_versions.set_index("source")["version"].to_dict()
16 | 
17 | new_latest_versions: dict[str, str] = {}
18 | for source, current_latest_version in latest_versions_dict.items():
19 |     bioregistry_version = bioregistry.get_version(source)
20 |     if bioregistry_version:
21 |         if bioregistry_version > current_latest_version:
22 |             new_latest_versions[source] = bioregistry_version
23 | 
24 | if len(new_latest_versions) != 0:
25 |     for source, version in new_latest_versions.items():
26 |         print(
27 |             f"[bold blue]Source: [green]{source}[blue] has a more recent version:"
28 |             f" [green]{version}"
29 |         )
30 |     raise AssertionError(
31 |         f"{len(new_latest_versions.keys())} databases have more recent versions."
32 |     )
33 | 


--------------------------------------------------------------------------------
/bionty/core/__init__.py:
--------------------------------------------------------------------------------
 1 | """Developer API.
 2 | 
 3 | .. autosummary::
 4 |    :toctree: .
 5 | 
 6 |    BioRecord
 7 |    StaticReference
 8 |    Settings
 9 |    sync_public_sources
10 | """
11 | 
12 | from lamindb_setup._check_setup import _check_instance_setup
13 | 
14 | _check_instance_setup(from_module="bionty")
15 | 
16 | from bionty.models import BioRecord, StaticReference
17 | 
18 | from ._add_ontology import add_ontology
19 | from ._settings import Settings
20 | from ._source import sync_public_sources
21 | 
22 | # backward-compat
23 | sync_all_sources_to_latest = sync_public_sources
24 | 


--------------------------------------------------------------------------------
/bionty/core/_settings.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from lamin_utils import logger
 4 | 
 5 | from bionty.models import Organism
 6 | 
 7 | 
 8 | class Settings:
 9 |     """Settings.
10 | 
11 |     Directly use `bt.settings` rather than instantiating this class yourself.
12 |     """
13 | 
14 |     def __init__(self):
15 |         self._organism = "human"
16 | 
17 |     @property
18 |     def organism(self) -> Organism | None:
19 |         """Default organism argument (default `"human"`).
20 | 
21 |         Default organism to use in cases of ambiguity. For instance, gene symbols are duplicated across organisms and need to be disambiguated.
22 | 
23 |         Examples:
24 | 
25 |             ::
26 | 
27 |                 bionty.settings.organism = "mouse"
28 |         """
29 |         if isinstance(self._organism, str):
30 |             self.organism = self._organism  # type: ignore
31 |         return self._organism
32 | 
33 |     @organism.setter
34 |     def organism(self, name: str | Organism):
35 |         if isinstance(name, Organism):
36 |             self._organism = name
37 |         else:
38 |             import lamindb as ln
39 | 
40 |             organisms = Organism.from_values([name], mute=True)
41 |             if len(organisms) == 0:
42 |                 raise ValueError(
43 |                     f"No organism with name='{name}' is found, please create a organism record!"
44 |                 )
45 |             else:
46 |                 organism = organisms[0]
47 |             if organism._state.adding:  # type:ignore
48 |                 organism.save()  # type:ignore
49 |             self._organism = organism
50 | 
51 | 
52 | settings = Settings()
53 | settings.__doc__ = """Global :class:`~bionty.core.Settings`."""
54 | 


--------------------------------------------------------------------------------
/bionty/ids.py:
--------------------------------------------------------------------------------
1 | from .uids import *  # noqa: F403
2 | 


--------------------------------------------------------------------------------
/bionty/migrations/0029_alter_cellline_previous_runs_and_more.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 5.1 on 2024-06-13 10:31
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [
 8 |         ("bionty", "0028_artifactcellline_created_at_and_more"),
 9 |     ]
10 | 
11 |     operations = [
12 |         migrations.AlterField(
13 |             model_name="cellline",
14 |             name="previous_runs",
15 |             field=models.ManyToManyField(related_name="+", to="lamindb.run"),
16 |         ),
17 |         migrations.AlterField(
18 |             model_name="cellmarker",
19 |             name="previous_runs",
20 |             field=models.ManyToManyField(related_name="+", to="lamindb.run"),
21 |         ),
22 |         migrations.AlterField(
23 |             model_name="celltype",
24 |             name="previous_runs",
25 |             field=models.ManyToManyField(related_name="+", to="lamindb.run"),
26 |         ),
27 |         migrations.AlterField(
28 |             model_name="developmentalstage",
29 |             name="previous_runs",
30 |             field=models.ManyToManyField(related_name="+", to="lamindb.run"),
31 |         ),
32 |         migrations.AlterField(
33 |             model_name="disease",
34 |             name="previous_runs",
35 |             field=models.ManyToManyField(related_name="+", to="lamindb.run"),
36 |         ),
37 |         migrations.AlterField(
38 |             model_name="ethnicity",
39 |             name="previous_runs",
40 |             field=models.ManyToManyField(related_name="+", to="lamindb.run"),
41 |         ),
42 |         migrations.AlterField(
43 |             model_name="experimentalfactor",
44 |             name="previous_runs",
45 |             field=models.ManyToManyField(related_name="+", to="lamindb.run"),
46 |         ),
47 |         migrations.AlterField(
48 |             model_name="gene",
49 |             name="previous_runs",
50 |             field=models.ManyToManyField(related_name="+", to="lamindb.run"),
51 |         ),
52 |         migrations.AlterField(
53 |             model_name="organism",
54 |             name="previous_runs",
55 |             field=models.ManyToManyField(related_name="+", to="lamindb.run"),
56 |         ),
57 |         migrations.AlterField(
58 |             model_name="pathway",
59 |             name="previous_runs",
60 |             field=models.ManyToManyField(related_name="+", to="lamindb.run"),
61 |         ),
62 |         migrations.AlterField(
63 |             model_name="phenotype",
64 |             name="previous_runs",
65 |             field=models.ManyToManyField(related_name="+", to="lamindb.run"),
66 |         ),
67 |         migrations.AlterField(
68 |             model_name="protein",
69 |             name="previous_runs",
70 |             field=models.ManyToManyField(related_name="+", to="lamindb.run"),
71 |         ),
72 |         migrations.AlterField(
73 |             model_name="publicsource",
74 |             name="previous_runs",
75 |             field=models.ManyToManyField(related_name="+", to="lamindb.run"),
76 |         ),
77 |         migrations.AlterField(
78 |             model_name="tissue",
79 |             name="previous_runs",
80 |             field=models.ManyToManyField(related_name="+", to="lamindb.run"),
81 |         ),
82 |     ]
83 | 


--------------------------------------------------------------------------------
/bionty/migrations/0031_alter_cellmarker_name_and_more.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 5.1 on 2024-07-31 11:46
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [
 8 |         ("bionty", "0030_rename_publicsource_source_and_more"),
 9 |     ]
10 | 
11 |     operations = [
12 |         migrations.AlterField(
13 |             model_name="cellmarker",
14 |             name="name",
15 |             field=models.CharField(db_index=True, max_length=64),
16 |         ),
17 |         migrations.AlterUniqueTogether(
18 |             name="cellmarker",
19 |             unique_together={("name", "organism")},
20 |         ),
21 |     ]
22 | 


--------------------------------------------------------------------------------
/bionty/migrations/0032_rename_source_name_source_description_and_more.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 5.1 on 2024-08-01 12:34
 2 | 
 3 | import django.db.models.deletion
 4 | from django.db import migrations, models
 5 | 
 6 | import bionty
 7 | import bionty.ids
 8 | from bionty._biorecord import encode_uid, list_biorecord_models
 9 | 
10 | 
11 | def prepend_bionty_to_entity(apps, schema_editor):
12 |     bionty_models = list_biorecord_models(bionty)
13 |     Source = apps.get_model("bionty", "Source")
14 |     for source in Source.objects.all():
15 |         if source.entity in bionty_models and not source.entity.startswith("bionty."):
16 |             # append bionty to entity
17 |             source.entity = f"bionty.{source.entity}"
18 |             # re-encode uid
19 |             source.uid = encode_uid(
20 |                 Source,
21 |                 {
22 |                     "entity": source.entity,
23 |                     "name": source.name,
24 |                     "organism": source.organism,
25 |                     "version": source.version,
26 |                 },
27 |             )["uid"]
28 |             source.save()
29 | 
30 | 
31 | class Migration(migrations.Migration):
32 |     dependencies = [
33 |         ("bionty", "0031_alter_cellmarker_name_and_more"),
34 |     ]
35 | 
36 |     operations = [
37 |         migrations.RenameField(
38 |             model_name="source",
39 |             old_name="source_name",
40 |             new_name="description",
41 |         ),
42 |         migrations.RenameField(
43 |             model_name="source",
44 |             old_name="source",
45 |             new_name="name",
46 |         ),
47 |         migrations.RenameField(
48 |             model_name="source",
49 |             old_name="df",
50 |             new_name="dataframe_artifact",
51 |         ),
52 |         migrations.AlterField(
53 |             model_name="source",
54 |             name="dataframe_artifact",
55 |             field=models.ForeignKey(
56 |                 default=None,
57 |                 null=True,
58 |                 on_delete=django.db.models.deletion.PROTECT,
59 |                 related_name="source_dataframe_of",
60 |                 to="lamindb.artifact",
61 |             ),
62 |         ),
63 |         migrations.AlterField(
64 |             model_name="source",
65 |             name="artifacts",
66 |             field=models.ManyToManyField(
67 |                 related_name="source_artifact_of", to="lamindb.artifact"
68 |             ),
69 |         ),
70 |         migrations.AlterField(
71 |             model_name="source",
72 |             name="entity",
73 |             field=models.CharField(db_index=True, max_length=256),
74 |         ),
75 |         migrations.AlterField(
76 |             model_name="source",
77 |             name="uid",
78 |             field=models.CharField(
79 |                 default=bionty.ids.source, max_length=4, unique=True
80 |             ),
81 |         ),
82 |         migrations.RunPython(prepend_bionty_to_entity),
83 |     ]
84 | 


--------------------------------------------------------------------------------
/bionty/migrations/0034_alter_source_unique_together.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 5.1 on 2024-08-02 07:53
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | import bionty.ids
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 |     dependencies = [
10 |         ("bionty", "0033_alter_artifactcellline_artifact_and_more"),
11 |     ]
12 | 
13 |     operations = [
14 |         migrations.AlterUniqueTogether(
15 |             name="source", unique_together=(("entity", "name", "organism", "version"),)
16 |         ),
17 |     ]
18 | 


--------------------------------------------------------------------------------
/bionty/migrations/0035_alter_protein_gene_symbol.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 5.2 on 2024-08-09 08:49
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [
 8 |         ("bionty", "0034_alter_source_unique_together"),
 9 |     ]
10 | 
11 |     operations = [
12 |         migrations.AlterField(
13 |             model_name="protein",
14 |             name="gene_symbol",
15 |             field=models.CharField(
16 |                 db_index=True, default=None, max_length=256, null=True
17 |             ),
18 |         ),
19 |     ]
20 | 


--------------------------------------------------------------------------------
/bionty/migrations/0036_alter_source_artifacts_and_more.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 5.2 on 2024-08-09 10:13
 2 | 
 3 | import django.db.models.deletion
 4 | from django.db import migrations, models
 5 | 
 6 | 
 7 | class Migration(migrations.Migration):
 8 |     dependencies = [
 9 |         ("bionty", "0035_alter_protein_gene_symbol"),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.AlterField(
14 |             model_name="source",
15 |             name="artifacts",
16 |             field=models.ManyToManyField(
17 |                 related_name="_source_artifact_of", to="lamindb.artifact"
18 |             ),
19 |         ),
20 |         migrations.AlterField(
21 |             model_name="source",
22 |             name="dataframe_artifact",
23 |             field=models.ForeignKey(
24 |                 default=None,
25 |                 null=True,
26 |                 on_delete=django.db.models.deletion.PROTECT,
27 |                 related_name="_source_dataframe_of",
28 |                 to="lamindb.artifact",
29 |             ),
30 |         ),
31 |     ]
32 | 


--------------------------------------------------------------------------------
/bionty/migrations/0037_alter_cellline_source_alter_cellmarker_source_and_more.py:
--------------------------------------------------------------------------------
  1 | # Generated by Django 5.2 on 2024-08-27 09:42
  2 | 
  3 | import django.db.models.deletion
  4 | from django.db import migrations, models
  5 | 
  6 | 
  7 | class Migration(migrations.Migration):
  8 |     dependencies = [
  9 |         ("bionty", "0036_alter_source_artifacts_and_more"),
 10 |     ]
 11 | 
 12 |     operations = [
 13 |         migrations.AlterField(
 14 |             model_name="cellline",
 15 |             name="source",
 16 |             field=models.ForeignKey(
 17 |                 null=True,
 18 |                 on_delete=django.db.models.deletion.PROTECT,
 19 |                 to="bionty.source",
 20 |             ),
 21 |         ),
 22 |         migrations.AlterField(
 23 |             model_name="cellmarker",
 24 |             name="source",
 25 |             field=models.ForeignKey(
 26 |                 null=True,
 27 |                 on_delete=django.db.models.deletion.PROTECT,
 28 |                 to="bionty.source",
 29 |             ),
 30 |         ),
 31 |         migrations.AlterField(
 32 |             model_name="celltype",
 33 |             name="source",
 34 |             field=models.ForeignKey(
 35 |                 null=True,
 36 |                 on_delete=django.db.models.deletion.PROTECT,
 37 |                 to="bionty.source",
 38 |             ),
 39 |         ),
 40 |         migrations.AlterField(
 41 |             model_name="developmentalstage",
 42 |             name="source",
 43 |             field=models.ForeignKey(
 44 |                 null=True,
 45 |                 on_delete=django.db.models.deletion.PROTECT,
 46 |                 to="bionty.source",
 47 |             ),
 48 |         ),
 49 |         migrations.AlterField(
 50 |             model_name="disease",
 51 |             name="source",
 52 |             field=models.ForeignKey(
 53 |                 null=True,
 54 |                 on_delete=django.db.models.deletion.PROTECT,
 55 |                 to="bionty.source",
 56 |             ),
 57 |         ),
 58 |         migrations.AlterField(
 59 |             model_name="ethnicity",
 60 |             name="source",
 61 |             field=models.ForeignKey(
 62 |                 null=True,
 63 |                 on_delete=django.db.models.deletion.PROTECT,
 64 |                 to="bionty.source",
 65 |             ),
 66 |         ),
 67 |         migrations.AlterField(
 68 |             model_name="experimentalfactor",
 69 |             name="source",
 70 |             field=models.ForeignKey(
 71 |                 null=True,
 72 |                 on_delete=django.db.models.deletion.PROTECT,
 73 |                 to="bionty.source",
 74 |             ),
 75 |         ),
 76 |         migrations.AlterField(
 77 |             model_name="gene",
 78 |             name="source",
 79 |             field=models.ForeignKey(
 80 |                 null=True,
 81 |                 on_delete=django.db.models.deletion.PROTECT,
 82 |                 to="bionty.source",
 83 |             ),
 84 |         ),
 85 |         migrations.AlterField(
 86 |             model_name="organism",
 87 |             name="source",
 88 |             field=models.ForeignKey(
 89 |                 null=True,
 90 |                 on_delete=django.db.models.deletion.PROTECT,
 91 |                 to="bionty.source",
 92 |             ),
 93 |         ),
 94 |         migrations.AlterField(
 95 |             model_name="pathway",
 96 |             name="source",
 97 |             field=models.ForeignKey(
 98 |                 null=True,
 99 |                 on_delete=django.db.models.deletion.PROTECT,
100 |                 to="bionty.source",
101 |             ),
102 |         ),
103 |         migrations.AlterField(
104 |             model_name="phenotype",
105 |             name="source",
106 |             field=models.ForeignKey(
107 |                 null=True,
108 |                 on_delete=django.db.models.deletion.PROTECT,
109 |                 to="bionty.source",
110 |             ),
111 |         ),
112 |         migrations.AlterField(
113 |             model_name="protein",
114 |             name="source",
115 |             field=models.ForeignKey(
116 |                 null=True,
117 |                 on_delete=django.db.models.deletion.PROTECT,
118 |                 to="bionty.source",
119 |             ),
120 |         ),
121 |         migrations.AlterField(
122 |             model_name="tissue",
123 |             name="source",
124 |             field=models.ForeignKey(
125 |                 null=True,
126 |                 on_delete=django.db.models.deletion.PROTECT,
127 |                 to="bionty.source",
128 |             ),
129 |         ),
130 |     ]
131 | 


--------------------------------------------------------------------------------
/bionty/migrations/0039_alter_cellline_source_alter_cellmarker_source_and_more.py:
--------------------------------------------------------------------------------
  1 | # Generated by Django 5.2 on 2024-09-09 11:43
  2 | 
  3 | import django.db.models.deletion
  4 | from django.db import migrations, models
  5 | 
  6 | 
  7 | class Migration(migrations.Migration):
  8 |     dependencies = [
  9 |         ("bionty", "0038_alter_artifactcellline_created_by_and_more"),
 10 |     ]
 11 | 
 12 |     operations = [
 13 |         migrations.AlterField(
 14 |             model_name="cellline",
 15 |             name="source",
 16 |             field=models.ForeignKey(
 17 |                 null=True,
 18 |                 on_delete=django.db.models.deletion.PROTECT,
 19 |                 related_name="+",
 20 |                 to="bionty.source",
 21 |             ),
 22 |         ),
 23 |         migrations.AlterField(
 24 |             model_name="cellmarker",
 25 |             name="source",
 26 |             field=models.ForeignKey(
 27 |                 null=True,
 28 |                 on_delete=django.db.models.deletion.PROTECT,
 29 |                 related_name="+",
 30 |                 to="bionty.source",
 31 |             ),
 32 |         ),
 33 |         migrations.AlterField(
 34 |             model_name="celltype",
 35 |             name="source",
 36 |             field=models.ForeignKey(
 37 |                 null=True,
 38 |                 on_delete=django.db.models.deletion.PROTECT,
 39 |                 related_name="+",
 40 |                 to="bionty.source",
 41 |             ),
 42 |         ),
 43 |         migrations.AlterField(
 44 |             model_name="developmentalstage",
 45 |             name="source",
 46 |             field=models.ForeignKey(
 47 |                 null=True,
 48 |                 on_delete=django.db.models.deletion.PROTECT,
 49 |                 related_name="+",
 50 |                 to="bionty.source",
 51 |             ),
 52 |         ),
 53 |         migrations.AlterField(
 54 |             model_name="disease",
 55 |             name="source",
 56 |             field=models.ForeignKey(
 57 |                 null=True,
 58 |                 on_delete=django.db.models.deletion.PROTECT,
 59 |                 related_name="+",
 60 |                 to="bionty.source",
 61 |             ),
 62 |         ),
 63 |         migrations.AlterField(
 64 |             model_name="ethnicity",
 65 |             name="source",
 66 |             field=models.ForeignKey(
 67 |                 null=True,
 68 |                 on_delete=django.db.models.deletion.PROTECT,
 69 |                 related_name="+",
 70 |                 to="bionty.source",
 71 |             ),
 72 |         ),
 73 |         migrations.AlterField(
 74 |             model_name="experimentalfactor",
 75 |             name="source",
 76 |             field=models.ForeignKey(
 77 |                 null=True,
 78 |                 on_delete=django.db.models.deletion.PROTECT,
 79 |                 related_name="+",
 80 |                 to="bionty.source",
 81 |             ),
 82 |         ),
 83 |         migrations.AlterField(
 84 |             model_name="gene",
 85 |             name="source",
 86 |             field=models.ForeignKey(
 87 |                 null=True,
 88 |                 on_delete=django.db.models.deletion.PROTECT,
 89 |                 related_name="+",
 90 |                 to="bionty.source",
 91 |             ),
 92 |         ),
 93 |         migrations.AlterField(
 94 |             model_name="organism",
 95 |             name="source",
 96 |             field=models.ForeignKey(
 97 |                 null=True,
 98 |                 on_delete=django.db.models.deletion.PROTECT,
 99 |                 related_name="+",
100 |                 to="bionty.source",
101 |             ),
102 |         ),
103 |         migrations.AlterField(
104 |             model_name="pathway",
105 |             name="source",
106 |             field=models.ForeignKey(
107 |                 null=True,
108 |                 on_delete=django.db.models.deletion.PROTECT,
109 |                 related_name="+",
110 |                 to="bionty.source",
111 |             ),
112 |         ),
113 |         migrations.AlterField(
114 |             model_name="phenotype",
115 |             name="source",
116 |             field=models.ForeignKey(
117 |                 null=True,
118 |                 on_delete=django.db.models.deletion.PROTECT,
119 |                 related_name="+",
120 |                 to="bionty.source",
121 |             ),
122 |         ),
123 |         migrations.AlterField(
124 |             model_name="protein",
125 |             name="source",
126 |             field=models.ForeignKey(
127 |                 null=True,
128 |                 on_delete=django.db.models.deletion.PROTECT,
129 |                 related_name="+",
130 |                 to="bionty.source",
131 |             ),
132 |         ),
133 |         migrations.AlterField(
134 |             model_name="tissue",
135 |             name="source",
136 |             field=models.ForeignKey(
137 |                 null=True,
138 |                 on_delete=django.db.models.deletion.PROTECT,
139 |                 related_name="+",
140 |                 to="bionty.source",
141 |             ),
142 |         ),
143 |     ]
144 | 


--------------------------------------------------------------------------------
/bionty/migrations/0040_rename_feature_ref_is_symbol_artifactgene_feature_ref_is_name_and_more.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 5.1.1 on 2024-10-18 14:17
 2 | 
 3 | from django.db import migrations
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [
 8 |         ("bionty", "0039_alter_cellline_source_alter_cellmarker_source_and_more"),
 9 |     ]
10 | 
11 |     operations = [
12 |         migrations.RenameField(
13 |             model_name="artifactgene",
14 |             old_name="feature_ref_is_symbol",
15 |             new_name="feature_ref_is_name",
16 |         ),
17 |         migrations.RenameField(
18 |             model_name="artifactgene",
19 |             old_name="gene_ref_is_symbol",
20 |             new_name="label_ref_is_name",
21 |         ),
22 |         migrations.AlterUniqueTogether(
23 |             name="artifactcellline",
24 |             unique_together={("artifact", "cellline", "feature")},
25 |         ),
26 |         migrations.AlterUniqueTogether(
27 |             name="artifactcellmarker",
28 |             unique_together={("artifact", "cellmarker", "feature")},
29 |         ),
30 |         migrations.AlterUniqueTogether(
31 |             name="artifactcelltype",
32 |             unique_together={("artifact", "celltype", "feature")},
33 |         ),
34 |         migrations.AlterUniqueTogether(
35 |             name="artifactdevelopmentalstage",
36 |             unique_together={("artifact", "developmentalstage", "feature")},
37 |         ),
38 |         migrations.AlterUniqueTogether(
39 |             name="artifactdisease",
40 |             unique_together={("artifact", "disease", "feature")},
41 |         ),
42 |         migrations.AlterUniqueTogether(
43 |             name="artifactethnicity",
44 |             unique_together={("artifact", "ethnicity", "feature")},
45 |         ),
46 |         migrations.AlterUniqueTogether(
47 |             name="artifactexperimentalfactor",
48 |             unique_together={("artifact", "experimentalfactor", "feature")},
49 |         ),
50 |         migrations.AlterUniqueTogether(
51 |             name="artifactgene",
52 |             unique_together={("artifact", "gene", "feature")},
53 |         ),
54 |         migrations.AlterUniqueTogether(
55 |             name="artifactorganism",
56 |             unique_together={("artifact", "organism", "feature")},
57 |         ),
58 |         migrations.AlterUniqueTogether(
59 |             name="artifactpathway",
60 |             unique_together={("artifact", "pathway", "feature")},
61 |         ),
62 |         migrations.AlterUniqueTogether(
63 |             name="artifactphenotype",
64 |             unique_together={("artifact", "phenotype", "feature")},
65 |         ),
66 |         migrations.AlterUniqueTogether(
67 |             name="artifactprotein",
68 |             unique_together={("artifact", "protein", "feature")},
69 |         ),
70 |         migrations.AlterUniqueTogether(
71 |             name="artifacttissue",
72 |             unique_together={("artifact", "tissue", "feature")},
73 |         ),
74 |         migrations.AlterUniqueTogether(
75 |             name="featuresetcellmarker",
76 |             unique_together={("featureset", "cellmarker")},
77 |         ),
78 |         migrations.AlterUniqueTogether(
79 |             name="featuresetgene",
80 |             unique_together={("featureset", "gene")},
81 |         ),
82 |         migrations.AlterUniqueTogether(
83 |             name="featuresetpathway",
84 |             unique_together={("featureset", "pathway")},
85 |         ),
86 |         migrations.AlterUniqueTogether(
87 |             name="featuresetprotein",
88 |             unique_together={("featureset", "protein")},
89 |         ),
90 |     ]
91 | 


--------------------------------------------------------------------------------
/bionty/migrations/0043_lamindbv2_part2.py:
--------------------------------------------------------------------------------
  1 | # Generated by Django 5.2 on 2025-01-10 23:59
  2 | 
  3 | import django.db.models.deletion
  4 | import lamindb.base.fields
  5 | from django.db import migrations
  6 | 
  7 | 
  8 | class Migration(migrations.Migration):
  9 |     dependencies = [
 10 |         ("bionty", "0042_lamindbv1"),
 11 |     ]
 12 | 
 13 |     operations = [
 14 |         migrations.AddField(
 15 |             model_name="cellline",
 16 |             name="space",
 17 |             field=lamindb.base.fields.ForeignKey(
 18 |                 blank=True,
 19 |                 default=1,
 20 |                 on_delete=django.db.models.deletion.PROTECT,
 21 |                 to="lamindb.space",
 22 |             ),
 23 |         ),
 24 |         migrations.AddField(
 25 |             model_name="cellmarker",
 26 |             name="space",
 27 |             field=lamindb.base.fields.ForeignKey(
 28 |                 blank=True,
 29 |                 default=1,
 30 |                 on_delete=django.db.models.deletion.PROTECT,
 31 |                 to="lamindb.space",
 32 |             ),
 33 |         ),
 34 |         migrations.AddField(
 35 |             model_name="celltype",
 36 |             name="space",
 37 |             field=lamindb.base.fields.ForeignKey(
 38 |                 blank=True,
 39 |                 default=1,
 40 |                 on_delete=django.db.models.deletion.PROTECT,
 41 |                 to="lamindb.space",
 42 |             ),
 43 |         ),
 44 |         migrations.AddField(
 45 |             model_name="developmentalstage",
 46 |             name="space",
 47 |             field=lamindb.base.fields.ForeignKey(
 48 |                 blank=True,
 49 |                 default=1,
 50 |                 on_delete=django.db.models.deletion.PROTECT,
 51 |                 to="lamindb.space",
 52 |             ),
 53 |         ),
 54 |         migrations.AddField(
 55 |             model_name="disease",
 56 |             name="space",
 57 |             field=lamindb.base.fields.ForeignKey(
 58 |                 blank=True,
 59 |                 default=1,
 60 |                 on_delete=django.db.models.deletion.PROTECT,
 61 |                 to="lamindb.space",
 62 |             ),
 63 |         ),
 64 |         migrations.AddField(
 65 |             model_name="ethnicity",
 66 |             name="space",
 67 |             field=lamindb.base.fields.ForeignKey(
 68 |                 blank=True,
 69 |                 default=1,
 70 |                 on_delete=django.db.models.deletion.PROTECT,
 71 |                 to="lamindb.space",
 72 |             ),
 73 |         ),
 74 |         migrations.AddField(
 75 |             model_name="experimentalfactor",
 76 |             name="space",
 77 |             field=lamindb.base.fields.ForeignKey(
 78 |                 blank=True,
 79 |                 default=1,
 80 |                 on_delete=django.db.models.deletion.PROTECT,
 81 |                 to="lamindb.space",
 82 |             ),
 83 |         ),
 84 |         migrations.AddField(
 85 |             model_name="gene",
 86 |             name="space",
 87 |             field=lamindb.base.fields.ForeignKey(
 88 |                 blank=True,
 89 |                 default=1,
 90 |                 on_delete=django.db.models.deletion.PROTECT,
 91 |                 to="lamindb.space",
 92 |             ),
 93 |         ),
 94 |         migrations.AddField(
 95 |             model_name="organism",
 96 |             name="space",
 97 |             field=lamindb.base.fields.ForeignKey(
 98 |                 blank=True,
 99 |                 default=1,
100 |                 on_delete=django.db.models.deletion.PROTECT,
101 |                 to="lamindb.space",
102 |             ),
103 |         ),
104 |         migrations.AddField(
105 |             model_name="pathway",
106 |             name="space",
107 |             field=lamindb.base.fields.ForeignKey(
108 |                 blank=True,
109 |                 default=1,
110 |                 on_delete=django.db.models.deletion.PROTECT,
111 |                 to="lamindb.space",
112 |             ),
113 |         ),
114 |         migrations.AddField(
115 |             model_name="phenotype",
116 |             name="space",
117 |             field=lamindb.base.fields.ForeignKey(
118 |                 blank=True,
119 |                 default=1,
120 |                 on_delete=django.db.models.deletion.PROTECT,
121 |                 to="lamindb.space",
122 |             ),
123 |         ),
124 |         migrations.AddField(
125 |             model_name="protein",
126 |             name="space",
127 |             field=lamindb.base.fields.ForeignKey(
128 |                 blank=True,
129 |                 default=1,
130 |                 on_delete=django.db.models.deletion.PROTECT,
131 |                 to="lamindb.space",
132 |             ),
133 |         ),
134 |         migrations.AddField(
135 |             model_name="source",
136 |             name="space",
137 |             field=lamindb.base.fields.ForeignKey(
138 |                 blank=True,
139 |                 default=1,
140 |                 on_delete=django.db.models.deletion.PROTECT,
141 |                 to="lamindb.space",
142 |             ),
143 |         ),
144 |         migrations.AddField(
145 |             model_name="tissue",
146 |             name="space",
147 |             field=lamindb.base.fields.ForeignKey(
148 |                 blank=True,
149 |                 default=1,
150 |                 on_delete=django.db.models.deletion.PROTECT,
151 |                 to="lamindb.space",
152 |             ),
153 |         ),
154 |     ]
155 | 


--------------------------------------------------------------------------------
/bionty/migrations/0044_alter_cellline_space_alter_cellmarker_space_and_more.py:
--------------------------------------------------------------------------------
  1 | # Generated by Django 5.2 on 2025-01-11 16:44
  2 | 
  3 | import django.db.models.deletion
  4 | import lamindb.base.fields
  5 | from django.db import migrations
  6 | 
  7 | 
  8 | class Migration(migrations.Migration):
  9 |     dependencies = [
 10 |         ("bionty", "0043_lamindbv2_part2"),
 11 |         ("lamindb", "0072_remove_user__branch_code_remove_user_aux_and_more"),
 12 |     ]
 13 | 
 14 |     operations = [
 15 |         migrations.AlterField(
 16 |             model_name="cellline",
 17 |             name="space",
 18 |             field=lamindb.base.fields.ForeignKey(
 19 |                 blank=True,
 20 |                 db_default=1,
 21 |                 default=1,
 22 |                 on_delete=django.db.models.deletion.PROTECT,
 23 |                 to="lamindb.space",
 24 |             ),
 25 |         ),
 26 |         migrations.AlterField(
 27 |             model_name="cellmarker",
 28 |             name="space",
 29 |             field=lamindb.base.fields.ForeignKey(
 30 |                 blank=True,
 31 |                 db_default=1,
 32 |                 default=1,
 33 |                 on_delete=django.db.models.deletion.PROTECT,
 34 |                 to="lamindb.space",
 35 |             ),
 36 |         ),
 37 |         migrations.AlterField(
 38 |             model_name="celltype",
 39 |             name="space",
 40 |             field=lamindb.base.fields.ForeignKey(
 41 |                 blank=True,
 42 |                 db_default=1,
 43 |                 default=1,
 44 |                 on_delete=django.db.models.deletion.PROTECT,
 45 |                 to="lamindb.space",
 46 |             ),
 47 |         ),
 48 |         migrations.AlterField(
 49 |             model_name="developmentalstage",
 50 |             name="space",
 51 |             field=lamindb.base.fields.ForeignKey(
 52 |                 blank=True,
 53 |                 db_default=1,
 54 |                 default=1,
 55 |                 on_delete=django.db.models.deletion.PROTECT,
 56 |                 to="lamindb.space",
 57 |             ),
 58 |         ),
 59 |         migrations.AlterField(
 60 |             model_name="disease",
 61 |             name="space",
 62 |             field=lamindb.base.fields.ForeignKey(
 63 |                 blank=True,
 64 |                 db_default=1,
 65 |                 default=1,
 66 |                 on_delete=django.db.models.deletion.PROTECT,
 67 |                 to="lamindb.space",
 68 |             ),
 69 |         ),
 70 |         migrations.AlterField(
 71 |             model_name="ethnicity",
 72 |             name="space",
 73 |             field=lamindb.base.fields.ForeignKey(
 74 |                 blank=True,
 75 |                 db_default=1,
 76 |                 default=1,
 77 |                 on_delete=django.db.models.deletion.PROTECT,
 78 |                 to="lamindb.space",
 79 |             ),
 80 |         ),
 81 |         migrations.AlterField(
 82 |             model_name="experimentalfactor",
 83 |             name="space",
 84 |             field=lamindb.base.fields.ForeignKey(
 85 |                 blank=True,
 86 |                 db_default=1,
 87 |                 default=1,
 88 |                 on_delete=django.db.models.deletion.PROTECT,
 89 |                 to="lamindb.space",
 90 |             ),
 91 |         ),
 92 |         migrations.AlterField(
 93 |             model_name="gene",
 94 |             name="space",
 95 |             field=lamindb.base.fields.ForeignKey(
 96 |                 blank=True,
 97 |                 db_default=1,
 98 |                 default=1,
 99 |                 on_delete=django.db.models.deletion.PROTECT,
100 |                 to="lamindb.space",
101 |             ),
102 |         ),
103 |         migrations.AlterField(
104 |             model_name="organism",
105 |             name="space",
106 |             field=lamindb.base.fields.ForeignKey(
107 |                 blank=True,
108 |                 db_default=1,
109 |                 default=1,
110 |                 on_delete=django.db.models.deletion.PROTECT,
111 |                 to="lamindb.space",
112 |             ),
113 |         ),
114 |         migrations.AlterField(
115 |             model_name="pathway",
116 |             name="space",
117 |             field=lamindb.base.fields.ForeignKey(
118 |                 blank=True,
119 |                 db_default=1,
120 |                 default=1,
121 |                 on_delete=django.db.models.deletion.PROTECT,
122 |                 to="lamindb.space",
123 |             ),
124 |         ),
125 |         migrations.AlterField(
126 |             model_name="phenotype",
127 |             name="space",
128 |             field=lamindb.base.fields.ForeignKey(
129 |                 blank=True,
130 |                 db_default=1,
131 |                 default=1,
132 |                 on_delete=django.db.models.deletion.PROTECT,
133 |                 to="lamindb.space",
134 |             ),
135 |         ),
136 |         migrations.AlterField(
137 |             model_name="protein",
138 |             name="space",
139 |             field=lamindb.base.fields.ForeignKey(
140 |                 blank=True,
141 |                 db_default=1,
142 |                 default=1,
143 |                 on_delete=django.db.models.deletion.PROTECT,
144 |                 to="lamindb.space",
145 |             ),
146 |         ),
147 |         migrations.AlterField(
148 |             model_name="source",
149 |             name="space",
150 |             field=lamindb.base.fields.ForeignKey(
151 |                 blank=True,
152 |                 db_default=1,
153 |                 default=1,
154 |                 on_delete=django.db.models.deletion.PROTECT,
155 |                 to="lamindb.space",
156 |             ),
157 |         ),
158 |         migrations.AlterField(
159 |             model_name="tissue",
160 |             name="space",
161 |             field=lamindb.base.fields.ForeignKey(
162 |                 blank=True,
163 |                 db_default=1,
164 |                 default=1,
165 |                 on_delete=django.db.models.deletion.PROTECT,
166 |                 to="lamindb.space",
167 |             ),
168 |         ),
169 |     ]
170 | 


--------------------------------------------------------------------------------
/bionty/migrations/0045_rename_aux_cellline__aux_rename_aux_cellmarker__aux_and_more.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 5.2 on 2025-01-13 11:15
 2 | 
 3 | from django.db import migrations
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [
 8 |         ("bionty", "0044_alter_cellline_space_alter_cellmarker_space_and_more"),
 9 |     ]
10 | 
11 |     operations = [
12 |         migrations.RenameField(
13 |             model_name="cellline",
14 |             old_name="aux",
15 |             new_name="_aux",
16 |         ),
17 |         migrations.RenameField(
18 |             model_name="cellmarker",
19 |             old_name="aux",
20 |             new_name="_aux",
21 |         ),
22 |         migrations.RenameField(
23 |             model_name="celltype",
24 |             old_name="aux",
25 |             new_name="_aux",
26 |         ),
27 |         migrations.RenameField(
28 |             model_name="developmentalstage",
29 |             old_name="aux",
30 |             new_name="_aux",
31 |         ),
32 |         migrations.RenameField(
33 |             model_name="disease",
34 |             old_name="aux",
35 |             new_name="_aux",
36 |         ),
37 |         migrations.RenameField(
38 |             model_name="ethnicity",
39 |             old_name="aux",
40 |             new_name="_aux",
41 |         ),
42 |         migrations.RenameField(
43 |             model_name="experimentalfactor",
44 |             old_name="aux",
45 |             new_name="_aux",
46 |         ),
47 |         migrations.RenameField(
48 |             model_name="gene",
49 |             old_name="aux",
50 |             new_name="_aux",
51 |         ),
52 |         migrations.RenameField(
53 |             model_name="organism",
54 |             old_name="aux",
55 |             new_name="_aux",
56 |         ),
57 |         migrations.RenameField(
58 |             model_name="pathway",
59 |             old_name="aux",
60 |             new_name="_aux",
61 |         ),
62 |         migrations.RenameField(
63 |             model_name="phenotype",
64 |             old_name="aux",
65 |             new_name="_aux",
66 |         ),
67 |         migrations.RenameField(
68 |             model_name="protein",
69 |             old_name="aux",
70 |             new_name="_aux",
71 |         ),
72 |         migrations.RenameField(
73 |             model_name="source",
74 |             old_name="aux",
75 |             new_name="_aux",
76 |         ),
77 |         migrations.RenameField(
78 |             model_name="tissue",
79 |             old_name="aux",
80 |             new_name="_aux",
81 |         ),
82 |     ]
83 | 


--------------------------------------------------------------------------------
/bionty/migrations/0046_alter_cellline__aux_alter_cellmarker__aux_and_more.py:
--------------------------------------------------------------------------------
  1 | # Generated by Django 5.2 on 2025-01-13 11:55
  2 | 
  3 | import lamindb.base.fields
  4 | from django.db import migrations
  5 | 
  6 | 
  7 | class Migration(migrations.Migration):
  8 |     dependencies = [
  9 |         ("bionty", "0045_rename_aux_cellline__aux_rename_aux_cellmarker__aux_and_more"),
 10 |     ]
 11 | 
 12 |     operations = [
 13 |         migrations.AlterField(
 14 |             model_name="cellline",
 15 |             name="_aux",
 16 |             field=lamindb.base.fields.JSONField(
 17 |                 blank=True, db_default=None, default=None, null=True
 18 |             ),
 19 |         ),
 20 |         migrations.AlterField(
 21 |             model_name="cellmarker",
 22 |             name="_aux",
 23 |             field=lamindb.base.fields.JSONField(
 24 |                 blank=True, db_default=None, default=None, null=True
 25 |             ),
 26 |         ),
 27 |         migrations.AlterField(
 28 |             model_name="celltype",
 29 |             name="_aux",
 30 |             field=lamindb.base.fields.JSONField(
 31 |                 blank=True, db_default=None, default=None, null=True
 32 |             ),
 33 |         ),
 34 |         migrations.AlterField(
 35 |             model_name="developmentalstage",
 36 |             name="_aux",
 37 |             field=lamindb.base.fields.JSONField(
 38 |                 blank=True, db_default=None, default=None, null=True
 39 |             ),
 40 |         ),
 41 |         migrations.AlterField(
 42 |             model_name="disease",
 43 |             name="_aux",
 44 |             field=lamindb.base.fields.JSONField(
 45 |                 blank=True, db_default=None, default=None, null=True
 46 |             ),
 47 |         ),
 48 |         migrations.AlterField(
 49 |             model_name="ethnicity",
 50 |             name="_aux",
 51 |             field=lamindb.base.fields.JSONField(
 52 |                 blank=True, db_default=None, default=None, null=True
 53 |             ),
 54 |         ),
 55 |         migrations.AlterField(
 56 |             model_name="experimentalfactor",
 57 |             name="_aux",
 58 |             field=lamindb.base.fields.JSONField(
 59 |                 blank=True, db_default=None, default=None, null=True
 60 |             ),
 61 |         ),
 62 |         migrations.AlterField(
 63 |             model_name="gene",
 64 |             name="_aux",
 65 |             field=lamindb.base.fields.JSONField(
 66 |                 blank=True, db_default=None, default=None, null=True
 67 |             ),
 68 |         ),
 69 |         migrations.AlterField(
 70 |             model_name="organism",
 71 |             name="_aux",
 72 |             field=lamindb.base.fields.JSONField(
 73 |                 blank=True, db_default=None, default=None, null=True
 74 |             ),
 75 |         ),
 76 |         migrations.AlterField(
 77 |             model_name="pathway",
 78 |             name="_aux",
 79 |             field=lamindb.base.fields.JSONField(
 80 |                 blank=True, db_default=None, default=None, null=True
 81 |             ),
 82 |         ),
 83 |         migrations.AlterField(
 84 |             model_name="phenotype",
 85 |             name="_aux",
 86 |             field=lamindb.base.fields.JSONField(
 87 |                 blank=True, db_default=None, default=None, null=True
 88 |             ),
 89 |         ),
 90 |         migrations.AlterField(
 91 |             model_name="protein",
 92 |             name="_aux",
 93 |             field=lamindb.base.fields.JSONField(
 94 |                 blank=True, db_default=None, default=None, null=True
 95 |             ),
 96 |         ),
 97 |         migrations.AlterField(
 98 |             model_name="source",
 99 |             name="_aux",
100 |             field=lamindb.base.fields.JSONField(
101 |                 blank=True, db_default=None, default=None, null=True
102 |             ),
103 |         ),
104 |         migrations.AlterField(
105 |             model_name="tissue",
106 |             name="_aux",
107 |             field=lamindb.base.fields.JSONField(
108 |                 blank=True, db_default=None, default=None, null=True
109 |             ),
110 |         ),
111 |     ]
112 | 


--------------------------------------------------------------------------------
/bionty/migrations/0047_lamindbv1_part5.py:
--------------------------------------------------------------------------------
  1 | # Generated by Django 5.2 on 2025-01-13 16:08
  2 | 
  3 | import django.db.models.deletion
  4 | import lamindb.base.fields
  5 | import lamindb.models
  6 | from django.db import migrations, models
  7 | 
  8 | 
  9 | class Migration(migrations.Migration):
 10 |     dependencies = [
 11 |         ("bionty", "0046_alter_cellline__aux_alter_cellmarker__aux_and_more"),
 12 |         ("lamindb", "0075_lamindbv1_part5"),
 13 |     ]
 14 | 
 15 |     operations = [
 16 |         migrations.RenameModel(
 17 |             old_name="FeatureSetCellMarker",
 18 |             new_name="SchemaCellMarker",
 19 |         ),
 20 |         migrations.RenameModel(
 21 |             old_name="FeatureSetGene",
 22 |             new_name="SchemaGene",
 23 |         ),
 24 |         migrations.RenameModel(
 25 |             old_name="FeatureSetPathway",
 26 |             new_name="SchemaPathway",
 27 |         ),
 28 |         migrations.RenameModel(
 29 |             old_name="FeatureSetProtein",
 30 |             new_name="SchemaProtein",
 31 |         ),
 32 |         migrations.RenameField(
 33 |             model_name="gene",
 34 |             old_name="feature_sets",
 35 |             new_name="schemas",
 36 |         ),
 37 |         migrations.RenameField(
 38 |             model_name="protein",
 39 |             old_name="feature_sets",
 40 |             new_name="schemas",
 41 |         ),
 42 |         migrations.RenameField(
 43 |             model_name="cellmarker",
 44 |             old_name="feature_sets",
 45 |             new_name="schemas",
 46 |         ),
 47 |         migrations.RenameField(
 48 |             model_name="pathway",
 49 |             old_name="feature_sets",
 50 |             new_name="schemas",
 51 |         ),
 52 |         migrations.RenameField(
 53 |             model_name="schemacellmarker",
 54 |             old_name="featureset",
 55 |             new_name="schema",
 56 |         ),
 57 |         migrations.RenameField(
 58 |             model_name="schemagene",
 59 |             old_name="featureset",
 60 |             new_name="schema",
 61 |         ),
 62 |         migrations.RenameField(
 63 |             model_name="schemapathway",
 64 |             old_name="featureset",
 65 |             new_name="schema",
 66 |         ),
 67 |         migrations.RenameField(
 68 |             model_name="schemaprotein",
 69 |             old_name="featureset",
 70 |             new_name="schema",
 71 |         ),
 72 |         migrations.AlterField(
 73 |             model_name="cellmarker",
 74 |             name="schemas",
 75 |             field=models.ManyToManyField(
 76 |                 related_name="cell_markers",
 77 |                 through="bionty.SchemaCellMarker",
 78 |                 to="lamindb.schema",
 79 |             ),
 80 |         ),
 81 |         migrations.AlterField(
 82 |             model_name="gene",
 83 |             name="schemas",
 84 |             field=models.ManyToManyField(
 85 |                 related_name="genes", through="bionty.SchemaGene", to="lamindb.schema"
 86 |             ),
 87 |         ),
 88 |         migrations.AlterField(
 89 |             model_name="pathway",
 90 |             name="schemas",
 91 |             field=models.ManyToManyField(
 92 |                 related_name="pathways",
 93 |                 through="bionty.SchemaPathway",
 94 |                 to="lamindb.schema",
 95 |             ),
 96 |         ),
 97 |         migrations.AlterField(
 98 |             model_name="protein",
 99 |             name="schemas",
100 |             field=models.ManyToManyField(
101 |                 related_name="proteins",
102 |                 through="bionty.SchemaProtein",
103 |                 to="lamindb.schema",
104 |             ),
105 |         ),
106 |         migrations.AlterField(
107 |             model_name="schemacellmarker",
108 |             name="schema",
109 |             field=lamindb.base.fields.ForeignKey(
110 |                 blank=True,
111 |                 on_delete=django.db.models.deletion.CASCADE,
112 |                 related_name="+",
113 |                 to="lamindb.schema",
114 |             ),
115 |         ),
116 |         migrations.AlterField(
117 |             model_name="schemagene",
118 |             name="schema",
119 |             field=lamindb.base.fields.ForeignKey(
120 |                 blank=True,
121 |                 on_delete=django.db.models.deletion.CASCADE,
122 |                 related_name="+",
123 |                 to="lamindb.schema",
124 |             ),
125 |         ),
126 |         migrations.AlterField(
127 |             model_name="schemapathway",
128 |             name="schema",
129 |             field=lamindb.base.fields.ForeignKey(
130 |                 blank=True,
131 |                 on_delete=django.db.models.deletion.CASCADE,
132 |                 related_name="+",
133 |                 to="lamindb.schema",
134 |             ),
135 |         ),
136 |         migrations.AlterField(
137 |             model_name="schemaprotein",
138 |             name="schema",
139 |             field=lamindb.base.fields.ForeignKey(
140 |                 blank=True,
141 |                 on_delete=django.db.models.deletion.CASCADE,
142 |                 related_name="+",
143 |                 to="lamindb.schema",
144 |             ),
145 |         ),
146 |     ]
147 | 


--------------------------------------------------------------------------------
/bionty/migrations/0049_alter_schemacellmarker_cellmarker_and_more.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 5.2 on 2025-01-27 07:22
 2 | 
 3 | import django.db.models.deletion
 4 | import lamindb.base.fields
 5 | from django.db import migrations
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 |     dependencies = [
10 |         ("bionty", "0048_lamindbv1_part6"),
11 |     ]
12 | 
13 |     operations = [
14 |         migrations.AlterField(
15 |             model_name="schemacellmarker",
16 |             name="cellmarker",
17 |             field=lamindb.base.fields.ForeignKey(
18 |                 blank=True,
19 |                 on_delete=django.db.models.deletion.PROTECT,
20 |                 related_name="links_schema",
21 |                 to="bionty.cellmarker",
22 |             ),
23 |         ),
24 |         migrations.AlterField(
25 |             model_name="schemacellmarker",
26 |             name="schema",
27 |             field=lamindb.base.fields.ForeignKey(
28 |                 blank=True,
29 |                 on_delete=django.db.models.deletion.CASCADE,
30 |                 related_name="links_cellmarker",
31 |                 to="lamindb.schema",
32 |             ),
33 |         ),
34 |         migrations.AlterField(
35 |             model_name="schemagene",
36 |             name="gene",
37 |             field=lamindb.base.fields.ForeignKey(
38 |                 blank=True,
39 |                 on_delete=django.db.models.deletion.PROTECT,
40 |                 related_name="links_schema",
41 |                 to="bionty.gene",
42 |             ),
43 |         ),
44 |         migrations.AlterField(
45 |             model_name="schemagene",
46 |             name="schema",
47 |             field=lamindb.base.fields.ForeignKey(
48 |                 blank=True,
49 |                 on_delete=django.db.models.deletion.CASCADE,
50 |                 related_name="links_gene",
51 |                 to="lamindb.schema",
52 |             ),
53 |         ),
54 |         migrations.AlterField(
55 |             model_name="schemapathway",
56 |             name="pathway",
57 |             field=lamindb.base.fields.ForeignKey(
58 |                 blank=True,
59 |                 on_delete=django.db.models.deletion.PROTECT,
60 |                 related_name="links_schema",
61 |                 to="bionty.pathway",
62 |             ),
63 |         ),
64 |         migrations.AlterField(
65 |             model_name="schemapathway",
66 |             name="schema",
67 |             field=lamindb.base.fields.ForeignKey(
68 |                 blank=True,
69 |                 on_delete=django.db.models.deletion.CASCADE,
70 |                 related_name="links_pathway",
71 |                 to="lamindb.schema",
72 |             ),
73 |         ),
74 |         migrations.AlterField(
75 |             model_name="schemaprotein",
76 |             name="protein",
77 |             field=lamindb.base.fields.ForeignKey(
78 |                 blank=True,
79 |                 on_delete=django.db.models.deletion.PROTECT,
80 |                 related_name="links_schema",
81 |                 to="bionty.protein",
82 |             ),
83 |         ),
84 |         migrations.AlterField(
85 |             model_name="schemaprotein",
86 |             name="schema",
87 |             field=lamindb.base.fields.ForeignKey(
88 |                 blank=True,
89 |                 on_delete=django.db.models.deletion.CASCADE,
90 |                 related_name="links_protein",
91 |                 to="lamindb.schema",
92 |             ),
93 |         ),
94 |     ]
95 | 


--------------------------------------------------------------------------------
/bionty/migrations/0050_alter_source_uid.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 5.2 on 2025-02-16 21:50
 2 | 
 3 | import lamindb.base.fields
 4 | from django.db import migrations
 5 | 
 6 | import bionty.ids
 7 | from bionty._biorecord import encode_uid
 8 | 
 9 | 
10 | def populate_uids(apps, schema_editor):
11 |     Source = apps.get_model("bionty", "Source")
12 |     for source in Source.objects.all():
13 |         # Convert model instance to dictionary of fields
14 |         kwargs = {
15 |             field.name: getattr(source, field.name)
16 |             for field in source._meta.fields
17 |             if field.name != "uid"  # Exclude uid field itself
18 |         }
19 |         # Generate and save new uid
20 |         source.uid = encode_uid(registry=Source, kwargs=kwargs)["uid"]
21 |         source.save()
22 | 
23 | 
24 | class Migration(migrations.Migration):
25 |     dependencies = [
26 |         ("bionty", "0049_alter_schemacellmarker_cellmarker_and_more"),
27 |     ]
28 | 
29 |     operations = [
30 |         migrations.AlterField(
31 |             model_name="source",
32 |             name="uid",
33 |             field=lamindb.base.fields.CharField(
34 |                 blank=True, default=bionty.ids.source, max_length=8, unique=True
35 |             ),
36 |         ),
37 |         migrations.RunPython(populate_uids, reverse_code=migrations.RunPython.noop),
38 |     ]
39 | 


--------------------------------------------------------------------------------
/bionty/migrations/0051_alter_cellline__branch_code_and_more.py:
--------------------------------------------------------------------------------
  1 | # Generated by Django 5.2 on 2025-05-25 11:59
  2 | 
  3 | import django.db.models.deletion
  4 | import lamindb.base.fields
  5 | from django.db import migrations
  6 | 
  7 | 
  8 | class Migration(migrations.Migration):
  9 |     dependencies = [
 10 |         ("bionty", "0050_alter_source_uid"),
 11 |         ("lamindb", "0100_branch_alter_artifact__branch_code_and_more"),
 12 |     ]
 13 | 
 14 |     operations = [
 15 |         migrations.AlterField(
 16 |             model_name="cellline",
 17 |             name="_branch_code",
 18 |             field=lamindb.base.fields.ForeignKey(
 19 |                 blank=True,
 20 |                 db_column="_branch_code",
 21 |                 db_default=1,
 22 |                 default=1,
 23 |                 on_delete=django.db.models.deletion.PROTECT,
 24 |                 to="lamindb.branch",
 25 |             ),
 26 |         ),
 27 |         migrations.AlterField(
 28 |             model_name="cellmarker",
 29 |             name="_branch_code",
 30 |             field=lamindb.base.fields.ForeignKey(
 31 |                 blank=True,
 32 |                 db_column="_branch_code",
 33 |                 db_default=1,
 34 |                 default=1,
 35 |                 on_delete=django.db.models.deletion.PROTECT,
 36 |                 to="lamindb.branch",
 37 |             ),
 38 |         ),
 39 |         migrations.AlterField(
 40 |             model_name="celltype",
 41 |             name="_branch_code",
 42 |             field=lamindb.base.fields.ForeignKey(
 43 |                 blank=True,
 44 |                 db_column="_branch_code",
 45 |                 db_default=1,
 46 |                 default=1,
 47 |                 on_delete=django.db.models.deletion.PROTECT,
 48 |                 to="lamindb.branch",
 49 |             ),
 50 |         ),
 51 |         migrations.AlterField(
 52 |             model_name="developmentalstage",
 53 |             name="_branch_code",
 54 |             field=lamindb.base.fields.ForeignKey(
 55 |                 blank=True,
 56 |                 db_column="_branch_code",
 57 |                 db_default=1,
 58 |                 default=1,
 59 |                 on_delete=django.db.models.deletion.PROTECT,
 60 |                 to="lamindb.branch",
 61 |             ),
 62 |         ),
 63 |         migrations.AlterField(
 64 |             model_name="disease",
 65 |             name="_branch_code",
 66 |             field=lamindb.base.fields.ForeignKey(
 67 |                 blank=True,
 68 |                 db_column="_branch_code",
 69 |                 db_default=1,
 70 |                 default=1,
 71 |                 on_delete=django.db.models.deletion.PROTECT,
 72 |                 to="lamindb.branch",
 73 |             ),
 74 |         ),
 75 |         migrations.AlterField(
 76 |             model_name="ethnicity",
 77 |             name="_branch_code",
 78 |             field=lamindb.base.fields.ForeignKey(
 79 |                 blank=True,
 80 |                 db_column="_branch_code",
 81 |                 db_default=1,
 82 |                 default=1,
 83 |                 on_delete=django.db.models.deletion.PROTECT,
 84 |                 to="lamindb.branch",
 85 |             ),
 86 |         ),
 87 |         migrations.AlterField(
 88 |             model_name="experimentalfactor",
 89 |             name="_branch_code",
 90 |             field=lamindb.base.fields.ForeignKey(
 91 |                 blank=True,
 92 |                 db_column="_branch_code",
 93 |                 db_default=1,
 94 |                 default=1,
 95 |                 on_delete=django.db.models.deletion.PROTECT,
 96 |                 to="lamindb.branch",
 97 |             ),
 98 |         ),
 99 |         migrations.AlterField(
100 |             model_name="gene",
101 |             name="_branch_code",
102 |             field=lamindb.base.fields.ForeignKey(
103 |                 blank=True,
104 |                 db_column="_branch_code",
105 |                 db_default=1,
106 |                 default=1,
107 |                 on_delete=django.db.models.deletion.PROTECT,
108 |                 to="lamindb.branch",
109 |             ),
110 |         ),
111 |         migrations.AlterField(
112 |             model_name="organism",
113 |             name="_branch_code",
114 |             field=lamindb.base.fields.ForeignKey(
115 |                 blank=True,
116 |                 db_column="_branch_code",
117 |                 db_default=1,
118 |                 default=1,
119 |                 on_delete=django.db.models.deletion.PROTECT,
120 |                 to="lamindb.branch",
121 |             ),
122 |         ),
123 |         migrations.AlterField(
124 |             model_name="pathway",
125 |             name="_branch_code",
126 |             field=lamindb.base.fields.ForeignKey(
127 |                 blank=True,
128 |                 db_column="_branch_code",
129 |                 db_default=1,
130 |                 default=1,
131 |                 on_delete=django.db.models.deletion.PROTECT,
132 |                 to="lamindb.branch",
133 |             ),
134 |         ),
135 |         migrations.AlterField(
136 |             model_name="phenotype",
137 |             name="_branch_code",
138 |             field=lamindb.base.fields.ForeignKey(
139 |                 blank=True,
140 |                 db_column="_branch_code",
141 |                 db_default=1,
142 |                 default=1,
143 |                 on_delete=django.db.models.deletion.PROTECT,
144 |                 to="lamindb.branch",
145 |             ),
146 |         ),
147 |         migrations.AlterField(
148 |             model_name="protein",
149 |             name="_branch_code",
150 |             field=lamindb.base.fields.ForeignKey(
151 |                 blank=True,
152 |                 db_column="_branch_code",
153 |                 db_default=1,
154 |                 default=1,
155 |                 on_delete=django.db.models.deletion.PROTECT,
156 |                 to="lamindb.branch",
157 |             ),
158 |         ),
159 |         migrations.AlterField(
160 |             model_name="source",
161 |             name="_branch_code",
162 |             field=lamindb.base.fields.ForeignKey(
163 |                 blank=True,
164 |                 db_column="_branch_code",
165 |                 db_default=1,
166 |                 default=1,
167 |                 on_delete=django.db.models.deletion.PROTECT,
168 |                 to="lamindb.branch",
169 |             ),
170 |         ),
171 |         migrations.AlterField(
172 |             model_name="tissue",
173 |             name="_branch_code",
174 |             field=lamindb.base.fields.ForeignKey(
175 |                 blank=True,
176 |                 db_column="_branch_code",
177 |                 db_default=1,
178 |                 default=1,
179 |                 on_delete=django.db.models.deletion.PROTECT,
180 |                 to="lamindb.branch",
181 |             ),
182 |         ),
183 |     ]
184 | 


--------------------------------------------------------------------------------
/bionty/migrations/0052_rename__branch_code_cellline_branch_and_more.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 5.2 on 2025-05-25 12:04
 2 | 
 3 | from django.db import migrations
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [
 8 |         ("bionty", "0051_alter_cellline__branch_code_and_more"),
 9 |     ]
10 | 
11 |     operations = [
12 |         migrations.RenameField(
13 |             model_name="cellline",
14 |             old_name="_branch_code",
15 |             new_name="branch",
16 |         ),
17 |         migrations.RenameField(
18 |             model_name="cellmarker",
19 |             old_name="_branch_code",
20 |             new_name="branch",
21 |         ),
22 |         migrations.RenameField(
23 |             model_name="celltype",
24 |             old_name="_branch_code",
25 |             new_name="branch",
26 |         ),
27 |         migrations.RenameField(
28 |             model_name="developmentalstage",
29 |             old_name="_branch_code",
30 |             new_name="branch",
31 |         ),
32 |         migrations.RenameField(
33 |             model_name="disease",
34 |             old_name="_branch_code",
35 |             new_name="branch",
36 |         ),
37 |         migrations.RenameField(
38 |             model_name="ethnicity",
39 |             old_name="_branch_code",
40 |             new_name="branch",
41 |         ),
42 |         migrations.RenameField(
43 |             model_name="experimentalfactor",
44 |             old_name="_branch_code",
45 |             new_name="branch",
46 |         ),
47 |         migrations.RenameField(
48 |             model_name="gene",
49 |             old_name="_branch_code",
50 |             new_name="branch",
51 |         ),
52 |         migrations.RenameField(
53 |             model_name="organism",
54 |             old_name="_branch_code",
55 |             new_name="branch",
56 |         ),
57 |         migrations.RenameField(
58 |             model_name="pathway",
59 |             old_name="_branch_code",
60 |             new_name="branch",
61 |         ),
62 |         migrations.RenameField(
63 |             model_name="phenotype",
64 |             old_name="_branch_code",
65 |             new_name="branch",
66 |         ),
67 |         migrations.RenameField(
68 |             model_name="protein",
69 |             old_name="_branch_code",
70 |             new_name="branch",
71 |         ),
72 |         migrations.RenameField(
73 |             model_name="source",
74 |             old_name="_branch_code",
75 |             new_name="branch",
76 |         ),
77 |         migrations.RenameField(
78 |             model_name="tissue",
79 |             old_name="_branch_code",
80 |             new_name="branch",
81 |         ),
82 |     ]
83 | 


--------------------------------------------------------------------------------
/bionty/migrations/0056_alter_recordtissue_record.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 5.2 on 2025-07-05 15:36
 2 | 
 3 | import django.db.models.deletion
 4 | import lamindb.base.fields
 5 | from django.db import migrations
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 |     dependencies = [
10 |         ("bionty", "0055_rename_cellline_recordcellline_value_and_more"),
11 |     ]
12 | 
13 |     operations = [
14 |         migrations.AlterField(
15 |             model_name="recordtissue",
16 |             name="record",
17 |             field=lamindb.base.fields.ForeignKey(
18 |                 blank=True,
19 |                 on_delete=django.db.models.deletion.CASCADE,
20 |                 related_name="values_tissue",
21 |                 to="lamindb.record",
22 |             ),
23 |         ),
24 |     ]
25 | 


--------------------------------------------------------------------------------
/bionty/migrations/0058_cellline_page_cellmarker_page_celltype_page_and_more.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 5.1.12 on 2025-09-28 23:37
 2 | 
 3 | import django.db.models.deletion
 4 | import lamindb.base.fields
 5 | from django.db import migrations
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 |     dependencies = [
10 |         ("bionty", "0057_alter_cellline_description_alter_cellline_synonyms_and_more"),
11 |         ("lamindb", "0124_page_artifact_page_collection_page_feature_page_and_more"),
12 |     ]
13 | 
14 |     operations = []  # type: ignore
15 | 


--------------------------------------------------------------------------------
/bionty/migrations/0059_cellline_is_locked_cellmarker_is_locked_and_more.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 5.1.12 on 2025-09-29 00:46
 2 | 
 3 | import lamindb.base.fields
 4 | from django.db import migrations
 5 | 
 6 | 
 7 | class Migration(migrations.Migration):
 8 |     dependencies = [
 9 |         ("bionty", "0058_cellline_page_cellmarker_page_celltype_page_and_more"),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.AddField(
14 |             model_name="cellline",
15 |             name="is_locked",
16 |             field=lamindb.base.fields.BooleanField(blank=True, default=False),
17 |         ),
18 |         migrations.AddField(
19 |             model_name="cellmarker",
20 |             name="is_locked",
21 |             field=lamindb.base.fields.BooleanField(blank=True, default=False),
22 |         ),
23 |         migrations.AddField(
24 |             model_name="celltype",
25 |             name="is_locked",
26 |             field=lamindb.base.fields.BooleanField(blank=True, default=False),
27 |         ),
28 |         migrations.AddField(
29 |             model_name="developmentalstage",
30 |             name="is_locked",
31 |             field=lamindb.base.fields.BooleanField(blank=True, default=False),
32 |         ),
33 |         migrations.AddField(
34 |             model_name="disease",
35 |             name="is_locked",
36 |             field=lamindb.base.fields.BooleanField(blank=True, default=False),
37 |         ),
38 |         migrations.AddField(
39 |             model_name="ethnicity",
40 |             name="is_locked",
41 |             field=lamindb.base.fields.BooleanField(blank=True, default=False),
42 |         ),
43 |         migrations.AddField(
44 |             model_name="experimentalfactor",
45 |             name="is_locked",
46 |             field=lamindb.base.fields.BooleanField(blank=True, default=False),
47 |         ),
48 |         migrations.AddField(
49 |             model_name="gene",
50 |             name="is_locked",
51 |             field=lamindb.base.fields.BooleanField(blank=True, default=False),
52 |         ),
53 |         migrations.AddField(
54 |             model_name="organism",
55 |             name="is_locked",
56 |             field=lamindb.base.fields.BooleanField(blank=True, default=False),
57 |         ),
58 |         migrations.AddField(
59 |             model_name="pathway",
60 |             name="is_locked",
61 |             field=lamindb.base.fields.BooleanField(blank=True, default=False),
62 |         ),
63 |         migrations.AddField(
64 |             model_name="phenotype",
65 |             name="is_locked",
66 |             field=lamindb.base.fields.BooleanField(blank=True, default=False),
67 |         ),
68 |         migrations.AddField(
69 |             model_name="protein",
70 |             name="is_locked",
71 |             field=lamindb.base.fields.BooleanField(blank=True, default=False),
72 |         ),
73 |         migrations.AddField(
74 |             model_name="source",
75 |             name="is_locked",
76 |             field=lamindb.base.fields.BooleanField(blank=True, default=False),
77 |         ),
78 |         migrations.AddField(
79 |             model_name="tissue",
80 |             name="is_locked",
81 |             field=lamindb.base.fields.BooleanField(blank=True, default=False),
82 |         ),
83 |     ]
84 | 


--------------------------------------------------------------------------------
/bionty/migrations/0060_alter_cellline_is_locked_alter_cellmarker_is_locked_and_more.py:
--------------------------------------------------------------------------------
  1 | # Generated by Django 5.1.12 on 2025-09-29 07:51
  2 | 
  3 | import lamindb.base.fields
  4 | from django.db import migrations
  5 | 
  6 | 
  7 | class Migration(migrations.Migration):
  8 |     dependencies = [
  9 |         ("bionty", "0059_cellline_is_locked_cellmarker_is_locked_and_more"),
 10 |     ]
 11 | 
 12 |     operations = [
 13 |         migrations.AlterField(
 14 |             model_name="cellline",
 15 |             name="is_locked",
 16 |             field=lamindb.base.fields.BooleanField(
 17 |                 blank=True, db_default=False, default=False
 18 |             ),
 19 |         ),
 20 |         migrations.AlterField(
 21 |             model_name="cellmarker",
 22 |             name="is_locked",
 23 |             field=lamindb.base.fields.BooleanField(
 24 |                 blank=True, db_default=False, default=False
 25 |             ),
 26 |         ),
 27 |         migrations.AlterField(
 28 |             model_name="celltype",
 29 |             name="is_locked",
 30 |             field=lamindb.base.fields.BooleanField(
 31 |                 blank=True, db_default=False, default=False
 32 |             ),
 33 |         ),
 34 |         migrations.AlterField(
 35 |             model_name="developmentalstage",
 36 |             name="is_locked",
 37 |             field=lamindb.base.fields.BooleanField(
 38 |                 blank=True, db_default=False, default=False
 39 |             ),
 40 |         ),
 41 |         migrations.AlterField(
 42 |             model_name="disease",
 43 |             name="is_locked",
 44 |             field=lamindb.base.fields.BooleanField(
 45 |                 blank=True, db_default=False, default=False
 46 |             ),
 47 |         ),
 48 |         migrations.AlterField(
 49 |             model_name="ethnicity",
 50 |             name="is_locked",
 51 |             field=lamindb.base.fields.BooleanField(
 52 |                 blank=True, db_default=False, default=False
 53 |             ),
 54 |         ),
 55 |         migrations.AlterField(
 56 |             model_name="experimentalfactor",
 57 |             name="is_locked",
 58 |             field=lamindb.base.fields.BooleanField(
 59 |                 blank=True, db_default=False, default=False
 60 |             ),
 61 |         ),
 62 |         migrations.AlterField(
 63 |             model_name="gene",
 64 |             name="is_locked",
 65 |             field=lamindb.base.fields.BooleanField(
 66 |                 blank=True, db_default=False, default=False
 67 |             ),
 68 |         ),
 69 |         migrations.AlterField(
 70 |             model_name="organism",
 71 |             name="is_locked",
 72 |             field=lamindb.base.fields.BooleanField(
 73 |                 blank=True, db_default=False, default=False
 74 |             ),
 75 |         ),
 76 |         migrations.AlterField(
 77 |             model_name="pathway",
 78 |             name="is_locked",
 79 |             field=lamindb.base.fields.BooleanField(
 80 |                 blank=True, db_default=False, default=False
 81 |             ),
 82 |         ),
 83 |         migrations.AlterField(
 84 |             model_name="phenotype",
 85 |             name="is_locked",
 86 |             field=lamindb.base.fields.BooleanField(
 87 |                 blank=True, db_default=False, default=False
 88 |             ),
 89 |         ),
 90 |         migrations.AlterField(
 91 |             model_name="protein",
 92 |             name="is_locked",
 93 |             field=lamindb.base.fields.BooleanField(
 94 |                 blank=True, db_default=False, default=False
 95 |             ),
 96 |         ),
 97 |         migrations.AlterField(
 98 |             model_name="source",
 99 |             name="is_locked",
100 |             field=lamindb.base.fields.BooleanField(
101 |                 blank=True, db_default=False, default=False
102 |             ),
103 |         ),
104 |         migrations.AlterField(
105 |             model_name="tissue",
106 |             name="is_locked",
107 |             field=lamindb.base.fields.BooleanField(
108 |                 blank=True, db_default=False, default=False
109 |             ),
110 |         ),
111 |     ]
112 | 


--------------------------------------------------------------------------------
/bionty/migrations/0061_remove_cellline_page_remove_cellmarker_page_and_more.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 5.1.12 on 2025-10-01 14:39
 2 | 
 3 | from django.db import migrations
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [
 8 |         ("bionty", "0060_alter_cellline_is_locked_alter_cellmarker_is_locked_and_more"),
 9 |     ]
10 | 
11 |     operations = []  # type: ignore
12 | 


--------------------------------------------------------------------------------
/bionty/migrations/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/laminlabs/bionty/620a707fe3266d49249af3b47bebdac043b36326/bionty/migrations/__init__.py


--------------------------------------------------------------------------------
/bionty/uids.py:
--------------------------------------------------------------------------------
  1 | """UIDs.
  2 | 
  3 | Entity-related generators:
  4 | 
  5 | .. autosummary::
  6 |    :toctree: .
  7 | 
  8 |    gene
  9 |    protein
 10 |    cellmarker
 11 |    ontology
 12 |    source
 13 | 
 14 | """
 15 | 
 16 | import hashlib
 17 | import secrets
 18 | import string
 19 | 
 20 | 
 21 | def base62(n_char: int) -> str:
 22 |     """Random Base62 string."""
 23 |     alphabet = string.digits + string.ascii_letters.swapcase()
 24 |     id = "".join(secrets.choice(alphabet) for i in range(n_char))
 25 |     return id
 26 | 
 27 | 
 28 | def encode_base62(s: str) -> str:
 29 |     from lamin_utils._base62 import encodebytes
 30 | 
 31 |     return encodebytes(hashlib.md5(s.encode()).digest())
 32 | 
 33 | 
 34 | def hash_id(input_id: str | None = None, *, n_char: int) -> str:
 35 |     if input_id is None:
 36 |         return base62(n_char=n_char)
 37 |     else:
 38 |         return encode_base62(input_id)[:n_char]
 39 | 
 40 | 
 41 | def gene(input_id: str | None = None) -> str:
 42 |     """12 base62."""
 43 |     return hash_id(input_id, n_char=12)
 44 | 
 45 | 
 46 | def protein(input_id: str | None = None) -> str:
 47 |     """12 base62."""
 48 |     return hash_id(input_id, n_char=12)
 49 | 
 50 | 
 51 | def cellmarker(input_id: str | None = None) -> str:
 52 |     """12 base62."""
 53 |     return hash_id(input_id, n_char=12)
 54 | 
 55 | 
 56 | def ontology(input_id: str | None = None):
 57 |     """8 base62."""
 58 |     return hash_id(input_id, n_char=8)
 59 | 
 60 | 
 61 | def source(input_id: str | None = None):
 62 |     """8 base62."""
 63 |     return hash_id(input_id, n_char=8)
 64 | 
 65 | 
 66 | def encode_uid(registry: type, kwargs: dict):
 67 |     """The type passed needs to be a subclass of BioRecord."""
 68 |     from lamindb.models import SQLRecord
 69 | 
 70 |     from . import ids
 71 | 
 72 |     if kwargs.get("uid") is not None:
 73 |         # if uid is passed, no encoding is needed
 74 |         return kwargs
 75 |     name = registry.__name__.lower()
 76 |     if hasattr(registry, "organism_id"):
 77 |         organism = kwargs.get("organism")
 78 |         if organism is None:
 79 |             if kwargs.get("organism_id") is not None:
 80 |                 from .models import Organism
 81 | 
 82 |                 organism = Organism.get(kwargs.get("organism_id")).name
 83 |         elif isinstance(organism, SQLRecord):
 84 |             organism = organism.name
 85 |     else:
 86 |         organism = ""
 87 | 
 88 |     if hasattr(registry, "_ontology_id_field"):
 89 |         ontology_id_field = registry._ontology_id_field
 90 |     else:
 91 |         ontology_id_field = "ontology_id"
 92 |     if hasattr(registry, "_name_field"):
 93 |         name_field = registry._name_field
 94 |     else:
 95 |         name_field = "name"
 96 | 
 97 |     str_to_encode = None
 98 |     if name == "source":
 99 |         str_to_encode = f"{kwargs.get('entity', '')}{kwargs.get('name', '')}{kwargs.get('organism', '')}{kwargs.get('version', '')}"
100 |     elif name == "gene":  # gene has multiple id fields
101 |         str_to_encode = kwargs.get(ontology_id_field)
102 |         if str_to_encode is None or str_to_encode == "":
103 |             str_to_encode = kwargs.get("stable_id")
104 |         if str_to_encode is None or str_to_encode == "":
105 |             str_to_encode = f"{kwargs.get(name_field)}{organism}"  # name + organism
106 |         if str_to_encode is None or str_to_encode == "":
107 |             raise AssertionError(
108 |                 f"must provide {ontology_id_field}, stable_id or {name_field}"
109 |             )
110 |     else:
111 |         str_to_encode = kwargs.get(ontology_id_field)
112 |         if str_to_encode is None or str_to_encode == "":
113 |             str_to_encode = f"{kwargs.get(name_field)}{organism}"  # name + organism
114 |         if str_to_encode is None or str_to_encode == "":
115 |             raise AssertionError(f"must provide {ontology_id_field} or {name_field}")
116 | 
117 |     if str_to_encode is not None and len(str_to_encode) > 0:
118 |         try:
119 |             id_encoder = getattr(ids, name)
120 |         except Exception:
121 |             if ontology_id_field == "ontology_id":
122 |                 id_encoder = ids.ontology
123 |             else:
124 |                 return kwargs
125 |         kwargs["uid"] = id_encoder(str_to_encode)
126 |     return kwargs
127 | 
128 | 
129 | def encode_uid_for_hub(registry_name: str, registry_schema_json: dict, kwargs: dict):
130 |     """Encode the uid for the hub.
131 | 
132 |     Note that `organism` record must be passed in kwargs instead of `organism_id`.
133 |     """
134 |     from . import ids
135 | 
136 |     if kwargs.get("uid") is not None:
137 |         # if uid is passed, no encoding is needed
138 |         return kwargs
139 |     name = registry_name.lower()
140 |     # here we need to pass the organism record, not organism_id
141 |     organism = kwargs.get("organism", "")
142 |     if organism:
143 |         organism = organism.get("name", "")
144 | 
145 |     # default to ontology_id
146 |     ontology_id_field = registry_schema_json.get("_ontology_id_field", "ontology_id")
147 |     name_field = registry_schema_json.get("_name_field", "name")
148 | 
149 |     str_to_encode = None
150 |     if name == "source":
151 |         str_to_encode = f"{kwargs.get('entity', '')}{kwargs.get('name', '')}{kwargs.get('organism', '')}{kwargs.get('version', '')}"
152 |     elif name == "gene":  # gene has multiple id fields
153 |         str_to_encode = kwargs.get(ontology_id_field)
154 |         if str_to_encode is None or str_to_encode == "":
155 |             str_to_encode = kwargs.get("stable_id")
156 |         if str_to_encode is None or str_to_encode == "":
157 |             str_to_encode = f"{kwargs.get(name_field)}{organism}"  # name + organism
158 |         if str_to_encode is None or str_to_encode == "":
159 |             raise AssertionError(
160 |                 f"must provide {ontology_id_field}, stable_id or {name_field}"
161 |             )
162 |     else:
163 |         str_to_encode = kwargs.get(ontology_id_field)
164 |         if str_to_encode is None or str_to_encode == "":
165 |             str_to_encode = f"{kwargs.get(name_field)}{organism}"  # name + organism
166 |         if str_to_encode is None or str_to_encode == "":
167 |             raise AssertionError(f"must provide {ontology_id_field} or {name_field}")
168 | 
169 |     if str_to_encode is not None and len(str_to_encode) > 0:
170 |         try:
171 |             id_encoder = getattr(ids, name)
172 |         except Exception:
173 |             if ontology_id_field == "ontology_id":
174 |                 id_encoder = ids.ontology
175 |             else:
176 |                 return kwargs
177 |         kwargs["uid"] = id_encoder(str_to_encode)
178 |     return kwargs
179 | 


--------------------------------------------------------------------------------
/docs/guide.md:
--------------------------------------------------------------------------------
 1 | # Guide
 2 | 
 3 | ```{toctree}
 4 | :maxdepth: 2
 5 | 
 6 | 
 7 | ./guide/concepts
 8 | ./guide/config
 9 | ```
10 | 
11 | ```{toctree}
12 | :hidden:
13 | 
14 | ```
15 | 


--------------------------------------------------------------------------------
/docs/guide/concepts.md:
--------------------------------------------------------------------------------
 1 | # Concepts
 2 | 
 3 | ## Entity
 4 | 
 5 | Let's define a biological entity (e.g., `Organism`) to be a variable that takes values from a vocabulary of terms with biological meaning.
 6 | 
 7 | 1. There are different roughly equivalent vocabularies for the same entity. For example, one can describe organism with the vocabulary of the scientific names, the vocabulary of the common names, or the vocabulary of ontology IDs for the same organism.
 8 | 2. There are different versions & sources of these vocabularies.
 9 | 3. Terms in the vocabularies have different granularity, and are often hierarchical.
10 | 4. Typically, vocabularies are based on a given version of a public reference ontology, but contain additional “custom” terms corresponding to "new knowledge" absent from reference ontologies. For example, new cell types or states, new synthetic genes, etc.
11 | 
12 | ## PublicOntology object
13 | 
14 | The central class {class}`~bionty.base.PublicOntology` models 3 of the 4 above-mentioned properties of biological entities:
15 | 
16 | 1. Every `PublicOntology` object comes with a table of terms in which each column corresponds to an alternative vocabulary for the entity.
17 | 2. Every table is versioned & has a tracked reference source (typically, a public ontology).
18 | 3. Most tables have a children column that allows mapping hierarchies.
19 | 4. Adding user-defined records amounts to managing manage-ontologies through Bionty's SQL models.
20 | 


--------------------------------------------------------------------------------
/docs/guide/config.md:
--------------------------------------------------------------------------------
 1 | # Configuration
 2 | 
 3 | ## Public bionty sources
 4 | 
 5 | Bionty maintains a [sources.yaml](https://raw.githubusercontent.com/laminlabs/bionty/main/bionty/base/sources.yaml) listing public sources of each entity.
 6 | These sources are curated ([bionty-assets](https://github.com/laminlabs/bionty-assets)) and stored in a [bionty-assets instance](https://lamin.ai/laminlabs/bionty-assets/) to provide fast and reliable access.
 7 | Cached sources files are stored at your local `bionty/base/_dynamic/` directory.
 8 | 
 9 | ## Display public sources
10 | 
11 | The available and currently used ontologies can also be printed with
12 | `bionty.base.display_available_sources` or `bionty.base.display_currently_used_sources`.
13 | 
14 | ## Structure of the sources.yaml
15 | 
16 | ```yaml
17 | entity: # Bionty entity class name, e.g. CellType
18 |   source: # short name of the source, (CURIE prefix for ontologies) e.g. cl
19 |     organism: # organism common name, (if none applied, use 'all') e.g. human
20 |       version: # version of the source
21 |         url: # "link to the source file"
22 | ```
23 | 
24 | ## Default ontologies and versions in sources.yaml
25 | 
26 | For each entity, the **first source** and its **maximum version** defined in [sources.yaml](https://raw.githubusercontent.com/laminlabs/bionty/main/bionty/base/sources.yaml) is used as default.
27 | To set your own default ontology and version, shift the order of entries.
28 | For example, in the following "doid" used when "organism" is specified as "human":
29 | 
30 | (highlighted sources are considered the default)
31 | 
32 | ```{code-block} yaml
33 | ---
34 | emphasize-lines: 2-6,12-16
35 | ---
36 | Disease:
37 |   mondo:
38 |     all:
39 |       2023-02-06:
40 |         source: http://purl.obolibrary.org/obo/mondo/releases/2023-02-06/mondo.owl
41 |       2022-10-11:
42 |         source: http://purl.obolibrary.org/obo/mondo/releases/2022-10-11/mondo.owl
43 |     name: Mondo Disease Ontology
44 |     website: https://mondo.monarchinitiative.org/
45 |   doid:
46 |     human:
47 |       2023-01-30:
48 |         source: http://purl.obolibrary.org/obo/doid/releases/2023-01-30/doid.obo
49 |     name: Human Disease Ontology
50 |     website: https://disease-ontology.org/
51 |   inhouse_diseases:
52 |     human:
53 |       2000-01-01:
54 |         source: http://download-my-diseases.com/releases/2000-01-01/mydiseases.owl
55 |     name: My in-house Disease Ontology
56 |     website: http://my-website.com
57 | ```
58 | 
59 | <br>
60 | 
61 | We may change the default to "inhouse_diseases" when "organism" is specified as "human", by the following:
62 | 
63 | ```{code-block} yaml
64 | ---
65 | emphasize-lines: 2,3,7-9,12-16
66 | ---
67 | Disease:
68 |   mondo:
69 |     all:
70 |       2022-10-11:
71 |         source: http://purl.obolibrary.org/obo/mondo/releases/2022-10-11/mondo.owl
72 |       2023-02-06:
73 |         source: http://purl.obolibrary.org/obo/mondo/releases/2023-02-06/mondo.owl
74 |     name: Mondo Disease Ontology
75 |     website: https://mondo.monarchinitiative.org/
76 |   inhouse_diseases:
77 |     human:
78 |       2000-01-01:
79 |         source: http://download-my-diseases.com/releases/2000-01-01/mydiseases.owl
80 |     name: My in-house Disease Ontology
81 |     website: http://my-website.com
82 |   doid:
83 |     human:
84 |       2023-01-30:
85 |         source: http://purl.obolibrary.org/obo/doid/releases/2023-01-30/doid.obo
86 |     name: Human Disease Ontology
87 |     website: https://disease-ontology.org/
88 | ```
89 | 


--------------------------------------------------------------------------------
/docs/guide/test_notebooks.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | 
3 | import nbproject_test as test
4 | 
5 | 
6 | def test_notebooks():
7 |     nbdir = Path(__file__).parent
8 |     test.execute_notebooks(nbdir, write=True)
9 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | ```{include} ../README.md
 2 | :start-line: 0
 3 | :end-line: 1
 4 | ```
 5 | 
 6 | ```{toctree}
 7 | :maxdepth: 1
 8 | :hidden:
 9 | 
10 | guide
11 | reference
12 | ```
13 | 


--------------------------------------------------------------------------------
/docs/reference.md:
--------------------------------------------------------------------------------
1 | # Reference
2 | 
3 | ```{eval-rst}
4 | .. automodule:: bionty
5 | ```
6 | 


--------------------------------------------------------------------------------
/noxfile.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import nox
 4 | from laminci.nox import build_docs, install_lamindb, run, run_pre_commit
 5 | 
 6 | nox.options.default_venv_backend = "none"
 7 | 
 8 | IS_PR = os.getenv("GITHUB_EVENT_NAME") != "push"
 9 | 
10 | 
11 | @nox.session
12 | def lint(session: nox.Session) -> None:
13 |     run_pre_commit(session)
14 | 
15 | 
16 | @nox.session
17 | @nox.parametrize("group", ["bionty-base", "bionty-core", "bionty-docs"])
18 | def build(session: nox.Session, group: str):
19 |     branch = (
20 |         "main" if IS_PR else "main"
21 |     )  # point to "main" for PRs, to "release" for main
22 |     install_lamindb(session, branch=branch)
23 |     run(session, "uv pip install --system wetlab")
24 |     session.run(*"uv pip install --system -e .[dev]".split())
25 | 
26 |     coverage_args = "--cov=bionty --cov-append --cov-report=term-missing"
27 |     if group == "bionty-base":
28 |         session.run(*f"pytest {coverage_args} ./tests/base".split())
29 |     elif group == "bionty-core":
30 |         session.run(*f"pytest {coverage_args} ./tests/core".split())
31 |     elif group == "bionty-docs":
32 |         session.run(*f"pytest -s {coverage_args} ./docs/guide".split())
33 |         run(session, "lamin init --storage ./docsbuild --modules bionty")
34 |         build_docs(session, strict=True)
35 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [build-system]
  2 | requires = ["flit_core >=3.2,<4"]
  3 | build-backend = "flit_core.buildapi"
  4 | 
  5 | [project]
  6 | name = "bionty"
  7 | requires-python = ">=3.10,<3.14"
  8 | authors = [{name = "Lamin Labs", email = "open-source@lamin.ai"}]
  9 | readme = "README.md"
 10 | dynamic = ["version", "description"]
 11 | classifiers = [
 12 |     "License :: OSI Approved :: Apache Software License",
 13 |     "Programming Language :: Python :: 3.10",
 14 |     "Programming Language :: Python :: 3.11",
 15 |     "Programming Language :: Python :: 3.12",
 16 |     "Programming Language :: Python :: 3.13",
 17 | ]
 18 | dependencies = [
 19 |     "lamindb>=1.12a1",
 20 |     "lamindb_setup>=0.81.2",
 21 |     "lamin_utils>=0.13.9",
 22 |     "requests",
 23 |     "pyyaml",
 24 | ]
 25 | 
 26 | [project.urls]
 27 | Home = "https://github.com/laminlabs/bionty"
 28 | 
 29 | [project.optional-dependencies]
 30 | dev = [
 31 |     "laminci",
 32 |     "pre-commit",
 33 |     "pytest>=6.0",
 34 |     "pytest-cov",
 35 |     "nbproject-test",
 36 |     "pronto",
 37 |     "pymysql",
 38 |     "bioregistry",
 39 | ]
 40 | 
 41 | [tool.pytest.ini_options]
 42 | testpaths = [
 43 |     "tests",
 44 | ]
 45 | filterwarnings = [
 46 |     "ignore::DeprecationWarning:botocore.*",
 47 |     "ignore::pronto.utils.warnings.SyntaxWarning",
 48 |     "ignore::pronto.utils.warnings.NotImplementedWarning",
 49 | ]
 50 | 
 51 | [tool.coverage.run]
 52 | omit = [
 53 |     "bionty/core/*",
 54 |     "bionty/migrations/*",
 55 |     "bionty/models.py",
 56 |     "bionty/ids.py",
 57 | ]
 58 | 
 59 | [tool.ruff]
 60 | src = ["src"]
 61 | line-length = 88
 62 | lint.select = [
 63 |     "F",  # Errors detected by Pyflakes
 64 |     "E",  # Error detected by Pycodestyle
 65 |     "W",  # Warning detected by Pycodestyle
 66 |     "I",  # isort
 67 |     "D",  # pydocstyle
 68 |     "B",  # flake8-bugbear
 69 |     "TID",  # flake8-tidy-imports
 70 |     "C4",  # flake8-comprehensions
 71 |     "BLE",  # flake8-blind-except
 72 |     "UP",  # pyupgrade
 73 |     "RUF100",  # Report unused noqa directives
 74 |     "TCH",  # Typing imports
 75 |     "NPY",  # Numpy specific rules
 76 |     "PTH"  # Use pathlib
 77 | ]
 78 | lint.ignore = [
 79 |     # Do not catch blind exception: `Exception`
 80 |     "BLE001",
 81 |     # Errors from function calls in argument defaults. These are fine when the result is immutable.
 82 |     "B008",
 83 |     # line too long -> we accept long comment lines; black gets rid of long code lines
 84 |     "E501",
 85 |     # Do not assign a lambda expression, use a def -> lambda expression assignments are convenient
 86 |     "E731",
 87 |     # allow I, O, l as variable names -> I is the identity matrix
 88 |     "E741",
 89 |     # Missing docstring in public module
 90 |     "D100",
 91 |     # undocumented-public-class
 92 |     "D101",
 93 |     # Missing docstring in public method
 94 |     "D102",
 95 |     # Missing docstring in public function
 96 |     "D103",
 97 |     # Missing docstring in public package
 98 |     "D104",
 99 |     # __magic__ methods are are often self-explanatory, allow missing docstrings
100 |     "D105",
101 |     # Missing docstring in public nested class
102 |     "D106",
103 |     # Missing docstring in __init__
104 |     "D107",
105 |     ## Disable one in each pair of mutually incompatible rules
106 |     # We don’t want a blank line before a class docstring
107 |     "D203",
108 |     # 1 blank line required after class docstring
109 |     "D204",
110 |     # first line should end with a period [Bug: doesn't work with single-line docstrings]
111 |     # We want docstrings to start immediately after the opening triple quote
112 |     "D213",
113 |     # blank line required between summary line and description
114 |     "D205",
115 |     # Section underline is over-indented ("{name}")
116 |     "D215",
117 |     # First line should end with a period
118 |     "D400",
119 |     # First line should be in imperative mood; try rephrasing
120 |     "D401",
121 |     # First word of the first line should be capitalized: {} -> {}
122 |     "D403",
123 |     # First word of the docstring should not be "This"
124 |     "D404",
125 |     # Section name should end with a newline ("{name}")
126 |     "D406",
127 |     # Missing dashed underline after section ("{name}")
128 |     "D407",
129 |     # Section underline should be in the line following the section's name ("{name}")
130 |     "D408",
131 |     # Section underline should match the length of its name ("{name}")
132 |     "D409",
133 |     # No blank lines allowed between a section header and its content ("{name}")
134 |     "D412",
135 |     # Missing blank line after last section ("{name}")
136 |     "D413",
137 |     # Missing argument description
138 |     "D417",
139 |     # Imports unused
140 |     "F401",
141 |     # camcelcase imported as lowercase
142 |     "N813",
143 |     # module import not at top level of file
144 |     "E402",
145 |     # open()` should be replaced by `Path.open()
146 |     "PTH123",
147 | ]
148 | 
149 | [tool.ruff.lint.pydocstyle]
150 | convention = "google"
151 | 
152 | [tool.ruff.lint.per-file-ignores]
153 | "docs/*" = ["I"]
154 | "tests/*" = ["D"]
155 | "*/__init__.py" = ["F401"]
156 | 


--------------------------------------------------------------------------------
/scripts/update_ontology_sources.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "32bd32d7",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Update ontology sources"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": null,
 14 |    "id": "66ec8c1e",
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "!lamin connect laminlabs/bionty-assets"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "id": "905819d0",
 25 |    "metadata": {},
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "import bionty as bt\n",
 29 |     "import lamindb as ln\n",
 30 |     "from bionty.base._ontology_url import get_ontology_url\n",
 31 |     "from bionty.core._source import register_source_in_bionty_assets\n",
 32 |     "from lamin_utils import logger\n",
 33 |     "\n",
 34 |     "ln.settings.verbosity = \"hint\"\n",
 35 |     "\n",
 36 |     "ln.track(\"7extigZj6QNG\")"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "id": "cff430f5",
 42 |    "metadata": {},
 43 |    "source": [
 44 |     "All entities that are not listed in the following matrix must be curated manually as they require intervention.\n",
 45 |     "Consult https://bionty-assets-gczz.netlify.app/ingest/ for guidance."
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": null,
 51 |    "id": "321c31b9",
 52 |    "metadata": {},
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "configs = [\n",
 56 |     "    (\"Disease\", \"mondo\", \"all\"),\n",
 57 |     "    (\"CellType\", \"cl\", \"all\"),\n",
 58 |     "    (\"Organism\", \"ncbitaxon\", \"all\"),\n",
 59 |     "    (\n",
 60 |     "        \"Tissue\",\n",
 61 |     "        \"uberon\",\n",
 62 |     "        \"all\",\n",
 63 |     "    ),  # This may take a long time due to parsing, set verbosity to hint to see progress updates\n",
 64 |     "    (\"Disease\", \"doid\", \"human\"),\n",
 65 |     "    (\"ExperimentalFactor\", \"efo\", \"all\"),\n",
 66 |     "    (\"Phenotype\", \"pato\", \"all\"),\n",
 67 |     "    (\"Phenotype\", \"hp\", \"human\"),\n",
 68 |     "    (\"Pathway\", \"go\", \"all\"),\n",
 69 |     "    # (\"Pathway\", \"pw\", \"all\"), Currently leads to a URL error - upstream issue\n",
 70 |     "    (\"DevelopmentalStage\", \"hsapdv\", \"human\"),\n",
 71 |     "    (\"DevelopmentalStage\", \"mmusdv\", \"mouse\"),\n",
 72 |     "    (\"Ethnicity\", \"hancestro\", \"human\"),\n",
 73 |     "    # (\"Drug\", \"dron\", \"all\"), Not a Bionty entity (yet)\n",
 74 |     "]"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": null,
 80 |    "id": "c51bf23b",
 81 |    "metadata": {},
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "for i, config in enumerate(configs, 1):\n",
 85 |     "    entity, source_name, organism, *version = config\n",
 86 |     "    config_id = f\"{entity}_{source_name}_{organism}\"\n",
 87 |     "    logger.info(f\"[{i}/{len(configs)}] Processing {config_id}\")\n",
 88 |     "\n",
 89 |     "    try:\n",
 90 |     "        *_, version_to_use = get_ontology_url(\n",
 91 |     "            prefix=source_name, version=version[0] if version else None\n",
 92 |     "        )\n",
 93 |     "\n",
 94 |     "        new_df = getattr(bt.base, entity)(\n",
 95 |     "            source=source_name, version=version_to_use\n",
 96 |     "        ).to_dataframe()\n",
 97 |     "\n",
 98 |     "        if new_df.empty or not {\"name\", \"synonyms\"}.issubset(new_df.columns):\n",
 99 |     "            logger.warning(f\"{config_id} failed validation. Skipping...\")\n",
100 |     "            continue\n",
101 |     "\n",
102 |     "        try:\n",
103 |     "            current_source = bt.Source.filter(\n",
104 |     "                entity=f\"bionty.{entity}\",\n",
105 |     "                name=source_name,\n",
106 |     "                organism=organism,\n",
107 |     "                currently_used=True,\n",
108 |     "            ).one_or_none()\n",
109 |     "            if current_source:\n",
110 |     "                current_df = getattr(bt.base, entity)(\n",
111 |     "                    source=current_source\n",
112 |     "                ).to_dataframe()\n",
113 |     "                if new_df.shape[0] < current_df.shape[0]:\n",
114 |     "                    logger.warning(\n",
115 |     "                        f\"{config_id} has fewer rows than current. Skipping...\"\n",
116 |     "                    )\n",
117 |     "                    continue\n",
118 |     "        except ValueError as e:\n",
119 |     "            if \"No source url is available\" in str(e):\n",
120 |     "                pass  # This occurs during testing in local instances\n",
121 |     "            else:\n",
122 |     "                raise\n",
123 |     "        except Exception:\n",
124 |     "            pass\n",
125 |     "\n",
126 |     "        source_rec = getattr(bt, entity).add_source(\n",
127 |     "            source=source_name, version=version_to_use\n",
128 |     "        )\n",
129 |     "        register_source_in_bionty_assets(\n",
130 |     "            f\"{bt.base.settings.dynamicdir}/df_{organism}__{source_name}__{version_to_use}__{entity}.parquet\",\n",
131 |     "            source=source_rec,\n",
132 |     "            is_dataframe=True,\n",
133 |     "        )\n",
134 |     "        register_source_in_bionty_assets(\n",
135 |     "            f\"{bt.base.settings.dynamicdir}/ontology_{organism}__{source_name}__{version_to_use}__{entity}\",\n",
136 |     "            source=source_rec,\n",
137 |     "            is_dataframe=False,\n",
138 |     "        )\n",
139 |     "\n",
140 |     "        logger.info(f\"registered {config_id} version {version_to_use}\")\n",
141 |     "\n",
142 |     "    except ValueError as e:\n",
143 |     "        if \"artifact already exists\" in str(e):\n",
144 |     "            logger.warning(f\"{config_id} already registered. Skipping...\")\n",
145 |     "        else:\n",
146 |     "            logger.error(f\"{config_id} failed: {e}\")\n",
147 |     "    except FileNotFoundError:\n",
148 |     "        logger.warning(f\"{config_id} files not found. Skipping...\")\n",
149 |     "    except Exception as e:\n",
150 |     "        logger.error(\n",
151 |     "            f\"[{i}/{len(configs)}] {config_id} failed: {type(e).__name__}: {str(e)}\"\n",
152 |     "        )\n",
153 |     "        continue"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": null,
159 |    "id": "f93f3b4a",
160 |    "metadata": {},
161 |    "outputs": [],
162 |    "source": [
163 |     "ln.finish()"
164 |    ]
165 |   }
166 |  ],
167 |  "metadata": {
168 |   "kernelspec": {
169 |    "display_name": "lamindb",
170 |    "language": "python",
171 |    "name": "python3"
172 |   },
173 |   "language_info": {
174 |    "codemirror_mode": {
175 |     "name": "ipython",
176 |     "version": 3
177 |    },
178 |    "file_extension": ".py",
179 |    "mimetype": "text/x-python",
180 |    "name": "python",
181 |    "nbconvert_exporter": "python",
182 |    "pygments_lexer": "ipython3",
183 |    "version": "3.12.8"
184 |   }
185 |  },
186 |  "nbformat": 4,
187 |  "nbformat_minor": 5
188 | }
189 | 


--------------------------------------------------------------------------------
/tests/base/dev/test_handle_sources.py:
--------------------------------------------------------------------------------
 1 | import tempfile
 2 | from pathlib import Path
 3 | 
 4 | import pytest
 5 | from bionty.base.dev._handle_sources import (
 6 |     parse_currently_used_sources,
 7 |     parse_sources_yaml,
 8 | )
 9 | 
10 | 
11 | @pytest.fixture(scope="function")
12 | def versions_yaml_replica():
13 |     input_file_content = """
14 |     version: "0.3.0"
15 |     Organism:
16 |       ensembl:
17 |         vertebrates:
18 |           latest-version: release-112
19 |           url: https://ftp.ensembl.org/pub/{version}/species_EnsemblVertebrates.txt
20 |         name: Ensembl
21 |         website: https://www.ensembl.org/index.html
22 |     Gene:
23 |       ensembl:
24 |         human:
25 |           latest-version: release-112
26 |           url: s3://bionty-assets/df_human__ensembl__{version}__Gene.parquet
27 |         mouse:
28 |           latest-version: release-112
29 |           url: s3://bionty-assets/df_mouse__ensembl__{version}__Gene.parquet
30 |         name: Ensembl
31 |         website: https://www.ensembl.org/index.html
32 |     CellType:
33 |       cl:
34 |         all:
35 |           latest-version: 2024-08-16
36 |           url: http://purl.obolibrary.org/obo/cl/releases/{version}/cl.owl
37 |         name: Cell Ontology
38 |         website: https://obophenotype.github.io/cell-ontology
39 |     """
40 |     with tempfile.NamedTemporaryFile(mode="w+", delete=False) as f:
41 |         f.write(input_file_content)
42 |         f.flush()
43 |         yield f.name
44 | 
45 |     Path(f.name).unlink()
46 | 
47 | 
48 | def test_parse_versions_yaml(versions_yaml_replica):
49 |     parsed_df = parse_sources_yaml(versions_yaml_replica)
50 |     assert parsed_df.shape == (4, 7)
51 |     assert all(parsed_df["entity"].values == ["Organism", "Gene", "Gene", "CellType"])
52 |     assert all(parsed_df["organism"].values == ["vertebrates", "human", "mouse", "all"])
53 |     assert all(parsed_df["name"].values == ["ensembl", "ensembl", "ensembl", "cl"])
54 | 
55 | 
56 | def test_parse_current_versions(versions_yaml_replica):
57 |     expected = {
58 |         "Organism": {"vertebrates": {"ensembl": "release-112"}},
59 |         "Gene": {
60 |             "human": {"ensembl": "release-112"},
61 |             "mouse": {"ensembl": "release-112"},
62 |         },
63 |         "CellType": {"all": {"cl": "2024-08-16"}},
64 |     }
65 | 
66 |     assert parse_currently_used_sources(versions_yaml_replica) == expected
67 | 


--------------------------------------------------------------------------------
/tests/base/dev/test_io.py:
--------------------------------------------------------------------------------
 1 | import tempfile
 2 | from pathlib import Path
 3 | 
 4 | import pytest
 5 | from bionty.base.dev._io import url_download
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def local(tmp_path):
10 |     url = "https://bionty-assets.s3.amazonaws.com/bfxpipelines.json"
11 |     localpath = tmp_path / Path(url).name
12 |     yield localpath, url
13 |     if localpath.exists():
14 |         localpath.unlink()
15 | 
16 | 
17 | def test_url_download(local):
18 |     localpath = local[0]
19 |     url = local[1]
20 |     assert not localpath.exists()
21 | 
22 |     downloaded_path = Path(url_download(url=url, localpath=localpath))
23 |     assert downloaded_path.exists()
24 | 
25 | 
26 | def test_local_file():
27 |     with tempfile.TemporaryDirectory() as temp_dir:
28 |         local_file = Path(temp_dir) / "test.txt"
29 |         target_file = Path(temp_dir) / "downloaded.txt"
30 |         test_content = "temporary file"
31 | 
32 |         local_file.write_text(test_content)
33 |         assert local_file.exists(), "Test file was not created"
34 | 
35 |         downloaded_path = Path(
36 |             url_download(url=f"file://{local_file}", localpath=target_file)
37 |         )
38 | 
39 |         assert downloaded_path.exists(), "Downloaded file not found"
40 |         assert downloaded_path.read_text() == test_content, "Content mismatch"
41 | 
42 |         if downloaded_path.exists():
43 |             downloaded_path.unlink()
44 | 


--------------------------------------------------------------------------------
/tests/base/entities/test_bfxpipeline.py:
--------------------------------------------------------------------------------
 1 | import bionty.base as bt_base
 2 | import pandas as pd
 3 | 
 4 | 
 5 | def test_lamin_bfxpipeline_inspect_name():
 6 |     df = pd.DataFrame(
 7 |         index=[
 8 |             "nf-core methylseq v2.6.0",
 9 |             "Cell Ranger v8.0.0",
10 |             "This bfx pipeline does not exist",
11 |         ]
12 |     )
13 | 
14 |     bfxp = bt_base.BFXPipeline(source="lamin")
15 |     inspected_df = bfxp.inspect(df.index, field=bfxp.name, return_df=True)
16 | 
17 |     inspect = inspected_df["__validated__"].reset_index(drop=True)
18 |     expected_series = pd.Series([True, True, False])
19 | 
20 |     assert inspect.equals(expected_series)
21 | 


--------------------------------------------------------------------------------
/tests/base/entities/test_biosample.py:
--------------------------------------------------------------------------------
1 | import bionty.base as bt_base
2 | 
3 | 
4 | def test_ncbi_biosample():
5 |     bs = bt_base.BioSample(source="ncbi")
6 |     df = bs.to_dataframe()
7 |     assert "edta_inhibitor_tested" in df.abbr.tolist()
8 | 


--------------------------------------------------------------------------------
/tests/base/entities/test_cellline.py:
--------------------------------------------------------------------------------
 1 | import bionty.base as bt_base
 2 | import pandas as pd
 3 | 
 4 | 
 5 | def test_clo_cellline_inspect_name():
 6 |     df = pd.DataFrame(
 7 |         index=[
 8 |             "253D cell",
 9 |             "HEK293",
10 |             "2C1H7 cell",
11 |             "283TAg cell",
12 |             "This cell line does not exist",
13 |         ]
14 |     )
15 | 
16 |     cl = bt_base.CellLine(source="clo")
17 |     inspected_df = cl.inspect(df.index, field=cl.name, return_df=True)
18 | 
19 |     inspect = inspected_df["__validated__"].reset_index(drop=True)
20 |     expected_series = pd.Series([True, True, True, True, False])
21 | 
22 |     assert inspect.equals(expected_series)
23 | 
24 | 
25 | def test_depmap_cellline_inspect_name():
26 |     df = pd.DataFrame(
27 |         index=[
28 |             "NIH:OVCAR-3",
29 |             "NRH-LMS1",
30 |             "HEL",
31 |             "HEL 92.1.7",
32 |             "This cell line does not exist",
33 |         ]
34 |     )
35 | 
36 |     cl = bt_base.CellLine(source="depmap")
37 |     inspected_df = cl.inspect(df.index, field=cl.name, return_df=True)
38 | 
39 |     inspect = inspected_df["__validated__"].reset_index(drop=True)
40 |     expected_series = pd.Series([True, True, True, True, False])
41 | 
42 |     assert inspect.equals(expected_series)
43 | 


--------------------------------------------------------------------------------
/tests/base/entities/test_cellmarker.py:
--------------------------------------------------------------------------------
 1 | import bionty.base as bt_base
 2 | import pandas as pd
 3 | 
 4 | 
 5 | def test_cellmarker_cellmarker_inspect_name_human():
 6 |     df = pd.DataFrame(
 7 |         index=["CCR7", "CD69", "CD8", "CD45RA", "This protein does not exist"]
 8 |     )
 9 | 
10 |     cm = bt_base.CellMarker(source="cellmarker")
11 |     curated = cm.inspect(df.index, field=cm.name)
12 | 
13 |     assert curated["validated"] == ["CD69", "CD8", "CD45RA"]
14 |     assert curated["non_validated"] == ["CCR7", "This protein does not exist"]
15 | 
16 | 
17 | def test_cellmarker_cellmarker_inspect_name_mouse():
18 |     df = pd.DataFrame(
19 |         index=["Tcf4", "Cd36", "Cd34", "Lgr6", "This protein does not exist"]
20 |     )
21 | 
22 |     cm = bt_base.CellMarker(source="cellmarker", organism="mouse")
23 |     inspected_df = cm.inspect(df.index, field=cm.name, return_df=True)
24 | 
25 |     inspect = inspected_df["__validated__"].reset_index(drop=True)
26 |     expected_series = pd.Series([True, False, True, True, False])
27 | 
28 |     assert inspect.equals(expected_series)
29 | 


--------------------------------------------------------------------------------
/tests/base/entities/test_celltype.py:
--------------------------------------------------------------------------------
 1 | import bionty.base as bt_base
 2 | import pandas as pd
 3 | 
 4 | 
 5 | def test_cl_celltype_inspect_name():
 6 |     df = pd.DataFrame(
 7 |         index=[
 8 |             "Boettcher cell",
 9 |             "bone marrow cell",
10 |             "interstitial cell of ovary",
11 |             "pancreatic ductal cell",
12 |             "This cell type does not exist",
13 |         ]
14 |     )
15 | 
16 |     ct = bt_base.CellType(source="cl")
17 |     inspected_df = ct.inspect(df.index, field=ct.name, return_df=True)
18 | 
19 |     inspect = inspected_df["__validated__"].reset_index(drop=True)
20 |     expected_series = pd.Series([True, True, True, True, False])
21 | 
22 |     assert inspect.equals(expected_series)
23 | 
24 | 
25 | def test_cl_celltype_version():
26 |     # old version, not in s3://bionty-assets
27 |     ct = bt_base.CellType(version="2020-05-20")
28 |     assert ct.to_dataframe().shape[0] == 2355
29 | 


--------------------------------------------------------------------------------
/tests/base/entities/test_developmentalstage.py:
--------------------------------------------------------------------------------
 1 | import bionty.base as bt_base
 2 | import pandas as pd
 3 | 
 4 | 
 5 | def test_hsapdv_developmentalstage_inspect_name():
 6 |     df = pd.DataFrame(
 7 |         index=[
 8 |             "blastula stage",
 9 |             "Carnegie stage 03",
10 |             "neurula stage",
11 |             "organogenesis stage",
12 |             "This developmental stage does not exist",
13 |         ]
14 |     )
15 | 
16 |     ds = bt_base.DevelopmentalStage(source="hsapdv")
17 |     inspected_df = ds.inspect(df.index, field=ds.name, return_df=True)
18 | 
19 |     inspect = inspected_df["__validated__"].reset_index(drop=True)
20 |     expected_series = pd.Series([True, True, True, True, False])
21 | 
22 |     assert inspect.equals(expected_series)
23 | 


--------------------------------------------------------------------------------
/tests/base/entities/test_disease.py:
--------------------------------------------------------------------------------
  1 | import bionty.base as bt_base
  2 | import pandas as pd
  3 | 
  4 | 
  5 | def test_mondo_disease_inspect_name():
  6 |     df = pd.DataFrame(
  7 |         index=[
  8 |             "supraglottis cancer",
  9 |             "alexia",
 10 |             "trigonitis",
 11 |             "paranasal sinus disorder",
 12 |             "This disease does not exist",
 13 |         ]
 14 |     )
 15 | 
 16 |     ds = bt_base.Disease(source="mondo", version="2025-06-03")
 17 |     inspected_df = ds.inspect(df.index, field=ds.name, return_df=True)
 18 | 
 19 |     inspect = inspected_df["__validated__"].reset_index(drop=True)
 20 |     expected_series = pd.Series([True, True, True, True, False])
 21 | 
 22 |     assert inspect.equals(expected_series)
 23 | 
 24 | 
 25 | def test_doid_disease_inspect_ontology_id():
 26 |     df = pd.DataFrame(
 27 |         index=[
 28 |             "DOID:0001816",
 29 |             "DOID:0002116",
 30 |             "DOID:5547",
 31 |             "DOID:5551",
 32 |             "This disease does not exist",
 33 |         ]
 34 |     )
 35 | 
 36 |     ds = bt_base.Disease(source="doid")
 37 |     inspected_df = ds.inspect(df.index, field=ds.ontology_id, return_df=True)
 38 | 
 39 |     inspect = inspected_df["__validated__"].reset_index(drop=True)
 40 |     expected_series = pd.Series([True, True, True, True, False])
 41 | 
 42 |     assert inspect.equals(expected_series)
 43 | 
 44 | 
 45 | def test_icd_9_disease_inspect_name():
 46 |     df = pd.DataFrame(
 47 |         index=[
 48 |             "Cholera d/t vib cholerae",
 49 |             "Typhoid fever",
 50 |             "Mult gest-plac/sac NOS",
 51 |             "Paratyphoid fever a",
 52 |             "This disease does not exist",
 53 |         ]
 54 |     )
 55 | 
 56 |     ds = bt_base.Disease(source="icd", version="icd-9-2011")
 57 |     inspected_df = ds.inspect(df.index, field=ds.name, return_df=True)
 58 | 
 59 |     inspect = inspected_df["__validated__"].reset_index(drop=True)
 60 |     expected_series = pd.Series([True, True, True, True, False])
 61 | 
 62 |     assert inspect.equals(expected_series)
 63 | 
 64 | 
 65 | def test_icd_10_disease_inspect_name():
 66 |     df = pd.DataFrame(
 67 |         index=[
 68 |             "Vaping-related disorder",
 69 |             "COVID-19",
 70 |             "Typhoid fever with heart involvement",
 71 |             "Typhoid fever, unspecified",
 72 |             "This disease does not exist",
 73 |         ]
 74 |     )
 75 | 
 76 |     ds = bt_base.Disease(source="icd", version="icd-10-2020")
 77 |     inspected_df = ds.inspect(df.index, field=ds.name, return_df=True)
 78 | 
 79 |     inspect = inspected_df["__validated__"].reset_index(drop=True)
 80 |     expected_series = pd.Series([True, True, True, True, False])
 81 | 
 82 |     assert inspect.equals(expected_series)
 83 | 
 84 | 
 85 | def test_icd_11_disease_inspect_name():
 86 |     df = pd.DataFrame(
 87 |         index=[
 88 |             "Certain infectious or parasitic diseases",
 89 |             "Cholera",
 90 |             "Intestinal infection due to other Vibrio",
 91 |             "Gastroenteritis or colitis of infectious origin",
 92 |             "This disease does not exist",
 93 |         ]
 94 |     )
 95 | 
 96 |     ds = bt_base.Disease(source="icd", version="icd-11-2023")
 97 |     inspected_df = ds.inspect(df.index, field=ds.name, return_df=True)
 98 | 
 99 |     inspect = inspected_df["__validated__"].reset_index(drop=True)
100 |     expected_series = pd.Series([True, True, True, True, False])
101 | 
102 |     assert inspect.equals(expected_series)
103 | 


--------------------------------------------------------------------------------
/tests/base/entities/test_drug.py:
--------------------------------------------------------------------------------
 1 | import bionty.base as bt_base
 2 | import pandas as pd
 3 | 
 4 | 
 5 | def test_dron_drug_inspect_name():
 6 |     df = pd.DataFrame(
 7 |         index=[
 8 |             "triflusal",
 9 |             "citrus bioflavonoids",
10 |             "Candida albicans",
11 |             "Hyoscyamus extract",
12 |             "This drug does not exist",
13 |         ]
14 |     )
15 | 
16 |     dt = bt_base.Drug(source="dron")
17 |     inspected_df = dt.inspect(df.index, field=dt.name, return_df=True)
18 | 
19 |     inspect = inspected_df["__validated__"].reset_index(drop=True)
20 |     expected_series = pd.Series([True, True, True, True, False])
21 | 
22 |     assert inspect.equals(expected_series)
23 | 
24 | 
25 | def test_chebi_drug_inspect_name():
26 |     df = pd.DataFrame(
27 |         index=[
28 |             "navitoclax",
29 |             "Vismione D",
30 |             "(+)-Atherospermoline",
31 |             "N-[(2R,3S,6R)-2-(hydroxymethyl)-6-[2-[[oxo-[4-(trifluoromethyl)anilino]methyl]amino]ethyl]-3-oxanyl]-3-pyridinecarboxamide",
32 |             "This drug does not exist",
33 |         ]
34 |     )
35 | 
36 |     dt = bt_base.Drug(source="chebi")
37 |     inspected_df = dt.inspect(df.index, field=dt.name, return_df=True)
38 | 
39 |     inspect = inspected_df["__validated__"].reset_index(drop=True)
40 |     expected_series = pd.Series([True, True, True, True, False])
41 | 
42 |     assert inspect.equals(expected_series)
43 | 
44 | 
45 | def test_chebi_chembl_id():
46 |     dt = bt_base.Drug(source="chebi")
47 |     assert "CHEMBL500609" in dt.to_dataframe()["chembl_id"].values
48 | 


--------------------------------------------------------------------------------
/tests/base/entities/test_ethnicity.py:
--------------------------------------------------------------------------------
 1 | import bionty.base as bt_base
 2 | import pandas as pd
 3 | 
 4 | 
 5 | def test_hancestro_ethnicity_inspect_name():
 6 |     df = pd.DataFrame(
 7 |         index=[
 8 |             "Mende",
 9 |             "European",
10 |             "South Asian",
11 |             "Arab",
12 |             "This ethnicity does not exist",
13 |         ]
14 |     )
15 | 
16 |     et = bt_base.Ethnicity(source="hancestro")
17 |     df.index = et.standardize(df.index)
18 |     inspected_df = et.inspect(df.index, field=et.name, return_df=True)
19 | 
20 |     inspect = inspected_df["__validated__"].reset_index(drop=True)
21 |     expected_series = pd.Series([True, True, True, True, False])
22 | 
23 |     assert inspect.equals(expected_series)
24 | 


--------------------------------------------------------------------------------
/tests/base/entities/test_experimentalfactor.py:
--------------------------------------------------------------------------------
 1 | import bionty.base as bt_base
 2 | import pandas as pd
 3 | 
 4 | 
 5 | def test_efo_experimental_factor_inspect_ontology_id():
 6 |     df = pd.DataFrame(
 7 |         index=[
 8 |             "EFO:1002048",
 9 |             "EFO:1002050",
10 |             "EFO:1002047",
11 |             "EFO:1002049",
12 |             "This readout does not exist",
13 |         ]
14 |     )
15 | 
16 |     ro = bt_base.ExperimentalFactor(source="efo")
17 |     inspected_df = ro.inspect(df.index, ro.ontology_id, return_df=True)
18 | 
19 |     inspect = inspected_df["__validated__"].reset_index(drop=True)
20 |     expected_series = pd.Series([True, True, True, True, False])
21 | 
22 |     assert inspect.equals(expected_series)
23 | 
24 | 
25 | def test_efo_shape():
26 |     """We observed issues with new EFO versions not including all records."""
27 |     # 3.78.0 is the latest version where had initially observed this issue
28 |     # If this works well, we may unpin the fixed version
29 |     assert bt_base.ExperimentalFactor(version="3.78.0").to_dataframe().shape[0] > 18000
30 | 


--------------------------------------------------------------------------------
/tests/base/entities/test_gene.py:
--------------------------------------------------------------------------------
  1 | import bionty.base as bt_base
  2 | import pandas as pd
  3 | import pytest
  4 | from bionty.base.entities._gene import MappingResult
  5 | 
  6 | 
  7 | @pytest.fixture(scope="module")
  8 | def genes():
  9 |     data = {
 10 |         "gene symbol": ["A1CF", "A1BG", "FANCD1", "corrupted"],
 11 |         "ncbi id": ["29974", "1", "5133", "corrupted"],
 12 |         "ensembl_gene_id": [
 13 |             "ENSG00000148584",
 14 |             "ENSG00000121410",
 15 |             "ENSG00000188389",
 16 |             "ENSG0000corrupted",
 17 |         ],
 18 |     }
 19 |     df = pd.DataFrame(data).set_index("ensembl_gene_id")
 20 | 
 21 |     gn = bt_base.Gene(source="ensembl")
 22 | 
 23 |     return df, gn
 24 | 
 25 | 
 26 | def test_gene_ensembl_inspect_hgnc_id(genes):
 27 |     df, gn = genes
 28 | 
 29 |     inspected_df = gn.inspect(df["ncbi id"], field=gn.ncbi_gene_id, return_df=True)
 30 | 
 31 |     inspect = inspected_df["__validated__"].reset_index(drop=True)
 32 |     expected_series = pd.Series([True, True, True, False])
 33 | 
 34 |     assert inspect.equals(expected_series)
 35 | 
 36 | 
 37 | def test_ensemblgene_download():
 38 |     from bionty.base.entities._gene import EnsemblGene
 39 | 
 40 |     ensembl_gene = EnsemblGene(organism="human", version="release-110")
 41 |     assert ensembl_gene._organism.name == "human"
 42 | 
 43 |     external_df = ensembl_gene.external_dbs()
 44 |     assert external_df.shape[0] > 1
 45 | 
 46 |     df = ensembl_gene.download_df(external_db_names={"HGNC": "hgnc_id"})
 47 |     assert df.shape[0] > 6000
 48 |     assert "hgnc_id" in df.columns
 49 | 
 50 | 
 51 | def test_ensemblgene_map_legacy_ids():
 52 |     gn = bt_base.Gene(organism="human", version="release-110")
 53 |     legacy_genes = [
 54 |         "ENSG00000280710",
 55 |         "ENSG00000261490",
 56 |         "ENSG00000203812",
 57 |         "ENSG00000204092",
 58 |         "ENSG00000215271",
 59 |     ]
 60 |     result = gn.map_legacy_ids(legacy_genes)
 61 |     assert result == MappingResult(
 62 |         mapped={
 63 |             "ENSG00000204092": "ENSG00000226070",
 64 |             "ENSG00000215271": "ENSG00000290292",
 65 |             "ENSG00000261490": "ENSG00000071127",
 66 |             "ENSG00000280710": "ENSG00000125304",
 67 |         },
 68 |         ambiguous={"ENSG00000203812": ["ENSG00000288859", "ENSG00000288825"]},
 69 |         unmapped=[],
 70 |     )
 71 | 
 72 |     result = gn.map_legacy_ids("ENSG00000280710")
 73 |     assert result == MappingResult(
 74 |         mapped={"ENSG00000280710": "ENSG00000125304"},
 75 |         ambiguous={},
 76 |         unmapped=[],
 77 |     )
 78 | 
 79 |     result = gn.map_legacy_ids(["ENSG00000280710"])
 80 |     assert result == MappingResult(
 81 |         mapped={"ENSG00000280710": "ENSG00000125304"},
 82 |         ambiguous={},
 83 |         unmapped=[],
 84 |     )
 85 | 
 86 | 
 87 | def test_old_ensembl_version():
 88 |     gene_ontology_102 = bt_base.Gene(
 89 |         source="ensembl", organism="mouse", version="release-102"
 90 |     )
 91 |     assert (
 92 |         "ENSMUSG00000021745"
 93 |         in gene_ontology_102.to_dataframe()["ensembl_gene_id"].values
 94 |     )
 95 | 
 96 |     gene_ontology_112 = bt_base.Gene(
 97 |         source="ensembl", organism="mouse", version="release-112"
 98 |     )
 99 |     assert (
100 |         "ENSMUSG00000021745"
101 |         not in gene_ontology_112.to_dataframe()["ensembl_gene_id"].values
102 |     )
103 | 


--------------------------------------------------------------------------------
/tests/base/entities/test_organism.py:
--------------------------------------------------------------------------------
 1 | import bionty.base as bt_base
 2 | import pandas as pd
 3 | 
 4 | 
 5 | def test_ensembl_organism_inspect_name():
 6 |     df = pd.DataFrame(
 7 |         index=[
 8 |             "spiny chromis",
 9 |             "silver-eye",
10 |             "platyfish",
11 |             "california sea lion",
12 |             "This organism does not exist",
13 |         ]
14 |     )
15 | 
16 |     sp = bt_base.Organism(source="ensembl")
17 |     inspected_df = sp.inspect(df.index, field=sp.name, return_df=True)
18 | 
19 |     inspect = inspected_df["__validated__"].reset_index(drop=True)
20 |     expected_series = pd.Series([True, True, True, True, False])
21 | 
22 |     assert inspect.equals(expected_series)
23 | 
24 | 
25 | def test_ncbitaxon_organism_inspect_name():
26 |     df = pd.DataFrame(
27 |         index=[
28 |             "human",
29 |             "ancylobacter aquaticus",
30 |             "microbacterium sp. 6.11-vpa",
31 |             "calamagrostis varia",
32 |             "This organism does not exist",
33 |         ]
34 |     )
35 |     sp = bt_base.Organism(source="ncbitaxon")
36 |     inspected_df = sp.inspect(df.index, field=sp.name, return_df=True)
37 | 
38 |     inspect = inspected_df["__validated__"].reset_index(drop=True)
39 |     expected_series = pd.Series([True, True, True, True, False])
40 | 
41 |     assert inspect.equals(expected_series)
42 | 
43 | 
44 | def test_ensembl_organism_version():
45 |     df = bt_base.Organism(version="release-108").to_dataframe()
46 |     assert df.shape[0] == 315
47 | 
48 | 
49 | def test_ensembl_organism_taxa():
50 |     for sp in ["bacteria", "plants", "fungi", "metazoa"]:
51 |         df = bt_base.Organism(taxa=sp).to_dataframe()
52 |         assert df.shape[0] > 10
53 | 
54 | 
55 | def test_ncbitaxon_organism():
56 |     df = bt_base.Organism(source="ncbitaxon").to_dataframe()
57 |     assert df.shape[0] > 10
58 | 


--------------------------------------------------------------------------------
/tests/base/entities/test_pathway.py:
--------------------------------------------------------------------------------
 1 | import bionty.base as bt_base
 2 | import pandas as pd
 3 | 
 4 | 
 5 | def test_pw_go_inspect_ontology_id():
 6 |     df = pd.DataFrame(
 7 |         index=[
 8 |             "GO:1905210",
 9 |             "GO:1905211",
10 |             "GO:1905212",
11 |             "GO:1905208",
12 |             "This pathway does not exist",
13 |         ]
14 |     )
15 | 
16 |     pw = bt_base.Pathway(source="go")
17 |     inspected_df = pw.inspect(df.index, pw.ontology_id, return_df=True)
18 | 
19 |     inspect = inspected_df["__validated__"].reset_index(drop=True)
20 |     expected_series = pd.Series([True, True, True, True, False])
21 | 
22 |     assert inspect.equals(expected_series)
23 | 
24 | 
25 | def test_pw_pathway_inspect_name():
26 |     df = pd.DataFrame(
27 |         index=[
28 |             "Toll-like receptor 9 signaling pathway",
29 |             "Toll-like receptor TLR1:TLR2 signaling pathway",
30 |             "classic metabolic pathway",
31 |             "regulatory pathway",
32 |             "This pathway does not exist",
33 |         ]
34 |     )
35 | 
36 |     pw = bt_base.Pathway(source="pw")
37 |     inspected_df = pw.inspect(df.index, field=pw.name, return_df=True)
38 | 
39 |     inspect = inspected_df["__validated__"].reset_index(drop=True)
40 |     expected_series = pd.Series([True, True, True, True, False])
41 | 
42 |     assert inspect.equals(expected_series)
43 | 


--------------------------------------------------------------------------------
/tests/base/entities/test_phenotype.py:
--------------------------------------------------------------------------------
  1 | import bionty.base as bt_base
  2 | import pandas as pd
  3 | 
  4 | 
  5 | def test_hp_phenotype_inspect_name():
  6 |     df = pd.DataFrame(
  7 |         index=[
  8 |             "Specific learning disability",
  9 |             "Dystonia",
 10 |             "Cerebral hemorrhage",
 11 |             "Slurred speech",
 12 |             "This phenotype does not exist",
 13 |         ]
 14 |     )
 15 | 
 16 |     pt = bt_base.Phenotype(source="hp")
 17 |     inspected_df = pt.inspect(df.index, field=pt.name, return_df=True)
 18 | 
 19 |     inspect = inspected_df["__validated__"].reset_index(drop=True)
 20 |     expected_series = pd.Series([True, True, True, True, False])
 21 | 
 22 |     assert inspect.equals(expected_series)
 23 | 
 24 | 
 25 | def test_mp_phenotype_inspect_name():
 26 |     df = pd.DataFrame(
 27 |         index=[
 28 |             "decreased ovary apoptosis",
 29 |             "abnormal Ebner's gland morphology",
 30 |             "abnormal lacrimal sac morphology",
 31 |             "abnormal nictitating membrane morphology",
 32 |             "This phenotype does not exist",
 33 |         ]
 34 |     )
 35 | 
 36 |     pt = bt_base.Phenotype(source="mp")
 37 |     inspected_df = pt.inspect(df.index, field=pt.name, return_df=True)
 38 | 
 39 |     inspect = inspected_df["__validated__"].reset_index(drop=True)
 40 |     expected_series = pd.Series([True, True, True, True, False])
 41 | 
 42 |     assert inspect.equals(expected_series)
 43 | 
 44 | 
 45 | def test_zp_phenotype_inspect_name():
 46 |     df = pd.DataFrame(
 47 |         index=[
 48 |             "somitogenesis disrupted, abnormal",
 49 |             "somite specification disrupted, abnormal",
 50 |             "liver has extra parts of type collagen trimer liver, abnormal",
 51 |             "neuromast hair cell normal process quality apoptotic process, abnormal",
 52 |             "This phenotype does not exist",
 53 |         ]
 54 |     )
 55 | 
 56 |     pt = bt_base.Phenotype(source="zp")
 57 |     inspected_df = pt.inspect(df.index, field=pt.name, return_df=True)
 58 | 
 59 |     inspect = inspected_df["__validated__"].reset_index(drop=True)
 60 |     expected_series = pd.Series([True, True, True, True, False])
 61 | 
 62 |     assert inspect.equals(expected_series)
 63 | 
 64 | 
 65 | def test_phe_phenotype_inspect_name():
 66 |     df = pd.DataFrame(
 67 |         index=[
 68 |             "Intestinal infection due to C. difficile",
 69 |             "Sepsis and SIRS",
 70 |             "Systemic inflammatory response syndrome (SIRS)",
 71 |             "Septic shock",
 72 |             "This phenotype does not exist",
 73 |         ]
 74 |     )
 75 | 
 76 |     pt = bt_base.Phenotype(source="phe")
 77 |     inspected_df = pt.inspect(df.index, field=pt.name, return_df=True)
 78 | 
 79 |     inspect = inspected_df["__validated__"].reset_index(drop=True)
 80 |     expected_series = pd.Series([True, True, True, True, False])
 81 | 
 82 |     assert inspect.equals(expected_series)
 83 | 
 84 | 
 85 | def test_path_phenotype():
 86 |     df = pd.DataFrame(
 87 |         index=[
 88 |             "nocturnal",
 89 |             "male",
 90 |             "female",
 91 |             "mobility",
 92 |             "This phenotype does not exist",
 93 |         ]
 94 |     )
 95 | 
 96 |     pt = bt_base.Phenotype(source="pato")
 97 |     inspected_df = pt.inspect(df.index, field=pt.name, return_df=True)
 98 |     inspect = inspected_df["__validated__"].reset_index(drop=True)
 99 |     expected_series = pd.Series([True, True, True, True, False])
100 | 
101 |     assert inspect.equals(expected_series)
102 | 


--------------------------------------------------------------------------------
/tests/base/entities/test_protein.py:
--------------------------------------------------------------------------------
 1 | import bionty.base as bt_base
 2 | import pandas as pd
 3 | 
 4 | 
 5 | def test_uniprot_protein_inspect_uniprotkb_id():
 6 |     df = pd.DataFrame(
 7 |         index=[
 8 |             "A0A024QZ08",  # no longer exist in 2024-03 version
 9 |             "X6RLV5",
10 |             "X6RM24",
11 |             "A0A024QZQ1",  # no longer exist in 2024-03 version
12 |             "This protein does not exist",
13 |         ]
14 |     )
15 | 
16 |     pr = bt_base.Protein(source="uniprot")
17 |     inspected_df = pr.inspect(df.index, pr.uniprotkb_id, return_df=True)
18 | 
19 |     inspect = inspected_df["__validated__"].reset_index(drop=True)
20 |     expected_series = pd.Series([False, True, True, False, False])
21 | 
22 |     assert inspect.equals(expected_series)
23 | 


--------------------------------------------------------------------------------
/tests/base/entities/test_tissue.py:
--------------------------------------------------------------------------------
 1 | import bionty.base as bt_base
 2 | import pandas as pd
 3 | 
 4 | 
 5 | def test_uberon_tissue_inspect_ontology_id():
 6 |     df = pd.DataFrame(
 7 |         index=[
 8 |             "UBERON:0000000",
 9 |             "UBERON:0000005",
10 |             "UBERON:8600001",
11 |             "UBERON:8600002",
12 |             "This tissue does not exist",
13 |         ]
14 |     )
15 | 
16 |     ts = bt_base.Tissue(source="uberon", version="2025-05-28")
17 |     inspected_df = ts.inspect(df.index, ts.ontology_id, return_df=True)
18 | 
19 |     inspect = inspected_df["__validated__"].reset_index(drop=True)
20 |     expected_series = pd.Series([True, True, True, True, False])
21 | 
22 |     assert inspect.equals(expected_series)
23 | 


--------------------------------------------------------------------------------
/tests/base/test_bionty.py:
--------------------------------------------------------------------------------
 1 | import bionty.base as bt_base
 2 | import pytest
 3 | 
 4 | 
 5 | def test_unavailable_sources():
 6 |     with pytest.raises(ValueError):
 7 |         bt_base.CellType(source="random")
 8 | 
 9 | 
10 | def test_diff_successful():
11 |     disease_bt_1 = bt_base.Disease(source="mondo", version="2023-04-04")
12 |     disease_bt_2 = bt_base.Disease(source="mondo", version="2023-02-06")
13 | 
14 |     new_entries, modified_entries = disease_bt_1.diff(disease_bt_2)
15 |     assert len(new_entries) == 819
16 |     assert len(modified_entries) == 249
17 | 
18 | 
19 | def test_diff_value_errors():
20 |     # Two different PublicOntology object types
21 |     disease_bt = bt_base.Disease()
22 |     phenotype_bt = bt_base.Phenotype()
23 |     with pytest.raises(ValueError):
24 |         disease_bt.diff(phenotype_bt)
25 | 
26 |     # Different sources
27 |     disease_bt_1 = bt_base.Disease(source="mondo")
28 |     disease_bt_2 = bt_base.Disease(source="doid")
29 |     with pytest.raises(ValueError):
30 |         disease_bt_1.diff(disease_bt_2)
31 | 
32 |     # Same version
33 |     disease_bt_3 = bt_base.Disease(source="mondo", version="2023-04-04")
34 |     disease_bt_4 = bt_base.Disease(source="mondo", version="2023-04-04")
35 |     with pytest.raises(ValueError):
36 |         disease_bt_3.diff(disease_bt_4)
37 | 


--------------------------------------------------------------------------------
/tests/base/test_ontology.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from bionty.base._ontology import Ontology
 4 | from bionty.base.dev._io import s3_bionty_assets
 5 | 
 6 | 
 7 | def test_ontology():
 8 |     localpath = s3_bionty_assets("ontology_all__pw__7.79__Pathway")
 9 | 
10 |     try:
11 |         onto = Ontology(localpath)
12 |         assert onto.get_term("PW:0000014").name == "neurodegenerative pathway"
13 | 
14 |         df = onto.to_df(source="pw", include_id_prefixes={"pw": ["PW"]})
15 |         assert df.shape == (2647, 4)
16 |         assert df.index.name == "ontology_id"
17 | 
18 |     finally:
19 |         if Path(localpath).exists:
20 |             Path(localpath).unlink()
21 | 


--------------------------------------------------------------------------------
/tests/base/test_ontology_url.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from bionty.base._ontology_url import (
 3 |     OntologyNotFoundError,
 4 |     OntologyVersionNotFoundError,
 5 |     get_ontology_url,
 6 | )
 7 | 
 8 | 
 9 | def test_get_ontology_url():
10 |     # Test with a known prefix and version
11 |     url, ver = get_ontology_url("OBA", "2022-05-11")
12 |     assert url == "http://purl.obolibrary.org/obo/oba/releases/2022-05-11/oba.owl"
13 |     assert ver == "2022-05-11"
14 | 
15 |     # Test with a known prefix and no version
16 |     prefix = "OBA"
17 |     url, ver = get_ontology_url(prefix)
18 |     assert url is not None
19 |     assert ver is not None
20 | 
21 |     # A wrong version
22 |     with pytest.raises(OntologyVersionNotFoundError):
23 |         get_ontology_url("OBA", "wrong_version")
24 | 
25 |     # Test with an unknown prefix
26 |     with pytest.raises(OntologyNotFoundError):
27 |         get_ontology_url("UNKNOWN_PREFIX")
28 | 
29 |     prefixes = [
30 |         "ncbitaxon",
31 |         "clo",
32 |         "cl",
33 |         "uberon",
34 |         "mondo",
35 |         "doid",
36 |         "efo",
37 |         "pato",
38 |         "hp",
39 |         "mp",
40 |         "zp",
41 |         "go",
42 |         "pw",
43 |         "dron",
44 |         "hsapdv",
45 |         "mmusdv",
46 |         "hancestro",
47 |     ]
48 | 
49 |     for prefix in prefixes:
50 |         url, ver = get_ontology_url(prefix)
51 |         assert url is not None
52 |         assert ver is not None
53 | 


--------------------------------------------------------------------------------
/tests/core/conftest.py:
--------------------------------------------------------------------------------
 1 | import shutil
 2 | 
 3 | import lamindb_setup as ln_setup
 4 | import pytest
 5 | 
 6 | 
 7 | def pytest_sessionstart():
 8 |     ln_setup.init(storage="./testdb", modules="bionty,wetlab")
 9 | 
10 | 
11 | def pytest_sessionfinish(session: pytest.Session):
12 |     shutil.rmtree("./testdb")
13 |     ln_setup.delete("testdb", force=True)
14 | 


--------------------------------------------------------------------------------
/tests/core/test_models.py:
--------------------------------------------------------------------------------
 1 | import bionty as bt
 2 | 
 3 | 
 4 | def test_public_synonym_mapping():
 5 |     bt_result = bt.Gene.public(organism="human").inspect(
 6 |         ["ABC1", "TNFRSF4"], field="symbol"
 7 |     )
 8 |     assert bt_result.synonyms_mapper == {"ABC1": "HEATR6"}
 9 | 
10 |     bt_result = bt.Gene.public(organism="human").inspect(
11 |         ["ABC1", "TNFRSF4"], field="symbol", inspect_synonyms=False
12 |     )
13 |     assert bt_result.synonyms_mapper == {}
14 | 
15 | 
16 | def test_encode_uids():
17 |     cell_type = bt.CellType(
18 |         ontology_id="CL:0000084",
19 |         _skip_validation=True,
20 |     )
21 |     assert cell_type.uid == "22LvKd01"
22 | 
23 |     organism = bt.Organism(
24 |         ontology_id="NCBITaxon:9606",
25 |         name="human",
26 |         _skip_validation=True,
27 |     )
28 |     assert organism.uid == "1dpCL6Td"
29 | 
30 |     bt.settings.organism = "human"
31 |     cell_marker = bt.CellMarker(
32 |         name="test",
33 |         organism=bt.settings.organism,
34 |         _skip_validation=True,
35 |     )
36 |     assert cell_marker.uid == "2dZ52W9noUDK"
37 | 
38 |     gene = bt.Gene(
39 |         ensembl_gene_id="ENSG00000081059",
40 |         symbol="TCF7",
41 |         organism=bt.settings.organism,  # required
42 |         _skip_validation=True,
43 |     )
44 |     assert gene.uid == "7IkHKPl0ScQR"
45 | 
46 |     disease = bt.Source(
47 |         entity="bionty.Disease",
48 |         name="mondo",
49 |         version="2023-04-04",
50 |         organism="all",
51 |         _skip_validation=True,
52 |     )
53 |     assert disease.uid == "Hgw08Vk3"
54 | 
55 |     phenotype = bt.Source(
56 |         entity="bionty.Phenotype",
57 |         name="hp",
58 |         version="2023-06-17",
59 |         organism="human",
60 |         _skip_validation=True,
61 |     )
62 |     assert phenotype.uid == "451W7iJS"
63 | 


--------------------------------------------------------------------------------
/tests/core/test_organism_requirement.py:
--------------------------------------------------------------------------------
 1 | import bionty as bt
 2 | import pytest
 3 | from bionty._organism import OrganismNotSet
 4 | 
 5 | 
 6 | def test_from_values_organism():
 7 |     bt.settings._organism = None
 8 |     with pytest.raises(OrganismNotSet):
 9 |         bt.Gene.from_values(["ABC1"], bt.Gene.symbol)
10 |     # no organism is needed if the values are ensembl gene ids
11 |     result = bt.Gene.from_values(["ENSG00000068097"], bt.Gene.ensembl_gene_id)
12 |     assert len(result) == 1
13 |     result = bt.Gene.from_values(
14 |         ["ENSMUSG00000102862", "ENSMUSG00000084826"], field=bt.Gene.ensembl_gene_id
15 |     )
16 |     assert len(result) == 2
17 |     result = bt.Gene.from_values(
18 |         ["HRA1", "ETS1-1"], field=bt.Gene.stable_id, organism="saccharomyces cerevisiae"
19 |     )
20 |     assert len(result) == 2
21 | 
22 |     bt.settings.organism = "human"
23 |     values = ["ABC1"]
24 |     standardized_values = bt.Gene.public().standardize(values)
25 |     records = bt.Gene.from_values(standardized_values, bt.Gene.symbol)
26 |     assert records[0].ensembl_gene_id == "ENSG00000068097"
27 | 
28 |     # TODO: Gene.public() should raise error if organism is not provided
29 |     standardized_values = bt.Gene.public(organism="mouse").standardize(values)
30 |     records = bt.Gene.from_values(standardized_values, bt.Gene.symbol, organism="mouse")
31 |     assert records[0].ensembl_gene_id == "ENSMUSG00000015243"
32 | 
33 |     # clean up
34 |     bt.settings._organism = None
35 |     bt.Organism.filter().delete(permanent=True)
36 |     bt.Gene.filter().delete(permanent=True)
37 | 


--------------------------------------------------------------------------------