├── .github ├── ISSUE_TEMPLATE │ └── new_sssom_element.md ├── pull_request_template.md └── workflows │ ├── build.yml │ ├── codespell.yml │ ├── deploy_documentation.yml │ ├── main.yaml │ ├── pypi-publish.yaml │ └── qc.yml ├── .gitignore ├── CHANGELOG.md ├── CITATION.cff ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── README.md ├── SSSOM.md ├── about.yaml ├── examples ├── README.md ├── embedded │ ├── foodie-inc-2022-05-01.sssom.tsv │ └── mp-hp-exact-0.0.1.sssom.tsv ├── external │ ├── example1.sssom.tsv │ ├── example1.sssom.yml │ ├── mp-hp-exact-0.0.1.sssom.tsv │ └── mp-hp-exact-0.0.1.sssom.yml └── schema │ ├── composite-entities.sssom.tsv │ ├── curation_rule.sssom.tsv │ ├── curation_rule_text.sssom.tsv │ ├── curation_rule_text2.sssom.tsv │ ├── curie_map.sssom.tsv │ ├── extension-slots.sssom.tsv │ ├── issue_tracker.sssom.tsv │ ├── issue_tracker_item.sssom.tsv │ ├── literals.sssom.tsv │ ├── mapping_set_confidence.sssom.tsv │ ├── no_term_found.sssom.tsv │ ├── predicate-types.sssom.tsv │ ├── similarity_score.sssom.tsv │ └── version.sssom.tsv ├── mkdocs.yml ├── poetry.lock ├── project.Makefile ├── project ├── excel │ └── sssom_schema.xlsx ├── graphql │ └── sssom_schema.graphql ├── jsonld │ ├── sssom_schema.context.jsonld │ └── sssom_schema.jsonld ├── jsonschema │ └── sssom_schema.schema.json ├── owl │ └── sssom_schema.owl.ttl ├── prefixmap │ └── sssom_schema.yaml ├── protobuf │ └── sssom_schema.proto ├── shacl │ └── sssom_schema.shacl.ttl ├── shex │ └── sssom_schema.shex └── sqlschema │ └── sssom_schema.sql ├── pyproject.toml ├── run.sh ├── scripts └── gh_table.pl ├── src ├── CONFIG.yaml ├── __init__.py ├── doc-templates │ ├── class.md.jinja2 │ ├── class_diagram.md.jinja2 │ ├── common_metadata.md.jinja2 │ ├── frontpage.md.jinja2 │ ├── index.md.jinja2 │ └── slot.md.jinja2 ├── docs │ ├── 5star-mappings.md │ ├── chaining-rules.md │ ├── contributing.md │ ├── create-mapping-commons.md │ ├── editors.md │ ├── events │ │ ├── ccb2022.md │ │ ├── mc2021.md │ │ ├── mc2023.md │ │ ├── oboacademy2022.md │ │ ├── ohdsi2022.md │ │ ├── ohdsi2023.md │ │ ├── om2022.md │ │ ├── pistoia2022.md │ │ └── wsbo2021.md │ ├── explanation │ │ └── mappings.md │ ├── faq.md │ ├── funding.md │ ├── glossary.md │ ├── images │ │ └── sssom-banner.png │ ├── introduction.md │ ├── mapping-commons.md │ ├── mapping-justifications.md │ ├── mapping-predicates.md │ ├── matching-tool-implementation-guide.md │ ├── presentations.md │ ├── related-documentation.md │ ├── resources │ │ └── sssom_5star_mappings.pdf │ ├── spec-formats-json.md │ ├── spec-formats-owl.md │ ├── spec-formats-tsv.md │ ├── spec-formats.md │ ├── spec-intro.md │ ├── spec-model.md │ ├── toolkit.md │ ├── training.md │ ├── tutorial.md │ ├── tutorials │ │ └── omop-mappings.md │ ├── usecases.md │ └── workshops.md └── sssom_schema │ ├── __init__.py │ ├── context │ ├── sssom_schema.context.jsonld │ └── sssom_schema.jsonld │ ├── datamodel │ ├── __init__.py │ └── sssom_schema.py │ └── schema │ └── sssom_schema.yaml ├── tests ├── __init__.py ├── input │ ├── CONFIG.yaml │ └── README.md └── test_input_against_model.py ├── tox.ini └── utils └── get-value.sh /.github/ISSUE_TEMPLATE/new_sssom_element.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "[New metadata element]: " 3 | name: Add new SSSOM metadata element 4 | about: New metadata element suggestion for SSSOM 5 | assignees: matentzn 6 | labels: 'new metadata element request' 7 | --- 8 | 9 | **Element id (e.g. creator_id, mapping_tool_version):** 10 | (Must be lower case and contain only letters and underscores.) 11 | 12 | ``` 13 | element_id_example 14 | ``` 15 | 16 | **Value data type (e.g. URI, URL, text, xsd:boolean):** 17 | 18 | ``` 19 | xsd:string 20 | ``` 21 | 22 | **Description** 23 | (Provide a human-readable description that clarifies the intended use of the metadata element.) 24 | 25 | Example description. 26 | 27 | **Complete example to a SSSOM file with this element** 28 | (This example can be given as a markdown table or a linked SSSOM file, feel free to edit the markdown table below) 29 | 30 | ``` 31 | # curie_map: 32 | # HP: http://purl.obolibrary.org/obo/FBbt_ 33 | # MP: http://purl.obolibrary.org/obo/UBERON_ 34 | # owl: http://www.w3.org/2002/07/owl# 35 | # rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns# 36 | # rdfs: http://www.w3.org/2000/01/rdf-schema# 37 | # semapv: https://w3id.org/semapv/vocab/ 38 | # skos: http://www.w3.org/2004/02/skos/core# 39 | # sssom: https://w3id.org/sssom/ 40 | # license: https://w3id.org/sssom/license/unspecified 41 | # mapping_set_id: https://w3id.org/sssom/mappings/ac9e1878-73f4-4767-8402-a6c40e1b0835 42 | ``` 43 | 44 | | subject_id | predicate_id | object_id | mapping_justification | element_id_example | 45 | | ----------- | --------------- | ----------- | ----------------------- | ------------------- | 46 | | HP:0009124 | skos:exactMatch | MP:0000003 | semapv:LexicalMatching | YOUR EXAMPLE VALUE | 47 | | HP:0008551 | skos:exactMatch | MP:0000018 | semapv:LexicalMatching | YOUR EXAMPLE VALUE | 48 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | Resolves [#ISSUE, #ISSUE] 2 | 3 | - [ ] `docs/` have been added/updated if necessary 4 | - [ ] `make test` has been run locally 5 | - [ ] tests have been added/updated (if applicable) 6 | - [ ] [CHANGELOG.md](https://github.com/mapping-commons/sssom/blob/master/CHANGELOG.md) has been updated. 7 | 8 | If you are proposing a change to the SSSOM metadata model, you must 9 | 10 | - [ ] provide a full, working and valid example in `examples/` 11 | - [ ] provide a link to the related GitHub issue in the `see_also` field of the linkml model 12 | - [ ] provide a link to a valid example in the `see_also` field of the linkml model 13 | - [ ] make sure any new slot is annotated with the appropriate `added_in` annotation 14 | - [ ] run SSSOM-Py test suite against the updated model 15 | 16 | 17 | [Add a description, mentioning at least relevant #ISSUE and how it was addressed. A bulleted list of all changes performed by the PR is is helpful.] 18 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | on: 3 | workflow_dispatch: 4 | #push: 5 | # branches: [ master ] 6 | # paths: 7 | # - 'src/linkml/sssom.yaml' 8 | 9 | jobs: 10 | build: 11 | runs-on: ubuntu-latest 12 | steps: 13 | #---------------------------------------------- 14 | # check-out repo and set-up python 15 | #---------------------------------------------- 16 | - name: Check out repository 17 | uses: actions/checkout@v2 18 | with: 19 | persist-credentials: false # otherwise, the token used is the GITHUB_TOKEN, instead of your personal token 20 | fetch-depth: 0 # otherwise, you will failed to push refs to dest repo 21 | 22 | - name: Set up Python 23 | uses: actions/setup-python@v2 24 | with: 25 | python-version: 3.9 26 | 27 | #---------------------------------------------- 28 | # install & configure poetry 29 | #---------------------------------------------- 30 | - name: Install Poetry 31 | uses: snok/install-poetry@v1.3 32 | with: 33 | virtualenvs-create: true 34 | virtualenvs-in-project: true 35 | 36 | #---------------------------------------------- 37 | # load cached venv if cache exists 38 | #---------------------------------------------- 39 | - name: Load cached venv 40 | id: cached-poetry-dependencies 41 | uses: actions/cache@v4 42 | with: 43 | path: .venv 44 | key: venv-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }} 45 | 46 | #---------------------------------------------- 47 | # install dependencies if cache does not exist 48 | #---------------------------------------------- 49 | - name: Install dependencies 50 | if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' 51 | run: poetry install --no-interaction --no-root 52 | 53 | #---------------------------------------------- 54 | # install your root project, if required 55 | #---------------------------------------------- 56 | - name: Install library 57 | run: poetry install --no-interaction 58 | 59 | - name: Create local changes 60 | run: | 61 | make all 62 | - name: Commit files 63 | run: | 64 | git config --local user.email "action@github.com" 65 | git config --local user.name "GitHub Action" 66 | git commit -m "Rebuilding documentation and generated files with Github Action" -a 67 | - name: Push changes 68 | uses: ad-m/github-push-action@master 69 | with: 70 | github_token: ${{ secrets.GITHUB_TOKEN }} 71 | branch: ${{ github.ref }} 72 | 73 | -------------------------------------------------------------------------------- /.github/workflows/codespell.yml: -------------------------------------------------------------------------------- 1 | # Codespell configuration is within pyproject.toml 2 | --- 3 | name: Codespell 4 | 5 | on: 6 | push: 7 | branches: [master] 8 | pull_request: 9 | branches: [master] 10 | 11 | permissions: 12 | contents: read 13 | 14 | jobs: 15 | codespell: 16 | name: Check for spelling errors 17 | runs-on: ubuntu-latest 18 | 19 | steps: 20 | - name: Checkout 21 | uses: actions/checkout@v4 22 | - name: Annotate locations with typos 23 | uses: codespell-project/codespell-problem-matcher@9ba2c57125d4908eade4308f32c4ff814c184633 24 | - name: Codespell 25 | uses: codespell-project/actions-codespell@94259cd8be02ad2903ba34a22d9c13de21a74461 26 | -------------------------------------------------------------------------------- /.github/workflows/deploy_documentation.yml: -------------------------------------------------------------------------------- 1 | name: Deploy Documentation 2 | 3 | # Controls when the action will run. Triggers the workflow on push 4 | on: 5 | workflow_dispatch: 6 | push: 7 | branches: [ master ] 8 | 9 | paths: 10 | - 'src/docs/*' 11 | - 'src/sssom_schema/schema/sssom_schema.yaml' 12 | - 'mkdocs.yml' 13 | 14 | 15 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel 16 | jobs: 17 | build-docs: 18 | # The type of runner that the job will run on 19 | runs-on: ubuntu-latest 20 | 21 | # Steps represent a sequence of tasks that will be executed as part of the job 22 | steps: 23 | #---------------------------------------------- 24 | # check-out repo and set-up python 25 | #---------------------------------------------- 26 | - name: Check out repository 27 | uses: actions/checkout@v3 28 | with: 29 | # persist-credentials: false # otherwise, the token used is the GITHUB_TOKEN, instead of your personal token 30 | fetch-depth: 0 # otherwise, you will failed to push refs to dest repo 31 | 32 | - name: Set up Python3 33 | uses: actions/setup-python@v3 34 | with: 35 | python-version: 3.9 36 | 37 | #---------------------------------------------- 38 | # install & configure poetry 39 | #---------------------------------------------- 40 | - name: Install Poetry 41 | uses: snok/install-poetry@v1.3 42 | # with: 43 | # virtualenvs-create: true 44 | # virtualenvs-in-project: true 45 | 46 | #---------------------------------------------- 47 | # load cached venv if cache exists 48 | #---------------------------------------------- 49 | - name: Load cached venv 50 | id: cached-poetry-dependencies 51 | uses: actions/cache@v4 52 | with: 53 | path: .venv 54 | key: venv-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }} 55 | 56 | #---------------------------------------------- 57 | # install dependencies if cache does not exist 58 | #---------------------------------------------- 59 | - name: Install dependencies 60 | # if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' 61 | # run: poetry install --no-interaction --no-root 62 | run: poetry install -E docs 63 | 64 | #---------------------------------------------- 65 | # install your root project, if required 66 | #---------------------------------------------- 67 | # - name: Install library 68 | # run: poetry install --no-interaction 69 | 70 | - name: Create local docs 71 | run: | 72 | mkdir docs 73 | touch docs/.nojekyll 74 | make gendoc 75 | make mkd-gh-deploy 76 | -------------------------------------------------------------------------------- /.github/workflows/main.yaml: -------------------------------------------------------------------------------- 1 | # Built from: 2 | # https://docs.github.com/en/actions/guides/building-and-testing-python 3 | # https://github.com/snok/install-poetry#workflows-and-tips 4 | 5 | name: Build and test sssom 6 | 7 | on: [pull_request] 8 | 9 | jobs: 10 | test: 11 | 12 | runs-on: ubuntu-latest 13 | strategy: 14 | matrix: 15 | python-version: ["3.9", "3.10"] 16 | 17 | steps: 18 | 19 | #---------------------------------------------- 20 | # check-out repo and set-up python 21 | #---------------------------------------------- 22 | - name: Check out repository 23 | uses: actions/checkout@v2 24 | 25 | - name: Set up Python ${{ matrix.python-version }} 26 | uses: actions/setup-python@v2 27 | with: 28 | python-version: ${{ matrix.python-version }} 29 | 30 | #---------------------------------------------- 31 | # install & configure poetry 32 | #---------------------------------------------- 33 | - name: Install Poetry 34 | uses: snok/install-poetry@v1.3 35 | with: 36 | virtualenvs-create: true 37 | virtualenvs-in-project: true 38 | 39 | #---------------------------------------------- 40 | # load cached venv if cache exists 41 | #---------------------------------------------- 42 | - name: Load cached venv 43 | id: cached-poetry-dependencies 44 | uses: actions/cache@v4 45 | with: 46 | path: .venv 47 | key: venv-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }} 48 | 49 | #---------------------------------------------- 50 | # install dependencies if cache does not exist 51 | #---------------------------------------------- 52 | - name: Install dependencies 53 | if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' 54 | run: poetry install --no-interaction --no-root 55 | 56 | #---------------------------------------------- 57 | # install your root project, if required 58 | #---------------------------------------------- 59 | - name: Install library 60 | run: poetry install --no-interaction 61 | 62 | #---------------------------------------------- 63 | # run test suite 64 | #---------------------------------------------- 65 | - name: Run tests 66 | run: make test 67 | 68 | -------------------------------------------------------------------------------- /.github/workflows/pypi-publish.yaml: -------------------------------------------------------------------------------- 1 | name: Publish Python Package 2 | 3 | on: 4 | workflow_dispatch: 5 | release: 6 | types: [created] 7 | 8 | jobs: 9 | build-n-publish: 10 | name: Build and publish Python 🐍 distributions 📦 to PyPI 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - uses: actions/checkout@v3.0.2 15 | 16 | - name: Set up Python 17 | uses: actions/setup-python@v3.1.2 18 | with: 19 | python-version: 3.9 20 | 21 | - name: Install Poetry 22 | uses: snok/install-poetry@v1.3.1 23 | with: 24 | virtualenvs-create: true 25 | virtualenvs-in-project: true 26 | 27 | - name: Install dependencies 28 | run: poetry install --no-interaction 29 | 30 | - name: Build source and wheel archives 31 | run: | 32 | poetry version $(git describe --tags --abbrev=0) 33 | poetry build 34 | 35 | - name: Publish distribution 📦 to PyPI 36 | uses: pypa/gh-action-pypi-publish@v1.5.0 37 | with: 38 | user: __token__ 39 | password: ${{ secrets.SSSOM_SCHEMA_TOKEN }} 40 | -------------------------------------------------------------------------------- /.github/workflows/qc.yml: -------------------------------------------------------------------------------- 1 | # Basic ODK workflow 2 | 3 | name: CI 4 | 5 | # Controls when the action will run. 6 | on: 7 | # Triggers the workflow on push or pull request events but only for the master branch 8 | push: 9 | branches: [ master ] 10 | pull_request: 11 | branches: [ master ] 12 | 13 | # Allows you to run this workflow manually from the Actions tab 14 | workflow_dispatch: 15 | 16 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel 17 | jobs: 18 | sssom_validation: 19 | runs-on: ubuntu-latest 20 | container: obolibrary/odkfull:v1.5.4 21 | steps: 22 | - name: Install latest SSSOM 23 | env: 24 | DEFAULT_BRANCH: master 25 | run: pip install -U sssom sssom-schema --break-system-packages 26 | - uses: actions/checkout@v2 27 | - name: Run Mapping QC checks 28 | env: 29 | DEFAULT_BRANCH: master 30 | run: make validate_mappings 31 | 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /docs/ 2 | /project/docs/ 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | pip-wheel-metadata/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | db.sqlite3-journal 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # IPython 84 | profile_default/ 85 | ipython_config.py 86 | 87 | # pyenv 88 | .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | .DS_Store 134 | tmp/ 135 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog for SSSOM 2 | 3 | ## Next 4 | 5 | - Add `composed entity expression` as a new value in the `EntityType` enumeration ([issue](https://github.com/mapping-commons/sssom/issues/402)). 6 | - Add `predicate_type` slot (previously defined but unused) to the `Mapping` and `MappingSet` classes ([issue](https://github.com/mapping-commons/sssom/issues/404)). 7 | - Add `similarity_measure` slot to the `MappingSet` class ([issue](https://github.com/mapping-commons/sssom/issues/411)). 8 | - Add `sssom_version` slot to the `MappingSet` class ([issue](https://github.com/mapping-commons/sssom/issues/439)). 9 | - Change the type of the `see_also` slot to `xsd:anyURI` ([issue](https://github.com/mapping-commons/sssom/issues/422)). 10 | - Add `mappings_set_confidence` slot to the `MappingSet` class ([issue](https://github.com/mapping-commons/sssom/issues/438)). 11 | - TBD 12 | 13 | ## SSSOM version 1.0.0 14 | 15 | - Add the concept of "propagatable slots". 16 | - Add the `curie_map` to the model (instead of it being a specificity of the SSSOM/TSV format). 17 | - Add the concept of "extension slots". 18 | - Add the concept of "literal mappings". 19 | - Add the entity reference `sssom:NoTermFound` to express the concept of an "unmapped entity" ([issue](https://github.com/mapping-commons/sssom/issues/28)) 20 | - Replace `semantic_similarity_score` with `similarity_score` and `semantic_similarity_measure` with `similarity_measure` in the data model ([issue](https://github.com/mapping-commons/sssom/issues/385)) 21 | 22 | ## SSSOM version 0.15.1 23 | 24 | - Add recommendation to sort the keys in the YAML metadata block. 25 | - Double-typed slots explicitly constrained to the [0.0,1.0] range, as per their description. 26 | 27 | ## SSSOM version 0.15.0 28 | 29 | - Add issue_tracker_item and issue_tracker [model elements](https://github.com/mapping-commons/sssom/pull/259). 30 | 31 | ## SSSOM version 0.13.0 32 | 33 | - The necessity of the "canonical column ordering" was downgraded from MUST to SHOULD (https://github.com/mapping-commons/sssom/pull/285) 34 | - Documents clearly that built-in prefixes MUST NOT be redefined (https://github.com/mapping-commons/sssom/pull/285) 35 | 36 | ## SSSOM version 0.11.0 37 | 38 | - see https://github.com/mapping-commons/sssom/releases/tag/0.11.0 39 | 40 | ### Summary 41 | 42 | #### New elements: 43 | - `mapping_set_title` to capture a human readable title for a mapping set 44 | - `registry_title` and `registry_description` to capture the human readable title and description of an SSSOM mapping set registry 45 | - `curation_rule` to capture a (potentially) complex (set of) condition(s) executed by an agent (usually human) that led to the establishment of a mapping. 46 | 47 | #### Updated elements: 48 | - Adding mapping_source slot to Mapping by @matentzn in #230 49 | - Improve documentation for `subject_category` and `object_category` elements 50 | 51 | #### Documentation 52 | - Compiled a list of all SSSOM talks: https://mapping-commons.github.io/sssom/presentations/ 53 | - Document chaining rules: https://mapping-commons.github.io/sssom/chaining_rules/ 54 | 55 | #### Quality control and Technical infrastructure 56 | 57 | - Make adding a concrete SSSOM example part of the new element request 58 | - Adding QC checks for example SSSOM files hosted in the repo 59 | 60 | ## SSSOM version 0.10.1 61 | 62 | - see https://github.com/mapping-commons/sssom/releases/tag/0.10.1 63 | 64 | ## SSSOM version 0.9.4 65 | 66 | - see https://github.com/mapping-commons/sssom/releases/tag/0.9.4 67 | 68 | ## SSSOM version 0.9.3 69 | 70 | - see https://github.com/mapping-commons/sssom/releases/tag/0.9.3 71 | - Major change: Changed `match_type` logic to `mapping_justification` ([issue](https://github.com/mapping-commons/sssom/issues/150)). 72 | 73 | 74 | ## SSSOM version 0.9.2 75 | 76 | - see https://github.com/mapping-commons/sssom/releases/tag/0.9.2 77 | 78 | ## SSSOM version 0.9.1 79 | 80 | - see https://github.com/mapping-commons/sssom/releases/tag/0.9.1 81 | 82 | ## SSSOM version 0.9.0 83 | - Initial release 84 | - see https://github.com/mapping-commons/sssom/releases/tag/0.9.0 85 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: '1.1.0' 2 | message: 'Please cite the following works when using this software.' 3 | abstract: "Abstract\n Despite progress in the development of standards for describing and exchanging scientific information, the lack of easy-to-use standards for mapping between different representations of the same or similar objects in different databases poses a major impediment to data integration and interoperability. Mappings often lack the metadata needed to be correctly interpreted and applied. For example, are two terms equivalent or merely related? Are they narrow or broad matches? Or are they associated in some other way? Such relationships between the mapped terms are often not documented, which leads to incorrect assumptions and makes them hard to use in scenarios that require a high degree of precision (such as diagnostics or risk prediction). Furthermore, the lack of descriptions of how mappings were done makes it hard to combine and reconcile mappings, particularly curated and automated ones. We have developed the Simple Standard for Sharing Ontological Mappings (SSSOM) which addresses these problems by: (i) Introducing a machine-readable and extensible vocabulary to describe metadata that makes imprecision, inaccuracy and incompleteness in mappings explicit. (ii) Defining an easy-to-use simple table-based format that can be integrated into existing data science pipelines without the need to parse or query ontologies, and that integrates seamlessly with Linked Data principles. (iii) Implementing open and community-driven collaborative workflows that are designed to evolve the standard continuously to address changing requirements and mapping practices. (iv) Providing reference tools and software libraries for working with the standard. In this paper, we present the SSSOM standard, describe several use cases in detail and survey some of the existing work on standardizing the exchange of mappings, with the goal of making mappings Findable, Accessible, Interoperable and Reusable (FAIR). The SSSOM specification can be found at http://w3id.org/sssom/spec.\n Database URL: http://w3id.org/sssom/spec" 4 | authors: 5 | - family-names: 'Matentzoglu' 6 | given-names: 'Nicolas' 7 | - family-names: 'Balhoff' 8 | given-names: 'James P' 9 | - family-names: 'Bello' 10 | given-names: 'Susan M' 11 | - family-names: 'Bizon' 12 | given-names: 'Chris' 13 | - family-names: 'Brush' 14 | given-names: 'Matthew' 15 | - family-names: 'Callahan' 16 | given-names: 'Tiffany J' 17 | - family-names: 'Chute' 18 | given-names: 'Christopher G' 19 | - family-names: 'Duncan' 20 | given-names: 'William D' 21 | - family-names: 'Evelo' 22 | given-names: 'Chris T' 23 | - family-names: 'Gabriel' 24 | given-names: 'Davera' 25 | - family-names: 'Graybeal' 26 | given-names: 'John' 27 | - family-names: 'Gray' 28 | given-names: 'Alasdair' 29 | - family-names: 'Gyori' 30 | given-names: 'Benjamin M' 31 | - family-names: 'Haendel' 32 | given-names: 'Melissa' 33 | - family-names: 'Harmse' 34 | given-names: 'Henriette' 35 | - family-names: 'Harris' 36 | given-names: 'Nomi L' 37 | - family-names: 'Harrow' 38 | given-names: 'Ian' 39 | - family-names: 'Hegde' 40 | given-names: 'Harshad B' 41 | - family-names: 'Hoyt' 42 | given-names: 'Amelia L' 43 | - family-names: 'Hoyt' 44 | given-names: 'Charles T' 45 | - family-names: 'Jiao' 46 | given-names: 'Dazhi' 47 | - family-names: 'Jiménez-Ruiz' 48 | given-names: 'Ernesto' 49 | - family-names: 'Jupp' 50 | given-names: 'Simon' 51 | - family-names: 'Kim' 52 | given-names: 'Hyeongsik' 53 | - family-names: 'Koehler' 54 | given-names: 'Sebastian' 55 | - family-names: 'Liener' 56 | given-names: 'Thomas' 57 | - family-names: 'Long' 58 | given-names: 'Qinqin' 59 | - family-names: 'Malone' 60 | given-names: 'James' 61 | - family-names: 'McLaughlin' 62 | given-names: 'James A' 63 | - family-names: 'McMurry' 64 | given-names: 'Julie A' 65 | - family-names: 'Moxon' 66 | given-names: 'Sierra' 67 | - family-names: 'Munoz-Torres' 68 | given-names: 'Monica C' 69 | - family-names: 'Osumi-Sutherland' 70 | given-names: 'David' 71 | - family-names: 'Overton' 72 | given-names: 'James A' 73 | - family-names: 'Peters' 74 | given-names: 'Bjoern' 75 | - family-names: 'Putman' 76 | given-names: 'Tim' 77 | - family-names: 'Queralt-Rosinach' 78 | given-names: 'Núria' 79 | - family-names: 'Shefchek' 80 | given-names: 'Kent' 81 | - family-names: 'Solbrig' 82 | given-names: 'Harold' 83 | - family-names: 'Thessen' 84 | given-names: 'Anne' 85 | - family-names: 'Tudorache' 86 | given-names: 'Tania' 87 | - family-names: 'Vasilevsky' 88 | given-names: 'Nicole' 89 | - family-names: 'Wagner' 90 | given-names: 'Alex H' 91 | - family-names: 'Mungall' 92 | given-names: 'Christopher J' 93 | doi: '10.1093/database/baac035' 94 | identifiers: 95 | - type: 'doi' 96 | value: '10.1093/database/baac035' 97 | - type: 'url' 98 | value: 'http://dx.doi.org/10.1093/database/baac035' 99 | - type: 'other' 100 | value: 'urn:issn:1758-0463' 101 | title: 'A Simple Standard for Sharing Ontological Mappings (SSSOM)' 102 | url: 'http://dx.doi.org/10.1093/database/baac035' 103 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to make participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | ## Our Standards 8 | 9 | Examples of behavior that contributes to creating a positive environment include: 10 | 11 | * Using welcoming and inclusive language 12 | * Being respectful of differing viewpoints and experiences 13 | * Gracefully accepting constructive criticism 14 | * Focusing on what is best for the community 15 | * Showing empathy towards other community members 16 | 17 | Examples of unacceptable behavior by participants include: 18 | 19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances 20 | * Trolling, insulting/derogatory comments, and personal or political attacks 21 | * Public or private harassment 22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission 23 | * Other conduct which could reasonably be considered inappropriate in a professional setting 24 | 25 | ## Our Responsibilities 26 | 27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 28 | 29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 30 | 31 | ## Scope 32 | 33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. 34 | 35 | ## Enforcement 36 | 37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by [contacting the project team](contact.md). All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. 38 | 39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. 40 | 41 | ## Attribution 42 | 43 | This code of conduct has been derived from the excellent code of conduct of the [ATOM project](https://github.com/atom/atom/blob/master/CODE_OF_CONDUCT.md) which in turn is adapted from the [Contributor Covenant][homepage], version 1.4, available at [https://contributor-covenant.org/version/1/4][version] 44 | 45 | [homepage]: https://contributor-covenant.org 46 | [version]: https://contributor-covenant.org/version/1/4/ 47 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2022, Nico Matentzoglu 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | MAKEFLAGS += --warn-undefined-variables 2 | SHELL := bash 3 | .SHELLFLAGS := -eu -o pipefail -c 4 | .DEFAULT_GOAL := help 5 | .DELETE_ON_ERROR: 6 | .SUFFIXES: 7 | .SECONDARY: 8 | 9 | RUN = poetry run 10 | # get values from about.yaml file 11 | SCHEMA_NAME = sssom_schema 12 | SOURCE_SCHEMA_PATH = src/sssom_schema/schema/sssom_schema.yaml 13 | SRC = src 14 | DEST = project 15 | PYMODEL = $(SRC)/$(SCHEMA_NAME)/datamodel 16 | DOCDIR = docs 17 | TEMPLATE_DIR = $(SRC)/doc-templates 18 | 19 | # basename of a YAML file in model/ 20 | .PHONY: all clean 21 | 22 | help: status 23 | @echo "" 24 | @echo "make all -- makes site locally" 25 | @echo "make install -- install dependencies" 26 | @echo "make setup -- initial setup" 27 | @echo "make test -- runs tests" 28 | @echo "make testdoc -- builds docs and runs local test server" 29 | @echo "make deploy -- deploys site" 30 | @echo "make update -- updates linkml version" 31 | @echo "make help -- show this help" 32 | @echo "" 33 | 34 | status: check-config 35 | @echo "Project: $(SCHEMA_NAME)" 36 | @echo "Source: $(SOURCE_SCHEMA_PATH)" 37 | 38 | setup: install gen-project gendoc git-init-add 39 | 40 | install: 41 | poetry install 42 | .PHONY: install 43 | 44 | all: gen-project gendoc gen-excel get-context 45 | %.yaml: gen-project 46 | deploy: all mkd-gh-deploy 47 | 48 | # generates all project files 49 | gen-project: $(PYMODEL) 50 | $(RUN) gen-project \ 51 | --exclude owl \ 52 | -d $(DEST) $(SOURCE_SCHEMA_PATH) && mv $(DEST)/*.py $(PYMODEL) 53 | 54 | test: 55 | $(RUN) gen-project \ 56 | --exclude owl \ 57 | -d tmp $(SOURCE_SCHEMA_PATH) 58 | 59 | check-config: 60 | @(grep my-datamodel about.yaml > /dev/null && printf "\n**Project not configured**:\n\n - Remember to edit 'about.yaml'\n\n" || exit 0) 61 | 62 | convert-examples-to-%: 63 | $(patsubst %, $(RUN) linkml-convert % -s $(SOURCE_SCHEMA_PATH) -C Person, $(shell find src/data/examples -name "*.yaml")) 64 | 65 | get-context: 66 | mkdir -p $(SRC)/$(SCHEMA_NAME)/context 67 | cp $(DEST)/jsonld/* $(SRC)/$(SCHEMA_NAME)/context 68 | 69 | examples/%.yaml: src/data/examples/%.yaml 70 | $(RUN) linkml-convert -s $(SOURCE_SCHEMA_PATH) -C Person $< -o $@ 71 | examples/%.json: src/data/examples/%.yaml 72 | $(RUN) linkml-convert -s $(SOURCE_SCHEMA_PATH) -C Person $< -o $@ 73 | examples/%.ttl: src/data/examples/%.yaml 74 | $(RUN) linkml-convert -P EXAMPLE=http://example.org/ -s $(SOURCE_SCHEMA_PATH) -C Person $< -o $@ 75 | 76 | upgrade: 77 | poetry add -D linkml@latest 78 | 79 | # Test documentation locally 80 | serve: mkd-serve 81 | 82 | # Python datamodel 83 | $(PYMODEL): 84 | mkdir -p $@ 85 | 86 | 87 | $(DOCDIR): 88 | mkdir -p $@ 89 | 90 | gendoc: $(DOCDIR) 91 | cp -rf $(SRC)/docs/* $(DOCDIR) ; \ 92 | $(RUN) jinjanate $(SRC)/doc-templates/frontpage.md.jinja2 $(SOURCE_SCHEMA_PATH) -o $(DOCDIR)/index.md 93 | $(RUN) gen-doc -d $(DOCDIR) $(SOURCE_SCHEMA_PATH) --template-directory $(TEMPLATE_DIR) --index-name linkml-index 94 | 95 | testdoc: gendoc serve 96 | 97 | MKDOCS = $(RUN) mkdocs 98 | mkd-%: 99 | $(MKDOCS) $* 100 | 101 | PROJECT_FOLDERS = sqlschema shex shacl protobuf prefixmap owl jsonschema jsonld graphql excel 102 | git-init-add: git-init git-add git-commit git-status 103 | git-init: 104 | git init 105 | git-add: 106 | git add .gitignore .github Makefile LICENSE *.md examples utils about.yaml mkdocs.yml poetry.lock project.Makefile pyproject.toml src/linkml/*yaml src/*/datamodel/*py src/data 107 | git add $(patsubst %, project/%, $(PROJECT_FOLDERS)) 108 | git-commit: 109 | git commit -m 'Initial commit' -a 110 | git-status: 111 | git status 112 | 113 | clean: 114 | rm -rf $(DEST) 115 | rm -rf tmp 116 | 117 | include project.Makefile 118 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # A Simple Standard for Sharing Ontological Mappings (SSSOM) 4 | 5 | 6 | 7 | 8 | SSSOM is a Simple Standard for Sharing Ontological Mappings, providing 9 | 10 | 1. a TSV-based representation for ontology term mappings 11 | 1. a comprehensive set of standard metadata elements to describe mappings and 12 | 1. a standard translation between the TSV and the Web Ontology Language (OWL). 13 | 14 | The SSSOM TSV format in particular is geared towards the needs of the wider bioinformatics community as a way to safely exchange mappings in an easily readable yet semantically well-specified manner. Consider this example of a simple mapping file: 15 | 16 | | subject_id | predicate_id | object_id | mapping_justification | subject_label | object_label | 17 | | --- | --- | --- | --- | --- | --- | 18 | | HP:0009124 | skos:exactMatch | MP:0000003 | semapv:LexicalMatching | Abnormal adipose tissue morphology | abnormal adipose tissue morphology | 19 | | HP:0008551 | skos:exactMatch | MP:0000018 | semapv:LexicalMatching | Microtia | small ears | 20 | | HP:0000411 | skos:exactMatch | MP:0000021 | semapv:LexicalMatching | Protruding ear | prominent ears | 21 | 22 | SSSOM specifies all its metadata elements: 23 | 24 | - subject_id 25 | - predicate_id 26 | - object_id 27 | - mapping_justification (*NOTE: Since June 2022* `match_type` is being replaced by `mapping_justification` see [here](https://github.com/mapping-commons/sssom/issues/150)) 28 | - subject_label 29 | - object_label 30 | 31 | including clear definitions, examples of use and controlled vocabulary where necessary, along with 30 other optional metadata elements to provide additional provenance. 32 | 33 | SSSOM further provides a standard way to 34 | - augment the TSV file with mapping set - level metadata, such as creator_id, mapping_date or license and 35 | - translate a SSSOM compliant TSV files into _OWL reified axioms_. This will allow the easy loading, and merging of SSSOM mapping tables into existing ontologies using standard tools such as ROBOT (under development). 36 | 37 | Note that SSSOM is currently under development and subject to change. Please leave us a comment on the [issue tracker](https://github.com/OBOFoundry/SSSOM/issues) if you want to be involved. The full specification can be found [here](https://w3id.org/sssom/spec). 38 | 39 | ## Citation 40 | 41 | If you have found SSSOM to be helpful in your work, please consider citing: 42 | 43 | Nicolas Matentzoglu, James P Balhoff, Susan M Bello, Chris Bizon, Matthew Brush, Tiffany J Callahan, Christopher G Chute, William D Duncan, Chris T Evelo, Davera Gabriel, John Graybeal, Alasdair Gray, Benjamin M Gyori, Melissa Haendel, Henriette Harmse, Nomi L Harris, Ian Harrow, Harshad B Hegde, Amelia L Hoyt, Charles T Hoyt, Dazhi Jiao, Ernesto Jiménez-Ruiz, Simon Jupp, Hyeongsik Kim, Sebastian Koehler, Thomas Liener, Qinqin Long, James Malone, James A McLaughlin, Julie A McMurry, Sierra Moxon, Monica C Munoz-Torres, David Osumi-Sutherland, James A Overton, Bjoern Peters, Tim Putman, Núria Queralt-Rosinach, Kent Shefchek, Harold Solbrig, Anne Thessen, Tania Tudorache, Nicole Vasilevsky, Alex H Wagner, Christopher J Mungall, A Simple Standard for Sharing Ontological Mappings (SSSOM), Database, Volume 2022, 2022, baac035, https://doi.org/10.1093/database/baac035 44 | 45 | ```bibtex 46 | @article{10.1093/database/baac035, 47 | author = {Matentzoglu, Nicolas and Balhoff, James P and Bello, Susan M and Bizon, Chris and Brush, Matthew and Callahan, Tiffany J and Chute, Christopher G and Duncan, William D and Evelo, Chris T and Gabriel, Davera and Graybeal, John and Gray, Alasdair and Gyori, Benjamin M and Haendel, Melissa and Harmse, Henriette and Harris, Nomi L and Harrow, Ian and Hegde, Harshad B and Hoyt, Amelia L and Hoyt, Charles T and Jiao, Dazhi and Jiménez-Ruiz, Ernesto and Jupp, Simon and Kim, Hyeongsik and Koehler, Sebastian and Liener, Thomas and Long, Qinqin and Malone, James and McLaughlin, James A and McMurry, Julie A and Moxon, Sierra and Munoz-Torres, Monica C and Osumi-Sutherland, David and Overton, James A and Peters, Bjoern and Putman, Tim and Queralt-Rosinach, Núria and Shefchek, Kent and Solbrig, Harold and Thessen, Anne and Tudorache, Tania and Vasilevsky, Nicole and Wagner, Alex H and Mungall, Christopher J}, 48 | title = "{A Simple Standard for Sharing Ontological Mappings (SSSOM)}", 49 | journal = {Database}, 50 | volume = {2022}, 51 | year = {2022}, 52 | month = {05}, 53 | abstract = "{Despite progress in the development of standards for describing and exchanging scientific information, the lack of easy-to-use standards for mapping between different representations of the same or similar objects in different databases poses a major impediment to data integration and interoperability. Mappings often lack the metadata needed to be correctly interpreted and applied. For example, are two terms equivalent or merely related? Are they narrow or broad matches? Or are they associated in some other way? Such relationships between the mapped terms are often not documented, which leads to incorrect assumptions and makes them hard to use in scenarios that require a high degree of precision (such as diagnostics or risk prediction). Furthermore, the lack of descriptions of how mappings were done makes it hard to combine and reconcile mappings, particularly curated and automated ones. We have developed the Simple Standard for Sharing Ontological Mappings (SSSOM) which addresses these problems by: (i) Introducing a machine-readable and extensible vocabulary to describe metadata that makes imprecision, inaccuracy and incompleteness in mappings explicit. (ii) Defining an easy-to-use simple table-based format that can be integrated into existing data science pipelines without the need to parse or query ontologies, and that integrates seamlessly with Linked Data principles. (iii) Implementing open and community-driven collaborative workflows that are designed to evolve the standard continuously to address changing requirements and mapping practices. (iv) Providing reference tools and software libraries for working with the standard. In this paper, we present the SSSOM standard, describe several use cases in detail and survey some of the existing work on standardizing the exchange of mappings, with the goal of making mappings Findable, Accessible, Interoperable and Reusable (FAIR). The SSSOM specification can be found at http://w3id.org/sssom/spec.Database URL: http://w3id.org/sssom/spec}", 54 | issn = {1758-0463}, 55 | doi = {10.1093/database/baac035}, 56 | url = {https://doi.org/10.1093/database/baac035}, 57 | note = {baac035}, 58 | eprint = {https://academic.oup.com/database/article-pdf/doi/10.1093/database/baac035/43832024/baac035.pdf}, 59 | } 60 | ``` 61 | 62 | A [second report with updates since the primary SSSOM publication](https://ceur-ws.org/Vol-3324/om2022_LTpaper6.pdf) above was published as part of the proceedings of the Ontology Matching Workshop 2022. 63 | 64 | ## Copying 65 | 66 | SSSOM is distributed under the terms of the 3-clause BSD license, as included in the [LICENSE](LICENSE) file of the source distribution. 67 | 68 | By exception, the following files are _not_ covered by the 3-clause BSD license: 69 | 70 | * [sssom-banner.png](src/docs/images/sssom-banner.png): That file may only be used by members of the internal Monarch team and collaborators on Monarch flagship products. 71 | 72 | -------------------------------------------------------------------------------- /SSSOM.md: -------------------------------------------------------------------------------- 1 | This page has moved here: 2 | 3 | https://mapping-commons.github.io/sssom/spec/ -------------------------------------------------------------------------------- /about.yaml: -------------------------------------------------------------------------------- 1 | name: sssom_schema 2 | description: A Simple Standard for Sharing Ontology Mappings (SSSOM) 3 | source_schema_path: src/sssom_schema/schema/sssom_schema.yaml 4 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Examples of use of sssom 2 | 3 | This folder contains example data conforming to sssom 4 | 5 | The source for these is in [src/data](../src/data/examples) -------------------------------------------------------------------------------- /examples/embedded/foodie-inc-2022-05-01.sssom.tsv: -------------------------------------------------------------------------------- 1 | # comment: We could map to FOODON:00004187 instead which more specifically refers to 2 | # 'raw' Pink apples. Decided against to be consistent with other mapping choices. 3 | # curie_map: 4 | # FOODON: http://purl.obolibrary.org/obo/FOODON_ 5 | # KF_FOOD: https://kewl-foodie.inc/food/ 6 | # orcid: https://orcid.org/ 7 | # owl: http://www.w3.org/2002/07/owl# 8 | # rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns# 9 | # rdfs: http://www.w3.org/2000/01/rdf-schema# 10 | # semapv: https://w3id.org/semapv/vocab/ 11 | # skos: http://www.w3.org/2004/02/skos/core# 12 | # sssom: https://w3id.org/sssom/ 13 | # wikidata: https://www.wikidata.org/wiki/ 14 | # license: https://creativecommons.org/licenses/by/4.0/ 15 | # mapping_date: '2022-05-02' 16 | # mapping_set_description: Manually curated alignment of KEWL FOODIE INC internal food 17 | # and nutrition database with Food Ontology (FOODON). Intended to be used for ontological 18 | # analysis and grouping of KEWL FOODIE INC related data. 19 | # mapping_set_id: https://w3id.org/sssom/tutorial/example1.sssom.tsv 20 | # mapping_set_version: '2022-05-01' 21 | # object_source: wikidata:Q55118395 22 | # object_source_version: http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl 23 | # subject_source: KF_FOOD:DB 24 | subject_id subject_label predicate_id object_id object_label mapping_justification author_id object_source_version mapping_date confidence comment 25 | KF_FOOD:F001 apple skos:exactMatch FOODON:00002473 apple (whole) semapv:ManualMappingCuration orcid:0000-0002-7356-1779 http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl 2022-05-02 0.95 "We could map to FOODON:03310788 instead to cover sliced apples, but only ""whole"" apple types exist." 26 | KF_FOOD:F002 gala skos:exactMatch FOODON:00003348 Gala apple (whole) semapv:ManualMappingCuration orcid:0000-0002-7356-1779 http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl 2022-05-02 1.0 27 | KF_FOOD:F003 pink skos:exactMatch FOODON:00004187 Pink apple (whole, raw) semapv:ManualMappingCuration orcid:0000-0002-7356-1779 http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl 2022-05-02 0.9 "We could map to FOODON:00004187 instead which more specifically refers to ""raw"" Pink apples. Decided against to be consistent with other mapping choices." 28 | KF_FOOD:F004 braeburn skos:exactMatch sssom:NoMapping semapv:ManualMappingCuration orcid:0000-0002-7356-1779 http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl 2022-05-02 1.0 29 | KF_FOOD:F004 braeburn skos:broadMatch FOODON:00002473 apple (whole) semapv:ManualMappingCuration orcid:0000-0002-7356-1779 http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl 2022-05-02 1.0 30 | 31 | -------------------------------------------------------------------------------- /examples/embedded/mp-hp-exact-0.0.1.sssom.tsv: -------------------------------------------------------------------------------- 1 | # curie_map: 2 | # HP: http://purl.obolibrary.org/obo/HP_ 3 | # MP: http://purl.obolibrary.org/obo/MP_ 4 | # owl: http://www.w3.org/2002/07/owl# 5 | # rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns# 6 | # rdfs: http://www.w3.org/2000/01/rdf-schema# 7 | # semapv: https://w3id.org/semapv/vocab/ 8 | # skos: http://www.w3.org/2004/02/skos/core# 9 | # sssom: https://w3id.org/sssom/ 10 | # license: https://creativecommons.org/publicdomain/zero/1.0/ 11 | # mapping_provider: http://purl.obolibrary.org/obo/upheno.owl 12 | # mapping_set_id: https://w3id.org/sssom/mappings/27f85fe9-8a72-4e76-909b-7ba4244d9ede 13 | subject_id subject_label predicate_id object_id object_label mapping_justification 14 | HP:0000175 Cleft palate skos:exactMatch MP:0000111 cleft palate semapv:LexicalMatching 15 | HP:0000252 Microcephaly skos:exactMatch MP:0000433 microcephaly semapv:LexicalMatching 16 | HP:0000260 Wide anterior fontanel skos:exactMatch MP:0000085 large anterior fontanelle semapv:LexicalMatching 17 | HP:0000375 Abnormal cochlea morphology skos:exactMatch MP:0000031 abnormal cochlea morphology semapv:LexicalMatching 18 | HP:0000411 Protruding ear skos:exactMatch MP:0000021 prominent ears semapv:LexicalMatching 19 | HP:0000822 Hypertension skos:exactMatch MP:0000231 hypertension semapv:LexicalMatching 20 | HP:0001336 Myoclonus skos:exactMatch MP:0000243 myoclonus semapv:LexicalMatching 21 | HP:0001363 Craniosynostosis skos:exactMatch MP:0000081 premature cranial suture closure semapv:LexicalMatching 22 | HP:0001596 Alopecia skos:exactMatch MP:0000414 alopecia semapv:LexicalMatching 23 | HP:0001627 Abnormal heart morphology skos:exactMatch MP:0000266 abnormal heart morphology semapv:LexicalMatching 24 | HP:0001633 Abnormal mitral valve morphology skos:exactMatch MP:0000286 abnormal mitral valve morphology semapv:LexicalMatching 25 | HP:0001667 Right ventricular hypertrophy skos:exactMatch MP:0000276 heart right ventricle hypertrophy semapv:LexicalMatching 26 | HP:0001679 Abnormal aortic morphology skos:exactMatch MP:0000272 abnormal aorta morphology semapv:LexicalMatching 27 | HP:0001719 Double outlet right ventricle skos:exactMatch MP:0000284 double outlet right ventricle semapv:LexicalMatching 28 | HP:0001882 Leukopenia skos:exactMatch MP:0000221 decreased leukocyte cell number semapv:LexicalMatching 29 | HP:0001913 Granulocytopenia skos:exactMatch MP:0000334 decreased granulocyte number semapv:LexicalMatching 30 | HP:0001974 Leukocytosis skos:exactMatch MP:0000218 increased leukocyte cell number semapv:LexicalMatching 31 | HP:0001978 Extramedullary hematopoiesis skos:exactMatch MP:0000240 extramedullary hematopoiesis semapv:LexicalMatching 32 | HP:0001981 Schistocytosis skos:exactMatch MP:0000314 schistocytosis semapv:LexicalMatching 33 | HP:0002212 Curly hair skos:exactMatch MP:0000410 waved hair semapv:LexicalMatching 34 | HP:0002659 Increased susceptibility to fractures skos:exactMatch MP:0000061 fragile skeleton semapv:LexicalMatching 35 | HP:0002763 Abnormal cartilage morphology skos:exactMatch MP:0000163 abnormal cartilage morphology semapv:LexicalMatching 36 | HP:0003307 Hyperlordosis skos:exactMatch MP:0000162 lordosis semapv:LexicalMatching 37 | HP:0004349 Reduced bone mineral density skos:exactMatch MP:0000063 decreased bone mineral density semapv:LexicalMatching 38 | HP:0006288 Advanced eruption of teeth skos:exactMatch MP:0000122 premature tooth eruption semapv:LexicalMatching 39 | HP:0008551 Microtia skos:exactMatch MP:0000018 small ears semapv:LexicalMatching 40 | HP:0009124 Abnormal adipose tissue morphology skos:exactMatch MP:0000003 abnormal adipose tissue morphology semapv:LexicalMatching 41 | HP:0009910 Aplasia of the middle ear ossicles skos:exactMatch MP:0000040 absent middle ear ossicles semapv:LexicalMatching 42 | HP:0009939 Mandibular aplasia skos:exactMatch MP:0000087 absent mandible semapv:LexicalMatching 43 | HP:0011002 Osteopetrosis skos:exactMatch MP:0000067 osteopetrosis semapv:LexicalMatching 44 | HP:0011457 Loss of eyelashes skos:exactMatch MP:0000425 loss of eyelid cilia semapv:LexicalMatching 45 | HP:0011897 Neutrophilia skos:exactMatch MP:0000219 increased neutrophil cell number semapv:LexicalMatching 46 | HP:0012234 Agranulocytosis skos:exactMatch MP:0000334 decreased granulocyte number semapv:LexicalMatching 47 | HP:0012543 Hemosiderinuria skos:exactMatch MP:0000327 hemosiderinuria semapv:LexicalMatching 48 | HP:0025065 Abnormal mean corpuscular volume skos:exactMatch MP:0000226 abnormal mean corpuscular volume semapv:LexicalMatching 49 | HP:0025084 Folliculitis skos:exactMatch MP:0000376 folliculitis semapv:LexicalMatching 50 | HP:0031377 Abnormal cell proliferation skos:exactMatch MP:0000350 abnormal cell proliferation semapv:LexicalMatching 51 | HP:0031851 Reduced hematocrit skos:exactMatch MP:0000208 decreased hematocrit semapv:LexicalMatching 52 | HP:0032310 Granulocytosis skos:exactMatch MP:0000322 increased granulocyte number semapv:LexicalMatching 53 | HP:0100629 Midline facial cleft skos:exactMatch MP:0000108 midline facial cleft semapv:LexicalMatching 54 | HP:0100671 Abnormal trabecular bone morphology skos:exactMatch MP:0000130 abnormal trabecular bone morphology semapv:LexicalMatching 55 | HP:0400001 Chin with vertical crease skos:exactMatch MP:0000114 cleft chin semapv:LexicalMatching 56 | 57 | -------------------------------------------------------------------------------- /examples/external/example1.sssom.tsv: -------------------------------------------------------------------------------- 1 | subject_id subject_label predicate_id object_id object_label mapping_justification author_id object_source_version mapping_date confidence comment 2 | KF_FOOD:F001 apple skos:exactMatch FOODON:00002473 apple (whole) semapv:ManualMappingCuration orcid:0000-0002-7356-1779 http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl 2022-05-02 0.95 "We could map to FOODON:03310788 instead to cover sliced apples, but only ""whole"" apple types exist." 3 | KF_FOOD:F002 gala skos:exactMatch FOODON:00003348 Gala apple (whole) semapv:ManualMappingCuration orcid:0000-0002-7356-1779 http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl 2022-05-02 1.0 4 | KF_FOOD:F003 pink skos:exactMatch FOODON:00004187 Pink apple (whole, raw) semapv:ManualMappingCuration orcid:0000-0002-7356-1779 http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl 2022-05-02 0.9 "We could map to FOODON:00004187 instead which more specifically refers to ""raw"" Pink apples. Decided against to be consistent with other mapping choices." 5 | KF_FOOD:F004 braeburn skos:exactMatch sssom:NoMapping semapv:ManualMappingCuration orcid:0000-0002-7356-1779 http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl 2022-05-02 1.0 6 | KF_FOOD:F004 braeburn skos:broadMatch FOODON:00002473 apple (whole) semapv:ManualMappingCuration orcid:0000-0002-7356-1779 http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl 2022-05-02 1.0 7 | -------------------------------------------------------------------------------- /examples/external/example1.sssom.yml: -------------------------------------------------------------------------------- 1 | comment: We could map to FOODON:00004187 instead which more specifically refers to 2 | 'raw' Pink apples. Decided against to be consistent with other mapping choices. 3 | curie_map: 4 | FOODON: http://purl.obolibrary.org/obo/FOODON_ 5 | KF_FOOD: https://kewl-foodie.inc/food/ 6 | orcid: https://orcid.org/ 7 | owl: http://www.w3.org/2002/07/owl# 8 | rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns# 9 | rdfs: http://www.w3.org/2000/01/rdf-schema# 10 | semapv: https://w3id.org/semapv/vocab/ 11 | skos: http://www.w3.org/2004/02/skos/core# 12 | sssom: https://w3id.org/sssom/ 13 | wikidata: https://www.wikidata.org/wiki/ 14 | license: https://creativecommons.org/licenses/by/4.0/ 15 | mapping_date: '2022-05-02' 16 | mapping_set_description: Manually curated alignment of KEWL FOODIE INC internal food 17 | and nutrition database with Food Ontology (FOODON). Intended to be used for ontological 18 | analysis and grouping of KEWL FOODIE INC related data. 19 | mapping_set_id: https://w3id.org/sssom/tutorial/example1.sssom.tsv 20 | mapping_set_version: '2022-05-01' 21 | object_source: wikidata:Q55118395 22 | object_source_version: http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl 23 | subject_source: KF_FOOD:DB -------------------------------------------------------------------------------- /examples/external/mp-hp-exact-0.0.1.sssom.tsv: -------------------------------------------------------------------------------- 1 | subject_id subject_label predicate_id object_id object_label mapping_justification 2 | HP:0000175 Cleft palate skos:exactMatch MP:0000111 cleft palate semapv:LexicalMatching 3 | HP:0000252 Microcephaly skos:exactMatch MP:0000433 microcephaly semapv:LexicalMatching 4 | HP:0000260 Wide anterior fontanel skos:exactMatch MP:0000085 large anterior fontanelle semapv:LexicalMatching 5 | HP:0000375 Abnormal cochlea morphology skos:exactMatch MP:0000031 abnormal cochlea morphology semapv:LexicalMatching 6 | HP:0000411 Protruding ear skos:exactMatch MP:0000021 prominent ears semapv:LexicalMatching 7 | HP:0000822 Hypertension skos:exactMatch MP:0000231 hypertension semapv:LexicalMatching 8 | HP:0001336 Myoclonus skos:exactMatch MP:0000243 myoclonus semapv:LexicalMatching 9 | HP:0001363 Craniosynostosis skos:exactMatch MP:0000081 premature cranial suture closure semapv:LexicalMatching 10 | HP:0001596 Alopecia skos:exactMatch MP:0000414 alopecia semapv:LexicalMatching 11 | HP:0001627 Abnormal heart morphology skos:exactMatch MP:0000266 abnormal heart morphology semapv:LexicalMatching 12 | HP:0001633 Abnormal mitral valve morphology skos:exactMatch MP:0000286 abnormal mitral valve morphology semapv:LexicalMatching 13 | HP:0001667 Right ventricular hypertrophy skos:exactMatch MP:0000276 heart right ventricle hypertrophy semapv:LexicalMatching 14 | HP:0001679 Abnormal aortic morphology skos:exactMatch MP:0000272 abnormal aorta morphology semapv:LexicalMatching 15 | HP:0001719 Double outlet right ventricle skos:exactMatch MP:0000284 double outlet right ventricle semapv:LexicalMatching 16 | HP:0001882 Leukopenia skos:exactMatch MP:0000221 decreased leukocyte cell number semapv:LexicalMatching 17 | HP:0001913 Granulocytopenia skos:exactMatch MP:0000334 decreased granulocyte number semapv:LexicalMatching 18 | HP:0001974 Leukocytosis skos:exactMatch MP:0000218 increased leukocyte cell number semapv:LexicalMatching 19 | HP:0001978 Extramedullary hematopoiesis skos:exactMatch MP:0000240 extramedullary hematopoiesis semapv:LexicalMatching 20 | HP:0001981 Schistocytosis skos:exactMatch MP:0000314 schistocytosis semapv:LexicalMatching 21 | HP:0002212 Curly hair skos:exactMatch MP:0000410 waved hair semapv:LexicalMatching 22 | HP:0002659 Increased susceptibility to fractures skos:exactMatch MP:0000061 fragile skeleton semapv:LexicalMatching 23 | HP:0002763 Abnormal cartilage morphology skos:exactMatch MP:0000163 abnormal cartilage morphology semapv:LexicalMatching 24 | HP:0003307 Hyperlordosis skos:exactMatch MP:0000162 lordosis semapv:LexicalMatching 25 | HP:0004349 Reduced bone mineral density skos:exactMatch MP:0000063 decreased bone mineral density semapv:LexicalMatching 26 | HP:0006288 Advanced eruption of teeth skos:exactMatch MP:0000122 premature tooth eruption semapv:LexicalMatching 27 | HP:0008551 Microtia skos:exactMatch MP:0000018 small ears semapv:LexicalMatching 28 | HP:0009124 Abnormal adipose tissue morphology skos:exactMatch MP:0000003 abnormal adipose tissue morphology semapv:LexicalMatching 29 | HP:0009910 Aplasia of the middle ear ossicles skos:exactMatch MP:0000040 absent middle ear ossicles semapv:LexicalMatching 30 | HP:0009939 Mandibular aplasia skos:exactMatch MP:0000087 absent mandible semapv:LexicalMatching 31 | HP:0011002 Osteopetrosis skos:exactMatch MP:0000067 osteopetrosis semapv:LexicalMatching 32 | HP:0011457 Loss of eyelashes skos:exactMatch MP:0000425 loss of eyelid cilia semapv:LexicalMatching 33 | HP:0011897 Neutrophilia skos:exactMatch MP:0000219 increased neutrophil cell number semapv:LexicalMatching 34 | HP:0012234 Agranulocytosis skos:exactMatch MP:0000334 decreased granulocyte number semapv:LexicalMatching 35 | HP:0012543 Hemosiderinuria skos:exactMatch MP:0000327 hemosiderinuria semapv:LexicalMatching 36 | HP:0025065 Abnormal mean corpuscular volume skos:exactMatch MP:0000226 abnormal mean corpuscular volume semapv:LexicalMatching 37 | HP:0025084 Folliculitis skos:exactMatch MP:0000376 folliculitis semapv:LexicalMatching 38 | HP:0031377 Abnormal cell proliferation skos:exactMatch MP:0000350 abnormal cell proliferation semapv:LexicalMatching 39 | HP:0031851 Reduced hematocrit skos:exactMatch MP:0000208 decreased hematocrit semapv:LexicalMatching 40 | HP:0032310 Granulocytosis skos:exactMatch MP:0000322 increased granulocyte number semapv:LexicalMatching 41 | HP:0100629 Midline facial cleft skos:exactMatch MP:0000108 midline facial cleft semapv:LexicalMatching 42 | HP:0100671 Abnormal trabecular bone morphology skos:exactMatch MP:0000130 abnormal trabecular bone morphology semapv:LexicalMatching 43 | HP:0400001 Chin with vertical crease skos:exactMatch MP:0000114 cleft chin semapv:LexicalMatching 44 | -------------------------------------------------------------------------------- /examples/external/mp-hp-exact-0.0.1.sssom.yml: -------------------------------------------------------------------------------- 1 | curie_map: 2 | HP: http://purl.obolibrary.org/obo/HP_ 3 | MP: http://purl.obolibrary.org/obo/MP_ 4 | owl: http://www.w3.org/2002/07/owl# 5 | rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns# 6 | rdfs: http://www.w3.org/2000/01/rdf-schema# 7 | semapv: https://w3id.org/semapv/vocab/ 8 | skos: http://www.w3.org/2004/02/skos/core# 9 | sssom: https://w3id.org/sssom/ 10 | license: https://creativecommons.org/publicdomain/zero/1.0/ 11 | mapping_provider: http://purl.obolibrary.org/obo/upheno.owl 12 | mapping_set_id: https://w3id.org/sssom/mappings/27f85fe9-8a72-4e76-909b-7ba4244d9ede -------------------------------------------------------------------------------- /examples/schema/composite-entities.sssom.tsv: -------------------------------------------------------------------------------- 1 | #curie_map: 2 | # HP: http://purl.obolibrary.org/obo/HP_ 3 | # MONDO: http://purl.obolibrary.org/obo/MONDO_ 4 | # MP: http://purl.obolibrary.org/obo/MP_ 5 | # SCHEMA: http://example.org/schema 6 | #mapping_set_id: https://w3id.org/sssom/commons/examples/composite-entities.sssom.tsv 7 | #license: https://creativecommons.org/publicdomain/zero/1.0/ 8 | #comment: This is an example file for the SSSOM for illustration only. Its contents are entirely fabricated. 9 | subject_id predicate_id object_id mapping_justification subject_type 10 | SCHEMA:0001/(disease:'MONDO:0005148',phenotype:'HP:0009124') skos:exactMatch MP:0000003 semapv:ManualMappingCuration composed entity expression 11 | SCHEMA:0001/(disease:'MONDO:0005149',phenotype:'HP:0008551') skos:exactMatch MP:0000018 semapv:ManualMappingCuration composed entity expression 12 | SCHEMA:0001/(disease:'MONDO:0005150',phenotype:'HP:0000411') skos:exactMatch MP:0000018 semapv:ManualMappingCuration composed entity expression 13 | -------------------------------------------------------------------------------- /examples/schema/curation_rule.sssom.tsv: -------------------------------------------------------------------------------- 1 | #curie_map: 2 | # HP: http://purl.obolibrary.org/obo/HP_ 3 | # MP: http://purl.obolibrary.org/obo/MP_ 4 | # orcid: https://orcid.org/ 5 | # DISEASE_MAPPING_COMMONS_RULES: https://w3id.org/sssom/commons/disease/curation-rules/ 6 | #mapping_set_id: https://w3id.org/sssom/commons/examples/curation_rule.sssom.tsv 7 | #license: "https://creativecommons.org/publicdomain/zero/1.0/" 8 | #creator_id: orcid:0000-0002-7356-1779 9 | #mapping_provider: "https://w3id.org/sssom/core_team" 10 | #comment: This is an example file for the SSSOM for illustration only. Its contents are entirely fabricated. 11 | subject_id predicate_id object_id mapping_justification curation_rule see_also 12 | HP:0009124 skos:exactMatch MP:0000003 semapv:ManualMappingCuration DISEASE_MAPPING_COMMONS_RULES:MPR2 https://github.com/mapping-commons/disease-mappings/issues/16 13 | HP:0008551 skos:exactMatch MP:0000018 semapv:ManualMappingCuration DISEASE_MAPPING_COMMONS_RULES:MPR3 https://github.com/mapping-commons/disease-mappings/issues/16 14 | HP:0000411 skos:exactMatch MP:0000021 semapv:ManualMappingCuration DISEASE_MAPPING_COMMONS_RULES:MPR3 https://github.com/mapping-commons/disease-mappings/issues/16 15 | -------------------------------------------------------------------------------- /examples/schema/curation_rule_text.sssom.tsv: -------------------------------------------------------------------------------- 1 | #curie_map: 2 | # HP: http://purl.obolibrary.org/obo/HP_ 3 | # MP: http://purl.obolibrary.org/obo/MP_ 4 | # orcid: https://orcid.org/ 5 | # DISEASE_MAPPING_COMMONS_RULES: https://w3id.org/sssom/commons/disease/curation-rules/ 6 | #mapping_set_id: https://w3id.org/sssom/commons/examples/curation_rule_text.sssom.tsv 7 | #license: "https://creativecommons.org/publicdomain/zero/1.0/" 8 | #creator_id: orcid:0000-0002-7356-1779 9 | #mapping_provider: "https://w3id.org/sssom/core_team" 10 | #comment: This is an example file for the SSSOM for illustration only. Its contents are entirely fabricated. 11 | subject_id predicate_id object_id mapping_justification curation_rule_text see_also 12 | HP:0009124 skos:exactMatch MP:0000003 semapv:ManualMappingCuration The two phenotypes inhere in homologous structures and exhibit the same phenotypic quality https://github.com/mapping-commons/disease-mappings/issues/16 13 | HP:0008551 skos:exactMatch MP:0000018 semapv:ManualMappingCuration The two phenotypes inhere in homologous structures and exhibit the same phenotypic quality https://github.com/mapping-commons/disease-mappings/issues/16 14 | HP:0000411 skos:exactMatch MP:0000021 semapv:ManualMappingCuration The two phenotypes are associated with the exact same set of diseases https://github.com/mapping-commons/disease-mappings/issues/16 15 | -------------------------------------------------------------------------------- /examples/schema/curation_rule_text2.sssom.tsv: -------------------------------------------------------------------------------- 1 | #curie_map: 2 | # WTO: http://purl.obolibrary.org/obo/WTO_ 3 | # CO321: "http://www.cropontology.org/rdf/CO_321:" 4 | # ror: https://ror.org/ 5 | #mapping_set_id: https://w3id.org/sssom/commons/examples/curation_rule_text2.sssom.tsv 6 | #license: "https://www.etalab.gouv.fr/licence-ouverte-open-licence/" 7 | #comment: This is an example file for the SSSOM for illustration only. This example was extracted from a real mapping set where the subject source (WTO) is an ontology used to annotate text (e.g. scientific literature) and the object source (CO321) is an ontology used to annotate the traits evaluated from observational data. The objective of the alignment is to allow information retrieval from both textual and experimental phenotypic dataset. 8 | #creator_id: ror:02kvxyf05 9 | #creator_label: "INRAE" 10 | subject_id subject_label predicate_id object_id object_label mapping_justification curation_rule_text comment 11 | WTO:0000304 cold resistance skos:closeMatch CO321:0000080 Cold tolerance semapv:ManualMappingCuration Rule 4: We consider that "tolerance" and "resistance" are almost equivalent when applied to abiotic environmental conditions. 12 | WTO:0000450 aluminium toxicity skos:closeMatch CO321:0000079 Aluminum tolerance semapv:ManualMappingCuration Rule 3: We consider that the user of the information retrieval function interested in plant traits related to metal toxicity (WTO) also wants to retrieve observational data measuring the plant tolerance to the same metal (CO_321). The rule metal + toxicity (WTO) <-> metal + tolerance (CO321) is valid for any kind of metal. 13 | WTO:0000065 anther extrusion skos:exactMatch CO321:0000982 Anther extrusion semapv:ManualMappingCuration 14 | WTO:0000296 aphid resistance skos:closeMatch CO321:0000085 Aphid damage semapv:ManualMappingCuration Rule 2: We consider that the user of the information retrieval function interested in plant traits related to damages caused by some animal, insect, nematode, etc. also wants to retrieve observational data mentioning resistance to the same living organism. 15 | WTO:0000281 Armyworm resistance skos:closeMatch CO321:0000086 Armyworm damage semapv:ManualMappingCuration Rule 2: We consider that the user of the information retrieval function interested in plant traits related to damages caused by some animal, insect, nematode, etc. also wants to retrieve observational data mentioning resistance to the same living organism. 16 | WTO:0000125 awn color skos:exactMatch CO321:0000960 Awn color semapv:ManualMappingCuration 17 | WTO:0000126 awn length skos:exactMatch CO321:0000026 Awn length semapv:ManualMappingCuration 18 | WTO:0000452 bacterial leaf blight resistance skos:closeMatch CO321:0000932 Bacterial leaf blight severity semapv:ManualMappingCuration Rule 1.3: We consider that the user of the information retrieval function, given a pathogen or a disease, would like to retrieve all data, independently of the way the affection is observed. In observational data, a severity score is represented by two digits representing the vertical disease progress and an estimate of severity. The capacity of resistance to a disease would be deduced from the severity of this one on the plant. -------------------------------------------------------------------------------- /examples/schema/curie_map.sssom.tsv: -------------------------------------------------------------------------------- 1 | #curie_map: 2 | # HP: http://purl.obolibrary.org/obo/HP_ 3 | # MP: http://purl.obolibrary.org/obo/MP_ 4 | # orcid: https://orcid.org/ 5 | #mapping_set_id: https://w3id.org/sssom/commons/examples/curie_map.sssom.tsv 6 | #license: "https://creativecommons.org/publicdomain/zero/1.0/" 7 | #creator_id: 8 | # - orcid:0000-0002-7356-1779 9 | #mapping_provider: "https://w3id.org/sssom/core_team" 10 | #comment: This is an example file for the SSSOM for illustration only. Its contents are entirely fabricated. 11 | subject_id predicate_id object_id mapping_justification 12 | HP:0009124 skos:exactMatch MP:0000003 semapv:ManualMappingCuration 13 | HP:0008551 skos:exactMatch MP:0000018 semapv:ManualMappingCuration 14 | HP:0000411 skos:exactMatch MP:0000021 semapv:ManualMappingCuration 15 | -------------------------------------------------------------------------------- /examples/schema/extension-slots.sssom.tsv: -------------------------------------------------------------------------------- 1 | #curie_map: 2 | # COMENT: https://example.com/entities/ 3 | # EXPROP: https://example.org/properties/ 4 | # ORGENT: https://example.org/entities/ 5 | #mapping_set_id: https://example.org/sets/exo2c-with-extensions 6 | #mapping_set_title: Sample set EXO2C with extension slots 7 | #license: https://creativecommons.org/licenses/by/4.0/ 8 | #extension_definitions: 9 | # - slot_name: ext_bar 10 | # property: EXPROP:barProperty 11 | # type_hint: xsd:integer 12 | # - slot_name: ext_baz 13 | # property: EXPROP:bazProperty 14 | # type_hint: linkml:Uriorcurie 15 | # - slot_name: ext_foo 16 | # property: EXPROP:fooProperty 17 | #ext_foo: Foo A 18 | #ext_undeclared_foo: Foo B 19 | subject_id subject_label predicate_id object_id object_label mapping_justification ext_bar ext_baz ext_undeclared_baz 20 | ORGENT:0001 alice skos:closeMatch COMENT:0011 alpha semapv:ManualMappingCuration 111 ORGENT:BAZ_0001 BAZ A 21 | ORGENT:0002 bob skos:closeMatch COMENT:0012 beta semapv:ManualMappingCuration 112 ORGENT:BAZ_0002 22 | ORGENT:0004 daphne skos:closeMatch COMENT:0014 delta semapv:ManualMappingCuration 114 Baz C 23 | ORGENT:0005 eve skos:closeMatch COMENT:0015 epsilon semapv:ManualMappingCuration 115 ORGENT:BAZ_0005 Baz E 24 | -------------------------------------------------------------------------------- /examples/schema/issue_tracker.sssom.tsv: -------------------------------------------------------------------------------- 1 | #curie_map: 2 | # HP: http://purl.obolibrary.org/obo/HP_ 3 | # MP: http://purl.obolibrary.org/obo/MP_ 4 | # orcid: https://orcid.org/ 5 | # MH_MAPPINGS_GITHUB_ISSUES: https://github.com/mapping-commons/mh_mapping_initiative/issues/ 6 | #mapping_set_id: https://w3id.org/sssom/commons/examples/issue_tracker.sssom.tsv 7 | #license: "https://creativecommons.org/publicdomain/zero/1.0/" 8 | #creator_id: orcid:0000-0002-7356-1779 9 | #mapping_provider: "https://w3id.org/sssom/core_team" 10 | #issue_tracker: "https://github.com/mapping-commons/mh_mapping_initiative/issues" 11 | #comment: This is an example file for the SSSOM for illustration only. Its contents are entirely fabricated. 12 | subject_id predicate_id object_id mapping_justification issue_tracker_item 13 | HP:0009124 skos:exactMatch MP:0000003 semapv:ManualMappingCuration MH_MAPPINGS_GITHUB_ISSUES:9123 14 | HP:0008551 skos:exactMatch MP:0000018 semapv:ManualMappingCuration 15 | HP:0000411 skos:exactMatch MP:0000021 semapv:ManualMappingCuration MH_MAPPINGS_GITHUB_ISSUES:9125 16 | -------------------------------------------------------------------------------- /examples/schema/issue_tracker_item.sssom.tsv: -------------------------------------------------------------------------------- 1 | #curie_map: 2 | # HP: http://purl.obolibrary.org/obo/HP_ 3 | # MP: http://purl.obolibrary.org/obo/MP_ 4 | # orcid: https://orcid.org/ 5 | # MH_MAPPINGS_GITHUB_ISSUES: https://github.com/mapping-commons/mh_mapping_initiative/issues/ 6 | #mapping_set_id: https://w3id.org/sssom/commons/examples/issue_tracker_item.sssom.tsv 7 | #license: "https://creativecommons.org/publicdomain/zero/1.0/" 8 | #creator_id: orcid:0000-0002-7356-1779 9 | #mapping_provider: "https://w3id.org/sssom/core_team" 10 | #comment: This is an example file for the SSSOM for illustration only. Its contents are entirely fabricated. 11 | subject_id predicate_id object_id mapping_justification issue_tracker_item 12 | HP:0009124 skos:exactMatch MP:0000003 semapv:ManualMappingCuration MH_MAPPINGS_GITHUB_ISSUES:9123 13 | HP:0008551 skos:exactMatch MP:0000018 semapv:ManualMappingCuration 14 | HP:0000411 skos:exactMatch MP:0000021 semapv:ManualMappingCuration MH_MAPPINGS_GITHUB_ISSUES:9125 15 | -------------------------------------------------------------------------------- /examples/schema/literals.sssom.tsv: -------------------------------------------------------------------------------- 1 | #curie_map: 2 | # COMENT: https://example.com/entities/ 3 | #mapping_set_id: https://example.org/sets/literal-mappings 4 | #license: https://creativecommons.org/licenses/by/4.0/ 5 | subject_label predicate_id object_id object_label mapping_justification subject_type 6 | alice skos:closeMatch COMENT:0011 alpha semapv:ManualMappingCuration rdfs literal 7 | bob skos:closeMatch COMENT:0012 beta semapv:ManualMappingCuration rdfs literal 8 | daphne skos:closeMatch COMENT:0014 delta semapv:ManualMappingCuration rdfs literal 9 | eve skos:closeMatch COMENT:0015 epsilon semapv:ManualMappingCuration rdfs literal 10 | -------------------------------------------------------------------------------- /examples/schema/mapping_set_confidence.sssom.tsv: -------------------------------------------------------------------------------- 1 | #sssom_version: "1.1" 2 | #curie_map: 3 | # HP: http://purl.obolibrary.org/obo/HP_ 4 | # MP: http://purl.obolibrary.org/obo/MP_ 5 | # orcid: https://orcid.org/ 6 | #mapping_set_id: https://w3id.org/sssom/commons/examples/curie_map.sssom.tsv 7 | #mapping_set_confidence: 0.8 8 | #license: "https://creativecommons.org/publicdomain/zero/1.0/" 9 | #creator_id: 10 | # - orcid:0000-0002-7356-1779 11 | #mapping_provider: "https://w3id.org/sssom/core_team" 12 | #mapping_tool: AgreementMakerLight 13 | #comment: This is an example file for the SSSOM for illustration only. The mapping_set_confidence value expresses the confidence of the creator of the mapping into the agent that produced the mappings, i.e. AgreementMakerLight. 14 | subject_id predicate_id object_id mapping_justification 15 | HP:0009124 skos:exactMatch MP:0000003 semapv:LexicalMatching 16 | HP:0008551 skos:exactMatch MP:0000018 semapv:LexicalMatching 17 | -------------------------------------------------------------------------------- /examples/schema/no_term_found.sssom.tsv: -------------------------------------------------------------------------------- 1 | #curie_map: 2 | # HP: http://purl.obolibrary.org/obo/HP_ 3 | # MP: http://purl.obolibrary.org/obo/MP_ 4 | # obo: http://purl.obolibrary.org/obo/ 5 | # orcid: https://orcid.org/ 6 | #mapping_set_id: https://w3id.org/sssom/commons/examples/no_term_found.sssom.tsv 7 | #creator_id: 8 | # - orcid:0000-0002-7356-1779 9 | #subject_source: obo:hp 10 | #object_source: obo:mp 11 | #license: "https://creativecommons.org/publicdomain/zero/1.0/" 12 | #mapping_provider: "https://w3id.org/sssom/core_team" 13 | #comment: This is an example file for the SSSOM for illustration only. Its contents are entirely fabricated. 14 | subject_id predicate_id object_id mapping_justification 15 | HP:0009124 skos:exactMatch MP:0000003 semapv:ManualMappingCuration 16 | HP:0000411 skos:exactMatch sssom:NoTermFound semapv:ManualMappingCuration 17 | -------------------------------------------------------------------------------- /examples/schema/predicate-types.sssom.tsv: -------------------------------------------------------------------------------- 1 | #curie_map: 2 | # COMENT: https://example.com/entities/ 3 | # ORGENT: https://example.org/entities/ 4 | # ORGPRO: https://example.org/properties/ 5 | #mapping_set_id: https://w3id.org/sssom/commons/examples/predicate-types.sssom.tsv 6 | #license: https://creativecommons.org/licenses/by/4.0/ 7 | subject_id subject_label predicate_id object_id object_label mapping_justification predicate_type 8 | ORGENT:0001 alice ORGPRO:example_annot_property COMENT:0011 alpha semapv:ManualMappingCuration owl annotation property 9 | ORGENT:0002 bob ORGPRO:example_object_property COMENT:0012 beta semapv:ManualMappingCuration owl object property 10 | ORGENT:0004 daphne ORGPRO:example_data_property COMENT:0014 delta semapv:ManualMappingCuration owl data property 11 | ORGENT:0005 eve ORGPRO:example_rdf_property COMENT:0015 epsilon semapv:ManualMappingCuration rdf property 12 | -------------------------------------------------------------------------------- /examples/schema/similarity_score.sssom.tsv: -------------------------------------------------------------------------------- 1 | #curie_map: 2 | # HP: http://purl.obolibrary.org/obo/HP_ 3 | # MP: http://purl.obolibrary.org/obo/MP_ 4 | # orcid: https://orcid.org/ 5 | # wikidata: https://www.wikidata.org/entity/ 6 | #mapping_set_id: https://w3id.org/sssom/commons/examples/similarity_score.sssom.tsv 7 | #license: "https://creativecommons.org/publicdomain/zero/1.0/" 8 | #creator_id: 9 | # - orcid:0000-0002-7356-1779 10 | #mapping_provider: "https://w3id.org/sssom/core_team" 11 | #comment: This is an example file for the SSSOM for illustration only. Its contents are entirely fabricated. 12 | subject_id predicate_id object_id mapping_justification similarity_score similarity_measure 13 | HP:0009124 skos:exactMatch MP:0000003 semapv:LexicalSimilarityThresholdMatching 0.8 wikidata:Q865360 14 | HP:0008551 skos:exactMatch MP:0000018 semapv:LexicalSimilarityThresholdMatching 0.4 wikidata:Q865360 15 | HP:0000411 skos:exactMatch MP:0000021 semapv:SemanticSimilarityThresholdMatching 1.0 wikidata:Q1784941 16 | -------------------------------------------------------------------------------- /examples/schema/version.sssom.tsv: -------------------------------------------------------------------------------- 1 | # sssom_version: "1.1" 2 | # curie_map: 3 | # HP: http://purl.obolibrary.org/obo/FBbt_ 4 | # MP: http://purl.obolibrary.org/obo/UBERON_ 5 | # license: https://w3id.org/sssom/license/unspecified 6 | subject_id predicate_id object_id mapping_justification 7 | HP:0009124 skos:exactMatch MP:0000003 semapv:LexicalMatching 8 | HP:0008551 skos:exactMatch MP:0000018 semapv:LexicalMatching 9 | 10 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: "A Simple Standard for Sharing Ontology Mappings (SSSOM)" 2 | site_author: "The SSSOM Community" 3 | theme: 4 | name: material 5 | analytics: 6 | gtag: G-2SYBSJVZ23 7 | # palette: 8 | # scheme: slate 9 | # primary: cyan 10 | features: 11 | - content.tabs.link 12 | plugins: 13 | - search 14 | - mermaid2 15 | markdown_extensions: 16 | - pymdownx.highlight: 17 | use_pygments: true 18 | - pymdownx.inlinehilite 19 | - pymdownx.snippets 20 | - pymdownx.tabbed: 21 | - pymdownx.critic 22 | - pymdownx.caret 23 | - pymdownx.keys 24 | - pymdownx.mark 25 | - pymdownx.tilde 26 | - admonition 27 | #- pymdownx.emoji: 28 | # emoji_index: !!python/name:material.extensions.emoji.twemoji 29 | # emoji_generator: !!python/name:material.extensions.emoji.to_svg 30 | nav: 31 | - Home: index.md 32 | - Introduction: introduction.md 33 | - Specification: 34 | - Introduction: spec-intro.md 35 | - Data model: 36 | - Introduction: spec-model.md 37 | - Applying Chaining Rules: chaining-rules.md 38 | - LinkML documentation: linkml-index.md 39 | - Serialisations: 40 | - Introduction: spec-formats.md 41 | - SSSOM/TSV serialisation: spec-formats-tsv.md 42 | - OWL/RDF serialisation: spec-formats-owl.md 43 | - JSON serialisation: spec-formats-json.md 44 | - Resources for contributors: contributing.md 45 | - Resources for users: 46 | - FAQ: faq.md 47 | - Community: 48 | - Use Cases: usecases.md 49 | - Workshops: workshops.md 50 | - Presentations: presentations.md 51 | - Training materials: 52 | - Overview: training.md 53 | - Basic Tutorial: tutorial.md 54 | - How to: 55 | - Mapping Justifications: mapping-justifications.md 56 | - How to use mapping predicates: mapping-predicates.md 57 | - Set up a mapping registry/commons: mapping-commons.md 58 | - A basic guide for the SSSOM toolkit: toolkit.md 59 | - 5-Star Entity Mappings - Cheatsheet: 5star-mappings.md 60 | - Matching tool implementation guide: matching-tool-implementation-guide.md 61 | - How to gradually enrich OMOP mappings with SSSOM: tutorials/omop-mappings.md 62 | - Reference: 63 | - Glossary: glossary.md 64 | - Related documentation: related-documentation.md 65 | - Funding: funding.md 66 | 67 | site_url: https://mapping-commons.github.io/sssom/ 68 | repo_url: https://github.com/mapping-commons/sssom/ 69 | edit_uri: "edit/master/src/docs/" 70 | -------------------------------------------------------------------------------- /project.Makefile: -------------------------------------------------------------------------------- 1 | ## Add your own custom Makefile targets here 2 | # Added by H2 3 | EXCEL_DIR = $(DEST)/excel 4 | 5 | gen-excel: 6 | mkdir -p $(EXCEL_DIR) 7 | $(RUN) gen-excel --output $(EXCEL_DIR)/sssom_schema.xlsx $(SOURCE_SCHEMA_PATH) 8 | 9 | build: 10 | poetry build 11 | 12 | pypi: 13 | poetry publish 14 | 15 | ####################################### 16 | ##### Mapping validation ############# 17 | ####################################### 18 | 19 | MAPPING_DIR_SCHEMA=examples/schema 20 | MAPPING_DIR_EMBEDDED=examples/embedded 21 | TMPDIR = tmp 22 | 23 | validate-example-schema-%: 24 | mkdir -p $(TMPDIR) 25 | tsvalid $(MAPPING_DIR_SCHEMA)/$*.sssom.tsv --comment "#" --skip E1 26 | sssom validate $(MAPPING_DIR_SCHEMA)/$*.sssom.tsv 27 | sssom convert $(MAPPING_DIR_SCHEMA)/$*.sssom.tsv -o $(TMPDIR)/schema-$*.sssom.ttl 28 | 29 | validate-example-embedded-%: 30 | mkdir -p $(TMPDIR) 31 | tsvalid $(MAPPING_DIR_EMBEDDED)/$*.sssom.tsv --comment "#" --skip E1 32 | sssom validate $(MAPPING_DIR_EMBEDDED)/$*.sssom.tsv 33 | sssom convert $(MAPPING_DIR_EMBEDDED)/$*.sssom.tsv -o $(TMPDIR)/embedded-$*.sssom.ttl 34 | 35 | MAPPINGS_SCHEMA=$(notdir $(wildcard $(MAPPING_DIR_SCHEMA)/*.sssom.tsv)) 36 | VALIDATE_MAPPINGS_SCHEMA=$(patsubst %.sssom.tsv, validate-example-schema-%, $(notdir $(wildcard $(MAPPING_DIR_SCHEMA)/*.sssom.tsv))) 37 | 38 | MAPPINGS_EMBEDDED=$(notdir $(wildcard $(MAPPING_DIR_EMBEDDED)/*.sssom.tsv)) 39 | VALIDATE_MAPPINGS_EMBEDDED=$(patsubst %.sssom.tsv, validate-example-embedded-%, $(notdir $(wildcard $(MAPPING_DIR_EMBEDDED)/*.sssom.tsv))) 40 | 41 | validate_mappings: 42 | $(MAKE) $(VALIDATE_MAPPINGS_SCHEMA) 43 | $(MAKE) $(VALIDATE_MAPPINGS_EMBEDDED) 44 | -------------------------------------------------------------------------------- /project/excel/sssom_schema.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mapping-commons/sssom/1fb4b8b98358839f201ff6c776b5d121ce6ddec9/project/excel/sssom_schema.xlsx -------------------------------------------------------------------------------- /project/graphql/sssom_schema.graphql: -------------------------------------------------------------------------------- 1 | # metamodel_version: 1.7.0 2 | type ExtensionDefinition 3 | { 4 | slotName: Ncname! 5 | property: Uriorcurie 6 | typeHint: Uriorcurie 7 | } 8 | 9 | type Mapping 10 | { 11 | subjectId: EntityReference 12 | subjectLabel: String 13 | subjectCategory: String 14 | predicateId: EntityReference! 15 | predicateLabel: String 16 | predicateModifier: PredicateModifierEnum 17 | objectId: EntityReference 18 | objectLabel: String 19 | objectCategory: String 20 | mappingJustification: EntityReference! 21 | authorId: [EntityReference] 22 | authorLabel: [String] 23 | reviewerId: [EntityReference] 24 | reviewerLabel: [String] 25 | creatorId: [EntityReference] 26 | creatorLabel: [String] 27 | license: Uri 28 | subjectType: EntityTypeEnum 29 | subjectSource: EntityReference 30 | subjectSourceVersion: String 31 | objectType: EntityTypeEnum 32 | objectSource: EntityReference 33 | objectSourceVersion: String 34 | mappingProvider: Uri 35 | mappingSource: EntityReference 36 | mappingCardinality: MappingCardinalityEnum 37 | mappingTool: String 38 | mappingToolVersion: String 39 | mappingDate: Date 40 | publicationDate: Date 41 | confidence: Double 42 | curationRule: [EntityReference] 43 | curationRuleText: [String] 44 | subjectMatchField: [EntityReference] 45 | objectMatchField: [EntityReference] 46 | matchString: [String] 47 | subjectPreprocessing: [EntityReference] 48 | objectPreprocessing: [EntityReference] 49 | similarityScore: Double 50 | similarityMeasure: String 51 | seeAlso: [String] 52 | issueTrackerItem: EntityReference 53 | other: String 54 | comment: String 55 | } 56 | 57 | type MappingRegistry 58 | { 59 | mappingRegistryId: EntityReference! 60 | mappingRegistryTitle: String 61 | mappingRegistryDescription: String 62 | imports: [Uri] 63 | mappingSetReferences: [MappingSetReference] 64 | documentation: Uri 65 | homepage: Uri 66 | issueTracker: Uri 67 | } 68 | 69 | type MappingSet 70 | { 71 | curieMap: [Prefix] 72 | mappings: [Mapping] 73 | mappingSetId: Uri! 74 | mappingSetVersion: String 75 | mappingSetSource: [Uri] 76 | mappingSetTitle: String 77 | mappingSetDescription: String 78 | creatorId: [EntityReference] 79 | creatorLabel: [String] 80 | license: Uri! 81 | subjectType: EntityTypeEnum 82 | subjectSource: EntityReference 83 | subjectSourceVersion: String 84 | objectType: EntityTypeEnum 85 | objectSource: EntityReference 86 | objectSourceVersion: String 87 | mappingProvider: Uri 88 | mappingTool: String 89 | mappingToolVersion: String 90 | mappingDate: Date 91 | publicationDate: Date 92 | subjectMatchField: [EntityReference] 93 | objectMatchField: [EntityReference] 94 | subjectPreprocessing: [EntityReference] 95 | objectPreprocessing: [EntityReference] 96 | seeAlso: [String] 97 | issueTracker: Uri 98 | other: String 99 | comment: String 100 | extensionDefinitions: [ExtensionDefinition] 101 | } 102 | 103 | type MappingSetReference 104 | { 105 | mappingSetId: Uri! 106 | mirrorFrom: Uri 107 | registryConfidence: Double 108 | mappingSetGroup: String 109 | lastUpdated: Date 110 | localName: String 111 | } 112 | 113 | type NoTermFound 114 | { 115 | } 116 | 117 | type Prefix 118 | { 119 | prefixName: Ncname! 120 | prefixUrl: Uri 121 | } 122 | 123 | type Propagatable 124 | { 125 | propagated: Boolean 126 | } 127 | 128 | -------------------------------------------------------------------------------- /project/jsonld/sssom_schema.context.jsonld: -------------------------------------------------------------------------------- 1 | { 2 | "comments": { 3 | "description": "Auto generated by LinkML jsonld context generator", 4 | "generation_date": "2024-08-09T22:25:39", 5 | "source": "sssom_schema.yaml" 6 | }, 7 | "@context": { 8 | "dcterms": "http://purl.org/dc/terms/", 9 | "linkml": "https://w3id.org/linkml/", 10 | "oboInOwl": "http://www.geneontology.org/formats/oboInOwl#", 11 | "owl": "http://www.w3.org/2002/07/owl#", 12 | "pav": "http://purl.org/pav/", 13 | "prov": "http://www.w3.org/ns/prov#", 14 | "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", 15 | "rdfs": "http://www.w3.org/2000/01/rdf-schema#", 16 | "semapv": "https://w3id.org/semapv/vocab/", 17 | "skos": "http://www.w3.org/2004/02/skos/core#", 18 | "sssom": "https://w3id.org/sssom/", 19 | "xsd": "http://www.w3.org/2001/XMLSchema#", 20 | "@vocab": "https://w3id.org/sssom/", 21 | "author_id": { 22 | "@type": "rdfs:Resource", 23 | "@id": "pav:authoredBy" 24 | }, 25 | "author_label": { 26 | "@id": "author_label" 27 | }, 28 | "comment": { 29 | "@id": "rdfs:comment" 30 | }, 31 | "confidence": { 32 | "@type": "xsd:double", 33 | "@id": "confidence" 34 | }, 35 | "creator_id": { 36 | "@type": "rdfs:Resource", 37 | "@id": "dcterms:creator" 38 | }, 39 | "creator_label": { 40 | "@id": "creator_label" 41 | }, 42 | "curation_rule": { 43 | "@type": "rdfs:Resource", 44 | "@id": "curation_rule" 45 | }, 46 | "curation_rule_text": { 47 | "@id": "curation_rule_text" 48 | }, 49 | "curie_map": { 50 | "@type": "@id", 51 | "@id": "curie_map" 52 | }, 53 | "documentation": { 54 | "@type": "@id", 55 | "@id": "documentation" 56 | }, 57 | "extension_definitions": { 58 | "@type": "@id", 59 | "@id": "extension_definitions" 60 | }, 61 | "property": { 62 | "@type": "@id", 63 | "@id": "property" 64 | }, 65 | "slot_name": { 66 | "@id": "slot_name" 67 | }, 68 | "type_hint": { 69 | "@type": "@id", 70 | "@id": "type_hint" 71 | }, 72 | "homepage": { 73 | "@type": "@id", 74 | "@id": "homepage" 75 | }, 76 | "imports": { 77 | "@type": "@id", 78 | "@id": "imports" 79 | }, 80 | "issue_tracker": { 81 | "@type": "@id", 82 | "@id": "issue_tracker" 83 | }, 84 | "issue_tracker_item": { 85 | "@type": "rdfs:Resource", 86 | "@id": "issue_tracker_item" 87 | }, 88 | "last_updated": { 89 | "@type": "xsd:date", 90 | "@id": "last_updated" 91 | }, 92 | "license": { 93 | "@type": "@id", 94 | "@id": "dcterms:license" 95 | }, 96 | "local_name": { 97 | "@id": "local_name" 98 | }, 99 | "mapping_cardinality": { 100 | "@context": { 101 | "@vocab": "@null", 102 | "text": "skos:notation", 103 | "description": "skos:prefLabel", 104 | "meaning": "@id" 105 | }, 106 | "@id": "mapping_cardinality" 107 | }, 108 | "mapping_date": { 109 | "@type": "xsd:date", 110 | "@id": "pav:authoredOn" 111 | }, 112 | "mapping_justification": { 113 | "@type": "rdfs:Resource", 114 | "@id": "mapping_justification" 115 | }, 116 | "mapping_provider": { 117 | "@type": "@id", 118 | "@id": "mapping_provider" 119 | }, 120 | "mapping_registry_description": { 121 | "@id": "mapping_registry_description" 122 | }, 123 | "mapping_registry_id": { 124 | "@type": "rdfs:Resource", 125 | "@id": "mapping_registry_id" 126 | }, 127 | "mapping_registry_title": { 128 | "@id": "mapping_registry_title" 129 | }, 130 | "mapping_set_description": { 131 | "@id": "dcterms:description" 132 | }, 133 | "mapping_set_group": { 134 | "@id": "mapping_set_group" 135 | }, 136 | "mapping_set_id": { 137 | "@type": "@id", 138 | "@id": "mapping_set_id" 139 | }, 140 | "mapping_set_references": { 141 | "@type": "@id", 142 | "@id": "mapping_set_references" 143 | }, 144 | "mapping_set_source": { 145 | "@type": "@id", 146 | "@id": "prov:wasDerivedFrom" 147 | }, 148 | "mapping_set_title": { 149 | "@id": "dcterms:title" 150 | }, 151 | "mapping_set_version": { 152 | "@id": "owl:versionInfo" 153 | }, 154 | "mapping_source": { 155 | "@type": "rdfs:Resource", 156 | "@id": "mapping_source" 157 | }, 158 | "mapping_tool": { 159 | "@id": "mapping_tool" 160 | }, 161 | "mapping_tool_version": { 162 | "@id": "mapping_tool_version" 163 | }, 164 | "mappings": { 165 | "@type": "@id", 166 | "@id": "mappings" 167 | }, 168 | "match_string": { 169 | "@id": "match_string" 170 | }, 171 | "mirror_from": { 172 | "@type": "@id", 173 | "@id": "mirror_from" 174 | }, 175 | "object_category": { 176 | "@id": "object_category" 177 | }, 178 | "object_id": { 179 | "@type": "rdfs:Resource", 180 | "@id": "owl:annotatedTarget" 181 | }, 182 | "object_label": { 183 | "@id": "object_label" 184 | }, 185 | "object_match_field": { 186 | "@type": "rdfs:Resource", 187 | "@id": "object_match_field" 188 | }, 189 | "object_preprocessing": { 190 | "@type": "rdfs:Resource", 191 | "@id": "object_preprocessing" 192 | }, 193 | "object_source": { 194 | "@type": "rdfs:Resource", 195 | "@id": "object_source" 196 | }, 197 | "object_source_version": { 198 | "@id": "object_source_version" 199 | }, 200 | "object_type": { 201 | "@context": { 202 | "@vocab": "@null", 203 | "text": "skos:notation", 204 | "description": "skos:prefLabel", 205 | "meaning": "@id" 206 | }, 207 | "@id": "object_type" 208 | }, 209 | "other": { 210 | "@id": "other" 211 | }, 212 | "predicate_id": { 213 | "@type": "rdfs:Resource", 214 | "@id": "owl:annotatedProperty" 215 | }, 216 | "predicate_label": { 217 | "@id": "predicate_label" 218 | }, 219 | "predicate_modifier": { 220 | "@context": { 221 | "@vocab": "@null", 222 | "text": "skos:notation", 223 | "description": "skos:prefLabel", 224 | "meaning": "@id" 225 | }, 226 | "@id": "predicate_modifier" 227 | }, 228 | "predicate_type": { 229 | "@context": { 230 | "@vocab": "@null", 231 | "text": "skos:notation", 232 | "description": "skos:prefLabel", 233 | "meaning": "@id" 234 | }, 235 | "@id": "predicate_type" 236 | }, 237 | "prefix_name": { 238 | "@id": "prefix_name" 239 | }, 240 | "prefix_url": { 241 | "@type": "@id", 242 | "@id": "prefix_url" 243 | }, 244 | "propagated": { 245 | "@type": "xsd:boolean", 246 | "@id": "propagated" 247 | }, 248 | "publication_date": { 249 | "@type": "xsd:date", 250 | "@id": "dcterms:created" 251 | }, 252 | "registry_confidence": { 253 | "@type": "xsd:double", 254 | "@id": "registry_confidence" 255 | }, 256 | "reviewer_id": { 257 | "@type": "rdfs:Resource", 258 | "@id": "reviewer_id" 259 | }, 260 | "reviewer_label": { 261 | "@id": "reviewer_label" 262 | }, 263 | "see_also": { 264 | "@id": "rdfs:seeAlso" 265 | }, 266 | "similarity_measure": { 267 | "@id": "similarity_measure" 268 | }, 269 | "similarity_score": { 270 | "@type": "xsd:double", 271 | "@id": "similarity_score" 272 | }, 273 | "subject_category": { 274 | "@id": "subject_category" 275 | }, 276 | "subject_id": { 277 | "@type": "rdfs:Resource", 278 | "@id": "owl:annotatedSource" 279 | }, 280 | "subject_label": { 281 | "@id": "subject_label" 282 | }, 283 | "subject_match_field": { 284 | "@type": "rdfs:Resource", 285 | "@id": "subject_match_field" 286 | }, 287 | "subject_preprocessing": { 288 | "@type": "rdfs:Resource", 289 | "@id": "subject_preprocessing" 290 | }, 291 | "subject_source": { 292 | "@type": "rdfs:Resource", 293 | "@id": "subject_source" 294 | }, 295 | "subject_source_version": { 296 | "@id": "subject_source_version" 297 | }, 298 | "subject_type": { 299 | "@context": { 300 | "@vocab": "@null", 301 | "text": "skos:notation", 302 | "description": "skos:prefLabel", 303 | "meaning": "@id" 304 | }, 305 | "@id": "subject_type" 306 | }, 307 | "ExtensionDefinition": { 308 | "@id": "ExtensionDefinition" 309 | }, 310 | "Mapping": { 311 | "@id": "owl:Axiom" 312 | }, 313 | "MappingRegistry": { 314 | "@id": "MappingRegistry" 315 | }, 316 | "MappingSet": { 317 | "@id": "MappingSet" 318 | }, 319 | "MappingSetReference": { 320 | "@id": "MappingSetReference" 321 | }, 322 | "NoTermFound": { 323 | "@id": "NoTermFound" 324 | }, 325 | "Prefix": { 326 | "@id": "Prefix" 327 | }, 328 | "Propagatable": { 329 | "@id": "Propagatable" 330 | } 331 | } 332 | } 333 | -------------------------------------------------------------------------------- /project/prefixmap/sssom_schema.yaml: -------------------------------------------------------------------------------- 1 | { 2 | "dcterms": "http://purl.org/dc/terms/", 3 | "linkml": "https://w3id.org/linkml/", 4 | "oboInOwl": "http://www.geneontology.org/formats/oboInOwl#", 5 | "owl": "http://www.w3.org/2002/07/owl#", 6 | "pav": "http://purl.org/pav/", 7 | "prov": "http://www.w3.org/ns/prov#", 8 | "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", 9 | "rdfs": "http://www.w3.org/2000/01/rdf-schema#", 10 | "semapv": "https://w3id.org/semapv/vocab/", 11 | "skos": "http://www.w3.org/2004/02/skos/core#", 12 | "sssom": "https://w3id.org/sssom/", 13 | "xsd": "http://www.w3.org/2001/XMLSchema#", 14 | "Mapping": { 15 | "@id": "owl:Axiom" 16 | } 17 | } -------------------------------------------------------------------------------- /project/protobuf/sssom_schema.proto: -------------------------------------------------------------------------------- 1 | syntax="proto3"; 2 | package 3 | // metamodel_version: 1.7.0 4 | // A definition of an extension (non-standard) slot. 5 | message ExtensionDefinition 6 | { 7 | ncname slotName = 0 8 | uriorcurie property = 0 9 | uriorcurie typeHint = 0 10 | } 11 | // Represents an individual mapping between a pair of entities 12 | message Mapping 13 | { 14 | entityReference subjectId = 0 15 | string subjectLabel = 0 16 | string subjectCategory = 0 17 | entityReference predicateId = 0 18 | string predicateLabel = 0 19 | predicateModifierEnum predicateModifier = 0 20 | entityReference objectId = 0 21 | string objectLabel = 0 22 | string objectCategory = 0 23 | entityReference mappingJustification = 0 24 | repeated entityReference authorId = 0 25 | repeated string authorLabel = 0 26 | repeated entityReference reviewerId = 0 27 | repeated string reviewerLabel = 0 28 | repeated entityReference creatorId = 0 29 | repeated string creatorLabel = 0 30 | uri license = 0 31 | entityTypeEnum subjectType = 0 32 | entityReference subjectSource = 0 33 | string subjectSourceVersion = 0 34 | entityTypeEnum objectType = 0 35 | entityReference objectSource = 0 36 | string objectSourceVersion = 0 37 | uri mappingProvider = 0 38 | entityReference mappingSource = 0 39 | mappingCardinalityEnum mappingCardinality = 0 40 | string mappingTool = 0 41 | string mappingToolVersion = 0 42 | date mappingDate = 0 43 | date publicationDate = 0 44 | double confidence = 0 45 | repeated entityReference curationRule = 0 46 | repeated string curationRuleText = 0 47 | repeated entityReference subjectMatchField = 0 48 | repeated entityReference objectMatchField = 0 49 | repeated string matchString = 0 50 | repeated entityReference subjectPreprocessing = 0 51 | repeated entityReference objectPreprocessing = 0 52 | double similarityScore = 0 53 | string similarityMeasure = 0 54 | repeated string seeAlso = 0 55 | entityReference issueTrackerItem = 0 56 | string other = 0 57 | string comment = 0 58 | } 59 | // A registry for managing mapping sets. It holds a set of mapping set references, and can import other registries. 60 | message MappingRegistry 61 | { 62 | entityReference mappingRegistryId = 0 63 | string mappingRegistryTitle = 0 64 | string mappingRegistryDescription = 0 65 | repeated uri imports = 0 66 | repeated mappingSetReference mappingSetReferences = 0 67 | uri documentation = 0 68 | uri homepage = 0 69 | uri issueTracker = 0 70 | } 71 | // Represents a set of mappings 72 | message MappingSet 73 | { 74 | repeated prefix curieMap = 0 75 | repeated mapping mappings = 0 76 | uri mappingSetId = 0 77 | string mappingSetVersion = 0 78 | repeated uri mappingSetSource = 0 79 | string mappingSetTitle = 0 80 | string mappingSetDescription = 0 81 | repeated entityReference creatorId = 0 82 | repeated string creatorLabel = 0 83 | uri license = 0 84 | entityTypeEnum subjectType = 0 85 | entityReference subjectSource = 0 86 | string subjectSourceVersion = 0 87 | entityTypeEnum objectType = 0 88 | entityReference objectSource = 0 89 | string objectSourceVersion = 0 90 | uri mappingProvider = 0 91 | string mappingTool = 0 92 | string mappingToolVersion = 0 93 | date mappingDate = 0 94 | date publicationDate = 0 95 | repeated entityReference subjectMatchField = 0 96 | repeated entityReference objectMatchField = 0 97 | repeated entityReference subjectPreprocessing = 0 98 | repeated entityReference objectPreprocessing = 0 99 | repeated string seeAlso = 0 100 | uri issueTracker = 0 101 | string other = 0 102 | string comment = 0 103 | repeated extensionDefinition extensionDefinitions = 0 104 | } 105 | // A reference to a mapping set. It allows to augment mapping set metadata from the perspective of the registry, for example, providing confidence, or a local filename or a grouping. 106 | message MappingSetReference 107 | { 108 | uri mappingSetId = 0 109 | uri mirrorFrom = 0 110 | double registryConfidence = 0 111 | string mappingSetGroup = 0 112 | date lastUpdated = 0 113 | string localName = 0 114 | } 115 | message Prefix 116 | { 117 | ncname prefixName = 0 118 | uri prefixUrl = 0 119 | } 120 | // Metamodel extension class to describe slots whose value can be propagated down from the MappingSet class to the Mapping class. 121 | message Propagatable 122 | { 123 | boolean propagated = 0 124 | } 125 | -------------------------------------------------------------------------------- /project/shex/sssom_schema.shex: -------------------------------------------------------------------------------- 1 | # metamodel_version: 1.7.0 2 | BASE 3 | PREFIX prov: 4 | PREFIX skos: 5 | PREFIX owl: 6 | PREFIX rdf: 7 | PREFIX rdfs: 8 | PREFIX xsd: 9 | PREFIX linkml: 10 | PREFIX pav: 11 | PREFIX dc1: 12 | 13 | 14 | rdfs:Resource 15 | 16 | linkml:String xsd:string 17 | 18 | linkml:Integer xsd:integer 19 | 20 | linkml:Boolean xsd:boolean 21 | 22 | linkml:Float xsd:float 23 | 24 | linkml:Double xsd:double 25 | 26 | linkml:Decimal xsd:decimal 27 | 28 | linkml:Time xsd:time 29 | 30 | linkml:Date xsd:date 31 | 32 | linkml:Datetime xsd:dateTime 33 | 34 | linkml:DateOrDatetime linkml:DateOrDatetime 35 | 36 | linkml:Uriorcurie IRI 37 | 38 | linkml:Curie xsd:string 39 | 40 | linkml:Uri IRI 41 | 42 | linkml:Ncname xsd:string 43 | 44 | linkml:Objectidentifier IRI 45 | 46 | linkml:Nodeidentifier NONLITERAL 47 | 48 | linkml:Jsonpointer xsd:string 49 | 50 | linkml:Jsonpath xsd:string 51 | 52 | linkml:Sparqlpath xsd:string 53 | 54 | CLOSED { 55 | ( $ ( @linkml:Ncname ; 56 | @linkml:Uriorcurie ? ; 57 | @linkml:Uriorcurie ? 58 | ) ; 59 | rdf:type [ ] ? 60 | ) 61 | } 62 | 63 | CLOSED { 64 | ( $ ( owl:annotatedSource @ ? ; 65 | @linkml:String ? ; 66 | @linkml:String ? ; 67 | owl:annotatedProperty @ ; 68 | @linkml:String ? ; 69 | [ ] ? ; 70 | owl:annotatedTarget @ ? ; 71 | @linkml:String ? ; 72 | @linkml:String ? ; 73 | @ ; 74 | pav:authoredBy @ * ; 75 | @linkml:String * ; 76 | @ * ; 77 | @linkml:String * ; 78 | dc1:creator @ * ; 79 | @linkml:String * ; 80 | dc1:license @linkml:Uri ? ; 81 | [ owl:Class owl:ObjectProperty owl:DataProperty owl:AnnotationProperty owl:NamedIndividual skos:Concept 82 | rdfs:Resource rdfs:Class rdfs:Literal rdfs:Datatype rdf:Property ] ? ; 83 | @ ? ; 84 | @linkml:String ? ; 85 | [ owl:Class owl:ObjectProperty owl:DataProperty owl:AnnotationProperty owl:NamedIndividual skos:Concept 86 | rdfs:Resource rdfs:Class rdfs:Literal rdfs:Datatype rdf:Property ] ? ; 87 | @ ? ; 88 | @linkml:String ? ; 89 | @linkml:Uri ? ; 90 | @ ? ; 91 | [ 92 | 93 | 94 | ] ? ; 95 | @linkml:String ? ; 96 | @linkml:String ? ; 97 | pav:authoredOn @linkml:Date ? ; 98 | dc1:created @linkml:Date ? ; 99 | @linkml:Double ? ; 100 | @ * ; 101 | @linkml:String * ; 102 | @ * ; 103 | @ * ; 104 | @linkml:String * ; 105 | @ * ; 106 | @ * ; 107 | @linkml:Double ? ; 108 | @linkml:String ? ; 109 | rdfs:seeAlso @linkml:String * ; 110 | @ ? ; 111 | @linkml:String ? ; 112 | rdfs:comment @linkml:String ? 113 | ) ; 114 | rdf:type [ owl:Axiom ] ? 115 | ) 116 | } 117 | 118 | CLOSED { 119 | ( $ ( @ ; 120 | @linkml:String ? ; 121 | @linkml:String ? ; 122 | @linkml:Uri * ; 123 | @ * ; 124 | @linkml:Uri ? ; 125 | @linkml:Uri ? ; 126 | @linkml:Uri ? 127 | ) ; 128 | rdf:type [ ] ? 129 | ) 130 | } 131 | 132 | CLOSED { 133 | ( $ ( @ * ; 134 | @ * ; 135 | @linkml:Uri ; 136 | owl:versionInfo @linkml:String ? ; 137 | prov:wasDerivedFrom @linkml:Uri * ; 138 | dc1:title @linkml:String ? ; 139 | dc1:description @linkml:String ? ; 140 | dc1:creator @ * ; 141 | @linkml:String * ; 142 | dc1:license @linkml:Uri ; 143 | [ owl:Class owl:ObjectProperty owl:DataProperty owl:AnnotationProperty owl:NamedIndividual skos:Concept 144 | rdfs:Resource rdfs:Class rdfs:Literal rdfs:Datatype rdf:Property ] ? ; 145 | @ ? ; 146 | @linkml:String ? ; 147 | [ owl:Class owl:ObjectProperty owl:DataProperty owl:AnnotationProperty owl:NamedIndividual skos:Concept 148 | rdfs:Resource rdfs:Class rdfs:Literal rdfs:Datatype rdf:Property ] ? ; 149 | @ ? ; 150 | @linkml:String ? ; 151 | @linkml:Uri ? ; 152 | @linkml:String ? ; 153 | @linkml:String ? ; 154 | pav:authoredOn @linkml:Date ? ; 155 | dc1:created @linkml:Date ? ; 156 | @ * ; 157 | @ * ; 158 | @ * ; 159 | @ * ; 160 | rdfs:seeAlso @linkml:String * ; 161 | @linkml:Uri ? ; 162 | @linkml:String ? ; 163 | rdfs:comment @linkml:String ? ; 164 | @ * 165 | ) ; 166 | rdf:type [ ] ? 167 | ) 168 | } 169 | 170 | CLOSED { 171 | ( $ ( @linkml:Uri ; 172 | @linkml:Uri ? ; 173 | @linkml:Double ? ; 174 | @linkml:String ? ; 175 | @linkml:Date ? ; 176 | @linkml:String ? 177 | ) ; 178 | rdf:type [ ] ? 179 | ) 180 | } 181 | 182 | CLOSED { 183 | ( $ rdf:type . * ; 184 | rdf:type [ ] ? 185 | ) 186 | } 187 | 188 | CLOSED { 189 | ( $ ( @linkml:Ncname ; 190 | @linkml:Uri ? 191 | ) ; 192 | rdf:type [ ] 193 | ) 194 | } 195 | 196 | CLOSED { 197 | ( $ @linkml:Boolean ? ; 198 | rdf:type [ ] ? 199 | ) 200 | } 201 | 202 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "sssom-schema" 3 | version = "0.0.0" 4 | description = "SSSOM is a Simple Standard for Sharing Ontology Mappings." 5 | readme = "README.md" 6 | authors = [ 7 | "Nicolas Matentzoglu ", 8 | "Harshad Hegde " 9 | ] 10 | license = "MIT" 11 | 12 | [tool.poetry.dependencies] 13 | python = "^3.8.1" 14 | jinjanator = "*" 15 | linkml-runtime = "*" 16 | 17 | [tool.poetry.dev-dependencies] 18 | linkml = "^1.7.0" 19 | mkdocs-material = "^8.2.8" 20 | mkdocs-mermaid2-plugin = "^1.1.1" 21 | 22 | [build-system] 23 | requires = ["poetry-core>=1.0.0"] 24 | build-backend = "poetry.core.masonry.api" 25 | 26 | [tool.poetry.extras] 27 | docs = ["linkml", "mkdocs-material"] 28 | 29 | [tool.codespell] 30 | # Ref: https://github.com/codespell-project/codespell#using-a-config-file 31 | skip = '.git*,*.pdf,*.lock' 32 | check-hidden = true 33 | ignore-regex = '\b(COMENT|EHR|LOD)\b' 34 | ignore-words-list = 'disjointness' 35 | -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Wrapper script for docker. 3 | # 4 | # This is used primarily for wrapping the GNU Make workflow. 5 | # Instead of typing "make TARGET", type "./run.sh make TARGET". 6 | # This will run the make workflow within a docker container. 7 | # 8 | # The assumption is that you are working in the src/ontology folder; 9 | # we therefore map the whole repo (../..) to a docker volume. 10 | # 11 | # See README-editors.md for more details. 12 | docker run -v $PWD:/work -w /work -e ROBOT_JAVA_ARGS='-Xmx8G' --rm -ti obolibrary/odkfull "$@" -------------------------------------------------------------------------------- /scripts/gh_table.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | my $n=0; 4 | my $len; 5 | my $hlen; 6 | while(<>) { 7 | chomp; 8 | if ($n==0 && m@^\#@) { 9 | s@^\#@@; 10 | } 11 | my (@vals) = split(/\t/,$_); 12 | @vals = map {s@\|@, @g; $_} @vals; 13 | if (!$hlen) { 14 | $hlen = scalar(@vals); 15 | } 16 | while (scalar(@vals) < $hlen) { 17 | push(@vals, ''); 18 | } 19 | print '|'.join('|',@vals)."|\n"; 20 | $nulen = scalar(@vals); 21 | if ($n > 0) { 22 | if ($len ne $nulen) { 23 | print STDERR "MISMATCH: $len != $nulen\n"; 24 | } 25 | } 26 | $len = $nulen; 27 | if ($n ==0) { 28 | @vals = map {"---"} @vals; 29 | print '|'.join('|',@vals)."|\n"; 30 | } 31 | $n++; 32 | } -------------------------------------------------------------------------------- /src/CONFIG.yaml: -------------------------------------------------------------------------------- 1 | model_organization: mapping-commons # Name of github individual organization where this cone exists (e.g. linkml / mygithub 2 | model_name: sssom # Name of target repository (e.g. my-wonderful-model) 3 | root_schema: sssom # the name of the base schema file (w/o .yaml) 4 | model_root_class: MappingSet # 36 | {% endif %} 37 | 38 | ## Slots 39 | 40 | | Name | Cardinality and Range | Description | Inheritance | 41 | | --- | --- | --- | --- | 42 | {% if gen.get_direct_slots(element)|length > 0 %} 43 | {%- for slot in gen.get_direct_slots(element) -%} 44 | | {{ gen.link(slot) }} | {{ gen.cardinality(slot) }}
{{ gen.link(slot.range) }} | {{ slot.description|enshorten }} | direct | 45 | {% endfor -%} 46 | {% endif -%} 47 | {% if gen.get_indirect_slots(element)|length > 0 %} 48 | {%- for slot in gen.get_indirect_slots(element) -%} 49 | | {{ gen.link(slot) }} | {{ gen.cardinality(slot) }}
{{ gen.link(slot.range) }} | {{ slot.description|enshorten }} | {{ gen.links(gen.get_slot_inherited_from(element.name, slot.name))|join(', ') }} | 50 | {% endfor -%} 51 | {% endif %} 52 | 53 | {% if schemaview.is_mixin(element.name) %} 54 | ## Mixin Usage 55 | 56 | | mixed into | description | 57 | | --- | --- | 58 | {% for c in schemaview.class_children(element.name, is_a=False) -%} 59 | | {{ gen.link(c) }} | {{ schemaview.get_class(c).description|enshorten }} | 60 | {% endfor %} 61 | {% endif %} 62 | 63 | {% if schemaview.usage_index().get(element.name) %} 64 | ## Usages 65 | 66 | | used by | used in | type | used | 67 | | --- | --- | --- | --- | 68 | {% for usage in schemaview.usage_index().get(element.name) -%} 69 | | {{gen.link(usage.used_by)}} | {{gen.link(usage.slot)}} | {{usage.metaslot}} | {{ gen.link(usage.used) }} | 70 | {% endfor %} 71 | {% endif %} 72 | 73 | {% include "common_metadata.md.jinja2" %} 74 | 75 | 76 | {% if schemaview.get_mappings(element.name).items() -%} 77 | ## Mappings 78 | 79 | | Mapping Type | Mapped Value | 80 | | --- | --- | 81 | {% for m, mt in schemaview.get_mappings(element.name).items() -%} 82 | {% if mt|length > 0 -%} 83 | | {{ m }} | {{ mt|join(', ') }} | 84 | {% endif -%} 85 | {% endfor %} 86 | 87 | {% endif -%} 88 | 89 | {% if gen.example_object_blobs(element.name) -%} 90 | ## Examples 91 | {% for name, blob in gen.example_object_blobs(element.name) -%} 92 | ### Example: {{name}} 93 | 94 | ```yaml 95 | {{ blob }} 96 | ``` 97 | {% endfor %} 98 | {% endif %} 99 | 100 | 101 | ## LinkML Source 102 | 103 | 104 | 105 | ### Direct 106 | 107 |
108 | ```yaml 109 | {{gen.yaml(element)}} 110 | ``` 111 |
112 | 113 | ### Induced 114 | 115 |
116 | ```yaml 117 | {{gen.yaml(element, inferred=True)}} 118 | ``` 119 |
120 | 121 | {%- if footer -%} 122 | {{footer}} 123 | {%- endif -%} -------------------------------------------------------------------------------- /src/doc-templates/class_diagram.md.jinja2: -------------------------------------------------------------------------------- 1 | {% if schemaview.class_parents(element.name) and schemaview.class_children(element.name) %} 2 | ```{{ gen.mermaid_directive() }} 3 | classDiagram 4 | class {{ gen.name(element) }} 5 | {% for s in schemaview.class_parents(element.name)|sort(attribute='name') -%} 6 | {{ gen.name(schemaview.get_element(s)) }} <|-- {{ gen.name(element) }} 7 | {% endfor %} 8 | 9 | {% for s in schemaview.class_children(element.name)|sort(attribute='name') -%} 10 | {{ gen.name(element) }} <|-- {{ gen.name(schemaview.get_element(s)) }} 11 | {% endfor %} 12 | 13 | {% for s in schemaview.class_induced_slots(element.name)|sort(attribute='name') -%} 14 | {{ gen.name(element) }} : {{gen.name(s)}} 15 | {% if s.range not in gen.all_type_object_names() %} 16 | {{ gen.name(element) }} --|> {{ s.range }} : {{ gen.name(s) }} 17 | {% endif %} 18 | {% endfor %} 19 | ``` 20 | {% elif schemaview.class_parents(element.name) %} 21 | ```{{ gen.mermaid_directive() }} 22 | classDiagram 23 | class {{ gen.name(element) }} 24 | {% for s in schemaview.class_parents(element.name)|sort(attribute='name') -%} 25 | {{ gen.name(schemaview.get_element(s)) }} <|-- {{ gen.name(element) }} 26 | {% endfor %} 27 | {% for s in schemaview.class_induced_slots(element.name)|sort(attribute='name') -%} 28 | {{ gen.name(element) }} : {{gen.name(s)}} 29 | {% if s.range not in gen.all_type_object_names() %} 30 | {{ gen.name(element) }} --|> {{ s.range }} : {{ gen.name(s) }} 31 | {% endif %} 32 | {% endfor %} 33 | ``` 34 | {% elif schemaview.class_children(element.name) %} 35 | ```{{ gen.mermaid_directive() }} 36 | classDiagram 37 | class {{ gen.name(element) }} 38 | {% for s in schemaview.class_children(element.name)|sort(attribute='name') -%} 39 | {{ gen.name(element) }} <|-- {{ gen.name(schemaview.get_element(s)) }} 40 | {% endfor %} 41 | {% for s in schemaview.class_induced_slots(element.name)|sort(attribute='name') -%} 42 | {{ gen.name(element) }} : {{gen.name(s)}} 43 | {% if s.range not in gen.all_type_object_names() %} 44 | {{ gen.name(element) }} --|> {{ s.range }} : {{ gen.name(s) }} 45 | {% endif %} 46 | {% endfor %} 47 | ``` 48 | {% else %} 49 | ```{{ gen.mermaid_directive() }} 50 | classDiagram 51 | class {{ gen.name(element) }} 52 | {% for s in schemaview.class_induced_slots(element.name)|sort(attribute='name') -%} 53 | {{ gen.name(element) }} : {{gen.name(s)}} 54 | {% if s.range not in gen.all_type_object_names() %} 55 | {{ gen.name(element) }} --|> {{ s.range }} : {{ gen.name(s) }} 56 | {% endif %} 57 | {% endfor %} 58 | ``` 59 | {% endif %} -------------------------------------------------------------------------------- /src/doc-templates/common_metadata.md.jinja2: -------------------------------------------------------------------------------- 1 | {% if element.aliases %} 2 | ## Aliases 3 | 4 | {% for alias in element.aliases %} 5 | * {{ alias }} 6 | {%- endfor %} 7 | {% endif %} 8 | 9 | 10 | {% if element.examples %} 11 | ## Examples 12 | 13 | | Value | 14 | | --- | 15 | {% for x in element.examples -%} 16 | | {{ x.value }} | 17 | {% endfor %} 18 | {% endif -%} 19 | 20 | {% if element.comments -%} 21 | ## Comments 22 | 23 | {% for x in element.comments -%} 24 | * {{x}} 25 | {% endfor %} 26 | {% endif -%} 27 | 28 | {% if element.todos -%} 29 | ## TODOs 30 | 31 | {% for x in element.todos -%} 32 | * {{x}} 33 | {% endfor %} 34 | {% endif -%} 35 | 36 | {% if element.see_also -%} 37 | ## See Also 38 | 39 | {% for x in element.see_also -%} 40 | * {{ gen.uri_link(x) }} 41 | {% endfor %} 42 | {% endif -%} 43 | 44 | ## Identifier and Mapping Information 45 | 46 | {% if element.id_prefixes %} 47 | ### Valid ID Prefixes 48 | 49 | Instances of this class *should* have identifiers with one of the following prefixes: 50 | {% for p in element.id_prefixes %} 51 | * {{p}} 52 | {% endfor %} 53 | 54 | {% endif %} 55 | 56 | 57 | {% if element.annotations %} 58 | ### Annotations 59 | 60 | | property | value | 61 | | --- | --- | 62 | {% for a in element.annotations -%} 63 | {%- if a|string|first != '_' -%} 64 | | {{ a }} | {{ element.annotations[a].value }} | 65 | {%- endif -%} 66 | {% endfor %} 67 | {% endif %} 68 | 69 | {% if element.from_schema or element.imported_from %} 70 | ### Schema Source 71 | 72 | {% if element.from_schema %} 73 | * from schema: {{ element.from_schema }} 74 | {% endif %} 75 | {% if element.imported_from %} 76 | * imported from: {{ element.imported_from }} 77 | {% endif %} 78 | {% endif %} -------------------------------------------------------------------------------- /src/doc-templates/index.md.jinja2: -------------------------------------------------------------------------------- 1 | # SSSOM Official Data Model Documentation 2 | 3 | ![SSSOM banner](images/sssom-banner.png) 4 | 5 | {% if schema.description %}{{ schema.description }}{% endif %} 6 | 7 | **Schema PURL**: {{ schema.id }} 8 | 9 | ## Introduction 10 | 11 | While the SSSOM model is quite general and mappings can be shared in different formats, the most common format is the [SSSOM/TSV format](spec-formats-tsv.md). 12 | Here is a tabular representation of some example mappings for illustration purposes: 13 | 14 | | subject_id | subject_label | predicate_id | object_id | object_label | mapping_justification | author_id | confidence | comment | 15 | |---------------|---------------|-----------------|-----------------|----------------------|-----------------------------|-------------------------|------------|---------------------------------------------------------------------------------------------------------------------------------| 16 | | KF_FOOD:F001 | apple | skos:exactMatch | FOODON:00002473 | apple (whole) | semapv:ManualMappingCuration | orcid:0000-0002-7356-1779 | 0.95 | "We could map to FOODON:03310788 instead to cover sliced apples, but only 'whole' apple types exist." | 17 | | KF_FOOD:F002 | gala | skos:exactMatch | FOODON:00003348 | Gala apple (whole) | semapv:ManualMappingCuration | orcid:0000-0002-7356-1779 | 1.0 | | 18 | | KF_FOOD:F003 | pink | skos:exactMatch | FOODON:00004186 | Pink apple (whole) | semapv:ManualMappingCuration | orcid:0000-0002-7356-1779 | 0.9 | "We could map to FOODON:00004187 instead which more specifically refers to 'raw' Pink apples. Decided against to be consistent with other mapping choices." | 19 | | KF_FOOD:F004 | braeburn | skos:broadMatch | FOODON:00002473 | apple (whole) | semapv:ManualMappingCuration | orcid:0000-0002-7356-1779 | 1.0 | | 20 | 21 | In the TSV format, mapping set metadata is included at the top of the file, before the mappings themselves, in yaml-like key-value pairs: 22 | 23 | 24 | !!! info "Example header (YAML format)" 25 | 26 |
 27 |     curie_map:
 28 |       FOODON: http://purl.obolibrary.org/obo/FOODON_
 29 |       KF_FOOD: https://kewl-foodie.inc/food/
 30 |       orcid: https://orcid.org/
 31 |     mapping_set_id: https://w3id.org/sssom/tutorial/example1.sssom.tsv
 32 |     mapping_set_description: >
 33 |       Manually curated alignment of KEWL FOODIE INC internal food and 
 34 |       nutrition database with Food Ontology (FOODON). Intended to be 
 35 |       used for ontological analysis and grouping of KEWL FOODIE INC 
 36 |       related data.
 37 |     license: https://creativecommons.org/licenses/by/4.0/
 38 |     mapping_date: 2022-05-02
 39 |     
40 | 41 | See [here](https://github.com/mapping-commons/sssom/tree/master/examples/schema) for concrete examples. 42 | 43 | 44 | ## Mapping metadata elements 45 | 46 | {% for c in gen.all_class_objects()|sort(attribute=sort_by) -%} 47 | {% if c.name == "mapping" %} 48 | {{gen.link(c)}}: {{c.description|enshorten}} 49 | 50 | | Column/Field | Description | Required | 51 | |--------------------|---------------------------------------------------------|-------------| 52 | {%- for slot in c.slots %} 53 | {%- set slot_info = schemaview.get_slot(slot) %} 54 | | **{{ gen.link(slot) }}** | {{ slot_info.description | default("No description") }} | {% if slot_info.required | default(false) %}Required{% elif slot_info.recommended | default(false) %}Recommended{% else %}Optional{% endif %} | 55 | {%- endfor %} 56 | {%- endif %} 57 | {%- endfor %} 58 | 59 | ## Mappings set metadata elements 60 | 61 | {% for c in gen.all_class_objects()|sort(attribute=sort_by) -%} 62 | {% if c.name == "mapping set" %} 63 | {{gen.link(c)}}: {{c.description|enshorten}} 64 | 65 | | Column/Field | Description | Required | 66 | |--------------------|---------------------------------------------------------|-------------| 67 | {%- for slot in c.slots %} 68 | {%- set slot_info = schemaview.get_slot(slot) %} 69 | | **{{ gen.link(slot) }}** | {{ slot_info.description | default("No description") }} | {% if slot_info.required | default(false) %}Required{% elif slot_info.recommended | default(false) %}Recommended{% else %}Optional{% endif %} | 70 | {%- endfor %} 71 | {%- endif %} 72 | {%- endfor %} 73 | 74 | {# 75 | ## Schema Diagram 76 | 77 | ```{{ gen.mermaid_directive() }} 78 | {{ gen.mermaid_diagram() }} 79 | ``` 80 | #} 81 | ## Index (all classes, enums and elements) 82 | 83 | ### Columns/Slots/Fields 84 | 85 | | Slot | Description | 86 | | --- | --- | 87 | {% for s in gen.all_slot_objects()|sort(attribute=sort_by) -%} 88 | | {{gen.link(s)}} | {{s.description|enshorten}} | 89 | {% endfor %} 90 | 91 | ### Classes 92 | 93 | | Class | Description | 94 | | --- | --- | 95 | {% if gen.hierarchical_class_view -%} 96 | {% for u, v in gen.class_hierarchy_as_tuples() -%} 97 | | {{ " "|safe*u*8 }}{{ gen.link(schemaview.get_class(v)) }} | {{ schemaview.get_class(v).description }} | 98 | {% endfor %} 99 | {% else -%} 100 | {% for c in gen.all_class_objects()|sort(attribute=sort_by) -%} 101 | | {{gen.link(c)}} | {{c.description|enshorten}} | 102 | {% endfor %} 103 | {% endif %} 104 | 105 | ### Enumerations 106 | 107 | | Enumeration | Description | 108 | | --- | --- | 109 | {% for e in gen.all_enum_objects()|sort(attribute=sort_by) -%} 110 | | {{gen.link(e)}} | {{e.description|enshorten}} | 111 | {% endfor %} 112 | 113 | ### Types 114 | 115 | | Type | Description | 116 | | --- | --- | 117 | {% for t in gen.all_type_objects()|sort(attribute=sort_by) -%} 118 | | {{gen.link(t)}} | {{t.description|enshorten}} | 119 | {% endfor %} 120 | -------------------------------------------------------------------------------- /src/doc-templates/slot.md.jinja2: -------------------------------------------------------------------------------- 1 | # {{ gen.name(element) }} 2 | 3 | **URI**: {{ gen.uri_link(element) }} 4 | 5 | **Applicable to**: {% for c in schemaview.get_classes_by_slot(element, include_induced=True) %}{{ gen.link(c) }}{% if not loop.last %}, {% endif %}{% endfor -%} 6 | 7 | {%- if element.description %} 8 | {%- set element_description_lines = element.description.split('\n') %} 9 | !!! info "Description" 10 | 11 | {% for element_description_line in element_description_lines %} 12 | _{{ element_description_line }}_ 13 | {%- endfor %} 14 | {%- endif -%} 15 | {# 16 | {% if schemaview.slot_parents(element.name) or schemaview.slot_children(element.name, mixins=False) %} 17 | 18 | ## Inheritance 19 | 20 | {{ gen.inheritance_tree(element, mixins=True) }} 21 | {% else %} 22 | 23 | {% endif %} 24 | 25 | {% if schemaview.get_classes_by_slot(element, include_induced=True) %} 26 | #} 27 | {# 28 | {% if schemaview.is_mixin(element.name) %} 29 | 30 | ## Mixin Usage 31 | 32 | | mixed into | description | range | domain | 33 | | --- | --- | --- | --- | 34 | {% for s in schemaview.slot_children(element.name, is_a=False) -%} 35 | | {{ gen.link(s) }} | {{ schemaview.get_slot(s).description|enshorten }} | {{ schemaview.get_slot(s).range }} | {{ schemaview.get_classes_by_slot(schemaview.get_slot(s))|join(', ') }} | 36 | {% endfor %} 37 | {% endif %} 38 | #} 39 | ## Properties 40 | 41 | * **Range**: {{gen.link(element.range)}}. The range of the element is the type of the value that can be assigned to it. 42 | {% if element.multivalued %}* **Multivalued**: {{ element.multivalued }}. If the element is multivalued, more than one value can be attached to the same field. In the TSV format, these are `|` separated.{% endif %} 43 | * **Required?**: {% if element.required %}Required (element has to be added to the mapping or mapping set.) 44 | {% elif element.recommended %}Recommended (We suggest adding this element to the mapping or mapping set, but it is not required. 45 | {% else %}Optional (You can add this element to the mapping or mapping set, but it is not required.){% endif -%} 46 | {% if element.minimum_value is not none %}* **Minimum Value**: {{ element.minimum_value|int }}{% endif -%} 47 | {% if element.maximum_value is not none %}* **Maximum Value**: {{ element.maximum_value|int }}{% endif -%} 48 | {% if element.pattern %}* **Regex pattern**: {{ '`' }}{{ element.pattern }}{{ '`' }}{% endif -%} 49 | {# 50 | 51 | {% if schemaview.usage_index().get(element.name) %} 52 | ## Usages 53 | 54 | | used by | used in | type | used | 55 | | --- | --- | --- | --- | 56 | {% for usage in schemaview.usage_index().get(element.name) -%} 57 | | {{gen.link(usage.used_by)}} | {{gen.link(usage.slot)}} | {{usage.metaslot}} | {{ gen.link(usage.used) }} | 58 | {% endfor %} 59 | {% endif %} 60 | #} 61 | {% if element.examples %} 62 | ## Examples 63 | 64 | {% for x in element.examples -%} 65 | !!! example "Example: {{ x.value }}" 66 | 67 | Example value: 68 | 69 | ``` 70 | {{ x.value }} 71 | ``` 72 | 73 | {% if x.description %}Description: {{ x.description }}{% endif %} 74 | {% endfor %} 75 | {% endif -%} 76 | 77 | {% if element.comments -%} 78 | ## Comments 79 | 80 | {% for x in element.comments -%} 81 | * {{x}} 82 | {% endfor %} 83 | {% endif -%} 84 | 85 | {% if element.see_also -%} 86 | ## See Also 87 | 88 | These are some relevant resources you might find useful to get additional information about the element, 89 | such as example implementations, issues and pull requests. 90 | 91 | {% for x in element.see_also -%} 92 | * {{ gen.uri_link(x) }} 93 | {% endfor %} 94 | {% endif -%} 95 | 96 | ## Schema developer documentation 97 | 98 |
LinkML source 99 | 100 | ```yaml 101 | {{ gen.yaml(element) }} 102 | ``` 103 |
-------------------------------------------------------------------------------- /src/docs/5star-mappings.md: -------------------------------------------------------------------------------- 1 | # 5-Star Entity Mappings - Cheatsheet 2 | 3 | [Download as PDF](resources/sssom_5star_mappings.pdf). 4 | 5 | This document is under development. Get involved by opening an issue on the [issue tracker](https://github.com/mapping-commons/sssom/issues). 6 | 7 | ## Towards more reusable and transparent mappings for Open Science. 8 | 9 | Entity Mappings connect clinical codes, data model enums, ontology classes and terms in clinical terminologies 10 | across knowledge organization systems and databases. Entity mappings are pivotal for the integration of 11 | healthcare data, but they are expensive to produce and often use-case-dependent. 12 | Despite the cost of creating these mappings, they are rarely shared across organizations, and even 13 | if they are made available, they lack standardization and metadata. Here we outline a 14 | 5-Star mapping system (inspired by Tim Berners-Lee’s 5-star system for Linked Data) 15 | to help you bring mapping production in your organization to the next level - step by step. 16 | 17 | ## :star: 1-Star Mappings 18 | 19 | * **Goal**: Export mapping in a computationally accessible format, make it publicly available and record mapping precision. 20 | * **Implementation**: 21 | * Record subject id, object id and mapping precision (exact, broad, narrow, close, related) 22 | * Use globally unique and persistent identifiers for subject id and object id (e.g. OMOP:123456) 23 | * Use a computable file format (JSON, XML, CSV, TSV) rather than XLSX or HTML 24 | * Make mappings available in a public space without access restrictions 25 | * **Optional**: record the subject and object labels to make it easier for humans to read the file 26 | * **Enables**: 27 | * Direct integration into ETL pipelines 28 | * Dropping societal costs by enabling others to reuse mappings 29 | * Moving data between semantic spaces 30 | 31 | ## :star::star: 2-Star Mappings 32 | 33 | * **Goal**: Make mappings available in a place with version control suitable for providing community feedback, make your own 34 | uncertainty explicit, add license and select semantic mapping predicate. 35 | * **Implementation** 36 | * Make mapping set available in a public version control system (e.g. GitHub) with an issue tracker 37 | * Record the semantic predicate explicitly (e.g. owl:equivalentClass, skos:exactMatch) 38 | * Record a confidence value for the mapping between 0 and 1, where appropriate 39 | * Use a standard open license for the use of the mapping set (e.g. Creative Commons) 40 | * **Enables**: 41 | * The worry-free reuse of mappings even if target or source terminologies are “closed” 42 | * Transparently versioned access to mappings and the opportunity to provide more direct feedback 43 | * Downstream users can filter for high-confidence mappings 44 | 45 | ## :star::star::star: 3-Star Mappings 46 | 47 | * **Goal**: Export mappings in a community standard format with basic versioning and provenance information. 48 | * **Implementation** 49 | * Export mappings in SSSOM ([https://w3id.org/sssom](https://w3id.org/sssom)) format (you do not have to curate using SSSOM!) 50 | * Record the following additional metadata 51 | * mapping_justification(s) (Lexical, Logical match, Human curated etc.) 52 | * mapping_date 53 | * subject_source, object_source, subject_source_version, object_source_version 54 | * mapping_tool (if the mapping was automatically computed using a tool), creator_id 55 | * **Enables**: 56 | * Dropping costs of reusing mappings further by providing a standard format to exchange mappings 57 | * Enabling the decentralised production of mappings by independent expert communities 58 | * Basic metadata, in particular justifications, enable downstream users to assess “fitness for purpose” for a different context 59 | 60 | ## :star::star::star::star: 4-Star Mappings 61 | 62 | * **Goal**: Make it easier to discover mappings by registering them at public mapping registry 63 | * **Implementation** 64 | * Register the mapping at a mapping commons (if none exists, create one) 65 | * Record the following additional metadata: 66 | * mapping_set_id, mapping_set_description, mapping_set_version 67 | * mapping_provider (if the mapping is not original, i.e. it is not derived from another source) 68 | * Provide an executable mapping_justification (see https://w3id.org/sssom#minimum) 69 | * **Enables**: 70 | * Mappings can easily be made available by Open Terminology services which enable scalable data mapping services 71 | 72 | ## :star::star::star::star::star: 5-Star Mappings 73 | 74 | * **Goal**: Ensure currency of mappings 75 | * **Implementation**: 76 | * Mappings are up-to-date with the latest versions of the sources being mapped 77 | * Have no issue on their issue tracker open for more than 3 months without an interaction 78 | * Usually requires a lifecycle management system that integrates automated matching 79 | * **Enables**: 80 | * Reduced effort dealing with mappings to deprecated codes or classes 81 | * Worry-free application of mappings in automated ETL processes 82 | -------------------------------------------------------------------------------- /src/docs/chaining-rules.md: -------------------------------------------------------------------------------- 1 | ## SSSOM Mapping Chains 2 | 3 | The goal of this document is to capture all obvious mapping chaining rules that could be applied to SSSOM, 4 | and later delivered as part of `sssom toolkit`. 5 | This is all structural, and should not be confused with proper reasoning or mapping reconciliation ala 6 | [boomer](https://github.com/INCATools/boomer). 7 | 8 | The idea is to provide the functionality to apply these chaining rules over a given mapping set, and record 9 | the appropriate metadata for that rule. 10 | 11 | Rules: 12 | 13 | - [Transitivity Rule](#transitivity) 14 | - [Role chains over exact/equivalent matches](#rce) 15 | - [Inverse Rule](#inverse) 16 | - [Generalisation Rule](#generalisation) 17 | 18 | 19 | 20 | ## Transitivity Rule 21 | 22 | Transitivity of a relation `R` implies that if an entity `A` is `R`-related to an entity `B` which in turn is 23 | `R`-related to an entity `C`, `A` is also `R`-related to `C`. 24 | 25 | ### Predicates applicable in transitivity rules 26 | 27 | We consider the following predicates transitive: 28 | 29 | - skos:exactMatch 30 | - skos:narrowMatch 31 | - skos:broadMatch 32 | - owl:equivalentClass / owl:equivalentProperty 33 | - rdfs:subClassOf / rdfs:subPropertyOf 34 | - owl:sameAs 35 | 36 | Note that technically speaking `skos:narrowMatch` and `skos:broadMatch` are not considered transitive 37 | (`skos:broaderTransitive` would be), but we are not defining a new semantics here, 38 | just a reasonable default for a mapping tool, which will nearly always hold true. 39 | 40 | Predicates we do not consider transitive include: `skos:relatedMatch` (for practical reasons), `oboInOwl:hasDbXref`, 41 | `skos:closeMatch`, `rdfs:seeAlso` (weakest form of a mapping link), `rdf:type`. 42 | 43 | ### Rules 44 | 45 | - T1: `(:A)-[predicate_id]->(:B)-[predicate_id]->(:C)` -> `(:A)-[predicate_id]->(:C)` 46 | 47 | ### Examples 48 | 49 | - T1-EX: `(:A)-[skos:broadMatch]->(:B)-[skos:broadMatch]->(:C)` -> `(:A)-[skos:broadMatch]->(:C)` 50 | 51 | 52 | 53 | ## Role chains over exact/equivalent matches 54 | 55 | Role chains are rules that allow us to bridge across mappings across multiple different properties. 56 | Role chains over exact are simple to define, so we start with these 57 | 58 | ### Predicates applicable in transitity rules 59 | 60 | - skos:narrowMatch 61 | - skos:broadMatch 62 | - skos:closeMatch 63 | - skos:relatedMatch 64 | 65 | ### Rules for SKOS 66 | 67 | - RCE1: `(:A)-[skos:exactMatch|owl:equivalentClass]->(:B)-[predicate_id]->(:C)` -> `(:A)-[predicate_id]->(:C)` 68 | - RCE2: `(:A)-[predicate_id]->(:B)-[skos:exactMatch]->(:C)` -> `(:A)-[predicate_id]->(:C)` 69 | 70 | ### Rules that should probably not be inferred (OWL) 71 | 72 | The following rules hold true, but will be left to a reasoner to be inferred: 73 | 74 | - RCE-N1: `(:A)-[owl:equivalentClass]->(:B)-[rdfs:subClassOf]->(:C)` -> `(:A)-[rdfs:subClassOf]->(:C)` 75 | - RCE-N2: `(:A)-[rdfs:subClassOf]->(:B)-[owl:equivalentClass]->(:C)` -> `(:A)-[rdfs:subClassOf]->(:C)` 76 | - RCE-N3: `(:A)-[owl:equivalentProperty]->(:B)-[rdfs:subPropertyOf]->(:C)` -> `(:A)-[rdfs:subPropertyOf]->(:C)` 77 | - RCE-N4: `(:A)-[rdfs:subPropertyOf]->(:B)-[owl:equivalentProperty]->(:C)` -> `(:A)-[rdfs:subPropertyOf]->(:C)` 78 | 79 | 80 | 81 | ## Inverse Rules 82 | 83 | `R` inverse of `S` implies that if an entity `A` is `R`-related to an entity `B` then `B` is also `S`-related to `A`. 84 | We like to call the output of an inverse rule a `walk-back`. A command that applies an inverse rule could be called `flip`. 85 | 86 | ### Predicates applicable in inverse rules 87 | 88 | This excludes the exact predicates for which inverse rules are redundant. 89 | 90 | ### Rules for SKOS 91 | 92 | - RI1: `(:A)-[skos:narrowMatch]->(:B)` -> `(:B)-[skos:broadMatch]->(:A)` 93 | - RI2: `(:A)-[skos:broadMatch]->(:B)` -> `(:B)-[skos:narrowMatch]->(:A)` 94 | 95 | ### Rules for SEMAPV 96 | 97 | - RI3: `(:A)-[semapv:crossSpeciesExactMatch]->(:B)` -> `(:B)-[semapv:crossSpeciesExactMatch]->(:A)` 98 | - RI4: `(:A)-[semapv:crossSpeciesNarrowMatch]->(:B)` -> `(:B)-[semapv:crossSpeciesBroadMatch]->(:A)` 99 | - RI5: `(:A)-[semapv:crossSpeciesBroadMatch]->(:B)` -> `(:B)-[semapv:crossSpeciesNarrowMatch]->(:A)` 100 | 101 | 102 | 103 | ## Generalisation Rules 104 | 105 | Generalisation rules are rules that can be applied to weaken a mapping deliberately. This is sometimes useful, for example when 106 | combining strong OWL-Semantics mappings with weaker SKOS-based ones. 107 | 108 | ## Rules 109 | 110 | - RG1: `(:A)-[owl:equivalentTo]->(:B)` -> `(:A)-[skos:exactMatch]->(:B)` 111 | - RG2: `(:A)-[owl:subClassOf]->(:B)` -> `(:A)-[skos:broadMatch]->(:B)` 112 | -------------------------------------------------------------------------------- /src/docs/contributing.md: -------------------------------------------------------------------------------- 1 | # Contributing to SSSOM 2 | 3 | - [Contribution guidelines](https://github.com/mapping-commons/sssom/blob/master/CONTRIBUTING.md) 4 | - [Code of Conduct](https://github.com/mapping-commons/sssom/blob/master/CODE_OF_CONDUCT.md) 5 | -------------------------------------------------------------------------------- /src/docs/create-mapping-commons.md: -------------------------------------------------------------------------------- 1 | ## Getting started 2 | 3 | - First, create a virtual environment of your choice (anaconda, venv, pyenv, poetry etc.). If you need assistance with virtual environments, [here's a guide](https://berkeleybop.github.io/best_practice/python_environments) to help you setup pyenv and use poetry with it. 4 | - Install the [cruft](https://github.com/cruft/cruft) package. Cruft enables keeping projects up-to-date with future updates made to this original template. 5 | 6 | ``` 7 | pip install cruft 8 | ``` 9 | 10 | - Create a project using the [mapping-commons-cookiecutter](https://github.com/mapping-commons/mapping-commons-cookiecutter) template. 11 | 12 | ``` 13 | cruft create https://github.com/mapping-commons/mapping-commons-cookiecutter 14 | ``` 15 | 16 | This kickstarts an interactive session where you declare the following: 17 | 18 | - `project_name`: Name of the project. [defaults to: my-commons-name] 19 | - `github_org`: Name of the github org the project belongs to. [defaults to: my-org] 20 | - `project_description`: Description of the project [defaults to: 'This is the project description.'] 21 | - `full_name`: Name of the author [defaults to: 'My Name'] 22 | - `email`: Author's email [defaults to: 'my-name@my-org.org'] 23 | - `yo`: Choose from [1]: Yes, [2]: No [**TEST OPTION FOR NOW**] 24 | - `license`: Choose from [1]: Yes, [2]: No [**TEST OPTION FOR NOW**] 25 | 26 | ## What does this do? 27 | 28 | The following files and directories are autogenerated in the project: 29 | 30 | ### TODO 31 | 32 | ## Version control 33 | ### GitHub 34 | 35 | 1. Go to [https://github.com/new] and follow the instructions, being sure to 36 | NOT add a README or .gitignore file (this cookiecutter template will take 37 | care of this for you) 38 | 39 | 2. Add the remote to your local git repository 40 | 41 | ```bash 42 | git remote add origin https://github.com/my-user-or-organization/my-commons-name.git 43 | git branch -M main 44 | git push -u origin main 45 | ``` 46 | 47 | ### GitLab 48 | 49 | #### TODO 50 | 51 | ## Future updates to the project's boilerplate code 52 | 53 | In order to be up-to-date with the template, first check if there is a mismatch between the project's boilerplate code and the template by running: 54 | 55 | ``` 56 | cruft check 57 | ``` 58 | 59 | This indicates if there is a difference between the current project's boilerplate code and the latest version of the project template. If the project is up-to-date with the template: 60 | 61 | ``` 62 | SUCCESS: Good work! Project's cruft is up to date and as clean as possible :). 63 | ``` 64 | 65 | Otherwise, it will indicate that the project's boilerplate code is not up-to-date by the following: 66 | 67 | ``` 68 | FAILURE: Project's cruft is out of date! Run `cruft update` to clean this mess up. 69 | ``` 70 | 71 | 72 | For viewing the difference, run `cruft diff`. This shows the difference between the project's boilerplate code and the template's latest version. 73 | 74 | After running `cruft update`, the project's boilerplate code will be updated to the latest version of the template. 75 | -------------------------------------------------------------------------------- /src/docs/editors.md: -------------------------------------------------------------------------------- 1 | # Simple Standard for Sharing Ontological Mappings (SSSOM) 2 | 3 | ## How to make a new release 4 | * Automated: 5 | * On the main code page, click on Releases (right hand column) 6 | * Click on the `Draft a new release` button 7 | * Click the `Choose a tag` button, create a new tag: `X.X.X` 8 | * Click on the `Generate a new release` button 9 | * Make sure only the `Select as the latest release` checkbox is checked. 10 | * Click `Publish release` button 11 | * Manual: 12 | * `make build` 13 | * `make pypi` 14 | 15 | This triggers a GitHub Action workflow that releases the new version of SSSOM to PyPi. 16 | 17 | ## Documentation deployment 18 | This can be done in two ways: 19 | * Automated: Every time a pull request is merged into the `main` branch, a github action is triggered to deploy documentation automatically. 20 | * Manually: The make command to deploy documentation is `make deploy`. -------------------------------------------------------------------------------- /src/docs/events/ccb2022.md: -------------------------------------------------------------------------------- 1 | ## CCB Seminar Series: Open SSSOM - Unlocking the wealth of biomedical data using shared standardized entity mappings 2 | 3 | Where: Virtual event at the Center for Computational Biomedicine, Harvard Medical School, see 4 | https://computationalbiomed.hms.harvard.edu/education/ccb-seminar-series/ 5 | 6 | When: Monday 12 Dec 2022 7 | 8 | Speaker: Nicolas Matentzoglu, PhD, Independent Contractor, Semantic Web and Knowledge Graphs expert 9 | 10 | Abstract: In this seminar, we will discuss the role of entity mappings in the biomedical domain, and the potential gain we might get from standardising and sharing them. We will introduce the Simple Standard of Ontological Mappings (SSSOM, https://w3id.org/sssom) and showcase some of its use cases. The central goal of this seminar is to redefine entity mappings as FAIR semantic artefacts in their own right, thus making them first-class citizens alongside, for example, controlled vocabularies and ontologies. 11 | 12 | 13 | -------------------------------------------------------------------------------- /src/docs/events/mc2023.md: -------------------------------------------------------------------------------- 1 | # 2nd Mapping Commons Workshop on Simple Standard for Sharing Ontology Mappings (SSSOM) 2 | 3 | **When**: Sunday, 23rd April, 2:30 pm Italy (CET), 1:30 pm BST, 8:30 am EDT, 5:30 am PDT (3 hours total) 4 | 5 | **Where**: co-located with [Biocuration 2023](https://biocuration2023.github.io/). For information about the venue see https://biocuration2023.github.io/workshops. 6 | 7 | During the workshop, we will work a lot with Slido for interactive sharing of ideas, rather than relying on open discussions. 8 | We recommend having your laptop / phone with you to be able to participate. 9 | 10 | ## Recordings 11 | 12 | https://www.youtube.com/watch?v=juMQQ01Q540&list=PLqu_J7ADQtKyX55F7RqZtaSS7TwGd3MoR&ab_channel=BiocurationConference2023 13 | 14 | ## Recommended preparation 15 | 16 | - Scroll through [the documentation](https://mapping-commons.github.io/sssom/home/), in particular [the paper](https://doi.org/10.1093/database/baac035) and the [basic tutorial](https://mapping-commons.github.io/sssom/tutorial/) to get a basic sense of SSSOM 17 | - Skim through, or even watch, the [SSSOM CCB Seminar recording](https://www.youtube.com/watch?v=4vqeRECuAKE) 18 | - Scroll through the slide decks describing the problems we will be addressing (note, they are all works in progress): 19 | - [Complex Mappings: Examples from OMOP2OBO (Tiffany Callahan)](https://docs.google.com/presentation/d/1Jn0W9gjRn19ISDB8N-sEwKwXsJySLPlNIsOL6ng_nEA/edit?usp=sharing) 20 | - [Complex mappings - the journey towards a proposal (Nico Matentzoglu)](https://docs.google.com/presentation/d/1kFD33S_WMgEGmCnT7IjVCeEyKI7OpcUw1ZzRXGqt1hs/edit?usp=sharing) 21 | - [Literal mappings with SSSOM (James McLaughlin)](https://docs.google.com/presentation/d/1mBZK6KS7JgmXlEtszQiOa_Cl7SXg_Z8wRp0tZHaL57Y/edit?usp=sharing) 22 | - FAIR Impact and schema mappings (Yann Le Franc) - slides TBD 23 | - [Mapping Data Structures: Challenges and Approaches](https://docs.google.com/presentation/d/191jQYOe8KAGoktVOA408NW_WWk_Gon0q9idyylbEQck/edit?usp=sharing) (Chris Mungall) 24 | - [Concept Set Mappings](https://docs.google.com/presentation/d/1055Etr0kgHHkguwgizecb_SEhj2nNd7my3q0u8fCDvk/edit?usp=sharing) (Chris Roeder) - slides TBD 25 | 26 | ## Preliminary agenda 27 | 28 | | Time | Topic | Led by | 29 | | ------ | ----- | ------ | 30 | | 2:30pm | [Welcome, introduction, SSSOM overview and introduction of the problem of "non-simple mappings"](https://docs.google.com/presentation/d/1bHcZsYU9GpZDyeDxO4uopnuw0-ETfldn1EFYQwBbNro/edit?usp=sharing) | Nico Matentzoglu | 31 | | 2:55pm | [FAIR Impact and schema mappings](https://drive.google.com/file/d/1cDSfvBehegy3edJU4LxZK3S-xI0LNbmn/view?usp=sharing) | Yann Le Franc | 32 | | 3:10pm | [Literal mappings with SSSOM](https://docs.google.com/presentation/d/1mBZK6KS7JgmXlEtszQiOa_Cl7SXg_Z8wRp0tZHaL57Y/edit?usp=sharing) | James McLaughlin | 33 | | 3:20pm | Literal Mappings - Discussion | | 34 | | 3:30pm | [Concept Set Mappings](https://docs.google.com/presentation/d/1055Etr0kgHHkguwgizecb_SEhj2nNd7my3q0u8fCDvk/edit?usp=sharing) | Chris Roeder | 35 | | 3:45pm | Concept Set Mappings - discussion | | 36 | | 3:55pm | Break | | 37 | | 4:05pm | [Complex Mappings: Examples from OMOP2OBO](https://docs.google.com/presentation/d/1Jn0W9gjRn19ISDB8N-sEwKwXsJySLPlNIsOL6ng_nEA/edit?usp=sharing) | Tiffany Callahan | 38 | | 4:20pm | [Complex mappings - the journey towards a proposal](https://docs.google.com/presentation/d/1kFD33S_WMgEGmCnT7IjVCeEyKI7OpcUw1ZzRXGqt1hs/edit?usp=sharing) | Nico Matentzoglu | 39 | | 4:35pm | Complex Mappings - discussion | | 40 | | 4:50pm | [Mapping Data Structures: Challenges and Approaches](https://docs.google.com/presentation/d/191jQYOe8KAGoktVOA408NW_WWk_Gon0q9idyylbEQck/edit?usp=sharing) | Chris Mungall | 41 | | 5:05pm | Schema Mappings - discussion | | 42 | | 5:20pm | Summary and closing remarks | Nico Matentzoglu | 43 | 44 | ## Organisers 45 | 46 | - Nicolas Matentzoglu 47 | - Yann Le Franc 48 | - Tiffany Callahan 49 | - Chris Mungall 50 | - Chris Roeder 51 | - James McLaughlin 52 | - Nomi Harris 53 | 54 | -------------------------------------------------------------------------------- /src/docs/events/oboacademy2022.md: -------------------------------------------------------------------------------- 1 | ## OBO Academy 2022: Introduction to manual mapping curation 2 | 3 | Where: Virtual, https://oboacademy.github.io/obook/courses/monarch-obo-training/ 4 | When: 17th May 2022 5 | 6 | Speaker: Nicolas Matentzoglu, PhD, Independent Contractor, Semantic Web and Knowledge Graphs expert 7 | 8 | Abstract: See [here](../tutorial.md). 9 | 10 | -------------------------------------------------------------------------------- /src/docs/events/ohdsi2022.md: -------------------------------------------------------------------------------- 1 | ## OHDSI Symposium 2022: There are no "good" mappings. 2 | 3 | Where: 4 | 5 | - https://www.ohdsi.org/ohdsi2022symposium/ 6 | - https://www.ohdsi.org/ohdsi2022-workgroup-activities/ 7 | 8 | When: 15th October 2022 9 | 10 | Speaker: Nicolas Matentzoglu, Melissa Haendel, Tiffany Callahan 11 | 12 | Slides: https://docs.google.com/presentation/d/1sGPh1b0keghxF4o7vMOQAlZ6QyBf97ZpaTXjmMY3UP0/edit#slide=id.p 13 | 14 | Abstract: The current state of observation data transformation management has resulted in a proliferation of approaches to data normalization and alignment that have accordingly created an explosion of maps. When publicly available, data transformation maps are of varying quality and are often configured to meet single clinical domain or project-specific objectives. Sharing maps in a community requires metadata to help map comparison and evaluation, and release management over different versions. This is typically lacking. In this workshop we will present rationale for translational research data transformation management, including implementation examples mapping Real World Data to OMOP, at-scale. We will discuss the way in which source vocabularies are managed in OMOP and how to improve that process in the future. Data transformations that accommodate divergent underlying ontologies supporting a variety of use cases will be solicited from the participants both in advance and during the workshop. -------------------------------------------------------------------------------- /src/docs/events/ohdsi2023.md: -------------------------------------------------------------------------------- 1 | ## Poster presentation at OHDSI 2023 Symposium, Rotterdam 2 | 3 | *Title*: "Synergizing Simple Standard for Sharing Ontology Mappings (SSSOM) and the Observational Health Data Sciences and Informatics (OHDSI)" 4 | 5 | *Where*: https://www.ohdsi-europe.org/index.php/symposium-2023 6 | 7 | *When*: 3rd July 2023 8 | 9 | *Presenters*: 10 | 11 | - Polina Talapova 12 | - Nicolas Matentzoglu 13 | 14 | *Links*: 15 | 16 | - [Link to poster](https://www.dropbox.com/s/qru8lel4ahrluwo/ohdsi2023_matentzoglu_talapova_poster.pdf?dl=0) 17 | - [Link to short report](https://www.dropbox.com/s/gvnjq16cfwvnna4/ohdsi2023_matentzoglu_talapova_extended_abstract.pdf?dl=0) 18 | -------------------------------------------------------------------------------- /src/docs/events/om2022.md: -------------------------------------------------------------------------------- 1 | ## OM 2022: A Simple Standard for Ontological Mappings 2022 - Updates of data model and outlook 2 | 3 | Where: Virtual, co-located with ISWC 2022, http://om2022.ontologymatching.org/ 4 | When: 23rd October 2022 5 | 6 | Speaker: Nicolas Matentzoglu, PhD, Independent Contractor, Semantic Web and Knowledge Graphs expert 7 | 8 | Abstract: 9 | The Simple Standard for Ontological Mappings (SSSOM) was first published in December 10 | 2021 (v. 0.9). After a number of revisions prompted by community feedback, we have 11 | published version 0.10.1 in August 2022. One of the key new features is the use of a controlled 12 | vocabulary for mapping-related processes, such as preprocessing steps and matching 13 | approaches. In this paper, we give an update on the development of SSSOM since v. 0.9, 14 | introduce the Semantic Mapping Vocabulary (SEMAPV) and outline some of our thoughts on 15 | the establishment of mapping commons in the future. 16 | 17 | 18 | -------------------------------------------------------------------------------- /src/docs/events/pistoia2022.md: -------------------------------------------------------------------------------- 1 | ## Pistoia Seminar 2022: A Simple Standard for Sharing Ontological Mappings (SSSOM) 2 | 3 | Where: Virtual 4 | 5 | When: 28th April 2022 6 | 7 | Speaker: Nicolas Matentzoglu, PhD, Independent Contractor, Semantic Web and Knowledge Graphs expert 8 | 9 | Abstract: Meeting introducing SSSOM to the Pistoia Alliance. 10 | -------------------------------------------------------------------------------- /src/docs/events/wsbo2021.md: -------------------------------------------------------------------------------- 1 | ## WSBO-2021: Workshop on Synergizing Biomedical Ontologies 2 | 3 | Where: https://github.com/OntoloBridge/WSBO/ 4 | 5 | [Slides](https://docs.google.com/presentation/d/1TlROX-JNeWvgrX57-CBa2qxTrRp92VGGZnrhJv3rLPM/edit#slide=id.p) 6 | 7 | 8 | -------------------------------------------------------------------------------- /src/docs/explanation/mappings.md: -------------------------------------------------------------------------------- 1 | # What is a mapping? 2 | 3 | The word "mapping" is pretty 4 | overloaded in practice: for some people, it simply means "a correspondence of one term 5 | to another equivalent or near equivalent term." 6 | But even here, there is little understanding to what a "term" is in this sentence, 7 | or what "almost equivalent" means - and, there are many different kinds of mappings used in practice that are not "equivalent" at all. In its very essence, an individual mapping maps one information entity, i.e. a _representation of a real world entity_, to another information entity - how, and what these strings could be, will be the subject of the following section. 8 | 9 | In the following, we consider an **information entity** a *sequence of characters which has a well defined relationship to some thing in the real world*, for example: 10 | - an ontology id like HP:0004934 corresponds to the concept of "Vascular calcification" in the real world. Note that HP:0004934 is annotated with the `rdfs:label` "Vascular calcification". 11 | The label itself is not necessarily a term - it could change, for example to "Abnormal calcification of the vasculature", and still retain the same meaning. 12 | - "Vascular calcification" may be a term in my controlled vocabulary which I understand to correspond to that respective disease (not all controlled vocabularies have IDs for their terms). 13 | This happens for example in clinical data models that do not use formal identifiers to refer to the values of slots in their data model, like "MARRIED" in /datamodel/marital_status. 14 | - Examples of terms: 15 | - IDs of classes in an ontology 16 | - elements of a clinical value set 17 | - codes of clinical terminologies such as [Z63.1](https://www.icd10data.com/ICD10CM/Codes/Z00-Z99/Z55-Z65/Z63-/Z63.1) 18 | - TLDR: terms correspond to things in the world and that correspondence is not subject to change. 19 | Labels can change without changing the meaning of a term. 20 | 21 | ## An attempt at a practical categorisation 22 | 23 | In our experience, there are roughly four kinds of mappings: 24 | 25 | - _string-string_: Relating one string, or label, to another string, or label. Understanding such mappings is fundamental to understanding all the other kinds of mappings. 26 | - _string-term_: Relating a specific string or "label" to their corresponding term in a terminology or ontology. We usually refer to these as synonyms, but there may be other words used in this case. 27 | - _term-term_: Relating a term, for example a class in an ontology, to another term. This is what most people in the ontology domain would understand when thy hear "ontology mappings". 28 | - _complex mappings_: Relating two sets of terms. These are the rarest and most complicated kinds of mappings, as they related for example two phenotypic profiles (sets of phenotypes) with each other. We will discuss some more examples in a future lesson. 29 | 30 | In some ways, these four kinds of mappings can be very different. We do believe, however, that there are enough important commonalities such as common features, widely overlapping use cases and overlapping toolkits to consider them together. In the following, we will discuss these in more detail, including important features of mappings and useful tools. 31 | 32 | ### Important features of mappings 33 | 34 | Mappings have historically been neglected as second-class citizens in the medical terminology and ontology worlds - 35 | the metadata is insufficient to allow for precise analyses and clinical decision support, they are frequently stale and out of date, etc. The question "Where can I find the canonical mappings between X and Y"? is often shrugged off and developers are pointed to aggregators such as [OxO](https://www.ebi.ac.uk/spot/oxo/) or [UMLS](https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/mapping_projects/index.html) which combine manually curated mappings with automated ones causing ["mapping hairballs"](#How-to-solve-the-problem-of-mapping-hairballs). 36 | 37 | There are many important metadata elements to consider, but the ones that are by far the most important to consider one way or another are: 38 | 39 | - _Precision_: Is the mapping exact, broad or merely closely related? 40 | - _Confidence_: Do I trust the mapping? Was is done manually by an expert in my domain, or by an algorithm? 41 | - _Source version_: Which version of the term (or its corresponding ontology) was mapped? Is there a newer mapping which has a more suitable match for my term? 42 | 43 | Whenever you handle mappings (either create, or reuse), make sure you are keenly aware of at least these three metrics, and capture them. You may even want to consider using a proper mapping model like the [Simple Shared Standard for Ontology Mappings (SSSOM)](https://github.com/mapping-commons/SSSOM/blob/master/SSSOM.md) which will make your mappings FAIR and reusable. 44 | 45 | ### String-string mappings 46 | String-string mappings are mappings that relate two strings. The task of matching two strings is ubiquitous for example in database search fields (where a user search string needs to be mapped to some strings in a database). Most, if not all effective ontology matching techniques will employ some form of string-string matching. For example, to match simple variations of labels such as "abnormal heart" and "heart abnormality", various techniques such as [Stemming](https://en.wikipedia.org/wiki/Stemming) and [bag of words](https://en.wikipedia.org/wiki/Bag-of-words_model#:~:text=The%20bag%2Dof%2Dwords%20model,word%20order%20but%20keeping%20multiplicity.) can be employed effectively. Other techniques such as edit-distance or Levenshtein can be used to quantify the similarity of two strings, which can provide useful insights into mapping candidates. 47 | 48 | ### String-term mappings / synonyms 49 | String-term mappings relate a specific string or "label" to their corresponding term in a terminology or ontology. Here, we refer to these as "synonyms", but there may be other cases for string-term mappings beyond synonymy. 50 | 51 | There are a lot of use cases for synonyms so we will name just a few here that are relevant to typical workflows of Semantic Engineers in the life sciences. 52 | 53 | [Thesauri](https://en.wikipedia.org/wiki/Thesaurus) are reference tools for finding synonyms of terms. Modern ontologies often include very rich thesauri, with some ontologies like Mondo capturing more than 70,000 exact and 35,000 related synonyms. They can provide a huge boost to traditional NLP pipelines by providing synonyms that can be used for both Named Entity Recognition and Entity Resolution. Some insight on how, for example, Uberon was used to boost text mining can be found [here](https://github.com/obophenotype/uberon/wiki/Using-uberon-for-text-mining). 54 | 55 | ### Term-term mappings / ontology mappings 56 | Term-term mappings relate a term, for example a class in an ontology, to another term, usually from another ontology or database. The term-term case of mappings is what most people in the ontology domain would understand when they hear "ontology mappings". This is also what most people understand when they here "Entity Resolution" in the database world - the task of determining whether, in essence, two rows in a database correspond to the same thing (as an example of a tool doing ER see [deepmatcher](https://github.com/anhaidgroup/deepmatcher), or [py-entitymatcher](https://pypi.org/project/py-entitymatching/)). For a list standard entity matching toolkit outside the ontology sphere see [here](https://www.biggorilla.org/software_cat/entity-matching/index.html). 57 | 58 | ### Further reading 59 | - A great overview can be found in ["Tackling the challenges of matching biomedical ontologies" (Faria et al 2018)](https://jbiomedsem.biomedcentral.com/articles/10.1186/s13326-017-0170-9) 60 | - A yearly competition of ontology matching systems is held by the [Ontology Alignment Evaluation Initiative (OAEI)](https://oaei.ontologymatching.org/). The challenge [results](http://oaei.ontologymatching.org/2020/results/) are a useful guide to identifying systems for matching you may want to try. 61 | 62 | 63 | ## Some examples of domain-specific mapping of importance to the biomedical domain 64 | 65 | ### Phenotype ontology mappings 66 | Mapping phenotypes across species holds great promise for leveraging the knowledge generated by Model Organism Database communities (MODs) for understanding human disease. There is a lot of work happening at the moment (2021) to provide standard mappings between species specific phenotype ontologies to drive translational research ([example](https://github.com/mapping-commons/mh_mapping_initiative/tree/master/mappings)). Tools such as [Exomiser](https://github.com/exomiser/Exomiser) leverage such mappings to perform clinical diagnostic tasks such as variant prioritisation. Another app you can try out that leverages cross-species mappings is the Monarch Initiatives [Phenotype Profile Search](https://monarchinitiative.org/analyze/phenotypes). 67 | 68 | ### Disease ontology mappings 69 | Medical terminology and ontology mapping is a huge deal in medical informatics ([example](https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/mapping_projects/index.html)). [Mondo](https://github.com/monarch-initiative/mondo) is a particularly rich source of well provenanced disease ontology mappings. 70 | 71 | ## How should you map your data to ontologies? 72 | 73 | There are no one size fits all strategies for mapping your data to ontologies. There 74 | are many research areas that have something to give in this process. Here, we outline some ideas 75 | on how to think about the problem. 76 | 77 | ### Case 1: Mapping internal controlled vocabularies 78 | 79 | ### Case 2: Mappings from free text 80 | 81 | Examples: 82 | - [Monarch Text Annotator](https://monarchinitiative.org/tools/text-annotate) 83 | 84 | ### Case 3: Mappings between public controlled vocabularies and ontologies 85 | 86 | ## How to solve the problem of mapping hairballs 87 | 88 | String-term mappings 89 | 90 | Overview of automated approaches 91 | - Simple matches (string, string pre-pro, fuzzy string) 92 | - Graph-based matches (incl. semantic similarity) 93 | - NLP/Machine Learning 94 | 95 | Practical: 96 | - Try to get the same mappings as before using techniques 97 | - Exact 98 | - Simple preprocessing 99 | - Levenshtein 100 | - Jaccard similarity 101 | - Embedding similarity (?) -------------------------------------------------------------------------------- /src/docs/faq.md: -------------------------------------------------------------------------------- 1 | # Frequently Asked Questions (FAQ) 2 | 3 | 4 | ## Why should our mappings be FAIR and carefully standardised? 5 | 6 | Mappings are frequently created on an ad-hoc basis, using simple two-column spreadsheets where the first column corresponds to the subject of the mapping, and the second column to the object of the mapping. This is insufficient for a variety of reasons: 7 | 8 | - non-transparent precision: While the assumption is that the subject "sort of mostly exactly" maps to the object, in practice this is rarely the case. Matches can `exact`, where the subject corresponds 100% to the object, `broad`, where the object is broader than the subject, and others. Qualifiers like `exact`, `broad`, `narrow`, `related` and `close` qualify the *precision* of the mapping (not to be mistaken for fuzziness of confidence). Without knowing the precision, we cannot accurately transform our data, nor can we use the mappings to "walk", i.e. move from one mapping to another, see [SSSOM 5-Star recommendation for mappings](5star-mappings.md) 9 | - non-transparent incompleteness: We don't know when the mapping was created, on the basis of what version of the terminological source of the subject or object. As time passes, we also lose confidence whether there would now be more suitable mappings, or whether there are new terms that are now fully covered by the mappings. 10 | - non-transparent confidence: whether a tool or a human propose the mapping, there is always a bit of a risk the mapping call may be wrong. As consumers of the mappings we need to know how confident the mapping authors were (confidence score), and why they confident (curation rules, mapping justification). 11 | 12 | Currently, mappings are created by a variety of systems, manually curated and automatic, and we need a way to efficiently collect and combine them. Mapping sets and mappings with quality provenance metadata allow us to trace faulty mappings to the source and correct them in a way that _all_ users of the mapping set will profit from it. 13 | 14 | ## Is there a central repository of SSSOM files? 15 | The idea of a mapping commons is to provide mappings in a decentralised fashion akin to OBO ontologies. A mapping commons collects 16 | mappings relevant to a particular community, either by reference (i.e. pulling in mapping sets already published elsewhere), or directly 17 | maintained at the mapping commons ([example](https://github.com/mapping-commons/mh_mapping_initiative)). 18 | Their integration as part of a repository (mapping server) would look like [EBI's Ontology X-ref Browser](https://www.ebi.ac.uk/spot/oxo/) 19 | or [BioPortal](https://www.bioontology.org/wiki/BioPortal_Mappings), 20 | but the exact scope of these repositories is _use case dependent_ - EBI may chose to show cross references from and to ontologies loaded into OLS, 21 | while BioPortal chooses to show a different set of mappings. The plan is to update EBI's OxO to support the full SSSOM data model, drawing curated 22 | mappings from a variety of mapping commons, by Summer 2022 - but its unlikely that one central place will index all available mappings. 23 | 24 | ## Who is responsible for the conversion into SSSOM - the primary developers of an artefact, or a mapping commons? 25 | Like with everything on the web, the closer to the source the SSSOM mappings are curated, the better. Ideally, mappings are maintained as 26 | part of ontology release pipelines or by primary mapping creators, rather than derived from a secondary source such as a database, further downstream. 27 | The reason for this is that ideally, we would want mappings to be reviewable and editable in much the same way as open ontologies, 28 | offering issue trackers and an active community incorporating changes. 29 | 30 | That said, it is unlikely that all existing mappings will be maintained by the source directly. For example, we expect to maintain the SSSOM mappings 31 | derived from the vast majority of OBO ontology xrefs as a downstream task ([example](https://github.com/mapping-commons/ols-mapping-commons)). 32 | 33 | ## How dependent are we on the sssom-py toolkit? 34 | SSSOM follows the core design principle that mapping tables should be (a) self-contained, i.e. including its prefix maps similar to a turtle file, 35 | and (b) readable by normal data science toolkits. An SSSOM table can be read with pandas using the `comment='#'` parameter 36 | (with one caveat, which is that `#` must be used as a character _solely_ to denote comments), or a very simply combination of a yaml reader and pandas. 37 | 38 | The SSSOM toolkit however offers some extra functionality, like export to JSON-LD, or RDF or import from other frequently used format. 39 | 40 | ## Is the concept of a "mapping server" equivalent, complementary, or antagonistic to the existing ontology repositories? 41 | A (SSSOM) mapping server is a repository for mappings that enables the browsing of existing mappings, exposing all (or some relevant subset of) SSSOM metadata as search 42 | facets. In that sense, it should be considered complementary, as it enables the search for accurate mappings from a specific term or set of terms, 43 | something that goes beyond what most ontologies would offer. However, the concept of ontology mappings can be _perceived_ as antagonistic to Open Ontology 44 | principles, as its goal is _not the logical integration of knowledge, but the association or linking of terms across controlled semantic spaces_. 45 | The OBO vision involves the building of a coherent, non-redundant semantic space of logically interconnected ontologies, which in particular 46 | wants to avoid the introduction of overlapping concepts. The mapping world specifically embraces heterogeneous semantic spaces and overlapping concepts, 47 | and seeks to bridge the semantic gaps using well-defined mapping relations such as "skos:broadMatch" or "owl:equivalentClass". 48 | 49 | 50 | -------------------------------------------------------------------------------- /src/docs/funding.md: -------------------------------------------------------------------------------- 1 | ## Funding 2 | 3 | The Simple Standard for Sharing Ontological Mappings (SSSOM) is a community-driven project which has received support from many different sources. 4 | We list the most important ones in the following. 5 | 6 | ### Volunteering efforts 7 | 8 | A huge fraction of the work on SSSOM has been done by volunteers without dedicated grant support. 9 | We hereby acknowledge their contributions as being absolutely essential. A selection of amazing contributions (by no means exhaustive): 10 | 11 | - The development of [SSSOM Java](https://incenp.org/dvlpt/sssom-java/) 12 | - Hundreds of careful contributions to discussions on the [SSSOM issue tracker](https://github.com/mapping-commons/sssom/issues) 13 | - The first draft of the [Mapping Registry Cookiecutter](https://github.com/mapping-commons/mapping-commons-cookiecutter) 14 | - We try to keep track of other [Community efforts here](https://github.com/mapping-commons/sssom/discussions/318) 15 | 16 | ### Phenomics First (NIH / NHGRI #1RM1HG010860-01) 17 | 18 | A lot of the groundwork of SSSOM was done to support a disease mapping project as part of the [Mondo Disease Ontology](https://github.com/monarch-initiative/mondo), 19 | which included, but was not limited to: 20 | 21 | - Creation of a basic metadata model 22 | - Implementation of validation and parsing methods in [sssom-py](https://github.com/mapping-commons/sssom-py) 23 | - Generating [training materials](training.md) 24 | - Organising [workshops](workshops.md) 25 | - Outreach activities to clinical communities such as [OHDSI](https://www.ohdsi.org/) 26 | 27 | The grant was awarded to members of the Monarch Initiative. 28 | 29 | ### Monarch (NIH / OD #5R24OD011883) 30 | 31 | To support development of cross-species mappings and knowledge graph integration for the [Monarch Knowledge Graph](https://monarchinitiative.org/), 32 | a few new features had to be supported: 33 | 34 | - Groundwork for the [Semantic Mapping Vocabulary](https://github.com/mapping-commons/semantic-mapping-vocabulary) which contains, for example, cross-species mapping properties. 35 | - The advancement of the concepts and tools behind the "Mapping Commons", including supporting the development of the [Mapping Registry Cookiecutter](https://github.com/mapping-commons/mapping-commons-cookiecutter) 36 | - Various improvements to the SSSOM metadata model, including the introduction of curation rules. 37 | - The [OxO2 SSSOM mapping browser](https://github.com/EBISPOT/oxo2) 38 | 39 | The grant was awarded to members of the Monarch Initiative. 40 | 41 | ### Bosch Gift to LBNL 42 | 43 | A lot of the work on tooling was supported by a Bosch Gift to the Lawrence Berkely National Laboratory (Chris Mungall group). We thank Bosch for their generous support which helped us with the following: 44 | 45 | - Implementation of conversion and testing methods in [sssom-py](https://github.com/mapping-commons/sssom-py) 46 | - The development of training materials 47 | - The development of specialised matching tools such as [OAK lexmatch](https://incatools.github.io/ontology-access-kit/guide/mappings.html) which provided the first implementation of the SSSOM standard in a matching tool. 48 | 49 | ### DARPA: Young Faculty Award W911NF2010255 50 | 51 | A huge amount of refactoring of [sssom-py](https://github.com/mapping-commons/sssom-py) and development best practices, as well as training materials, was provided through this grant (awarded to Benjamin M. Gyori). Other contributions include work on the [Semantic mapping reasoner and assembler](https://github.com/biopragmatics/semra) 52 | -------------------------------------------------------------------------------- /src/docs/glossary.md: -------------------------------------------------------------------------------- 1 | # Glossary 2 | 3 | The glossary is currently being developed [here](https://docs.google.com/document/d/1QqR8j7szjaq6wzE9YLBnZ2kOD9eN14d3SYd312X8JjQ/edit?usp=sharing). -------------------------------------------------------------------------------- /src/docs/images/sssom-banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mapping-commons/sssom/1fb4b8b98358839f201ff6c776b5d121ce6ddec9/src/docs/images/sssom-banner.png -------------------------------------------------------------------------------- /src/docs/mapping-commons.md: -------------------------------------------------------------------------------- 1 | # How to set up a Mapping Commons 2 | 3 | A mapping commons is an open, collaborative space for managing and reconciling mappings. The goal is to collect mappings from a variety of sources into a _mapping set registry_, standardise them into a common representation, curate some basic metrics such as "confidence" (how much does the community managing the commons trust a specific mapping source?) and provenance (where exactly did this mapping come from before it was integrated). 4 | 5 | There is no agreed upon standard for mapping registries yet. SSSOM itself provides a [lightweight metadata model for mapping registries](https://mapping-commons.github.io/sssom/) which is, as of August 2023, under active development. 6 | 7 | ## Typical setup of a mapping commons 8 | 9 | We recommend to base your mapping commons on a combination of GitHub (or GitLab) collaborative workflows (issues and discussions for the community, access management etc) and a git repository based on the [Mapping Commons Cookiecutter Template](https://github.com/mapping-commons/mapping-commons-cookiecutter) for version control of the mappings. 10 | 11 | Using the template system above allows you to: 12 | 13 | 1. make use of basic CI and quality control for your mappings, 14 | 2. provides a standard way to document metadata about your mapping sets 15 | 3. provides a basic ETL system based on `gnu make` (which you dont have to use, its just convenient) 16 | 4. Provides a standardised registry format that can be reused/imported by others. 17 | 18 | Examples of Mapping Commons are: 19 | 20 | 1. https://github.com/mapping-commons/mh_mapping_initiative 21 | 1. https://gitlab.c-path.org/c-pathontology/mapping-commons 22 | -------------------------------------------------------------------------------- /src/docs/presentations.md: -------------------------------------------------------------------------------- 1 | # Presentations 2 | 3 | ## A Simple Standard for Ontological Mappings 2024: A quick guide for getting started with publishing better entity mappings (RDA’s 23rd plenary) 4 | 5 | - November 13th, 2024, hybrid 6 | - Talk at [RDA plenary](https://www.rd-alliance.org/rdas-23rd-plenary-programme/), in particular [VSSIG WG](https://www.rd-alliance.org/session_entry/group-session-applications-03-07-2024-john-graybeal/) giving a brief update and encouraging SSSOM uptake. 7 | - [Slides](https://docs.google.com/presentation/d/1TKdXO-THSUf5PHZp2sYrzaTQhtunoB78A6_Yt8VEyoc/edit?usp=sharing) 8 | 9 | ## A Simple Standard for Ontological Mappings 2024: The case for prioritising 5-Star mappings (Every Cure, internal seminar) 10 | 11 | - August 1st, 2024, virtual 12 | - Talk to [Every Cure](https://everycure.org/) technical team (Knowledge Sharing Series) to make the case for prioritising the collection of mapping provenance. 13 | - [Slides](https://docs.google.com/presentation/d/1_KkSeZcKyzsQVlZe8qkl7iEvii3HX0QKR58uvbirTDM/edit) 14 | 15 | ## (Re-)bridging the anatomy ontologies with SSSOM 16 | 17 | - July 19th, 2024 18 | - [15th International Conference on Biomedical Ontologies (ICBO)](https://icbo-conference.github.io/icbo2024/) 19 | - [Slides](https://github.com/gouttegd/sssomt-uberon/releases/download/v2-icbo-2024/screen.pdf), [paper](https://github.com/gouttegd/sssomt-uberon/releases/download/v2-icbo-2024/paper.pdf), [sources](https://github.com/gouttegd/sssomt-uberon) 20 | 21 | ## Workshop on Prefixes, CURIEs, and IRIs 2023 22 | 23 | - November 27th, 2023, virtual 24 | - Lightning talk on the deeper integration of the Bioregistry and `curies` toolkit with the SSSOM Python package 25 | - [Workshop info](https://biopragmatics.github.io/workshops/WPCI2023) 26 | 27 | ## OM2023: A Simple Standard for Sharing Ontological Mappings 2023: Updates on data model, collaborations and tooling 28 | 29 | - 7th November 2023 30 | - [http://om2023.ontologymatching.org/](http://om2023.ontologymatching.org/) 31 | - [Slides](https://docs.google.com/presentation/d/1d2t-VcseZ_oAgVTbrDHJOmwQTqyXB9ZHDrZ08OU87a0/edit) 32 | 33 | ## SSSOM Updates 2023 (FAIR Impact and FAIRCORE4EOSC Life Working Session) 34 | 35 | - 6th October 2023 36 | - [Slides](https://docs.google.com/presentation/d/1RY0IKn5TWVqXhcJ5vyabXEPhCPXNLsx9IWiWzQCzXZg/edit) 37 | 38 | ## OHDSI 2023 Symposium: Synergizing Simple Standard for Sharing Ontology Mappings (SSSOM) and the Observational Health Data Sciences and Informatics (OHDSI) 39 | 40 | - 3rd July 2023, https://www.ohdsi-europe.org/index.php/symposium-2023 41 | - [Seminar info](events/ohdsi2023.md) 42 | 43 | ## Elixir Data Interoperability Meeting: Introduction to SSSOM 44 | 45 | - 6th March 2023 46 | - This talk is a variant of the CCB Seminar Series talk below 47 | - [Slides](https://docs.google.com/presentation/d/1w-rNLTprIbW8IUBu6YokDsPe98AKg4VwfR1gSsJrae8/edit#slide=id.g167f28e52df_0_22) 48 | 49 | ## Ontology Summit 2023: Open, FAIR and standardised mappings for ontologies, controlled vocabularies and database entities 50 | 51 | - 22 February 2023, virtual 52 | - This talk is a variant of the CCB Seminar Series talk below 53 | - [Seminar info](https://ontologforum.org/index.php/ConferenceCall_2023_02_22) 54 | - [Video recording (scroll to minute 46)](https://ontologforum.s3.amazonaws.com/OntologySummit2023/Part1/Ubergraph--JimBalhoff_20230222.mp4) 55 | - [Slides](https://docs.google.com/presentation/d/1_TuimFiJ_7VP0ZFkQrHYky_ktFZc981Vse0-_hZjKtc/edit#slide=id.g167f28e52df_0_22) 56 | 57 | ## CCB Seminar Series: Open SSSOM - Unlocking the wealth of biomedical data using shared standardized entity mappings 58 | 59 | - December 2022 talk, virtual 60 | - [Seminar info](events/ccb2022.md) 61 | - [Video recording](https://www.youtube.com/watch?v=4vqeRECuAKE) 62 | - [Slides](https://docs.google.com/presentation/d/1Gt6kLSTx_e1Al6eCvGp_hviezy5ySo4UA_ii8LGqqIw/edit?usp=drive_web&ouid=105278838581444356576) 63 | 64 | ## OHDSI Symposium 2022: There are no "good" mappings. 65 | 66 | - October 2022, hybrid symposium (https://www.ohdsi.org/ohdsi2022-workgroup-activities/) 67 | - [Seminar info](events/ohdsi2022.md) 68 | - [Slides](https://docs.google.com/presentation/d/1sGPh1b0keghxF4o7vMOQAlZ6QyBf97ZpaTXjmMY3UP0/edit#slide=id.SLIDES_API69505745_0) 69 | 70 | ## OM 2022: A Simple Standard for Ontological Mappings 2022 - Updates of data model and outlook 71 | 72 | - 23rd October 2022, Workshop for Ontology Matching, ISWC 2022, virtual 73 | - [Seminar info](events/om2022.md) 74 | - [Slides](https://docs.google.com/presentation/d/1L0LzXVPcfS9eW1KkN-BIYnxuh_CQ_8fl3QPvqw9BmUs/edit#slide=id.g16d02f01a3b_0_0) 75 | 76 | ## OBO Academy 2022: Introduction to manual mapping curation 77 | 78 | - 17th May 2022, Seminar, Monarch Seminar Series 79 | - [Video recording](https://www.youtube.com/watch?v=ZZeZcg-Vwjw) 80 | - [Seminar info](events/oboacademy2022.md) 81 | - [Slides](https://mapping-commons.github.io/sssom/tutorial/) 82 | 83 | ## Workshop on Prefixes, CURIEs, and IRIs 2021 84 | 85 | - Fall 2021, Use Case Talk on the need for prefix maps for SSSOM 86 | - [Video](https://youtu.be/iOXZfLAF_X0?t=1100) 87 | - [Workshop info](https://biopragmatics.github.io/workshops/WPCI2021) 88 | 89 | ## Pistoia Seminar 2022: A Simple Standard for Sharing Ontological Mappings (SSSOM) 90 | 91 | - 28th April 2022, Team Meeting 92 | - [Seminar info](events/pistoia2022.md) 93 | - [Slides](https://docs.google.com/presentation/d/1gW-BN4yR1c8qxzL9uLeJm99zRancY3k0tcZlJRPu4Eg/edit#slide=id.g126201cd604_0_0) 94 | 95 | ## WSBO-2021: Workshop on Synergizing Biomedical Ontologies 96 | 97 | - 14th July 2021, Workshop 98 | - [Workshop info](events/wsbo2021.md) 99 | - [Slides](https://docs.google.com/presentation/d/1TlROX-JNeWvgrX57-CBa2qxTrRp92VGGZnrhJv3rLPM/edit#slide=id.p) 100 | 101 | ## MC-2021: 1st Mapping Commons Workshop on Simple Standard for Sharing Ontology Mappings 102 | 103 | - [Workshop info](events/mc2021.md) 104 | - [Video recording](https://www.youtube.com/watch?v=lgVqFeSxYbg) 105 | - [Slides: Introduction](https://docs.google.com/presentation/d/1T75TRkpKRGHk5FSeFS7mQe8vmo8rt7bE69kgPX6PZMs/edit?usp=sharing) 106 | - [Slides: OMOP2OBO](https://docs.google.com/presentation/d/1ItWLWnIlJeBgw5r4ZQ6mOVAFVQp-1uQ7vA9EI-1o5HY/edit?usp=sharing). 107 | -------------------------------------------------------------------------------- /src/docs/related-documentation.md: -------------------------------------------------------------------------------- 1 | ## Related documentation 2 | 3 | - [SSSOM Toolkit](https://mapping-commons.github.io/sssom-py/index.html#): A toolkit and library for processing SSSOM files in Python 4 | - [SSSOM Java](https://incenp.org/dvlpt/sssom-java/): A toolkit and library for processing SSSOM files in Java 5 | - [Semantic Mapping Vocabulary (SEMAPV)](https://mapping-commons.github.io/semantic-mapping-vocabulary/): The mapping vocabulary used for mapping justifications and specialised mapping predicates 6 | - [LinkML](https://linkml.io/linkml/): The modelling framework used by SSSOM 7 | - [OBO Academy](https://oboacademy.github.io/obook/): Ontology and mapping related training materials 8 | - [Monarch Initiative](https://monarch-initiative.github.io/monarch-documentation/): Knowledge Graph related products supported by the Monarch Initiatives, including many tools using and producing SSSOM 9 | -------------------------------------------------------------------------------- /src/docs/resources/sssom_5star_mappings.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mapping-commons/sssom/1fb4b8b98358839f201ff6c776b5d121ce6ddec9/src/docs/resources/sssom_5star_mappings.pdf -------------------------------------------------------------------------------- /src/docs/spec-formats-json.md: -------------------------------------------------------------------------------- 1 | # The JSON serialisation format 2 | 3 | The JSON serialisation format is currently unspecified. 4 | 5 | It is intended as a more-or-less direct serialisation of the `MappingSet` class into the JSON format as specified by [RFC 8259](https://datatracker.ietf.org/doc/html/rfc8259), but many details of the serialisation are left unspecified for now. 6 | -------------------------------------------------------------------------------- /src/docs/spec-formats-owl.md: -------------------------------------------------------------------------------- 1 | # The OWL/RDF serialisation format 2 | 3 | This section defines a way to serialise SSSOM mappings as _reified OWL axioms_. This has the advantage that any mapping set can be simply merged with an ontology in the usual way, for example using [ROBOT merge](https://robot.obolibrary.org/merge). 4 | 5 | The OWL/RDF serialisation rules deal with three types of reified OWL axioms, and a few sub-types: 6 | 7 | 1. Predicate is an annotation property 8 | 2. Predicate is an object property and 9 | 1. Object/Subject are classes 10 | 2. Object/Subject are individuals 11 | 3. Predicate is language relational construct of RDFS or OWL (`rdfs:subClassOf`, `owl:equivalentClass`) 12 | 13 | ## Predicate is an annotation property: 14 | 15 | If the predicate corresponds to an annotation property, the mapping `` gets converted to an OWLAnnotationAssertion axiom: `OWLAnnotationAssertion(P,S,O)`. All mapping level metadata (`meta`) gets converted into OWLAnnotation objects which are materialised as axiom annotations on the mapping annotation assertion, see [OWL 2 Structural Specification](https://www.w3.org/TR/owl2-syntax/#Annotations): 16 | 17 | ``` 18 | AnnotationAssertion(meta P, S, O) 19 | ``` 20 | 21 | Where `meta` is a sequence of OWL Annotations objects like: 22 | 23 | ``` 24 | Annotation(Q1,V1) Annotation(Q2,V2) ... Annotation(Qn,Vn) 25 | ``` 26 | 27 | where `Qi` is a SSSOM metadata slot and `Vi` is an annotation value. 28 | 29 | Note that if a SSSOM metadata element value is a list `L` (i.e. can have multiple elements, such as creator and others), individual annotations are created for each of them: 30 | 31 | ``` 32 | Annotation(Q,V) for all V in L. 33 | ``` 34 | 35 | Example: 36 | 37 | ``` 38 | AnnotationAssertion(Annotation(sssom:creator_id ) Annotation(sssom:mapping_justification semapv:LexicalMatching) skos:exactMatch ) 39 | ``` 40 | 41 | Mapping set level annotations are manifested as Ontology annotation in the usual way, according to the [OWL 2 Structural Specification](https://www.w3.org/TR/owl2-syntax/#Annotations). 42 | 43 | ## Predicate is an object property 44 | 45 | ### Case 1: Object and Subject are classes. 46 | 47 | The mapping `` gets translated into an existential restriction: 48 | 49 | ``` 50 | SubclassOf(S, P some O) 51 | ``` 52 | 53 | All metadata slots are added as OWLAnnotation objects and added to SubclassOf axiom as axiom annotations: 54 | 55 | ``` 56 | SubclassOf(meta, S, P some O) 57 | ``` 58 | 59 | Example: 60 | 61 | ``` 62 | SubClassOf(Annotation(sssom:creator_id ) Annotation(sssom:mapping_justification semapv:LexicalMatching) ObjectSomeValuesFrom( )) 63 | ``` 64 | 65 | ### Case 2: Object and Subject are individuals 66 | 67 | The mapping `` gets translated into an object property assertion: 68 | 69 | ``` 70 | ObjectPropertyAssertion(P, S, O) 71 | ``` 72 | 73 | All metadata slots are added as OWLAnnotation objects and added to ObjectPropertyAssertion axiom as axiom annotations: 74 | 75 | ``` 76 | ObjectPropertyAssertion(meta, P, S, O) 77 | ``` 78 | 79 | Example: 80 | 81 | ``` 82 | ObjectPropertyAssertion(Annotation(sssom:creator_id ) Annotation(sssom:mapping_justification semapv:LexicalMatching) ) 83 | ``` 84 | 85 | 86 | ### Predicate is language relational construct of RDFS or OWL 87 | 88 | The mapping `` gets translated into an annotated axiom using the following table: 89 | 90 | | Mapping predicate | Generated axiom | 91 | | ------------------- | --------------------------- | 92 | | owl:equivalentClass | EquivalentClass(meta, S, O) | 93 | | rdfs:subClassOf | SubClassOf(meta, S, O) | 94 | 95 | Example: 96 | 97 | ``` 98 | SubClassOf(Annotation(sssom:creator_id ) Annotation(sssom:mapping_justification semapv:LexicalMatching) ) 99 | ``` 100 | -------------------------------------------------------------------------------- /src/docs/spec-formats.md: -------------------------------------------------------------------------------- 1 | # SSSOM serialisation formats 2 | 3 | The SSSOM standard defines the following serialisation formats for storing and exchanging mapping sets: 4 | 5 | * the [SSSOM/TSV](spec-formats-tsv.md) format; 6 | * the [SSSOM JSON](spec-formats-json.md) format; 7 | * and the [OWL/RDF](spec-formats-owl.md) format. 8 | 9 | Implementations MUST support the SSSOM/TSV format. They MAY support the other formats. 10 | -------------------------------------------------------------------------------- /src/docs/spec-intro.md: -------------------------------------------------------------------------------- 1 | # Specification of the SSSOM standard 2 | 3 | This document is the official specification for the SSSOM standard. 4 | 5 | It is divided in two sections covering the two different components of the standard: 6 | 7 | * the specification for the [data model](spec-model.md), to manipulate SSSOM mappings and mapping sets in your programs; 8 | * the specification for the [serialisation formats](spec-formats.md), to read, write, and exchange SSSOM mapping sets. 9 | 10 | Both sections are _normative_. 11 | 12 | ## Conventions used in this document 13 | 14 | ### Key words 15 | 16 | Throughout the specification, the key words “MUST”, “MUST NOT”, “REQUIRED”, “SHALL”, “SHALL NOT”, “SHOULD”, “SHOULD NOT”, “RECOMMENDED”, “NOT RECOMMENDED”, “MAY”, and “OPTIONAL” are to be interpreted as described in [BCP 14](https://datatracker.ietf.org/doc/html/bcp14) when, and only when, they appear in all capitals, as shown here. 17 | 18 | ### IRI prefixes 19 | 20 | Throughout the specification, the following IRI prefix names are used: 21 | 22 | | Prefix name | IRI prefix | 23 | | ----------- | ---------- | 24 | | owl | http://www.w3.org/2002/07/owl# | 25 | | rdf | http://www.w3.org/1999/02/22-rdf-syntax-ns# | 26 | | rdfs | http://www.w3.org/2000/01/rdf-schema# | 27 | | semapv | https://w3id.org/semapv/vocab/ | 28 | | skos | http://www.w3.org/2004/02/skos/core# | 29 | | sssom | https://w3id.org/sssom/ | 30 | | xsd | http://www.w3.org/2001/XMLSchema# | 31 | | linkml | https://w3id.org/linkml/ | 32 | -------------------------------------------------------------------------------- /src/docs/toolkit.md: -------------------------------------------------------------------------------- 1 | # The SSSOM Toolkit 2 | 3 | In the following we will give a brief introduction into the SSSOM toolkit. For more detailed documentation please refer to https://mapping-commons.github.io/sssom-py. 4 | 5 | ## Pre-requisites 6 | 7 | - Complete the [basic SSSOM tutorial](tutorial.md) 8 | - [Install SSSOM toolkit](https://mapping-commons.github.io/sssom-py/installation.html). Alternatively, you can install the [Ontology Development Kit (ODK)](https://github.com/INCATools/ontology-development-kit) and follow the tutorial using its [docker image](https://oboacademy.github.io/obook/howto/odk-setup/). 9 | - We are assuming a Unix shell for this tutorial, but most of the principles should apply to the Windows CMD as well. Windows users may prefer to install the ODK (see above). 10 | 11 | ## Overview 12 | 13 | SSSOM toolkit (STK), previously known as `sssom-py`, is a set of utility methods for processing SSSOM files, packaged as a Command Line Client (CLI) and a [python package](https://pypi.org/project/sssom/). In the following, we will extract mappings from an ontology an process them with the CLI. The goal is to give a sense of the functionality of the toolkit. Additional and more up-to-date information on usage can be found [here](https://mapping-commons.github.io/sssom-py). 14 | 15 | ## Table of Contents 16 | 17 | 1. `parse`: [Extracting mappings from an external source](#parse) 18 | 2. `merge`: [Combining mappings from several sources](#merge) 19 | 3. `convert`: [Converting an SSSOM mapping table into different formats](#convert) 20 | 21 | 22 | 23 | ## Extracting mappings from an external source 24 | 25 | One key issue developers are faced with is to convert various different mapping formats into a common representation (e.g. SSSOM). The SSSOM toolkit (STK) already implements a number of commonly use mapping formats: 26 | 27 | 1. [OWL Ontologies](https://en.wikipedia.org/wiki/Web_Ontology_Language) 28 | 2. [Alignment API](https://moex.gitlabpages.inria.fr/alignapi/) Format (format used by the Ontology Alignment Evaluation Initiative, OAEI) 29 | 3. Parsers for SNOMED mapping format and FHIR Concept Map are [in the making](https://github.com/mapping-commons/sssom-py/pull/207), June 2022. 30 | 31 | Here we use Uberon, an anatomy ontology in the biomedical domain. 32 | 33 | ``` 34 | wget http://purl.obolibrary.org/obo/uberon/uberon-base.json -O uberon-base.json 35 | ``` 36 | 37 | Feel free to download the file manually if you do not have `wget` installed. 38 | 39 | Now use `sssom parse` to extract all the mappings provided by the ontology. As there are multiple json based formats that can be parsed, you have to tell `sssom` which format you are using: `--input-format obographs-json`. 40 | 41 | ``` 42 | sssom parse uberon-base.json --input-format obographs-json --output uberon.sssom.tsv 43 | ``` 44 | 45 | From a CLI design perspective we already notice a few things: 46 | 47 | - `uberon-base.json` is passed to the STK _as an argument_ (without an option like `-i`). This is the case for most _primary inputs_ (mapping tables, source files) throughout the SSSOM client. 48 | - The output generated by the above command is large. There seem to be a lot of messages where some URL `does not follow any known prefixes`: 49 | 50 | ``` 51 | WARNING:root:http://dbpedia.org/ontology/AnatomicalStructure does not follow any known prefixes 52 | WARNING:root:http://uri.neuinfo.org/nif/nifstd/nlx_subcell_100205 does not follow any known prefixes 53 | WARNING:root:http://neurolex.org/wiki/Category:Embryonic_organism does not follow any known prefixes 54 | WARNING:root:http://www.informatics.jax.org/cookbook/figures/figure20.shtml does not follow any known prefixes 55 | WARNING:root:http://mbe.oxfordjournals.org/content/26/3/613/F1.large.jpg does not follow any known prefixes 56 | WARNING:root:http://palaeos.com/vertebrates/glossary/images/450x218xEctocuneiform.gif.pagespeed.ic.kaiuLYQELL.png does not follow any known prefixes 57 | WARNING:root:http://palaeos.com/vertebrates/bones/dermal/images/289x311xPalatine1.gif.pagespeed.ic.tglmNBrF4D.png does not follow any known prefixes 58 | WARNING:root:http://uri.neuinfo.org/nif/nifstd/nifext_14 does not follow any known prefixes 59 | .... 60 | ``` 61 | 62 | Understanding this is important to understand a lot about how SSSOM treats entities in general. 63 | 64 | ### Why are there so many `does not follow any known prefixes` warnings? 65 | 66 | CURIEs are a key concept for the representation of SSSOM documents, in particular its table. All fields that constitute a reference to some entity, such as ids (`subject_id`, `object_id`, `predicate_id`), and other fields such as `mapping_justification` are represented in CURIE syntax. 67 | 68 | The [Semantic Web](https://www.w3.org/standards/semanticweb/) uses URIs (which look more like URLs rather than CURIEs) to refer to entities - there is, however, no standard protocol to translate a URI into a _Compact_ URI (or CURIE). 69 | 70 | Efforts such as https://bioregistry.io/, https://github.com/prefixcommons or https://identifiers.org/ try to bring a bit of an organisation to prefixes. In particular the former two curate maps between prefixes and URIs. 71 | 72 | - URI: `http://purl.obolibrary.org/obo/MONDO_0000001` 73 | - CURIE: `MONDO:0000001` 74 | - PREFIX: `MONDO` 75 | - URI expansion: `http://purl.obolibrary.org/obo/MONDO_` 76 | 77 | Now the problem is that over the years, many very idiosyncratic URIs where used to denote entities in ontologies. While the STK tries to figure out the correct prefixes using https://bioregistry.io/, many times it fails - in these cases, the user _must provide its own prefix map_. 78 | 79 | Lets create a simple one, and save it as `metadata.yml` (we call it "metadata", because we will add more metadata to it in this tutorial): 80 | 81 | ``` 82 | curie_map: 83 | dbpedia: http://dbpedia.org/ontology/ 84 | ``` 85 | 86 | We can now use this _in addition to the default prefix maps_: 87 | 88 | ``` 89 | sssom parse uberon-base.json --input-format obographs-json --metadata metadata.yml --prefix-map-mode merged --output uberon.sssom.tsv 90 | ``` 91 | 92 | 93 | 94 | ## Combining mappings from several sources 95 | 96 | 97 | 98 | ## Converting an SSSOM mapping table into different formats 99 | 100 | 101 | 102 | ## Other methods: 103 | 104 | - cliquesummary 105 | - correlations 106 | - crosstab 107 | - dedupe 108 | - diff 109 | - dosql 110 | - partition 111 | - ptable 112 | - reconcile-prefixes 113 | - rewire 114 | - sort 115 | - sparql 116 | - split 117 | - validate 118 | 119 | _Under construction_. -------------------------------------------------------------------------------- /src/docs/training.md: -------------------------------------------------------------------------------- 1 | ## SSSOM Training materials 2 | 3 | - [Elevator pitch](#elevator) 4 | - [Tutorials and Guides](#guides) 5 | - [Related tutorials](#related) 6 | 7 | 8 | 9 | ### Elevator pitch 10 | 11 | 12 | 13 | 14 | 15 | ### Tutorials and Guides 16 | 17 | - [Mapping curation with SSSOM](https://oboacademy.github.io/obook/tutorial/sssom-tutorial/) 18 | - [Are these two entities the same? A guide.](https://oboacademy.github.io/obook/howto/are-two-entities-the-same/). An important tutorial that explains that it is not directly possible to determine if two things are the same across ontologies, but its still worth doing when explicitly recording the rationale. 19 | - [Linking across vocabularies: Semantic Entity Matching](https://oboacademy.github.io/obook/lesson/entity-matching/): Entity matching is the process of establishing a link between an identifier in one semantic space to an identifier in another. There are many cultures of thought around entity matching, including Ontology Matching, Entity Resolution and Entity Linking. 20 | 21 | 22 | 23 | ### Related tutorials 24 | 25 | - [Introduction to processing mappings with SSSOM and sssom-py CLI](https://oboacademy.github.io/obook/tutorial/sssom-toolkit/) 26 | - [Introduction to matching with OAK lexmatch](https://oboacademy.github.io/obook/tutorial/lexmatch-tutorial/) 27 | - [Curating Semantic Mappings with Biomappings](https://oboacademy.github.io/obook/tutorial/biomappings/) -------------------------------------------------------------------------------- /src/docs/tutorials/omop-mappings.md: -------------------------------------------------------------------------------- 1 | # How to gradually enrich OMOP mappings with SSSOM 2 | 3 | This document is a guide for OMOP ETL developers to think about gradually improving the (documentation of the) strength of evidence for their vocabulary mappings. 4 | 5 | ## Example table from OMOP 6 | 7 | Generated manually with Athena on the 20th July 2023. The start and end dates are invented. 8 | 9 | | concept_id_1 | concept_id_2 | relationship_id | valid_start_date | valid_end_date | invalid_reason | 10 | |--------------|--------------|-----------------|------------------|----------------|----------------| 11 | | 44499396 | 4028717 | Maps to | 19700101 | 20991231 | | 12 | | 45586281 | 4028717 | Maps to | 73754 | 20991231 | | 13 | 14 | ## Level 1, basic mapping table, basic provenance 15 | 16 | The SSSOM metadata provided is conceptually correct, but fictitious. 17 | 18 | The reader should imagine this being provided as a separate CONCEPT_MAPPINGS.CSV table that can be joined on `subject_id`->`concept_id_1`, `object_id`->`concept_id_2` for all rows with a `Maps to` `relationship_id` (this is assuming that the `concept_id_1`,`concept_id_2` tuple is unique for `Maps to`). 19 | 20 | | subject_id | object_id | predicate_id | mapping_provider | mapping_tool | mapping_tool_version | mapping_justification | reviewer_id | author_id | 21 | |---|---|---|---|---|---|---|---|---| 22 | | OMOP:44499396 | OMOP:4028717 | omoprel:mapsTo | OHDSI:Odysseus | | | semapv:ManualMappingCuration | | ORCID:0000-0003-4147-1485 | 23 | | OMOP:45586281 | OMOP:4028717 | omoprel:mapsTo | OHDSI:Odysseus | OHDSI_TOOLS:Usagi | 1.4.3 | semapv:LexicalMatching | ORCID:0000-0003-4147-1485 | 24 | | OMOP:45610575 | OMOP:441554 | omoprel:mapsTo | OHDSI:UMLS | | | semapv:UnspecifiedMatching | | | 25 | 26 | What we see here: 27 | 28 | 1. all identifiers are prefixed to make sure they are interpreted correctly when they are reused. This includes OMOP ids (e.g. `OMOP:44499396`) as well as ORCIDs (OPTIONAL) 29 | 1. "Maps to" is encoded using a proper identifier rather than a string (OPTIONAL) 30 | 1. All three mappings have a `mapping_justification` to distinguish for example if the mapping was determined by human manual curation (`semapv:ManualMappingCuration`) or lexical matching (`semapv:LexicalMatching`). Many other justifications exist and/or can be created. If the justification for the mapping is unknown, we can make our lack of knowledge transparent by using `semapv:UnspecifiedMatching`. 31 | 1. `author_id`, in the case of `semapv:ManualMappingCuration`, tells us who the person is that determined the mapping. This is basic provenance. If the identity of the author can be connected with an public record such as ORCID, this can help mapping users to increase trust in a mapping. `reviewer_id` tells us that some human looked at the mapping after it was proposed by a tool, and "signed off" on it. This can be valuable, again, to increase trust. 32 | 1. If the match was generated by the tool, some basic provenance is added (`mapping_tool`, `mapping_tool_version`). 33 | 34 | ## Level 2: Curate semantic mapping predicate 35 | 36 | | subject_id | object_id | predicate_id | mapping_provider | mapping_tool | mapping_tool_version | mapping_justification | reviewer_id | author_id | 37 | |---|---|---|---|---|---|---|---|---| 38 | | OMOP:44499396 | OMOP:4028717 | skos:broadMatch | OHDSI:Odysseus | | | semapv:ManualMappingCuration | | ORCID:0000-0003-4147-1485 | 39 | | OMOP:45586281 | OMOP:4028717 | skos:exactMatch | OHDSI:Odysseus | OHDSI_TOOLS:Usagi | 1.4.3 | semapv:LexicalMatching | ORCID:0000-0003-4147-1485 | 40 | | OMOP:45610575 | OMOP:441554 | skos:exactMatch | OHDSI:UMLS | | | semapv:UnspecifiedMatching | | | 41 | 42 | What do we see here? 43 | 44 | 1. Rather than `Maps to`, the mapping predicate (e.g. `skos:exactMatch`) is a semantic mapping predicate from a standardised vocabulary ([SKOS](https://www.w3.org/TR/skos-reference)). Here, we distinguish between `skos:exactMatch` and `skos:broadMatch`, but there are other predicates, see for example in the [Semantic Mapping Vocabulary](https://github.com/mapping-commons/semantic-mapping-vocabulary/blob/main/semapv-properties.tsv). 45 | 46 | ## Level 3: Document confidence widely 47 | 48 | `confidence` is an incredibly useful metric for downstream users, including ETL engineers and data analysts. In an ideal world, all mappings have some kind of `confidence` associated with them. `confidence` scores should be read as "the strength of evidence provided in this record/table row (i.e mapping justification) leads us to believe the mapping (e.g. `OMOP:44499396 --[skos:broadMatch]--> OMOP:4028717`) is correct with 90% confidence. 49 | 50 | | subject_id | object_id | predicate_id | mapping_provider | mapping_tool | mapping_tool_version | mapping_justification | reviewer_id | author_id | confidence | 51 | |---|---|---|---|---|---|---|---|---|---| 52 | | OMOP:44499396 | OMOP:4028717 | skos:broadMatch | OHDSI:Odysseus | | | semapv:ManualMappingCuration | | ORCID:0000-0003-4147-1485 | 0.9 | 53 | | OMOP:45586281 | OMOP:4028717 | skos:exactMatch | OHDSI:Odysseus | OHDSI_TOOLS:Usagi | 1.4.3 | semapv:LexicalMatching | ORCID:0000-0003-4147-1485 | 0.8 | 54 | | OMOP:45610575 | OMOP:441554 | skos:exactMatch | OHDSI:UMLS | | | semapv:UnspecifiedMatching | | | 0.6 | 55 | 56 | What do we see here? 57 | 58 | - For matching tools, confidence can be calculated by proxies such as "lexical similarity", "edit distance", "cosine similarity of node embedding" and other metrics. In the example above, Usagi has determined that the subject and objects match, but it was only 80% sure (we dont know why - this is [more advanced SSSOM](../mapping-justifications.md)) 59 | - For case where an external mapping is reused using ETL, `confidence` describes the level of trust you as an ETL expert have in the fidelty of the mapping provided by the source. 60 | 61 | ## Level 4: Document curation rules 62 | 63 | | subject_id | object_id | predicate_id | mapping_provider | mapping_tool | mapping_tool_version | mapping_justification | reviewer_id | author_id | confidence | curation_rule | 64 | |---|---|---|---|---|---|---|---|---|---|---| 65 | | OMOP:44499396 | OMOP:4028717 | skos:broadMatch | OHDSI:Odysseus | | | semapv:ManualMappingCuration | | ORCID:0000-0003-4147-1485 | 0.9 | OHDSI_CURATION_RULE:19 | 66 | 67 | What do we see here? 68 | 69 | - For manual matches, it is often unclear by what criteria a match was established. Documenting the curation rules can help increasing consistency for manual curation, and transparency for downstream users. 70 | - `OHDSI_CURATION_RULE:19` is a rule defined by your own curation rulebook. This can be _anything_. For example `OHDSI_CURATION_RULE:19` could correspond to the following rule: 71 | ``` 72 | OHDSI_CURATION_RULE:19 = If the subject concept does not have an exact match in the object source vocabulary, we select the nearest broad ("up-hill") concept applicable. Conceptually, if both terms would exist in the same terminology, the subject concept can be defined as a subconcept of the object concept. The determination for both criteria (nearest broad, conceptally subconcept) is performed through medical expert judgement. 73 | ``` 74 | -------------------------------------------------------------------------------- /src/docs/usecases.md: -------------------------------------------------------------------------------- 1 | # Use cases and case studies 2 | 3 | ## Mondo disease mappings 4 | 5 | - [Mondo mappings](https://github.com/monarch-initiative/mondo/tree/master/src/ontology/mappings) 6 | 7 | ## The National Microbiome Data Collaborative (NMDC, https://microbiomedata.org/): 8 | 9 | - [Various data model mappings](https://github.com/microbiomedata/nmdc-schema/tree/main/sssom), e.g. MIXS, GOLD, etc 10 | 11 | ## CCDH (mapping clinical data models, ontologies and value sets) 12 | 13 | - https://harmonization.datacommons.cancer.gov/ccdh-resources 14 | - SNOMED-NCIT [example](https://docs.google.com/spreadsheets/d/18luA05E9wLukOFamsRV3FWVnoCr57o8qAHe-aGYrPr8/edit#gid=509055704) 15 | 16 | ## Cross-species mapping efforts 17 | 18 | - [mouse-human mapping commons](https://github.com/mapping-commons/mh_mapping_initiative) 19 | 20 | ## Microbial traits 21 | 22 | - https://github.com/mapping-commons/microbial-trait-mappings 23 | 24 | ## Biomappings 25 | 26 | - https://github.com/biomappings/biomappings/tree/master/docs/_data/sssom 27 | - Decentralized curation of mappings, especially ones that aren't incident to ontology terms 28 | 29 | ## Clinical mappings 30 | 31 | [Gdocs](https://docs.google.com/document/d/1p7MVn0UGro6SMgnCfi70BOYgrDRoNkEjpXXAl8_hYXw/edit) for discussion. 32 | -------------------------------------------------------------------------------- /src/docs/workshops.md: -------------------------------------------------------------------------------- 1 | # Workshops 2 | 3 | ## Mapping Commons Workshop Series 4 | 5 | Wikidata: https://www.wikidata.org/wiki/Q108394475 6 | 7 | - [1st Mapping Commons Workshop on Simple Standard for Sharing Ontology Mappings](events/mc2021.md) 8 | - [2nd Mapping Commons Workshop on Simple Standard for Sharing Ontology Mappings](events/mc2023.md) 9 | -------------------------------------------------------------------------------- /src/sssom_schema/__init__.py: -------------------------------------------------------------------------------- 1 | from .datamodel.sssom_schema import * -------------------------------------------------------------------------------- /src/sssom_schema/context/sssom_schema.context.jsonld: -------------------------------------------------------------------------------- 1 | { 2 | "comments": { 3 | "description": "Auto generated by LinkML jsonld context generator", 4 | "generation_date": "2024-08-09T22:25:39", 5 | "source": "sssom_schema.yaml" 6 | }, 7 | "@context": { 8 | "dcterms": "http://purl.org/dc/terms/", 9 | "linkml": "https://w3id.org/linkml/", 10 | "oboInOwl": "http://www.geneontology.org/formats/oboInOwl#", 11 | "owl": "http://www.w3.org/2002/07/owl#", 12 | "pav": "http://purl.org/pav/", 13 | "prov": "http://www.w3.org/ns/prov#", 14 | "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", 15 | "rdfs": "http://www.w3.org/2000/01/rdf-schema#", 16 | "semapv": "https://w3id.org/semapv/vocab/", 17 | "skos": "http://www.w3.org/2004/02/skos/core#", 18 | "sssom": "https://w3id.org/sssom/", 19 | "xsd": "http://www.w3.org/2001/XMLSchema#", 20 | "@vocab": "https://w3id.org/sssom/", 21 | "author_id": { 22 | "@type": "rdfs:Resource", 23 | "@id": "pav:authoredBy" 24 | }, 25 | "author_label": { 26 | "@id": "author_label" 27 | }, 28 | "comment": { 29 | "@id": "rdfs:comment" 30 | }, 31 | "confidence": { 32 | "@type": "xsd:double", 33 | "@id": "confidence" 34 | }, 35 | "creator_id": { 36 | "@type": "rdfs:Resource", 37 | "@id": "dcterms:creator" 38 | }, 39 | "creator_label": { 40 | "@id": "creator_label" 41 | }, 42 | "curation_rule": { 43 | "@type": "rdfs:Resource", 44 | "@id": "curation_rule" 45 | }, 46 | "curation_rule_text": { 47 | "@id": "curation_rule_text" 48 | }, 49 | "curie_map": { 50 | "@type": "@id", 51 | "@id": "curie_map" 52 | }, 53 | "documentation": { 54 | "@type": "@id", 55 | "@id": "documentation" 56 | }, 57 | "extension_definitions": { 58 | "@type": "@id", 59 | "@id": "extension_definitions" 60 | }, 61 | "property": { 62 | "@type": "@id", 63 | "@id": "property" 64 | }, 65 | "slot_name": { 66 | "@id": "slot_name" 67 | }, 68 | "type_hint": { 69 | "@type": "@id", 70 | "@id": "type_hint" 71 | }, 72 | "homepage": { 73 | "@type": "@id", 74 | "@id": "homepage" 75 | }, 76 | "imports": { 77 | "@type": "@id", 78 | "@id": "imports" 79 | }, 80 | "issue_tracker": { 81 | "@type": "@id", 82 | "@id": "issue_tracker" 83 | }, 84 | "issue_tracker_item": { 85 | "@type": "rdfs:Resource", 86 | "@id": "issue_tracker_item" 87 | }, 88 | "last_updated": { 89 | "@type": "xsd:date", 90 | "@id": "last_updated" 91 | }, 92 | "license": { 93 | "@type": "@id", 94 | "@id": "dcterms:license" 95 | }, 96 | "local_name": { 97 | "@id": "local_name" 98 | }, 99 | "mapping_cardinality": { 100 | "@context": { 101 | "@vocab": "@null", 102 | "text": "skos:notation", 103 | "description": "skos:prefLabel", 104 | "meaning": "@id" 105 | }, 106 | "@id": "mapping_cardinality" 107 | }, 108 | "mapping_date": { 109 | "@type": "xsd:date", 110 | "@id": "pav:authoredOn" 111 | }, 112 | "mapping_justification": { 113 | "@type": "rdfs:Resource", 114 | "@id": "mapping_justification" 115 | }, 116 | "mapping_provider": { 117 | "@type": "@id", 118 | "@id": "mapping_provider" 119 | }, 120 | "mapping_registry_description": { 121 | "@id": "mapping_registry_description" 122 | }, 123 | "mapping_registry_id": { 124 | "@type": "rdfs:Resource", 125 | "@id": "mapping_registry_id" 126 | }, 127 | "mapping_registry_title": { 128 | "@id": "mapping_registry_title" 129 | }, 130 | "mapping_set_description": { 131 | "@id": "dcterms:description" 132 | }, 133 | "mapping_set_group": { 134 | "@id": "mapping_set_group" 135 | }, 136 | "mapping_set_id": { 137 | "@type": "@id", 138 | "@id": "mapping_set_id" 139 | }, 140 | "mapping_set_references": { 141 | "@type": "@id", 142 | "@id": "mapping_set_references" 143 | }, 144 | "mapping_set_source": { 145 | "@type": "@id", 146 | "@id": "prov:wasDerivedFrom" 147 | }, 148 | "mapping_set_title": { 149 | "@id": "dcterms:title" 150 | }, 151 | "mapping_set_version": { 152 | "@id": "owl:versionInfo" 153 | }, 154 | "mapping_source": { 155 | "@type": "rdfs:Resource", 156 | "@id": "mapping_source" 157 | }, 158 | "mapping_tool": { 159 | "@id": "mapping_tool" 160 | }, 161 | "mapping_tool_version": { 162 | "@id": "mapping_tool_version" 163 | }, 164 | "mappings": { 165 | "@type": "@id", 166 | "@id": "mappings" 167 | }, 168 | "match_string": { 169 | "@id": "match_string" 170 | }, 171 | "mirror_from": { 172 | "@type": "@id", 173 | "@id": "mirror_from" 174 | }, 175 | "object_category": { 176 | "@id": "object_category" 177 | }, 178 | "object_id": { 179 | "@type": "rdfs:Resource", 180 | "@id": "owl:annotatedTarget" 181 | }, 182 | "object_label": { 183 | "@id": "object_label" 184 | }, 185 | "object_match_field": { 186 | "@type": "rdfs:Resource", 187 | "@id": "object_match_field" 188 | }, 189 | "object_preprocessing": { 190 | "@type": "rdfs:Resource", 191 | "@id": "object_preprocessing" 192 | }, 193 | "object_source": { 194 | "@type": "rdfs:Resource", 195 | "@id": "object_source" 196 | }, 197 | "object_source_version": { 198 | "@id": "object_source_version" 199 | }, 200 | "object_type": { 201 | "@context": { 202 | "@vocab": "@null", 203 | "text": "skos:notation", 204 | "description": "skos:prefLabel", 205 | "meaning": "@id" 206 | }, 207 | "@id": "object_type" 208 | }, 209 | "other": { 210 | "@id": "other" 211 | }, 212 | "predicate_id": { 213 | "@type": "rdfs:Resource", 214 | "@id": "owl:annotatedProperty" 215 | }, 216 | "predicate_label": { 217 | "@id": "predicate_label" 218 | }, 219 | "predicate_modifier": { 220 | "@context": { 221 | "@vocab": "@null", 222 | "text": "skos:notation", 223 | "description": "skos:prefLabel", 224 | "meaning": "@id" 225 | }, 226 | "@id": "predicate_modifier" 227 | }, 228 | "predicate_type": { 229 | "@context": { 230 | "@vocab": "@null", 231 | "text": "skos:notation", 232 | "description": "skos:prefLabel", 233 | "meaning": "@id" 234 | }, 235 | "@id": "predicate_type" 236 | }, 237 | "prefix_name": { 238 | "@id": "prefix_name" 239 | }, 240 | "prefix_url": { 241 | "@type": "@id", 242 | "@id": "prefix_url" 243 | }, 244 | "propagated": { 245 | "@type": "xsd:boolean", 246 | "@id": "propagated" 247 | }, 248 | "publication_date": { 249 | "@type": "xsd:date", 250 | "@id": "dcterms:created" 251 | }, 252 | "registry_confidence": { 253 | "@type": "xsd:double", 254 | "@id": "registry_confidence" 255 | }, 256 | "reviewer_id": { 257 | "@type": "rdfs:Resource", 258 | "@id": "reviewer_id" 259 | }, 260 | "reviewer_label": { 261 | "@id": "reviewer_label" 262 | }, 263 | "see_also": { 264 | "@id": "rdfs:seeAlso" 265 | }, 266 | "similarity_measure": { 267 | "@id": "similarity_measure" 268 | }, 269 | "similarity_score": { 270 | "@type": "xsd:double", 271 | "@id": "similarity_score" 272 | }, 273 | "subject_category": { 274 | "@id": "subject_category" 275 | }, 276 | "subject_id": { 277 | "@type": "rdfs:Resource", 278 | "@id": "owl:annotatedSource" 279 | }, 280 | "subject_label": { 281 | "@id": "subject_label" 282 | }, 283 | "subject_match_field": { 284 | "@type": "rdfs:Resource", 285 | "@id": "subject_match_field" 286 | }, 287 | "subject_preprocessing": { 288 | "@type": "rdfs:Resource", 289 | "@id": "subject_preprocessing" 290 | }, 291 | "subject_source": { 292 | "@type": "rdfs:Resource", 293 | "@id": "subject_source" 294 | }, 295 | "subject_source_version": { 296 | "@id": "subject_source_version" 297 | }, 298 | "subject_type": { 299 | "@context": { 300 | "@vocab": "@null", 301 | "text": "skos:notation", 302 | "description": "skos:prefLabel", 303 | "meaning": "@id" 304 | }, 305 | "@id": "subject_type" 306 | }, 307 | "ExtensionDefinition": { 308 | "@id": "ExtensionDefinition" 309 | }, 310 | "Mapping": { 311 | "@id": "owl:Axiom" 312 | }, 313 | "MappingRegistry": { 314 | "@id": "MappingRegistry" 315 | }, 316 | "MappingSet": { 317 | "@id": "MappingSet" 318 | }, 319 | "MappingSetReference": { 320 | "@id": "MappingSetReference" 321 | }, 322 | "NoTermFound": { 323 | "@id": "NoTermFound" 324 | }, 325 | "Prefix": { 326 | "@id": "Prefix" 327 | }, 328 | "Propagatable": { 329 | "@id": "Propagatable" 330 | } 331 | } 332 | } 333 | -------------------------------------------------------------------------------- /src/sssom_schema/datamodel/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mapping-commons/sssom/1fb4b8b98358839f201ff6c776b5d121ce6ddec9/src/sssom_schema/datamodel/__init__.py -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mapping-commons/sssom/1fb4b8b98358839f201ff6c776b5d121ce6ddec9/tests/__init__.py -------------------------------------------------------------------------------- /tests/input/CONFIG.yaml: -------------------------------------------------------------------------------- 1 | model_name: template-config-model 2 | root_schema: config_model 3 | 4 | model_organization: linkml 5 | model_author: Harold Solbrig 6 | model_author_email: solbrig@jhu.edu 7 | model_synopsis: Configuration parameters for LinkML model generation 8 | model_description: |- 9 | The parameters used to create and generate a new model derived from the LinkML Model Template 10 | 11 | # Trove Classifiers (https://pypi.org/classifiers/) -- copied to setup.cfg only if not already present 12 | classifiers: 13 | - "Development Status :: 4 - Beta" 14 | - "Environment :: Console" 15 | - "Intended Audience :: Developers" 16 | - "Intended Audience :: Science/Research" 17 | - "Intended Audience :: Information Technology" 18 | - "License :: CC0 1.0 Universal (CC0 1.0) Public Domain Dedication" 19 | - "Programming Language :: Python :: 3 :: Only" 20 | - "Programming Language :: Python :: 3.7" 21 | - "Programming Language :: Python :: 3.8" 22 | - "Programming Language :: Python :: 3.9" 23 | 24 | # Keywords -- copied to setup.cfg only if not already present 25 | keywords: [linkml, LOD, Modeling, Linked open data, model] 26 | 27 | # Elements to be generated 28 | # See: https://github.com/linkml/linkml/generators/README.md for what the outputs look like 29 | # Possible values: graphql -- graphql schema for the model 30 | # json -- LinkML model definition in JSON syntax 31 | # jsonld_context -- JSON-LD context definitions for model 32 | # json_schema -- Model schema in json schema 33 | # owl -- OWL representation of model schema 34 | # rdf -- LinkML Model definition in RDF 35 | # shex -- LinkML model definition in ShEx 36 | generate: [jsonld_context, json_schema] 37 | 38 | -------------------------------------------------------------------------------- /tests/input/README.md: -------------------------------------------------------------------------------- 1 | # Tests input directory 2 | This file contains samples of your schema. Samples can written in yaml, json, rdf or any other language that 3 | the [https://linkml.github.io/linkml-runtime]() importers and exporters support. 4 | 5 | `test_examples.py` will iterate over this directory, loading each test and: 6 | 1) For each runtime generator (yaml, json, rdf, ...) will emit the output in that language 7 | 2) Will compare the output to its expected value in the `output` directory and will alert you if something 8 | has changed 9 | 3) If something HAS changed, will update the output accordingly. 10 | 11 | [ ] Add a manifest setup https://linkml.github.io/linkml-template-config-model, so we can specify whether 12 | we expect the tests to pass or fail and, if they fail, why. 13 | -------------------------------------------------------------------------------- /tests/test_input_against_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | 4 | from linkml_runtime.loaders import yaml_loader, json_loader, rdf_loader 5 | 6 | from sssom.sssom import Person 7 | 8 | CWD = os.path.abspath(os.path.dirname(__file__)) 9 | INPUT_DIR = os.path.join(CWD, 'input') 10 | 11 | 12 | class InputFileTestCase(unittest.TestCase): 13 | """ Test the input files against the model""" 14 | def test_input_files(self): 15 | """ Iterate over the input directory loading any test files """ 16 | def gen_detail(total: int, passed: int, typ: str) -> str: 17 | return f"{total} {typ} files tested - {total-passed} failures" 18 | 19 | nyaml, njson, nttl = 0, 0, 0 20 | pyaml, pjson, pttl = 0, 0, 0 21 | nunk = 0 22 | nread, nfailures = 0, 0 23 | for dpath, _, files in os.walk(INPUT_DIR): 24 | for fname in files: 25 | full_fname = os.path.join(dpath, fname) 26 | nread += 1 27 | try: 28 | if fname.endswith('.yaml'): 29 | nyaml += 1 30 | o: Person = yaml_loader.load(full_fname, Person) 31 | pyaml += 1 32 | elif fname.endswith('.json'): 33 | njson += 1 34 | o: Person = json_loader.load(full_fname, Person) 35 | pjson += 1 36 | elif fname.endswith('.ttl'): 37 | nttl += 1 38 | o: Person = rdf_loader.load(full_fname, Person) 39 | pttl += 1 40 | elif fname.endswith('.md'): 41 | pass 42 | else: 43 | nunk += 1 44 | except Exception as _: 45 | nfailures += 1 46 | 47 | print(f"{nread} files tested") 48 | print(f"\t{nread - nfailures} tests passed ({nfailures} failed)") 49 | print("\tDetails:") 50 | print(f"\t\t{gen_detail(nyaml, pyaml, 'YAML')}") 51 | print(f"\t\t{gen_detail(njson, pjson, 'JSON')}") 52 | print(f"\t\t{gen_detail(nttl, pttl, 'TTL')}") 53 | if nunk: 54 | print(f"{nunk} files of unrecognized type") 55 | self.assertEqual(0, nfailures) 56 | 57 | 58 | if __name__ == '__main__': 59 | unittest.main() 60 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py37, py38, py39 3 | setenv = PIPENV_SKIP_LOCK=1 4 | PIPENV_DEV=1 5 | PIPENV_IGNORE_VIRTUALENVS=1 6 | PIPENV_VERBOSITY=-1 7 | 8 | [testenv] 9 | whitelist_externals = python 10 | deps=unittest2 11 | tox-pipenv 12 | commands= pipenv install --dev 13 | pipenv run python -m unittest 14 | -------------------------------------------------------------------------------- /utils/get-value.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | # get the value of a key in the about.yaml file 3 | # https://stackoverflow.com/questions/1221833/pipe-output-and-capture-exit-status-in-bash 4 | grep $1 about.yaml | sed "s/$1:[[:space:]]//" ; test ${PIPESTATUS[0]} -eq 0 5 | --------------------------------------------------------------------------------