.
5 |
6 | personinfo:Event a sh:NodeShape ;
7 | sh:closed true ;
8 | sh:description "grouping class for events" ;
9 | sh:ignoredProperties ( rdf:type ) ;
10 | sh:targetClass personinfo:Event .
11 |
12 | personinfo:ForProfit a sh:NodeShape ;
13 | sh:closed true ;
14 | sh:ignoredProperties ( rdf:type ) ;
15 | sh:property [ sh:maxCount 1 ;
16 | sh:minCount 1 ;
17 | sh:order 0 ;
18 | sh:path personinfo:name ] ;
19 | sh:targetClass personinfo:ForProfit .
20 |
21 | personinfo:NonProfit a sh:NodeShape ;
22 | sh:closed true ;
23 | sh:ignoredProperties ( rdf:type ) ;
24 | sh:property [ sh:maxCount 1 ;
25 | sh:minCount 1 ;
26 | sh:order 0 ;
27 | sh:path personinfo:name ] ;
28 | sh:targetClass personinfo:NonProfit .
29 |
30 | personinfo:Organization a sh:NodeShape ;
31 | sh:closed true ;
32 | sh:ignoredProperties ( rdf:type ) ;
33 | sh:property [ sh:description "full name" ;
34 | sh:maxCount 1 ;
35 | sh:minCount 1 ;
36 | sh:order 0 ;
37 | sh:path personinfo:name ] ;
38 | sh:targetClass personinfo:Organization .
39 |
40 | personinfo:Person a sh:NodeShape ;
41 | sh:closed true ;
42 | sh:description "a person,living or dead" ;
43 | sh:ignoredProperties ( rdf:type ) ;
44 | sh:property [ sh:description "age in years" ;
45 | sh:maxCount 1 ;
46 | sh:order 2 ;
47 | sh:path personinfo:age ],
48 | [ sh:class personinfo:MedicalEvent ;
49 | sh:description "medical history" ;
50 | sh:nodeKind sh:BlankNode ;
51 | sh:order 4 ;
52 | sh:path personinfo:has_medical_history ],
53 | [ sh:description "identifier for a person" ;
54 | sh:maxCount 1 ;
55 | sh:minCount 1 ;
56 | sh:order 0 ;
57 | sh:path personinfo:id ],
58 | [ sh:description "age in years" ;
59 | sh:maxCount 1 ;
60 | sh:order 3 ;
61 | sh:path personinfo:gender ],
62 | [ sh:description "full name" ;
63 | sh:maxCount 1 ;
64 | sh:minCount 1 ;
65 | sh:order 1 ;
66 | sh:path personinfo:name ] ;
67 | sh:targetClass personinfo:Person .
68 |
69 | personinfo:MedicalEvent a sh:NodeShape ;
70 | sh:closed true ;
71 | sh:description "a medical encounter" ;
72 | sh:ignoredProperties ( rdf:type ) ;
73 | sh:targetClass personinfo:MedicalEvent .
74 |
75 |
--------------------------------------------------------------------------------
/docs/datamodel/docs/schema.md:
--------------------------------------------------------------------------------
1 |
2 | # TEMP schema
3 |
4 |
5 |
6 |
7 |
8 | ### Classes
9 |
10 | * [Event](Event.md) - grouping class for events
11 | * [MedicalEvent](MedicalEvent.md) - a medical encounter
12 | * [Organization](Organization.md)
13 | * [ForProfit](ForProfit.md)
14 | * [NonProfit](NonProfit.md)
15 | * [Person](Person.md) - a person,living or dead
16 |
17 | ### Mixins
18 |
19 |
20 | ### Slots
21 |
22 | * [age](age.md)
23 | * [Person➞age](Person_age.md) - age in years
24 | * [description](description.md) - a textual description
25 | * [gender](gender.md)
26 | * [Person➞gender](Person_gender.md) - age in years
27 | * [has medical history](has_medical_history.md)
28 | * [Person➞has medical history](Person_has_medical_history.md) - medical history
29 | * [id](id.md) - any identifier
30 | * [Person➞id](Person_id.md) - identifier for a person
31 | * [name](name.md)
32 | * [Organization➞name](Organization_name.md) - full name
33 | * [Person➞name](Person_name.md) - full name
34 |
35 | ### Enums
36 |
37 |
38 | ### Subsets
39 |
40 | * [A](A.md)
41 | * [B](B.md)
42 |
43 | ### Types
44 |
45 |
46 | #### Built in
47 |
48 | * **Bool**
49 | * **Decimal**
50 | * **ElementIdentifier**
51 | * **NCName**
52 | * **NodeIdentifier**
53 | * **URI**
54 | * **URIorCURIE**
55 | * **XSDDate**
56 | * **XSDDateTime**
57 | * **XSDTime**
58 | * **float**
59 | * **int**
60 | * **str**
61 |
62 | #### Defined
63 |
64 | * [Boolean](types/Boolean.md) (**Bool**) - A binary (true or false) value
65 | * [Date](types/Date.md) (**XSDDate**) - a date (year, month and day) in an idealized calendar
66 | * [Datetime](types/Datetime.md) (**XSDDateTime**) - The combination of a date and time
67 | * [Decimal](types/Decimal.md) (**Decimal**) - A real number with arbitrary precision that conforms to the xsd:decimal specification
68 | * [Double](types/Double.md) (**float**) - A real number that conforms to the xsd:double specification
69 | * [Float](types/Float.md) (**float**) - A real number that conforms to the xsd:float specification
70 | * [Integer](types/Integer.md) (**int**) - An integer
71 | * [Ncname](types/Ncname.md) (**NCName**) - Prefix part of CURIE
72 | * [Nodeidentifier](types/Nodeidentifier.md) (**NodeIdentifier**) - A URI, CURIE or BNODE that represents a node in a model.
73 | * [Objectidentifier](types/Objectidentifier.md) (**ElementIdentifier**) - A URI or CURIE that represents an object in the model.
74 | * [String](types/String.md) (**str**) - A character string
75 | * [Time](types/Time.md) (**XSDTime**) - A time object represents a (local) time of day, independent of any particular day
76 | * [Uri](types/Uri.md) (**URI**) - a complete URI
77 | * [Uriorcurie](types/Uriorcurie.md) (**URIorCURIE**) - a URI or a CURIE
78 |
--------------------------------------------------------------------------------
/docs/faq.md:
--------------------------------------------------------------------------------
1 | # FAQ
2 |
3 | ## How can I get more help?
4 |
5 | The best way to get help is to ask [on the github issue tracker](https://github.com/linkml/schemasheets/issues)
6 |
7 | ## Is there a specification?
8 |
9 | Schemasheets has its own specification:
10 |
11 | - [https://linkml.io/schemasheets/specification/](https://linkml.io/schemasheets/specification/)
12 |
13 | This is separate from the main LinkML specification
14 |
15 | ## Where do I ask questions about LinkML?
16 |
17 | See the [LinkML FAQ](https://linkml.io/linkml/faq/index.html)
18 |
19 | ## Why would I want to use schemasheets?
20 |
21 | Schemasheets are designed for easy, efficient and rapid collection of metadata elements and column headers for wide-table data
22 |
23 | - Lists of column headers/metadata elements and their associated information are easily managed
24 | - Enumerations can be explicitly provided - and mapped to ontologies
25 | - A flexible approach allows for schemas to be broken out over different sheets in different ways
26 |
27 | ## When should I *not* use schemasheets?
28 |
29 | Schemasheets works best for wide-table data, or "tidy" data, in which any individual observation or data point can have many variables or metadata elements associated with them
30 |
31 | If your data follows higher "normal forms" or is narrow then you may be better authoring directly in LinkML yaml.
32 |
33 | Schemasheets also works best when you want to involve non-technical people is modeling decisions and in definitions of metadata elements. Most domain experts are comfortable looking at lists of things in spreadsheets.
34 |
35 | If your modeling team is quite technical, we recommend authoring your schema directly in LinkML YAML
36 |
37 | ## Can I use schemasheets to make JSON-Schema?
38 |
39 | Why, yes you can!
40 |
41 | You can chain together `sheets2linkml` and `gen-json-schema` - or use `sheets2project`
42 |
43 | Note that of course if your JSON is highly nested, then it may not make sense to manage the schema in a spreadsheet-like form.
44 | Schemasheets works best for "wide-table" data
45 |
46 | ## Can I use schemasheets to make SHACL Shape Schemas?
47 |
48 | Indeed, you certainly can!
49 |
50 | You can chain together `sheets2linkml` and `gen-shacl` - or use `sheets2project`
51 |
52 | Note that of course if your RDF is highly relational, then it may not make sense to manage the schema in a spreadsheet-like form.
53 | Schemasheets works best for "wide-table" data
54 |
55 | ## Can I use schemasheets to make SQL DDL?
56 |
57 | I'm glad you asked, this is indeed possible
58 |
59 | You can chain together `sheets2linkml` and `gen-sqlddl` - or use `sheets2project`
60 |
61 | ## Can I make a nice looking website for my schema?
62 |
63 | Definitely!
64 |
65 | You can chain together `sheets2linkml` and `gen-doc` - or use `sheets2project`
66 |
67 | This will make a static site ready for publishing on GitHub!
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
--------------------------------------------------------------------------------
/.github/workflows/main.yaml:
--------------------------------------------------------------------------------
1 | # Built from:
2 | # https://docs.github.com/en/actions/guides/building-and-testing-python
3 | # https://github.com/snok/install-poetry#workflows-and-tips
4 |
5 | name: Build and test schemasheets
6 |
7 | on: [ push, pull_request ]
8 |
9 | jobs:
10 | test:
11 |
12 | runs-on: ubuntu-latest
13 |
14 | steps:
15 |
16 | #----------------------------------------------
17 | # check-out repo and set-up python
18 | #----------------------------------------------
19 | - name: Check out repository
20 | uses: actions/checkout@v4
21 |
22 | - name: Set up Python 3
23 | uses: actions/setup-python@v5
24 | with:
25 | python-version: 3.13
26 |
27 | #----------------------------------------------
28 | # install & configure poetry
29 | #----------------------------------------------
30 | - name: Install Poetry
31 | uses: snok/install-poetry@v1.4
32 | with:
33 | virtualenvs-create: true
34 | virtualenvs-in-project: true
35 |
36 | #----------------------------------------------
37 | # load cached venv if cache exists
38 | #----------------------------------------------
39 | - name: Load cached venv
40 | id: cached-poetry-dependencies
41 | uses: actions/cache@v4
42 | with:
43 | path: .venv
44 | key: venv-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }}
45 |
46 | #----------------------------------------------
47 | # install dependencies if cache does not exist
48 | #----------------------------------------------
49 | - name: Install dependencies
50 | if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
51 | run: poetry install --no-interaction --no-root
52 |
53 | #----------------------------------------------
54 | # install your root project, if required
55 | #----------------------------------------------
56 | - name: Install library
57 | run: poetry install --no-interaction
58 |
59 | #----------------------------------------------
60 | # run test suite
61 | #----------------------------------------------
62 | - name: Run tests
63 | run: poetry run pytest --cov-report=term --cov-report=html:htmlcov --cov=schemasheets/
64 |
65 | #----------------------------------------------
66 | # upload coverage results
67 | #----------------------------------------------
68 | - name: Upload pytest test results
69 | uses: actions/upload-artifact@v4
70 | with:
71 | name: pytest-results
72 | path: htmlcov
73 | # Use always() to always run this step to publish test results when there are test failures
74 | if: ${{ always() }}
75 |
76 | #----------------------------------------------
77 | # test generate-populate via `make scripts-all`
78 | #----------------------------------------------
79 | - name: generate-populate via make scripts-all
80 | run: make scripts-all
--------------------------------------------------------------------------------
/examples/output/docs/combined.md:
--------------------------------------------------------------------------------
1 |
2 | # PersonInfo schema
3 |
4 |
5 | Information about people, based on [schema.org](http://schema.org)
6 |
7 |
8 | ### Classes
9 |
10 | * [Event](Event.md) - grouping class for events
11 | * [MedicalEvent](MedicalEvent.md) - a medical encounter
12 | * [Organization](Organization.md)
13 | * [ForProfit](ForProfit.md)
14 | * [NonProfit](NonProfit.md)
15 | * [Person](Person.md) - a person,living or dead
16 |
17 | ### Mixins
18 |
19 |
20 | ### Slots
21 |
22 | * [age](age.md)
23 | * [Person➞age](Person_age.md) - age in years
24 | * [description](description.md) - a textual description
25 | * [gender](gender.md)
26 | * [Person➞gender](Person_gender.md) - age in years
27 | * [has medical history](has_medical_history.md)
28 | * [Person➞has medical history](Person_has_medical_history.md) - medical history
29 | * [id](id.md) - any identifier
30 | * [Person➞id](Person_id.md) - identifier for a person
31 | * [name](name.md)
32 | * [Organization➞name](Organization_name.md) - full name
33 | * [Person➞name](Person_name.md) - full name
34 |
35 | ### Enums
36 |
37 | * [FamilialRelationshipType](FamilialRelationshipType.md) - familial relationships
38 | * [GenderType](GenderType.md) - gender
39 |
40 | ### Subsets
41 |
42 | * [A](A.md) - subset a
43 | * [B](B.md) - subset b
44 |
45 | ### Types
46 |
47 |
48 | #### Built in
49 |
50 | * **Bool**
51 | * **Decimal**
52 | * **ElementIdentifier**
53 | * **NCName**
54 | * **NodeIdentifier**
55 | * **URI**
56 | * **URIorCURIE**
57 | * **XSDDate**
58 | * **XSDDateTime**
59 | * **XSDTime**
60 | * **float**
61 | * **int**
62 | * **str**
63 |
64 | #### Defined
65 |
66 | * [DecimalDegree](types/DecimalDegree.md) (**float**) - A decimal degree expresses latitude or longitude as decimal fractions
67 | * [Lang](types/Lang.md) (**str**) - language tag
68 | * [Boolean](types/Boolean.md) (**Bool**) - A binary (true or false) value
69 | * [Date](types/Date.md) (**XSDDate**) - a date (year, month and day) in an idealized calendar
70 | * [Datetime](types/Datetime.md) (**XSDDateTime**) - The combination of a date and time
71 | * [Decimal](types/Decimal.md) (**Decimal**) - A real number with arbitrary precision that conforms to the xsd:decimal specification
72 | * [Double](types/Double.md) (**float**) - A real number that conforms to the xsd:double specification
73 | * [Float](types/Float.md) (**float**) - A real number that conforms to the xsd:float specification
74 | * [Integer](types/Integer.md) (**int**) - An integer
75 | * [Ncname](types/Ncname.md) (**NCName**) - Prefix part of CURIE
76 | * [Nodeidentifier](types/Nodeidentifier.md) (**NodeIdentifier**) - A URI, CURIE or BNODE that represents a node in a model.
77 | * [Objectidentifier](types/Objectidentifier.md) (**ElementIdentifier**) - A URI or CURIE that represents an object in the model.
78 | * [String](types/String.md) (**str**) - A character string
79 | * [Time](types/Time.md) (**XSDTime**) - A time object represents a (local) time of day, independent of any particular day
80 | * [Uri](types/Uri.md) (**URI**) - a complete URI
81 | * [Uriorcurie](types/Uriorcurie.md) (**URIorCURIE**) - a URI or a CURIE
82 |
--------------------------------------------------------------------------------
/tests/test_121/input/schema_def.tsv:
--------------------------------------------------------------------------------
1 | id schema name aliases broad_mappings categories close_mappings comments conforms_to contributors created_by created_on date default_prefix default_range deprecated deprecated_element_has_exact_replacement deprecated_element_has_possible_replacement description exact_mappings implements imports in_language instantiates last_updated_on dat license mappings modified_by narrow_mappings notes rank related_mappings see_also slot_names_unique source status title todos version alt_description_source alt_description_text flavor annotation local name source local name value structured_aliases literal_form structured_aliases alias_predicate structured_aliases categories in subset id_prefixes emit_prefixes default_curi_maps
2 | >id schema aliases broad_mappings categories close_mappings comments conforms_to contributors created_by ignore default_prefix default_range deprecated deprecated_element_has_exact_replacement deprecated_element_has_possible_replacement description exact_mappings implements ignore in_language ignore ignore license mappings modified_by narrow_mappings notes rank related_mappings see_also slot_names_unique source status title todos version alt_descriptions alt_descriptions annotations local_names local_names structured_aliases structured_aliases structured_aliases in_subset id_prefixes emit_prefixes default_curi_maps
3 | > internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|'
4 | > inner_key: source inner_key: description inner_key: flavor inner_key: source inner_key: value inner_key: literal_form inner_key: predicate inner_key: categories ""
5 | http://example.com/some_schema some_schema text1|text2 some_schema:1|some_schema:2 some_schema:1|some_schema:2 some_schema:1|some_schema:2 the overall usefulness of default_curi_maps is debatable|there may be some improper modeling in here like illegal ranges. what software will detect that?|what about numeric, date and booleans that get converted to strings?|what are implements and instantiates good for?|how well does including an imports statement in a schemsheets TSV work?|todo what about multivalued slots with multiple inner keys?|what does the schema repair method do? LinkML some_schema:1|some_schema:2 some_schema:1 some_schema float we all feel deprecated some times some_schema:1 some_schema:1 A schema that tests as many elements as possible. For use in testing YAML <-> sheets some_schema:1|some_schema:2 some_schema:1|some_schema:2 some_schema:1|some_schema:2 English some_schema:1|some_schema:2 MIT some_schema:1|some_schema:2 some_schema:1 some_schema:1|some_schema:2 text1|text2 3 some_schema:1|some_schema:2 some_schema:1|some_schema:2 TRUE some_schema:1 some_schema:1 See description SETTINGS! v0.0.1 wiktionary "An outline or image universally applicable to a general conception, under which it is likely to be presented to the mind" raspberry logic format schema_definition EXACT_SYNONYM some_schema:1|some_schema:2 main_subset|secret_subset data_prefix_1|data_prefix_2 data_prefix_1|data_prefix_2|non_data_prefix semweb_context|idot_context
6 |
--------------------------------------------------------------------------------
/docs/intro/export.md:
--------------------------------------------------------------------------------
1 | # Exporting a schema to schemasheets
2 |
3 | ## Use Case
4 |
5 | Sometimes you might want to export from an existing LinkML schema to schemasheets -
6 | for example to migrate the source of some or part of a schema to sheet-based editing.
7 |
8 | The `sheets2linkml` command will convert schemasheet(s) to a LinkML schema
9 |
10 | The reverse operation `linkml2sheets` will convert a LinkML schema to schemasheets
11 |
12 | ## Status
13 |
14 | __THIS COMMAND IS ONLY PARTIALLY IMPLEMENTED__ -- not all parts of the specification are considered.
15 | However, you may still find this useful for "bootstrapping" schema sheets
16 |
17 | ## Usage
18 |
19 | Type
20 |
21 | ```bash
22 | linkml2sheets --help
23 | ```
24 |
25 | to get complete help
26 |
27 | Broadly there are two usage scenarios:
28 |
29 | - when you have a single sheet
30 | - when your schema is mapped to multiple sheets (e.g. enums and slots in different sheets)
31 |
32 | In both cases you need two inputs
33 |
34 | 1. A linkml schema, specified in yaml
35 | 2. One or more schemasheets that serve as the specification
36 | - these do not need to have any data
37 | - they do need the columns used and column descriptors
38 |
39 | ### Single-sheet usage
40 |
41 | Here you pass a single TSV specification on the command line
42 |
43 | You can use the `--output` (`-o`) option to write output to a single sheet file.
44 | Or omit this to write on stdout.
45 |
46 | ### Multi-sheet usage
47 |
48 | Here you multiple TSV specifications on the command line
49 |
50 | You must use the `--directory` (`-d`) option to specify which directory
51 | the files are written to. The filenames will be the same.
52 |
53 | So for example, if you had a folder:
54 |
55 | ```
56 | sheets/
57 | enums.tsv
58 | slots.tsv
59 | ```
60 |
61 | where:
62 |
63 | - each tsv contains minimally the column specifications,
64 | - you pass in `sheets/*tsv` as input
65 | - you pass `--directory output`
66 |
67 | Then you will generate a folder:
68 |
69 | ```
70 | output/
71 | enums.tsv
72 | slots.tsv
73 | ```
74 |
75 | the headers will be the same as the TSVs in the input,
76 | but it will include "data" rows, where each row is a matching
77 | schema element
78 |
79 | the input and output directory can be identical, but
80 | you will need to pass in `--overwrite` to explicitly overwrite,
81 | this guards against accidental overwrites.
82 |
83 | ## Converting between two different schemasheet specs
84 |
85 | schemasheets allows *custom* sheet formats that map to the LinkML standard.
86 |
87 | you can use the combination of sheets2linkml and linkml2sheets to convert between two sheet specifications.
88 |
89 | For example, let's say for schema1.tsv, you use a spreadsheet with the following headers:
90 |
91 | - record: `> class`
92 | - field: `> slot`
93 | - cardinality: `> cardinality`
94 | - info: `> description`
95 |
96 | and for schema2.tsv you have:
97 |
98 | - table: `> class`
99 | - attribute: `> slot`
100 | - required: `> required`
101 | - multivalued: `> multivalued`
102 | - description: `> description`
103 |
104 | (here each list element is a column, and the part after the `>` is the 2nd row)
105 |
106 | If you do:
107 |
108 | ```bash
109 | sheets2linkml schema1.tsv > schema1.yaml
110 | linkml2sheets -s schema1.yaml schema2.tsv > schema2_full.tsv
111 | ```
112 |
113 | then this will effectively map schema1.tsv onto the format for schema2.tsv.
114 | And you can swap the arguments to go in the reverse direction.
115 |
--------------------------------------------------------------------------------
/schemasheets/sheets_to_project.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 |
4 | import click
5 | import yaml
6 | from typing import List, Union, Any, Dict, Tuple, Generator
7 |
8 | from linkml.generators.projectgen import ProjectConfiguration, ProjectGenerator
9 | from linkml_runtime.dumpers import yaml_dumper
10 |
11 | from schemasheets.schemamaker import SchemaMaker
12 |
13 |
14 | @click.command()
15 | @click.option("--dir", "-d",
16 | help="directory in which to place generated files. E.g. linkml_model, biolink_model")
17 | @click.option("--repair/--no-repair",
18 | default=True,
19 | show_default=True,
20 | help="Automatically repair missing schema elements")
21 | @click.option("-n", "--name",
22 | default="schema",
23 | show_default=True,
24 | help="name of the schema")
25 | @click.option("--generator-arguments", "-A",
26 | help="yaml configuration for generators")
27 | @click.option("--config-file", "-C",
28 | type=click.File('rb'),
29 | help="path to yaml configuration")
30 | @click.option("--exclude", "-X",
31 | multiple=True,
32 | help="list of artefacts to be excluded") # TODO: make this an enum
33 | @click.option("--include", "-I",
34 | multiple=True,
35 | help="list of artefacts to be included. If not set, defaults to all") # TODO: make this an enum
36 | @click.option("--unique-slots/--no-unique-slots",
37 | default=False,
38 | show_default=True,
39 | help="All slots are treated as unique and top level and do not belong to the specified class")
40 | @click.option("-v", "--verbose", count=True)
41 | @click.argument('tsv_files', nargs=-1)
42 | def multigen(tsv_files, dir, verbose: int, repair: bool, name,
43 | unique_slots: bool,
44 | exclude: List[str], include: List[str], config_file, generator_arguments: str, **kwargs):
45 | """
46 | Generate an entire set of schema files from Schemasheets
47 |
48 | Generate all downstream artefacts using default configuration:
49 |
50 | sheets2linkml --output my_schema.yaml my_schema/*tsv
51 |
52 |
53 | """
54 | if verbose >= 2:
55 | logging.basicConfig(level=logging.DEBUG)
56 | elif verbose == 1:
57 | logging.basicConfig(level=logging.INFO)
58 | else:
59 | logging.basicConfig(level=logging.WARNING)
60 | project_config = ProjectConfiguration()
61 | if config_file is not None:
62 | for k, v in yaml.safe_load(config_file).items():
63 | setattr(project_config, k, v)
64 | if exclude:
65 | project_config.excludes = list(exclude)
66 | if include:
67 | project_config.includes = list(include)
68 | if generator_arguments is not None:
69 | try:
70 | project_config.generator_args = yaml.safe_load(generator_arguments)
71 | except Exception:
72 | raise Exception(f'Argument must be a valid YAML blob')
73 | logging.info(f'generator args: {project_config.generator_args}')
74 | if dir is None:
75 | dir = '.'
76 | project_config.directory = dir
77 | sm = SchemaMaker()
78 | if name:
79 | sm.default_name = name
80 | sm.unique_slots = unique_slots
81 | schema = sm.create_schema(list(tsv_files))
82 | if repair:
83 | schema = sm.repair_schema(schema)
84 | out_file = os.path.join(dir, f'{name}.yaml')
85 | yaml_dumper.dump(schema, to_file=out_file)
86 | gen = ProjectGenerator()
87 | gen.generate(out_file, project_config)
88 |
89 |
90 | if __name__ == '__main__':
91 | multigen()
92 |
--------------------------------------------------------------------------------
/docs/datamodel/index.md:
--------------------------------------------------------------------------------
1 |
2 | # Schemasheets-Mappings schema
3 |
4 |
5 | This is the datamodel for Schemasheets Configurations.
6 |
7 | Note that for most purposes you will likely not need to consult this.
8 |
9 | The key class is [ColumnSettings](ColumnSettings)
10 |
11 | Two controlled vocabularies are specified here:
12 |
13 | - [Cardinality](Cardinality) - terms and abbreviations that can be used for cardinality
14 | - [Shortcuts](Shortcuts) - species column configurations
15 |
16 |
17 | ### Classes
18 |
19 | * [ColumnSettings](ColumnSettings.md) - configuration for an individual column in a schema sheet.
20 | * [ValueMap](ValueMap.md) - A key-value dictionary
21 |
22 | ### Mixins
23 |
24 |
25 | ### Slots
26 |
27 | * [➞applies_to_class](columnSettings__applies_to_class.md) - if a value C is specified, then this column in the relevant row is interpreted as
28 | * [➞applies_to_slot](columnSettings__applies_to_slot.md)
29 | * [➞curie_prefix](columnSettings__curie_prefix.md) - CURIE prefix prepended to column value. This may be used for column that describe mapping or class/slot uri properties of the element. For example, with column settings `exact_mapping: {curie_prefix: dcterms}`, an element row with column value `contributor`, the value will be transformed to `dcterms:contributor`
30 | * [➞prefix](columnSettings__prefix.md) - string to be prefixed onto the column value
31 | * [➞suffix](columnSettings__suffix.md) - string to be suffixied onto the column value
32 | * [➞tag](columnSettings__tag.md)
33 | * [➞template](columnSettings__template.md) - jinja templated format string
34 | * [➞vmap](columnSettings__vmap.md) - Specifies a mapping for column values
35 | * [map_key](map_key.md) - key in the dictionary
36 | * [map_value](map_value.md) - key in the dictionary
37 |
38 | ### Enums
39 |
40 | * [Cardinality](Cardinality.md) - vocabulary for describing cardinality and applicability of slots or fields.
41 | * [Shortcuts](Shortcuts.md) - A vocabulary of permissible values as column descriptors that do not have an exact equivalent in the LinkML datamodel,
42 |
43 | ### Subsets
44 |
45 |
46 | ### Types
47 |
48 |
49 | #### Built in
50 |
51 | * **Bool**
52 | * **Decimal**
53 | * **ElementIdentifier**
54 | * **NCName**
55 | * **NodeIdentifier**
56 | * **URI**
57 | * **URIorCURIE**
58 | * **XSDDate**
59 | * **XSDDateTime**
60 | * **XSDTime**
61 | * **float**
62 | * **int**
63 | * **str**
64 |
65 | #### Defined
66 |
67 | * [ElementReference](types/ElementReference.md) (**str**) - A pointer to an element in a datamodel
68 | * [Boolean](types/Boolean.md) (**Bool**) - A binary (true or false) value
69 | * [Date](types/Date.md) (**XSDDate**) - a date (year, month and day) in an idealized calendar
70 | * [Datetime](types/Datetime.md) (**XSDDateTime**) - The combination of a date and time
71 | * [Decimal](types/Decimal.md) (**Decimal**) - A real number with arbitrary precision that conforms to the xsd:decimal specification
72 | * [Double](types/Double.md) (**float**) - A real number that conforms to the xsd:double specification
73 | * [Float](types/Float.md) (**float**) - A real number that conforms to the xsd:float specification
74 | * [Integer](types/Integer.md) (**int**) - An integer
75 | * [Ncname](types/Ncname.md) (**NCName**) - Prefix part of CURIE
76 | * [Nodeidentifier](types/Nodeidentifier.md) (**NodeIdentifier**) - A URI, CURIE or BNODE that represents a node in a model.
77 | * [Objectidentifier](types/Objectidentifier.md) (**ElementIdentifier**) - A URI or CURIE that represents an object in the model.
78 | * [String](types/String.md) (**str**) - A character string
79 | * [Time](types/Time.md) (**XSDTime**) - A time object represents a (local) time of day, independent of any particular day
80 | * [Uri](types/Uri.md) (**URI**) - a complete URI
81 | * [Uriorcurie](types/Uriorcurie.md) (**URIorCURIE**) - a URI or a CURIE
82 |
--------------------------------------------------------------------------------
/examples/output/jsonschema/combined.schema.json:
--------------------------------------------------------------------------------
1 | {
2 | "$defs": {
3 | "Event": {
4 | "additionalProperties": false,
5 | "description": "grouping class for events",
6 | "properties": {},
7 | "required": [],
8 | "title": "Event",
9 | "type": "object"
10 | },
11 | "FamilialRelationshipType": {
12 | "description": "familial relationships",
13 | "enum": [
14 | "SIBLING_OF",
15 | "PARENT_OF",
16 | "CHILD_OF"
17 | ],
18 | "title": "FamilialRelationshipType",
19 | "type": "string"
20 | },
21 | "ForProfit": {
22 | "additionalProperties": false,
23 | "description": "",
24 | "properties": {
25 | "name": {
26 | "description": "full name",
27 | "type": "string"
28 | }
29 | },
30 | "required": [
31 | "name"
32 | ],
33 | "title": "ForProfit",
34 | "type": "object"
35 | },
36 | "GenderType": {
37 | "description": "gender",
38 | "enum": [
39 | "nonbinary man",
40 | "nonbinary woma"
41 | ],
42 | "title": "GenderType",
43 | "type": "string"
44 | },
45 | "MedicalEvent": {
46 | "additionalProperties": false,
47 | "description": "a medical encounter",
48 | "properties": {},
49 | "required": [],
50 | "title": "MedicalEvent",
51 | "type": "object"
52 | },
53 | "NonProfit": {
54 | "additionalProperties": false,
55 | "description": "",
56 | "properties": {
57 | "name": {
58 | "description": "full name",
59 | "type": "string"
60 | }
61 | },
62 | "required": [
63 | "name"
64 | ],
65 | "title": "NonProfit",
66 | "type": "object"
67 | },
68 | "Organization": {
69 | "additionalProperties": false,
70 | "description": "",
71 | "properties": {
72 | "name": {
73 | "description": "full name",
74 | "type": "string"
75 | }
76 | },
77 | "required": [
78 | "name"
79 | ],
80 | "title": "Organization",
81 | "type": "object"
82 | },
83 | "Person": {
84 | "additionalProperties": false,
85 | "description": "a person,living or dead",
86 | "properties": {
87 | "age": {
88 | "description": "age in years",
89 | "type": "number"
90 | },
91 | "gender": {
92 | "description": "age in years",
93 | "type": "number"
94 | },
95 | "has_medical_history": {
96 | "description": "medical history",
97 | "items": {
98 | "$ref": "#/$defs/MedicalEvent"
99 | },
100 | "type": "array"
101 | },
102 | "id": {
103 | "description": "identifier for a person",
104 | "type": "string"
105 | },
106 | "name": {
107 | "description": "full name",
108 | "type": "string"
109 | }
110 | },
111 | "required": [
112 | "id",
113 | "name"
114 | ],
115 | "title": "Person",
116 | "type": "object"
117 | }
118 | },
119 | "$id": "https://w3id.org/linkml/examples/personinfo",
120 | "$schema": "http://json-schema.org/draft-07/schema#",
121 | "additionalProperties": true,
122 | "properties": {},
123 | "required": [],
124 | "title": "PersonInfo",
125 | "type": "object"
126 | }
127 |
--------------------------------------------------------------------------------
/tests/input/fair-cookbook-dd.tsv:
--------------------------------------------------------------------------------
1 | File Name Variable Name Variable Label Variable Ontology ID or RDFtype Variable ID Source Variable Statistical Type Variable Data Type Variable Size Max Allowed Value Min Allowed Value Regex Allowed Value Shorthands Allowed Value Descriptions Computed Value Unique (alone) Unique (Combined with) Required Collection Form Name Comments
2 | > class slot title slot_uri source ignore range ignore maximum_value minimum_value ignore ignore ignore ignore identifier ignore required ignore comments
3 | 1_Subjects.txt SUBJECT_ID Subject number https://schema.org/identifier https://schema.org categorical variable integer Y Y FORM 1
4 | 1_Subjects.txt SPECIES Species name https://schema.org/name https://schema.org categorical variable string FORM 1
5 | 1_Subjects.txt STRAIN Strain TODO substitute broken link https://bioschemas.org/profiles/Taxon/0.6-RELEASE/identifier https://schemas.org/ categorical variable string http://purl.obolibrary.org/obo/NCBITaxon_40674 FORM 1
6 | 1_Subjects.txt AGE Age at study initiation https://bioschemas.org/types/BioSample/0.1-RELEASE-2019_06_19 https://bioschemas.org/ continuous variable integer Y FORM 1
7 | 1_Subjects.txt AGE_UNIT Age unit http://purl.obolibrary.org/obo/UO_0000003 http://purl.obolibrary.org/obo/uo categorial variable string Y FORM 1
8 | 1_Subjects.txt SEX Sex https://schema.org/gender https://schema.org categorical variable enum M;F M=male;F=female FORM 1
9 | 1_Subjects.txt SOMEDATE Date of acquiring subject https://schema.org/dateCreated https://schema.org ordinal variable date YYYY-MM-DD FORM 1
10 | 1_Subjects.txt HEMOGLOBIN Hematology: Hemoglobin http://www.ebi.ac.uk/efo/EFO_0004509 http://www.ebi.ac.uk/efo continuous variable float 2.5 15 4 FORM 1 Field size denotes “places, decimal places”
11 | 1_Subjects.txt HEMOGLOBIN_UNIT Hemoglobin unit http://purl.obolibrary.org/obo/UO_0000003 http://www.ebi.ac.uk/efo categorical variable string FORM 1 Field size denotes “places, decimal places”
12 | 1_Subjects.txt HEIGHT Body size https://schema.org/height https://schema.org continuous variable float 2.5 0.5
13 | 1_Subjects.txt HEIGHT_UNIT Body size unit http://purl.obolibrary.org/obo/UO_0000003 https://schema.org categorical variable string
14 | 1_Subjects.txt WEIGHT Body weight https://schema.org/weight https://schema.org continuous variable float 300 25
15 | 1_Subjects.txt WEIGHT_UNIT Body weight unit http://purl.obolibrary.org/obo/UO_0000003 https://schema.org categorical variable string
16 | 1_Subjects.txt BMI Body mass index http://www.ebi.ac.uk/efo/EFO_0004340 http://www.ebi.ac.uk/efo continuous variable float 100 10 WEIGHT/(HEIGHT*HEIGHT)
17 | 1_Subjects.txt LAB Laboratory https://schema.org/identifier https://schema.org categorical variable integer 1;2;3 1=LabA;2=UniversityB;3=CompanyC FORM 1
18 | 2_Samples.txt SAMPLE_ID Sample ID https://schema.org/identifier https://schema.org categorical variable string Y Y FORM 2
19 | 2_Samples.txt SAMPLE_SITE Sample collection site https://bioschemas.org/types/BioSample/0.1-RELEASE-2019_06_19 https://bioschemas.org/ categorical variable string Y FORM 2
20 | 2_Samples.txt ANALYTE_TYPE Type of analysis http://edamontology.org/operation_2945 http://edamontology.org categorical variable string http://edamontology.org/operation_2945 Y FORM 2
21 | 2_Samples.txt GENOTYPING_CENTER GENOTYPING_CENTER https://schema.org/identifier https://schema.org categorical variable string FORM 2
22 | 2_Samples.txt SEQUENCING_CENTER SEQUENCING_CENTER https://schema.org/identifier https://schema.org categorical variable string FORM 2
23 | 3_SampleMapping.txt SUBJECT_ID Subject number https://schema.org/identifier https://schema.org ordinal variable integer SAMPLE_ID Y FORM 3
24 | 3_SampleMapping.txt SAMPLE_ID Sample ID https://schema.org/identifier https://schema.org
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | RUN = poetry run
2 | PROJ = schemasheets
3 | VERSION = $(shell git tag | tail -1)
4 |
5 | all: all_py test
6 |
7 | test:
8 | $(RUN) pytest
9 |
10 | all_py: schemasheets/conf/configschema.py
11 | $(PROJ)/conf/configschema.py: $(PROJ)/conf/configschema.yaml
12 | $(RUN) gen-python $< > $@.tmp && mv $@.tmp $@
13 |
14 | cogs-%:
15 | $(RUN) cogs $*
16 |
17 | sync-examples:
18 | cp tests/input/{personinfo,enums,prefixes,schema,subsets,types}.tsv examples/input && \
19 | cp -pr tests/output/personinfo/* examples/output
20 |
21 | datamodel-docs: schemasheets/conf/configschema.yaml
22 | gen-markdown $< -d docs/datamodel/
23 |
24 | tests/input/rda-crosswalk.tsv:
25 | curl -L -s 'https://docs.google.com/spreadsheets/d/1mu9iWZxX4DvtklLIQgEloM8oZfzZdzfJ/export?format=tsv&gid=1108662376' > $@
26 |
27 |
28 | serve:
29 | $(RUN) mkdocs serve
30 |
31 | gh-deploy:
32 | $(RUN) mkdocs gh-deploy
33 |
34 | examples/output/single_examples.yaml: examples/input/schema.tsv examples/input/prefixes.tsv examples/input/single_examples.tsv
35 | $(RUN) sheets2linkml --output $@ $^
36 | $(RUN) python schemasheets/schemaview_vs_examples.py
37 |
38 |
39 | examples/output/multiple_examples_per_slot.yaml: examples/input/schema.tsv examples/input/prefixes.tsv examples/input/multiple_examples_per_slot.tsv
40 | $(RUN) sheets2linkml --output $@ $^
41 |
42 | .PHONY: clean all test gh-deploy serve datamodel-docs sync-examples cogs-% all_py range_override_reasoning
43 |
44 | clean:
45 | rm -rf examples/output/*examples*yaml*
46 |
47 | bin/robot.jar:
48 | curl -s https://api.github.com/repos/ontodev/robot/releases/latest | grep 'browser_download_url.*\.jar"' | cut -d : -f 2,3 | tr -d \" | wget -O $@ -i -
49 |
50 | examples/output/range_override_examples.yaml: examples/input/schema.tsv examples/input/prefixes.tsv examples/input/range_override.tsv
51 | $(RUN) sheets2linkml --output $@ $^
52 |
53 |
54 | examples/output/range_override_examples.ttl: examples/output/range_override_examples.yaml
55 | $(RUN) gen-owl --output $@ --no-type-objects --no-metaclasses $<
56 | # ERROR:root:Multiple slots with URI: https://w3id.org/linkml/examples/personinfo/slot_for_range_override:
57 | # ['slot_for_range_override', 'class_for_range_override_slot_for_range_override']; consider giving each a unique slot_uri
58 |
59 | examples/output/range_override_examples_reasoned.ttl: examples/output/range_override_examples.ttl bin/robot.jar
60 | # error doesn't appear in the generated examples/output/range_override_examples.ttl
61 | - grep -i error $<
62 | java -jar bin/robot.jar reason --reasoner ELK --input $< --output $@
63 | @echo But Makefile keeps going!?
64 | - grep -i error $@
65 |
66 | configured_owl_via_project: examples/output/range_override_examples.yaml
67 | $(RUN) gen-project \
68 | --include owl \
69 | --generator-arguments 'owl: {type-objects: false}' \
70 | --dir examples/output $<
71 |
72 | ################################################
73 | #### Commands for building the Docker image ####
74 | ################################################
75 |
76 | IM=linkml/schemasheets
77 |
78 | docker-build-no-cache:
79 | @docker build --no-cache -t $(IM):$(VERSION) . \
80 | && docker tag $(IM):$(VERSION) $(IM):latest
81 |
82 | docker-build:
83 | @docker build -t $(IM):$(VERSION) . \
84 | && docker tag $(IM):$(VERSION) $(IM):latest
85 |
86 | docker-build-use-cache-dev:
87 | @docker build -t $(DEV):$(VERSION) . \
88 | && docker tag $(DEV):$(VERSION) $(DEV):latest
89 |
90 | docker-clean:
91 | docker kill $(IM) || echo not running ;
92 | docker rm $(IM) || echo not made
93 |
94 | docker-publish-no-build:
95 | @docker push $(IM):$(VERSION) \
96 | && docker push $(IM):latest
97 |
98 | docker-publish-dev-no-build:
99 | @docker push $(DEV):$(VERSION) \
100 | && docker push $(DEV):latest
101 |
102 | docker-publish: docker-build
103 | @docker push $(IM):$(VERSION) \
104 | && docker push $(IM):latest
105 |
106 | docker-run:
107 | @docker run -v $(PWD):/work -w /work -ti $(IM):$(VERSION)
108 |
109 | include scripts.makefile
110 |
--------------------------------------------------------------------------------
/tests/test_headers.py:
--------------------------------------------------------------------------------
1 | from schemasheets.schemasheet_datamodel import SchemaSheet
2 |
3 | RECORD = "Record"
4 | FIELD = "Field"
5 | METATYPE = "MetaType"
6 | INFO = "Info"
7 | CV = "CV"
8 | PV = "PV"
9 | SDO_MAPPINGS = "schema.org"
10 | WD_MAPPINGS = "wikidata"
11 | DATATYPE = "Datatype"
12 |
13 | CASES = [
14 | (1,
15 | [
16 | {
17 | RECORD: "> class",
18 | INFO: " description",
19 | SDO_MAPPINGS: "exact_mappings: {curie_prefix: sdo}",
20 | WD_MAPPINGS: "exact_mappings"
21 | },
22 | {
23 | RECORD: ">",
24 | WD_MAPPINGS: "curie_prefix: wd"
25 | },
26 | ]
27 | ),
28 | (2,
29 | [
30 | {RECORD: "> class", FIELD: " slot", INFO: " description"},
31 | ]
32 | ),
33 | (3,
34 | [
35 | {METATYPE: "> metatype", INFO: " description"},
36 | ]
37 | ),
38 | (4,
39 | [
40 | {CV: "> enum", PV: "permissible_value", INFO: " description"},
41 | ]
42 | ),
43 | (5,
44 | [
45 | {DATATYPE: "> type", INFO: " description"},
46 | ]
47 | ),
48 | # unnecessary/incompatible with the latest meta-model
49 | # (6,
50 | # [
51 | # {DATATYPE: "> metaslot.type", INFO: " description"},
52 | # ]
53 | # ),
54 | ]
55 |
56 | def test_parse_header():
57 | print()
58 | for case_id, case in CASES:
59 | ss = SchemaSheet.from_dictreader(case)
60 | tc = ss.table_config
61 | info_cc = tc.columns[INFO]
62 | assert info_cc.name == INFO
63 | assert info_cc.maps_to == "description"
64 | assert info_cc.metaslot is not None
65 | assert info_cc.metaslot.name == "description"
66 | if case_id == 1 or case_id == 2:
67 | assert tc.metatype_column is None
68 | record_cc = tc.columns[RECORD]
69 | assert record_cc.name == RECORD
70 | assert record_cc.maps_to == "class"
71 | assert record_cc.metaslot is None
72 | if case_id == 2:
73 | field_cc = tc.columns[FIELD]
74 | assert field_cc.name == FIELD
75 | assert field_cc.maps_to == "slot"
76 | assert field_cc.metaslot is None
77 | if case_id == 1:
78 | sdo_cc = tc.columns[SDO_MAPPINGS]
79 | assert sdo_cc.name == SDO_MAPPINGS
80 | assert sdo_cc.maps_to == "exact_mappings"
81 | assert sdo_cc.metaslot is not None
82 | assert sdo_cc.metaslot.name == "exact mappings" or\
83 | sdo_cc.metaslot.name == "exact_mappings"
84 | assert sdo_cc.settings.curie_prefix == "sdo"
85 | wd_cc = tc.columns[WD_MAPPINGS]
86 | assert wd_cc.name == WD_MAPPINGS
87 | assert wd_cc.maps_to == "exact_mappings"
88 | assert wd_cc.metaslot is not None
89 | assert wd_cc.metaslot.name == "exact mappings" or \
90 | wd_cc.metaslot.name == "exact_mappings"
91 | assert wd_cc.settings.curie_prefix == "wd"
92 | if case_id == 3:
93 | assert tc.metatype_column == METATYPE
94 | record_cc = tc.columns[METATYPE]
95 | assert record_cc.name == METATYPE
96 | assert record_cc.maps_to == "metatype"
97 | assert record_cc.metaslot is None
98 | if case_id == 4:
99 | cv_cc = tc.columns[CV]
100 | assert cv_cc.name == CV
101 | assert cv_cc.maps_to == "enum"
102 | assert cv_cc.metaslot is None
103 | pv_cc = tc.columns[PV]
104 | assert pv_cc.name == PV
105 | assert pv_cc.maps_to == "permissible_value"
106 | assert pv_cc.metaslot is None
107 | if case_id == 5:
108 | dt_cc = tc.columns[DATATYPE]
109 | #print(dt_cc)
110 | assert dt_cc.name == DATATYPE
111 | assert dt_cc.maps_to == "type"
112 | assert dt_cc.metaslot is None
113 | if case_id == 6:
114 | # See https://github.com/linkml/schemasheets/issues/75
115 | dt_cc = tc.columns[DATATYPE]
116 | assert dt_cc.name == DATATYPE
117 | assert dt_cc.maps_to == "type"
118 | assert dt_cc.metaslot is not None
119 | assert dt_cc.metaslot.name == "type"
120 |
121 |
122 |
123 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Schemasheets - make datamodels using spreadsheets
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 | 
22 | 
23 |
24 | Create a [data dictionary](https://linkml.io/schemasheets/howto/data-dictionaries/) / schema for your data using simple spreadsheets - *no coding required*.
25 |
26 | ## About
27 |
28 | Schemasheets is a framework for managing your schema using
29 | spreadsheets ([Google Sheets](https://linkml.io/schemasheets/howto/google-sheets/), [Excel](https://linkml.io/schemasheets/howto/excel/)). It works by compiling down to
30 | [LinkML](https://linkml.io), which can itself be compiled to a variety
31 | of formalisms, or used for different purposes like data validation
32 |
33 | - [installation](https://linkml.io/schemasheets/install/)
34 | - [basics](https://linkml.io/schemasheets/intro/basics/)
35 |
36 | ## Documentation
37 |
38 | See the [Schema Sheets Manual](https://linkml.io/schemasheets)
39 |
40 | ## Quick Start
41 |
42 | ```bash
43 | pip install schemasheets
44 | ```
45 |
46 | You should then be able to run the following commands:
47 |
48 | - sheets2linkml - Convert schemasheets to a LinkML schema
49 | - linkml2sheets - Convert a LinkML schema to schemasheets
50 | - sheets2project - Generate an entire set of schema files (JSON-Schema, SHACL, SQL, ...) from Schemasheets
51 |
52 | As an example, take a look at the different tabs in the google sheet with ID [1wVoaiFg47aT9YWNeRfTZ8tYHN8s8PAuDx5i2HUcDpvQ](https://docs.google.com/spreadsheets/d/1wVoaiFg47aT9YWNeRfTZ8tYHN8s8PAuDx5i2HUcDpvQ/edit#gid=55566104)
53 |
54 | The personinfo tab contains the bulk of the metadata elements:
55 |
56 | |record|field|key|multiplicity|range|desc|schema.org|
57 | |---|---|---|---|---|---|---|
58 | |`>` class|slot|identifier|cardinality|range|description|exact_mappings: {curie_prefix: sdo}|
59 | |`>`|||||||
60 | ||id|yes|1|string|any identifier|identifier|
61 | ||description|no|0..1|string|a textual description|description|
62 | |Person||n/a|n/a|n/a|a person,living or dead|Person|
63 | |Person|id|yes|1|string|identifier for a person|identifier|
64 | |Person, Organization|name|no|1|string|full name|name|
65 | |Person|age|no|0..1|decimal|age in years||
66 | |Person|gender|no|0..1|decimal|age in years||
67 | |Person|has medical history|no|0..*|MedicalEvent|medical history||
68 | |Event|||||grouping class for events||
69 | |MedicalEvent||n/a|n/a|n/a|a medical encounter||
70 | |ForProfit|||||||
71 | |NonProfit|||||||
72 |
73 | This demonstrator schema contains both *record types* (e.g Person, MedicalEvent) as well as *fields* (e.g. id, age, gender)
74 |
75 | You can convert this like this:
76 |
77 | ```bash
78 | sheets2linkml --gsheet-id 1wVoaiFg47aT9YWNeRfTZ8tYHN8s8PAuDx5i2HUcDpvQ personinfo types prefixes -o personinfo.yaml
79 | ```
80 |
81 | This will generate a LinkML YAML file `personinfo.yaml` from 3 of the tabs in the google sheet
82 |
83 | You can also work directly with TSVs:
84 |
85 | ```
86 | wget https://raw.githubusercontent.com/linkml/schemasheets/main/tests/input/personinfo.tsv
87 | sheets2linkml personinfo.tsv -o personinfo.yaml
88 | ```
89 |
90 | We recommend using [COGS](https://linkml.io/schemasheets/howto/google-sheets/) to synchronize your google sheets with local files using a git-like mechanism
91 |
92 | ## Examples
93 |
94 | - [Person Info Schema](https://docs.google.com/spreadsheets/d/1wVoaiFg47aT9YWNeRfTZ8tYHN8s8PAuDx5i2HUcDpvQ/edit#gid=55566104)
95 | - [Movies Property Graph Schema](https://docs.google.com/spreadsheets/d/1oMrzA41tg_nisdWInnqKJrcvv30dOXuwAhznJYYPSB8/edit?gid=1499822522#gid=1499822522)
96 |
97 | ## Finding out more
98 |
99 | * [Schema Sheets Manual](https://linkml.io/schemasheets)
100 | * [Specification](https://linkml.io/schemasheets/specification/)
101 | * [Internal Datamodel](https://linkml.io/schemasheets/datamodel/)
102 | * [linkml/schemasheets](https://github.com/linkml/schemasheets) code repo
103 | * [linkml/linkml](https://github.com/linkml/linkml) main LinkML repo
104 |
105 |
--------------------------------------------------------------------------------
/docs/specification.md:
--------------------------------------------------------------------------------
1 | # Specification (in progress)
2 |
3 | The following is an outline. Please refer to the above examples for elucidation.
4 |
5 | ## Sheet Structure
6 |
7 | - A sheet is a collection of one or more tables
8 | - A table is a named list of rows
9 | - A row is an array of values (cells)
10 | - A value is a UTF-8 string
11 |
12 | _Note_: we follow google terminology here, with a sheet being the encompassing structure, and each containing tabs
13 |
14 | Any formatting information (color, font, etc) is ignored
15 |
16 | Any individual table is organized into
17 |
18 | - Exactly one header row. This MUST be first.
19 | - Zero or more column configuration rows. These MUST come after the header row, and must start with `>`
20 | - Zero or more *element rows*
21 |
22 | ### Header line
23 |
24 | The first row in a table is a **header** row.
25 |
26 | - Each element of this row is called a column
27 | - Each column must be non-null and unique
28 | - There is always exactly one header row
29 | - The header row must be first
30 | - Each column must be unique
31 |
32 | The header row provides an index into subsequent rows
33 |
34 | In future, grouping columns may be possible.
35 |
36 | ### Column Configurations
37 |
38 | Any subsequent rows where the first value start with a `>` character are **column configurations**
39 |
40 | - A column configuration can be split over multiple lines. Each such subsequent line must start with `>`
41 | - The `>` marks to delimit column configurations, and is subsequently ignored for parsing
42 | - Each line must be a valid yaml string
43 | - note that a single token is valid yaml for that token
44 | - The first config line must include a *descriptor*
45 | - Subsequent lines are *settings* for that descriptor
46 | - A descriptor can be one of:
47 | - Any LinkML metamodel slot (e.g. description, comments, range, required, recommended, multivalued)
48 | - The keyword `cardinality`. See [Cardinality enum](https://linkml.io/schemasheets/datamodel/Cardinality/) in datamodel
49 | - An element metatype (one of: schema, prefix, class, enum, slot, type, subset, permissible_value)
50 | - A YAML object whose key is a descriptor and with values representing settings
51 | - Setting can be taken from [the schemasheets datamodel](https://linkml.io/schemasheets/datamodel/)
52 | - vmap provides a **value mapping** used to translate column values. E.g. a custom "yes" or "no" to "true" or "false"
53 | - curie_prefix auto-prefixes the value in the cell with a curie prefix
54 | - inner_key indicates that the column represents a complex/nested object, and the cell value populates that key
55 |
56 |
57 | ### Element Rows
58 |
59 | Remaining rows are **elements** of your schema
60 |
61 | - Each element gets its own row
62 | - A row must represent a unique element, which may be a:
63 | - class (record, table)
64 | - field (column)
65 | - slot usage (a field in the context of a class)
66 | - enumeration
67 | - permissible value
68 | - schema
69 | - prefix
70 | - type
71 | - subset
72 | - If a `metatype` descriptor is used:
73 | - the type of the row is indicated by the metatype value (one of: class, slot, enum, type, schema)
74 | - a name field must be present, this indicates the name of the element
75 | - If a `metatype` descriptor is not used:
76 | - some combination of class, slot, enum, permissible value schema, type are used to determine the row type plus the name
77 | - if both class and slot are populated the row is interpreted as `slot_usage` (i.e a field in the context of a class)
78 | - if only class is populated the row is a class element with name determined by the value of the class column
79 | - if only slot is populated the row is a slot element with name determined by the value of the slot column
80 | - if only type is populated the row is a type element with name determined by the value of the type column
81 | - if only enum is populated the row is a enum element with name determined by the value of the enum column
82 | - if both enum and permissible_value are populated the row is a permissible value element for that enum
83 | - permissible_value must not be populated without enum being populated
84 | - if only schema is populated the row is a schema element with name determined by the value of the schema column
85 | - schema column may be populated in conjunction with any of the combination above to place the element in a schema
86 | - all other combinations are forbidden
87 | - All other columns are interpreted according to the column configuration for that column
88 | - the column configuration includes curie_prefix then the value is prefixed with this value
89 | - if the column configuration specifies a vmap this is used to map the provided values
90 | - a column that is mapped to `example` automatically maps to `example.value`
91 | - All sheets/TSVs are combined together into a single LinkML schema as YAML
92 | - This LinkML schema can be translated to other formats as per the LinkML [generators](https://linkml.io/linkml/generators/index.html)
93 |
94 |
95 |
--------------------------------------------------------------------------------
/docs/intro/basics.md:
--------------------------------------------------------------------------------
1 | # Basics
2 |
3 | Schemasheets allow you to write schemas to manage your data without writing any code. You keep the source for your schema as a spreadsheet (e.g. in google sheets), and convert to LinkML using `sheets2linkml`
4 |
5 | ## Example
6 |
7 | The following example shows a schema sheet for a schema that is focused around
8 | the concept of a Person. The rows in the sheet describe either *classes* or *slots* (fields)
9 |
10 | |record|field|key|multiplicity|range|desc|schema.org
11 | |---|---|---|---|---|---|---|
12 | |`>` class|slot|identifier|cardinality|range|description|`exact_mappings: {curie_prefix: sdo}`
13 | |-|id|yes|1|string|any identifier|identifier
14 | |-|description|no|0..1|string|a textual description|description
15 | |Person||n/a|n/a|n/a|a person,living or dead|Person
16 | |Person|id|yes|1|string|identifier for a person|identifier
17 | |Person, Organization|name|no|1|string|full name|name
18 | |Person|age|no|0..1|decimal|age in years|-
19 | |Person|gender|no|0..1|decimal|age in years|-
20 | |Person|has medical history|no|0..*|MedicalEvent|medical history|-
21 | |MedicalEvent||n/a|n/a|n/a|-|-
22 |
23 | You can see this example as a google sheet, or as a file in GitHub:
24 |
25 | * [personinfo google sheet](https://docs.google.com/spreadsheets/d/1wVoaiFg47aT9YWNeRfTZ8tYHN8s8PAuDx5i2HUcDpvQ/edit#gid=55566104)
26 | * [tests/input/personinfo.tsv](https://github.com/linkml/schemasheets/blob/main/tests/input/personinfo.tsv) equivalent file in github
27 |
28 | To convert this, assuming `personinfo.tsv` is in your current directory:
29 |
30 | ```bash
31 | sheets2linkml personinfo.tsv
32 | ```
33 |
34 | The sheet is structured as follows:
35 |
36 | ## Header Line
37 |
38 | The first line is a header line. You get to decide the column headers
39 |
40 | |record|field|key|multiplicity|range|desc|schema.org
41 | |---|---|---|---|---|---|---|
42 |
43 | ## Column Descriptors
44 |
45 | - Subsequent lines starting with `>` are *column descriptors*
46 | - these provide a way to interpret the columns
47 | - descriptors can be drawn from the [linkml](https://linkml.io) vocabulary
48 |
49 | In the example above, there is a single row of descriptions:
50 |
51 | |record|field|key|multiplicity|range|desc|schema.org
52 | |---|---|---|---|---|---|---|
53 | |`>` class|slot|identifier|cardinality|range|description|`exact_mappings: {curie_prefix: sdo}`
54 |
55 | Here the column named `record` maps to the LinkML element class, `field` to slot, and so on.
56 |
57 | The final column is an example of a *complex mapping* which we will get to later.
58 |
59 | ## Schema Elements
60 |
61 | Remaining rows are *elements* of your schema
62 |
63 | - Each element gets its own row
64 | - A row can represent a class (record, table), field (column), enumeration, or other element types
65 |
66 | Looking at the first three element rows, we can see the first two represent fields (slots), in particular a field called `id` which is an identifier, and a field called `description`
67 |
68 | |record|field|key|multiplicity|range|desc|schema.org
69 | |---|---|---|---|---|---|---|
70 | |`>` class|slot|identifier|cardinality|range|description|`exact_mappings: {curie_prefix: sdo}`
71 | |-|id|yes|1|string|any identifier|identifier
72 | |-|description|no|0..1|string|a textual description|description
73 | |Person||n/a|n/a|n/a|a person,living or dead|Person
74 |
75 | The third element row represents a record (class) type - here, a Person
76 |
77 | ## Core Concepts
78 |
79 | The most basic schema concepts are *classes* and *slots*
80 |
81 | - classes represent record types, similar to tables in a database or sheets in a spreadsheet
82 | - slots represent fields, similar to columns in a database or spreadsheet
83 |
84 | These can be used in combination:
85 |
86 | - If a *class* is provided, but a *slot* is not, then the row represents a class.
87 | - If a *slot* is provided, but a *class* is not, then the row represents a slot (field)
88 | - If both *class* and *slot* are provided, then the row represents the *usage* of a slot in the context of a class
89 |
90 | Looking at the first 4 element (non-`>`) rows:
91 |
92 | |record|field|key|multiplicity|range|desc|schema.org
93 | |---|---|---|---|---|---|---|
94 | |`>` class|slot|identifier|cardinality|range|description|`exact_mappings: {curie_prefix: sdo}`
95 | |-|id|yes|1|string|any identifier|identifier
96 | |-|description|no|0..1|string|a textual description|description
97 | |Person||n/a|n/a|n/a|a person,living or dead|Person
98 | |Person|id|yes|1|string|identifier for a person|identifier
99 |
100 | The first two are slots, the third is a slot, and the fourth represents the slot `id` when used in the context of the Person class.
101 |
102 | To understand more about the concept of contextual slots, see [Slot Usage Docs](https://linkml.io/linkml/schemas/slots.html#slot-usage) in the main LinkML guide.
103 |
104 | Other element types are:
105 |
106 | - Enums, for enumerations
107 | - Prefixes, for representing prefix maps
108 | - Schemas, for schemas
109 | - Types, for primitive datatypes
110 |
111 | See [LinkML schemas](https://linkml.io/linkml/schemas/index.html) for more information
112 |
--------------------------------------------------------------------------------
/examples/output/owl/combined.owl.ttl:
--------------------------------------------------------------------------------
1 | @prefix IAO: .
2 | @prefix dcterms: .
3 | @prefix linkml: .
4 | @prefix owl: .
5 | @prefix personinfo: .
6 | @prefix rdfs: .
7 | @prefix sdo: .
8 | @prefix skos: .
9 | @prefix wikidata: .
10 | @prefix xsd: .
11 |
12 | linkml:SubsetDefinition a owl:Class ;
13 | rdfs:label "subset_definition" ;
14 | skos:definition "the name and description of a subset" .
15 |
16 | a owl:Ontology ;
17 | rdfs:label "PersonInfo" ;
18 | IAO:0000700 personinfo:Event,
19 | personinfo:Organization,
20 | personinfo:Person ;
21 | dcterms:license "https://creativecommons.org/publicdomain/zero/1.0/" ;
22 | skos:definition "Information about people, based on [schema.org](http://schema.org)" ;
23 | linkml:generation_date "2022-01-05T12:15:38" ;
24 | linkml:metamodel_version "1.7.0" ;
25 | linkml:source_file "combined.yaml" ;
26 | linkml:source_file_date "2022-01-05T12:15:36" ;
27 | linkml:source_file_size 4139 .
28 |
29 | personinfo:DecimalDegree a owl:Class,
30 | linkml:TypeDefinition ;
31 | rdfs:label "DecimalDegree" ;
32 | rdfs:subClassOf [ a owl:Restriction ;
33 | owl:onDataRange xsd:decimal ;
34 | owl:onProperty linkml:topValue ;
35 | owl:qualifiedCardinality 1 ] ;
36 | skos:definition "A decimal degree expresses latitude or longitude as decimal fractions" .
37 |
38 | personinfo:ForProfit a owl:Class,
39 | linkml:ClassDefinition ;
40 | rdfs:label "ForProfit" ;
41 | rdfs:subClassOf personinfo:Organization .
42 |
43 | personinfo:Lang a owl:Class,
44 | linkml:TypeDefinition ;
45 | rdfs:label "Lang" ;
46 | rdfs:subClassOf [ a owl:Restriction ;
47 | owl:onDataRange xsd:string ;
48 | owl:onProperty linkml:topValue ;
49 | owl:qualifiedCardinality 1 ] ;
50 | skos:definition "language tag" .
51 |
52 | personinfo:NonProfit a owl:Class,
53 | linkml:ClassDefinition ;
54 | rdfs:label "NonProfit" ;
55 | rdfs:subClassOf personinfo:Organization ;
56 | skos:exactMatch wikidata:Q163740 .
57 |
58 | personinfo:description a owl:ObjectProperty,
59 | linkml:SlotDefinition ;
60 | rdfs:label "description" ;
61 | rdfs:range linkml:String ;
62 | skos:definition "a textual description" .
63 |
64 | personinfo:MedicalEvent a owl:Class,
65 | linkml:ClassDefinition ;
66 | rdfs:label "MedicalEvent" ;
67 | rdfs:subClassOf personinfo:Event ;
68 | skos:definition "a medical encounter" .
69 |
70 | personinfo:Person a owl:Class,
71 | linkml:ClassDefinition ;
72 | rdfs:label "Person" ;
73 | rdfs:subClassOf [ a owl:Restriction ;
74 | owl:allValuesFrom personinfo:MedicalEvent ;
75 | owl:onProperty personinfo:has_medical_history ],
76 | [ a owl:Restriction ;
77 | owl:onClass linkml:String ;
78 | owl:onProperty personinfo:id ;
79 | owl:qualifiedCardinality 1 ],
80 | [ a owl:Restriction ;
81 | owl:maxQualifiedCardinality 1 ;
82 | owl:onClass linkml:Decimal ;
83 | owl:onProperty personinfo:age ],
84 | [ a owl:Restriction ;
85 | owl:maxQualifiedCardinality 1 ;
86 | owl:onClass linkml:Decimal ;
87 | owl:onProperty personinfo:gender ],
88 | [ a owl:Restriction ;
89 | owl:onClass linkml:String ;
90 | owl:onProperty personinfo:name ;
91 | owl:qualifiedCardinality 1 ] ;
92 | skos:definition "a person,living or dead" ;
93 | skos:exactMatch sdo:Person,
94 | wikidata:Q215627 .
95 |
96 | personinfo:age a owl:ObjectProperty,
97 | linkml:SlotDefinition .
98 |
99 | personinfo:gender a owl:ObjectProperty,
100 | linkml:SlotDefinition .
101 |
102 | personinfo:has_medical_history a owl:ObjectProperty,
103 | linkml:SlotDefinition .
104 |
105 | personinfo:id a owl:ObjectProperty,
106 | linkml:SlotDefinition .
107 |
108 | linkml:TypeDefinition a owl:Class ;
109 | rdfs:label "type_definition" ;
110 | skos:definition "A data type definition." .
111 |
112 | personinfo:Event a owl:Class,
113 | linkml:ClassDefinition ;
114 | rdfs:label "Event" ;
115 | skos:definition "grouping class for events" ;
116 | skos:exactMatch wikidata:Q1656682 .
117 |
118 | personinfo:name a owl:ObjectProperty,
119 | linkml:SlotDefinition .
120 |
121 | linkml:topValue a owl:DatatypeProperty ;
122 | rdfs:label "value" .
123 |
124 | personinfo:Organization a owl:Class,
125 | linkml:ClassDefinition ;
126 | rdfs:label "Organization" ;
127 | rdfs:subClassOf [ a owl:Restriction ;
128 | owl:onClass linkml:String ;
129 | owl:onProperty personinfo:name ;
130 | owl:qualifiedCardinality 1 ] .
131 |
132 | linkml:ClassDefinition a owl:Class ;
133 | rdfs:label "class_definition" ;
134 | skos:definition "the definition of a class or interface" .
135 |
136 | linkml:SlotDefinition a owl:Class ;
137 | rdfs:label "slot_definition" ;
138 | skos:definition "the definition of a property or a slot" .
139 |
140 |
141 |
--------------------------------------------------------------------------------
/docs/howto/data-dictionaries.md:
--------------------------------------------------------------------------------
1 | TODO: this is not yet complete, do not link from index until complete
2 |
3 | # Simple data dictionaries
4 |
5 | A data dictionary is a file (or collection of files) which unambiguously declares, defines and annotates all the variables collected in a project and associated to a dataset (_definition: [FAIR cookbook](https://faircookbook.elixir-europe.org/content/recipes/interoperability/creating-data-dictionary.html)).
6 |
7 | Schemasheets is an idea framework for managing a data dictionary.
8 |
9 | ## Example Data Dictionary
10 |
11 | The [FAIR Cookbook](https://faircookbook.elixir-europe.org) provides an example of a data dictionary for tracking various aspects of
12 | a research subject or model organism, including:
13 |
14 | - subject_id
15 | - species
16 | - strain (for model organisms)
17 | - age + age unit
18 | - etc
19 |
20 | See [Example](https://faircookbook.elixir-europe.org/content/recipes/interoperability/creating-data-dictionary.html#an-example-of-data-dictionary).
21 |
22 | Let's start by copying this directly into a google sheet.
23 |
24 | You can see this on the v1 tab of [this sheet](https://docs.google.com/spreadsheets/d/1bUMX6P8JkgbHwZHR7RU5XbBsbhrKwaBftk7XuDz6xJc/edit#gid=0)
25 |
26 | File Name | Variable Name | Variable Label | Variable Ontology ID or RDFtype | Variable ID Source | Variable Statistical Type | Variable Data Type | Variable Size | Max Allowed Value | Min Allowed Value | Regex | Allowed Value Shorthands | Allowed Value Descriptions | Computed Value | Unique (alone) | Unique (Combined with) | Required | Collection Form Name | Comments
27 | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | --
28 | 1_Subjects.txt | SUBJECT_ID | Subject number | https://schema.org/identifier | https://schema.org | categorical variable | integer | | | | | | | | Y | | Y | FORM 1 |
29 | 1_Subjects.txt | SPECIES | Species name | https://schema.org/name | https://schema.org | categorical variable | string | | | | | | | | | | | FORM 1 |
30 | 1_Subjects.txt | STRAIN | Strain | TODO substitute broken link https://bioschemas.org/profiles/Taxon/0.6-RELEASE/identifier | https://schemas.org/ | categorical variable | string | | | | | | http://purl.obolibrary.org/obo/NCBITaxon_40674 | | | | | FORM 1 |
31 | 1_Subjects.txt | AGE | Age at study initiation | https://bioschemas.org/types/BioSample/0.1-RELEASE-2019_06_19 | https://bioschemas.org/ | continuous variable | integer | | | | | | | | | | Y | FORM 1 |
32 | 1_Subjects.txt | AGE_UNIT | Age unit | http://purl.obolibrary.org/obo/UO_0000003 | http://purl.obolibrary.org/obo/uo | categorial variable | string | | | | | | | | | | Y | FORM 1 |
33 | 1_Subjects.txt | SEX | Sex | https://schema.org/gender | https://schema.org | categorical variable | enum | | | | | M;F | M=male;F=female | | | | | FORM 1 |
34 |
35 | ## Adding a descriptor row
36 |
37 | Our first task is to add a descriptor row that describes how each column heading maps to a LinkML metamodel element.
38 |
39 | Here we will tackle this incrementally, starting with the first 3 columns, we will map to:
40 |
41 | - [class][https://w3id.org/linkml/ClassDefinition]
42 | - [slot][https://w3id.org/linkml/SlotDefinition]
43 | - [title][https://w3id.org/linkml/title]
44 |
45 | The table now looks like this:
46 |
47 | |File Name|Variable Name|Variable Label|
48 | |---|---|---|
49 | |`>` class|slot|title|
50 | |1_Subjects.txt|SUBJECT_ID|Subject number|
51 | |1_Subjects.txt|SPECIES|Species name|
52 | |1_Subjects.txt|STRAIN|Strain|
53 | |1_Subjects.txt|AGE|Age at study initiation|
54 | |1_Subjects.txt|AGE_UNIT|Age unit|
55 | |1_Subjects.txt|SEX|Sex|
56 | |1_Subjects.txt|SOMEDATE|Date of acquiring subject|
57 | |1_Subjects.txt|HEMOGLOBIN|Hematology: Hemoglobin|
58 | |1_Subjects.txt|HEMOGLOBIN_UNIT|Hemoglobin unit|
59 | |1_Subjects.txt|HEIGHT|Body size|
60 | |1_Subjects.txt|HEIGHT_UNIT|Body size unit|
61 | |1_Subjects.txt|WEIGHT|Body weight|
62 | |1_Subjects.txt|WEIGHT_UNIT|Body weight unit|
63 | |1_Subjects.txt|BMI|Body mass index|
64 | |1_Subjects.txt|LAB|Laboratory|
65 | |2_Samples.txt|SAMPLE_ID|Sample ID|
66 | |2_Samples.txt|SAMPLE_SITE|Sample collection site|
67 | |2_Samples.txt|ANALYTE_TYPE|Type of analysis|
68 | |2_Samples.txt|GENOTYPING_CENTER|GENOTYPING_CENTER|
69 | |2_Samples.txt|SEQUENCING_CENTER|SEQUENCING_CENTER|
70 | |3_SampleMapping.txt|SUBJECT_ID|Subject number|
71 | |3_SampleMapping.txt|SAMPLE_ID|Sample ID|
72 |
73 | Our choice of how to map the first column is a bit odd, and reflects a slight mismatch between
74 | schemasheets/LinkML, which aims to describe a data model that can be used for *multiple instantiations of the same format* and a data dictionary that is oriented around describing *a single distribution*.
75 |
76 | Here we are implicitly creating classes/records like "1_Subjects.txt" which doesn't really conform to standard
77 | class naming conventions in LinkML. Later we will explore rewriting these with names like "Subject", "Sample", and "SampleMapping"
78 |
79 | TODO
80 |
81 | For the second column, the choice of ALL-CAPS for slot name also goes against standard naming conventions, but
82 | this doesn't really matter so much, and the title (col 3) is the string that should be used in user-facing applications
83 | like Data Harmonizer.
84 |
85 |
86 |
87 | ## Modifications
88 |
89 | - We modified the minimum and maximum values which were specified using commas instead of periods for decimal notation
90 | - The "regex" field had a value YYYY-MM-DD, but this isn't an actual regex
91 |
92 | This framework allows you to represent complex relation-style schemas
93 | using spreadsheets/TSVs. But it also allows for representation of simple "data dictionaries" or "minimal information lists".
94 | These can be thought of as "wide tables", e.g. representing individual observations or observable units such as persons or samples.
95 |
96 | TODO
97 |
98 |
99 |
100 |
--------------------------------------------------------------------------------
/tests/test_schema_exporter.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 |
4 | from linkml.utils.schema_builder import SchemaBuilder
5 | from linkml.utils.schema_fixer import SchemaFixer
6 | from linkml_runtime.dumpers import yaml_dumper
7 | from linkml_runtime.linkml_model import TypeDefinition, Annotation
8 | from linkml_runtime.utils.introspection import package_schemaview
9 | from linkml_runtime.utils.schemaview import SchemaView, SchemaDefinition, SlotDefinition, ClassDefinition, YAMLRoot
10 | from schemasheets.schema_exporter import SchemaExporter
11 | from schemasheets.schemamaker import SchemaMaker
12 | from schemasheets.schemasheet_datamodel import SchemaSheet
13 |
14 | ROOT = os.path.abspath(os.path.dirname(__file__))
15 | INPUT_DIR = os.path.join(ROOT, 'input')
16 | OUTPUT_DIR = os.path.join(ROOT, 'output')
17 | SHEET = os.path.join(INPUT_DIR, 'personinfo.tsv')
18 | ROUNDTRIPPED_SHEET = os.path.join(OUTPUT_DIR, 'personinfo-roundtrip.tsv')
19 | MINISHEET = os.path.join(OUTPUT_DIR, 'mini.tsv')
20 | TEST_SPEC = os.path.join(INPUT_DIR, 'test-spec.tsv')
21 | ENUM_SPEC = os.path.join(INPUT_DIR, 'enums.tsv')
22 | TYPES_SPEC = os.path.join(INPUT_DIR, 'types.tsv')
23 | PREFIXES_SPEC = os.path.join(INPUT_DIR, 'prefixes.tsv')
24 | SLOT_SPEC = os.path.join(INPUT_DIR, 'slot-spec.tsv')
25 |
26 | EXPECTED = [
27 | {
28 | 'field': 'id',
29 | 'key': 'true', # https://github.com/linkml/schemasheets/issues/67
30 | 'range': 'string',
31 | 'desc': 'any identifier',
32 | 'schema.org': 'identifier',
33 | ## TODO
34 | ## 'multiplicity': '1',
35 | },
36 | {
37 | 'record': 'Person',
38 | 'field': 'age',
39 | 'range': 'decimal',
40 | 'desc': 'age in years'
41 | },
42 | {
43 | 'record': 'ForProfit',
44 | 'parents': 'Organization'
45 | },
46 | {
47 | 'field': 'name'
48 | },
49 | # tests curie contraction
50 | {
51 | 'record': 'Person',
52 | 'field': 'id',
53 | 'key': 'true',
54 | 'range': 'string',
55 | 'desc': 'identifier for a person',
56 | 'schema.org': 'identifier'
57 | },
58 | ]
59 |
60 |
61 | def test_roundtrip_schema():
62 | """
63 | Tests linkml2sheets by round-tripping from the standard personinfo schema in YAML
64 | """
65 | sm = SchemaMaker()
66 | # sheets2linkml, from SHEET
67 | schema = sm.create_schema(SHEET)
68 | exporter = SchemaExporter(schemamaker=sm)
69 | sv = SchemaView(schema)
70 | # linkml2sheets, using original sheets as specification
71 | # (note that this ignores the main data in the TSV)
72 |
73 | exporter.export(sv, specification=SHEET, to_file=ROUNDTRIPPED_SHEET)
74 | for row in exporter.rows:
75 | logging.info(row)
76 | for record in EXPECTED:
77 | assert record in exporter.rows
78 |
79 |
80 | def _roundtrip(schema: SchemaDefinition, specification: str, must_pass=True) -> SchemaDefinition:
81 | """
82 | Tests a roundtrip from a Schema object to sheets, and then back to a schema, using
83 | the specified specification
84 |
85 | :param schema:
86 | :param specification:
87 | :return:
88 | """
89 | sm = SchemaMaker()
90 | exporter = SchemaExporter(schemamaker=sm)
91 | sv = SchemaView(schema)
92 | exporter.export(schemaview=sv, specification=specification, to_file=MINISHEET)
93 | # for row in exporter.rows:
94 | # print(row)
95 | schema2 = sm.create_schema(MINISHEET)
96 | sv2 = SchemaView(schema2)
97 | for e in sv.all_elements().values():
98 | e2 = sv2.get_element(e.name)
99 | if e2 is None:
100 | if not must_pass:
101 | continue
102 | raise ValueError(f"Could not find {e}")
103 | e2.from_schema = e.from_schema
104 | for s, v in vars(e).items():
105 | v2 = getattr(e2, s, None)
106 | if v != v2:
107 | logging.error(f" UNEXPECTED {s}: {v} ?= {v2} // {type(v2)}")
108 | if must_pass:
109 | assert v == v2
110 | return schema2
111 |
112 |
113 | def test_dynamic():
114 | """
115 | tests dynamically building up a schema and exporting
116 | """
117 | sb = SchemaBuilder()
118 | sf = SchemaFixer()
119 | sb.add_class('A', [])
120 | sb.add_class('M1', [])
121 | sb.add_class('M2', [])
122 | s1 = SlotDefinition('s1', title="ts1", description='s1', range="Y")
123 | s2 = SlotDefinition('s2', title="ts2", description='s2', range="string")
124 | s3 = SlotDefinition('s3', title="ts2", description='s3', range="integer")
125 | s2X = SlotDefinition('s2', pattern="^\\S+$")
126 | sb.add_class('X', ['s1', 's2'], slot_usage={'s2': s2X},
127 | description='d1', is_a="A", mixins=["M1"])
128 | sb.add_class('Y', ['s2', 's3'], description='d2', is_a="A", mixins=["M1", "M2"])
129 |
130 | # adding these give a "slot already present' error. it appears that they are implicitly added
131 | # when adding classes that use them
132 | # after having switched from LinkML 1.3 to 1.5
133 |
134 | sb.add_defaults()
135 | schema = sb.schema
136 |
137 | _roundtrip(schema, TEST_SPEC)
138 |
139 |
140 | def test_inner_key():
141 | """
142 | Tests the use of inner_key with annotations
143 |
144 | See https://github.com/linkml/schemasheets/issues/59
145 | """
146 | sb = SchemaBuilder()
147 | sf = SchemaFixer()
148 | a = Annotation("display_hint", "hello")
149 | a2 = Annotation("more_words", "profound_words")
150 | s = SlotDefinition("s1")
151 | c = ClassDefinition("X",
152 | slots=["s1"],
153 | slot_usage={s.name: s},
154 | annotations={a.tag: a, a2.tag: a2})
155 | schema = sb.schema
156 | schema.classes[c.name] = c
157 | c = schema.classes['X']
158 | assert isinstance(c, ClassDefinition)
159 | print(type(c.annotations))
160 | assert isinstance(c.annotations, dict)
161 | assert isinstance(c.slot_usage, dict)
162 | _roundtrip(schema, os.path.join(INPUT_DIR, 'test-spec-ann.tsv'))
163 |
164 |
165 | def test_enums():
166 | """
167 | tests a specification that is dedicated to enums
168 | """
169 | sb = SchemaBuilder()
170 | sb.add_enum('E', ['V1', 'V2'])
171 | sb.add_defaults()
172 | schema = sb.schema
173 | # TODO: add this functionality to SchemaBuilder
174 | e = schema.enums['E']
175 | e.description = 'test desc'
176 | _roundtrip(schema, ENUM_SPEC)
177 |
178 |
179 | def test_prefixes():
180 | """
181 | tests a specification that is dedicated to prefixes
182 | """
183 | sb = SchemaBuilder()
184 | sb.add_prefix("ex", "https://example.org/")
185 | sb.add_defaults()
186 | schema = sb.schema
187 | schema_recapitulated = _roundtrip(schema, PREFIXES_SPEC)
188 | assert "ex" in schema_recapitulated.prefixes
189 | assert schema_recapitulated.prefixes["ex"].prefix_reference == "https://example.org/"
190 | assert "linkml" in schema_recapitulated.prefixes
191 |
192 |
193 | def test_types():
194 | """
195 | tests a specification that is dedicated to types
196 | """
197 | sb = SchemaBuilder()
198 | schema = sb.schema
199 | # TODO: add this functionality to SchemaBuilder
200 | t = TypeDefinition('MyString', description='my string', typeof='string')
201 | schema.types[t.name] = t
202 | _roundtrip(schema, TYPES_SPEC)
203 |
204 |
205 | def test_parse_specification_from_tsv():
206 | """
207 | Tests parsing of specification rows from TSV
208 | """
209 | schemasheet = SchemaSheet.from_csv(TEST_SPEC)
210 | table_config = schemasheet.table_config
211 | mixins_config = table_config.columns["mixins"]
212 | assert "|" == mixins_config.settings.internal_separator
213 |
214 |
215 | def test_export_metamodel():
216 | metamodel_sv = package_schemaview('linkml_runtime.linkml_model.meta')
217 | metamodel_schema = metamodel_sv.schema
218 | roundtripped_schema = _roundtrip(metamodel_schema, TEST_SPEC, must_pass=False)
219 | logging.info(yaml_dumper.dumps(roundtripped_schema))
220 |
221 |
222 | def test_export_metamodel_slots():
223 | sm = SchemaMaker()
224 | metamodel_sv = package_schemaview('linkml_runtime.linkml_model.meta')
225 | metamodel_schema = metamodel_sv.schema
226 | exporter = SchemaExporter(schemamaker=sm)
227 | sv = SchemaView(metamodel_schema)
228 | exporter.export(sv, specification=SLOT_SPEC, to_file=MINISHEET)
229 | all_of_slot_rows = [row for row in exporter.rows if row['slot'] == 'all_of']
230 | assert 1 == len(all_of_slot_rows)
231 | [s] = [row for row in exporter.rows if row['slot'] == 'status']
232 | # NOTE: this test may be too rigid, if the metamodel documentation changes then the results
233 | # of this will change
234 | examples = s['examples']
235 | assert 'bibo:draft' == examples
236 |
--------------------------------------------------------------------------------
/docs/datamodel/Cardinality.md:
--------------------------------------------------------------------------------
1 |
2 | # Enum: Cardinality
3 |
4 |
5 | vocabulary for describing cardinality and applicability of slots or fields.
6 |
7 | Most elements in the dictionary have been mapped to a vocabulary called "carvoc",
8 | this has yet to be released.
9 |
10 | URI: [schemasheets:Cardinality](https://w3id.org/linkml/configschema/Cardinality)
11 |
12 |
13 | ## Other properties
14 |
15 | | | | |
16 | | --- | --- | --- |
17 | | **Aliases:** | | multiplicity |
18 | | | | applicability |
19 |
20 | ## Permissible Values
21 |
22 | | Text | Description | Meaning | Other Information |
23 | | :--- | :---: | :---: | ---: |
24 | | mandatory | At least one value MUST be provided | carvoc:Mandatory | {'annotations': {'maps_to': Annotation(tag='maps_to', value='required: true', extensions={}, annotations={}), 'min': Annotation(tag='min', value='1', extensions={}, annotations={}), 'opposite': Annotation(tag='opposite', value='optional', extensions={}, annotations={}), 'interpretation': Annotation(tag='interpretation', value='MUST', extensions={}, annotations={}), 'aliases': Annotation(tag='aliases', value='required', extensions={}, annotations={}), 'mixs_notation': Annotation(tag='mixs_notation', value='M', extensions={}, annotations={}), 'code': Annotation(tag='code', value='M', extensions={}, annotations={})}} |
25 | | optional | A value MAY be provided | carvoc:Optional | {'annotations': {'maps_to': Annotation(tag='maps_to', value='required: false', extensions={}, annotations={}), 'min': Annotation(tag='min', value='0', extensions={}, annotations={}), 'opposite': Annotation(tag='opposite', value='mandatory', extensions={}, annotations={}), 'interpretation': Annotation(tag='interpretation', value='MAY', extensions={}, annotations={}), 'aliases': Annotation(tag='aliases', value='permissible', extensions={}, annotations={}), 'mixs_notation': Annotation(tag='mixs_notation', value='X', extensions={}, annotations={}), 'code': Annotation(tag='code', value='O', extensions={}, annotations={})}} |
26 | | recommended | A value SHOULD be provided | carvoc:Recommended | {'annotations': {'maps_to': Annotation(tag='maps_to', value='{required: false, recommended: true}', extensions={}, annotations={}), 'min': Annotation(tag='min', value='0', extensions={}, annotations={}), 'opposite': Annotation(tag='opposite', value='not_recommended', extensions={}, annotations={}), 'interpretation': Annotation(tag='interpretation', value='SHOULD', extensions={}, annotations={}), 'aliases': Annotation(tag='aliases', value='strongly suggested', extensions={}, annotations={}), 'code': Annotation(tag='code', value='R', extensions={}, annotations={})}} |
27 | | not_recommended | Values are permitted, but SHOULD NOT be filled in | carvoc:NotRecommended | {'annotations': {'maps_to': Annotation(tag='maps_to', value='{required: false, recommended: false, recommended_against: true}', extensions={}, annotations={}), 'min': Annotation(tag='min', value='0', extensions={}, annotations={}), 'opposite': Annotation(tag='opposite', value='recommended', extensions={}, annotations={}), 'interpretation': Annotation(tag='interpretation', value='SHOULD NOT', extensions={}, annotations={}), 'aliases': Annotation(tag='aliases', value='recommended against', extensions={}, annotations={}), 'code': Annotation(tag='code', value='-R', extensions={}, annotations={})}} |
28 | | applicable | union of optional and mandatory | carvoc:Applicable | {'annotations': {'min': Annotation(tag='min', value='0', extensions={}, annotations={}), 'opposite': Annotation(tag='opposite', value='not_applicable', extensions={}, annotations={}), 'interpretation': Annotation(tag='interpretation', value='MAY', extensions={}, annotations={}), 'code': Annotation(tag='code', value='O/M', extensions={}, annotations={})}, 'comments': ['use of this descriptor is generally not recommendes as it is usually possible to commit']} |
29 | | not_applicable | A value MUST NOT be provided | carvoc:NotApplicable | {'annotations': {'max': Annotation(tag='max', value='0', extensions={}, annotations={}), 'min': Annotation(tag='min', value='0', extensions={}, annotations={}), 'opposite': Annotation(tag='opposite', value='applicable', extensions={}, annotations={}), 'interpretation': Annotation(tag='interpretation', value='MUST NOT', extensions={}, annotations={}), 'mixs_notation': Annotation(tag='mixs_notation', value='-', extensions={}, annotations={}), 'code': Annotation(tag='code', value='-', extensions={}, annotations={})}, 'comments': ['a use case for this is for a field that has been deprecated', 'a use case for this is for annotating a field that is not applicable in a particular context']} |
30 | | zero_or_one | not required, single-valued | carvoc:ZeroToOne | {'mixins': ['optional', 'single-valued'], 'annotations': {'maps_to': Annotation(tag='maps_to', value='{required: false, multivalued: false}', extensions={}, annotations={}), 'min': Annotation(tag='min', value='0', extensions={}, annotations={}), 'max': Annotation(tag='max', value='1', extensions={}, annotations={}), 'interpretation': Annotation(tag='interpretation', value='MUST, MAY', extensions={}, annotations={}), 'uml': Annotation(tag='uml', value='0..1', extensions={}, annotations={}), 'code': Annotation(tag='code', value='0..1', extensions={}, annotations={})}} |
31 | | exactly_one | required, single-valued | carvoc:ExactlyOne | {'mixins': ['mandatory', 'single-valued'], 'annotations': {'maps_to': Annotation(tag='maps_to', value='{required: true, multivalued: false}', extensions={}, annotations={}), 'min': Annotation(tag='min', value='1', extensions={}, annotations={}), 'max': Annotation(tag='max', value='1', extensions={}, annotations={}), 'interpretation': Annotation(tag='interpretation', value='MUST', extensions={}, annotations={}), 'uml': Annotation(tag='uml', value='1..1', extensions={}, annotations={}), 'code': Annotation(tag='code', value='1..1', extensions={}, annotations={})}} |
32 | | zero_to_many | not required, multi-valued | carvoc:ZeroToMany | {'mixins': ['optional', 'multi-valued'], 'annotations': {'maps_to': Annotation(tag='maps_to', value='{required: false, multivalued: true}', extensions={}, annotations={}), 'min': Annotation(tag='min', value='0', extensions={}, annotations={}), 'max': Annotation(tag='max', value='*', extensions={}, annotations={}), 'interpretation': Annotation(tag='interpretation', value='MAY', extensions={}, annotations={}), 'uml': Annotation(tag='uml', value='0..*', extensions={}, annotations={}), 'code': Annotation(tag='code', value='0..*', extensions={}, annotations={})}} |
33 | | one_to_many | required, multi-valued | carvoc:OneToMany | {'mixins': ['mandatory', 'multi-valued'], 'annotations': {'maps_to': Annotation(tag='maps_to', value='{required: true, multivalued: true}', extensions={}, annotations={}), 'min': Annotation(tag='min', value='1', extensions={}, annotations={}), 'max': Annotation(tag='max', value='*', extensions={}, annotations={}), 'interpretation': Annotation(tag='interpretation', value='MUST, MAY', extensions={}, annotations={}), 'uml': Annotation(tag='uml', value='1..*', extensions={}, annotations={}), 'code': Annotation(tag='code', value='1..*', extensions={}, annotations={})}} |
34 | | single_valued | not multi-valued | carvoc:SingleValued | {'annotations': {'maps_to': Annotation(tag='maps_to', value='multivalued: false', extensions={}, annotations={}), 'max': Annotation(tag='max', value='1', extensions={}, annotations={}), 'opposite': Annotation(tag='opposite', value='multi-valued', extensions={}, annotations={}), 'interpretation': Annotation(tag='interpretation', value='MAY', extensions={}, annotations={}), 'mixs_notation': Annotation(tag='mixs_notation', value='1', extensions={}, annotations={}), 'code': Annotation(tag='code', value='SV', extensions={}, annotations={})}} |
35 | | multi_valued | multi-valued | carvoc:MultiValued | {'annotations': {'maps_to': Annotation(tag='maps_to', value='multivalued: true', extensions={}, annotations={}), 'max': Annotation(tag='max', value='*', extensions={}, annotations={}), 'opposite': Annotation(tag='opposite', value='single-valued', extensions={}, annotations={}), 'interpretation': Annotation(tag='interpretation', value='MAY', extensions={}, annotations={}), 'mixs_notation': Annotation(tag='mixs_notation', value='m', extensions={}, annotations={}), 'code': Annotation(tag='code', value='MV', extensions={}, annotations={})}} |
36 | | conditional | A qualifier on cardinalities that indicates the interpretation is context-dependent | carvoc:Conditional | {'annotations': {'maps_to': Annotation(tag='maps_to', value='conditional: true', extensions={}, annotations={}), 'mixs_notation': Annotation(tag='mixs_notation', value='E', extensions={}, annotations={}), 'code': Annotation(tag='code', value='E+', extensions={}, annotations={})}} |
37 | | unconditional | A qualifier on cardinalities that indicates the interpretation is context-independent | carvoc:Unconditional | {'annotations': {'maps_to': Annotation(tag='maps_to', value='conditional: false', extensions={}, annotations={}), 'code': Annotation(tag='code', value='E-', extensions={}, annotations={})}} |
38 | | conditional_mandatory | | carvoc:ConditionalMandatory | {'mixins': ['mandatory', 'conditional'], 'annotations': {'maps_to': Annotation(tag='maps_to', value='{required: true, conditional: true}', extensions={}, annotations={}), 'mixs_notation': Annotation(tag='mixs_notation', value='C', extensions={}, annotations={}), 'code': Annotation(tag='code', value='EM', extensions={}, annotations={})}} |
39 |
40 |
--------------------------------------------------------------------------------
/schemasheets/schemasheet_datamodel.py:
--------------------------------------------------------------------------------
1 | """Core data model for a SchemaSheet."""
2 | import csv
3 | from dataclasses import dataclass
4 | from typing import Union, Dict, List, Any
5 | import pkgutil
6 | from pathlib import PurePath, Path
7 | from functools import lru_cache
8 | import logging
9 | import yaml
10 | from linkml_runtime.linkml_model import SlotDefinition, ClassDefinition, SchemaDefinition, \
11 | PermissibleValue, EnumDefinition, TypeDefinition, SubsetDefinition, Prefix
12 | from linkml_runtime.linkml_model.meta import Setting
13 |
14 | from linkml_runtime.utils.schemaview import SchemaView
15 |
16 | from schemasheets.conf.configschema import ColumnSettings, Shortcuts
17 |
18 | COL_NAME = str
19 | DESCRIPTOR = str
20 | ROW = Dict[str, Any]
21 |
22 | # Vocabulary for types
23 | T_SCHEMA = 'schema'
24 | T_CLASS = 'class'
25 | T_SLOT = 'slot'
26 | T_ATTRIBUTE = 'attribute'
27 | T_ENUM = 'enum'
28 | T_PV = 'permissible_value'
29 | T_TYPE = 'type'
30 | T_SUBSET = 'subset'
31 | T_PREFIX = 'prefix'
32 | T_SETTING = 'setting'
33 |
34 | tmap = {
35 | T_SCHEMA: SchemaDefinition,
36 | T_CLASS: ClassDefinition,
37 | T_SLOT: SlotDefinition,
38 | T_ATTRIBUTE: SlotDefinition,
39 | T_ENUM: EnumDefinition,
40 | T_PV: PermissibleValue,
41 | T_TYPE: TypeDefinition,
42 | T_SUBSET: SubsetDefinition,
43 | T_PREFIX: Prefix,
44 | T_SETTING: Setting,
45 | }
46 |
47 |
48 |
49 |
50 | @dataclass
51 | class ColumnConfig:
52 | """
53 | Configuration for a single column in a schema sheet
54 | """
55 | name: COL_NAME
56 | maps_to: DESCRIPTOR = None
57 | settings: ColumnSettings = None
58 | metaslot: SlotDefinition = None
59 | inner_key_metaslot: SlotDefinition = None
60 | is_element_type: bool = None
61 |
62 | def merge_settings(self, settings: ColumnSettings) -> None:
63 | """
64 | merges specified settings into current settings
65 |
66 | :param settings: settings to be merged
67 | """
68 | for k, v in vars(settings).items():
69 | if v:
70 | setattr(self.settings, k, v)
71 |
72 | def add_info(self, info: Union[Dict, DESCRIPTOR]) -> None:
73 | """
74 | Adds configuration/settings in the form of a dict object.
75 |
76 | Information can be incrementally added:
77 |
78 | - the first piece of information should be the descriptor
79 | - after that individual settings can be added
80 |
81 | :param info: configuration
82 | :return:
83 | """
84 | logging.debug(f"Adding info to {self.name}: {info}")
85 | if self.maps_to is None:
86 | # The first descriptor row describes what the column maps to
87 | self.settings = ColumnSettings()
88 | if isinstance(info, dict):
89 | items = list(info.items())
90 | if len(items) != 1:
91 | raise ValueError(f'Unexpected settings: {info}')
92 | else:
93 | item = items[0]
94 | self.maps_to = item[0]
95 | if isinstance(item[1], dict):
96 | settings = ColumnSettings(**item[1])
97 | else:
98 | raise ValueError(f'Expected dict after first element of {items}')
99 | self.merge_settings(settings)
100 | else:
101 | self.maps_to = info
102 | mm = get_metamodel()
103 | snmap = mm.slot_name_mappings()
104 | for k, v in snmap.items():
105 | if k != v.name:
106 | logging.info(f"Mismatch between slot_name_mapping key {k} slot name {v.name}")
107 | # TODO: use alias
108 | snmap['uri'] = snmap['type_uri']
109 | if self.maps_to.startswith("metaslot."):
110 | maps_to = self.maps_to.replace("metaslot.", "")
111 | self.metaslot = snmap[maps_to]
112 | self.maps_to = maps_to
113 | elif self.maps_to in snmap and self.maps_to != 'type':
114 | self.metaslot = snmap[self.maps_to]
115 | else:
116 | if self.maps_to not in tmap and self.maps_to not in Shortcuts:
117 | raise ValueError(f'Cannot interpret: {self.maps_to}')
118 | else:
119 | settings = ColumnSettings(**info)
120 | self.merge_settings(settings)
121 | if settings.inner_key:
122 | snmap = get_metamodel().slot_name_mappings()
123 | if settings.inner_key in snmap:
124 | self.inner_key_metaslot = snmap[settings.inner_key]
125 |
126 |
127 |
128 | @dataclass
129 | class TableConfig:
130 | """
131 | Configuration for an entire table / schema sheet
132 |
133 | """
134 | name: str = None
135 | """table name"""
136 |
137 | columns: Dict[COL_NAME, ColumnConfig] = None
138 | """maps column names to config"""
139 |
140 | column_by_element_type: Dict[str, COL_NAME] = None
141 | """maps element types (schema, class, ...) to the name of the column that represents them"""
142 |
143 | metatype_column: COL_NAME = None
144 | """Column that represents the metatype designator"""
145 |
146 | name_column: COL_NAME = None
147 | """Column that represents that name of the entity"""
148 |
149 | def add_info(self, col: COL_NAME, info: Union[Dict, DESCRIPTOR]) -> None:
150 | """
151 | Wrapper for :ref:`ColumnConfig.add_info`
152 |
153 | :param col:
154 | :param info:
155 | """
156 | if col not in self.columns:
157 | self.columns[col] = ColumnConfig(col)
158 | self.columns[col].add_info(info)
159 | if self.columns[col].maps_to == 'metatype':
160 | if self.metatype_column and self.metatype_column != col:
161 | raise ValueError(f'Multiple metatype columns not allowed: {self.metatype_column}, {col}')
162 | self.metatype_column = col
163 | if self.columns[col].maps_to == 'name':
164 | if self.name_column:
165 | raise ValueError(f'Multiple name columns not allowed: {self.name_column}, {col}')
166 | self.name_column = col
167 | if self.column_by_element_type is None:
168 | self.column_by_element_type = {}
169 | for c, cc in self.columns.items():
170 | if cc.maps_to in tmap:
171 | self.column_by_element_type[cc.maps_to] = c
172 | cc.is_element_type = True
173 |
174 |
175 | @dataclass
176 | class SchemaSheet:
177 | """
178 | A SchemaSheet consists of:
179 |
180 | - a collection of rows, each row representing a schema element
181 | - a TableConfiguration
182 | """
183 | table_config: TableConfig
184 | rows: List[ROW]
185 | start_line_number: int
186 | table_config_rows: List[ROW] = None
187 |
188 | @classmethod
189 | def from_csv(cls, path: str, delimiter='\t'):
190 | with open(path, newline='') as tsv_file:
191 | reader = csv.DictReader(tsv_file, delimiter=delimiter)
192 | return cls.from_dictreader(reader)
193 |
194 | @staticmethod
195 | def from_dictreader(reader: csv.DictReader) -> "SchemaSheet":
196 | """
197 | Reads a schemasheets TSV file parsing only header info
198 |
199 | :param reader:
200 | :return:
201 | """
202 | table_config = TableConfig(columns={})
203 | rows = []
204 | line_num = 1
205 | table_config_rows = []
206 | descriptor_line_count = 0
207 | for row in reader:
208 | logging.debug(f"ROW: {row}")
209 | # google sheets
210 | if "" in row:
211 | del row[""]
212 | k0 = list(row.keys())[0]
213 | if row[k0].startswith('>'):
214 | table_config_rows.append(row)
215 | line_num += 1
216 | descriptor_line_count += 1
217 | for k, v in row.items():
218 | if v is not None and v.startswith('>'):
219 | v = v.replace('>', '')
220 | if v:
221 | meta_obj = yaml.safe_load(v)
222 | table_config.add_info(k, meta_obj)
223 | else:
224 | if line_num == 2:
225 | # TODO: consider auto-interpreting
226 | raise ValueError(f'Enter an interpretation for {k}')
227 | logging.debug(f'Empty val for {k} in line {line_num}')
228 | else:
229 | rows.append(row)
230 | if descriptor_line_count == 0:
231 | logging.warning(f"No descriptor line found in {line_num} lines. Start line_num = {line_num}")
232 | return SchemaSheet(table_config=table_config,
233 | table_config_rows=table_config_rows,
234 | rows=rows,
235 | start_line_number=line_num)
236 |
237 | def load_table_config(self, config: Union[dict, str, Path]) -> None:
238 | """
239 | Loads a table configuration from a file or dict
240 |
241 | :param config:
242 | :return:
243 | """
244 | if not isinstance(config, dict):
245 | with open(config) as f:
246 | config = yaml.safe_load(f)
247 | return self.load_table_config(config)
248 | for k, v in config.items():
249 | if isinstance(v, list):
250 | for v1 in v:
251 | self.table_config.add_info(k, v1)
252 | else:
253 | self.table_config.add_info(k, v)
254 |
255 | @lru_cache()
256 | def get_metamodel() -> SchemaView:
257 | """
258 | Returns the LinkML schema metamodel as a SchemaView object
259 |
260 | this can be retired when https://github.com/linkml/linkml-runtime/pull/100/
261 | is in major release
262 | :return:
263 | """
264 | package = 'linkml_runtime.linkml_model.meta'
265 | full_path = PurePath('model') / 'schema'
266 | data = pkgutil.get_data(package, f'{full_path}/meta.yaml')
267 | return SchemaView(data.decode("utf-8"))
268 |
269 | @lru_cache()
270 | def get_configmodel() -> SchemaView:
271 | """
272 | Returns the Config schema metamodel as a SchemaView object
273 |
274 | this can be retired when https://github.com/linkml/linkml-runtime/pull/100/
275 | is in major release
276 | :return:
277 | """
278 | package = 'schemasheets.conf.configschema'
279 | data = pkgutil.get_data(package, f'configschema.yaml')
280 | return SchemaView(data.decode("utf-8"))
281 |
--------------------------------------------------------------------------------