├── .github └── workflows │ ├── main.yaml │ └── pypi-publish.yaml ├── .gitignore ├── .gitmodules ├── AUTHORS ├── ChangeLog ├── Pipfile ├── README.md ├── ancilliary ├── __init__.py └── earlreport.py ├── docker └── Dockerfile ├── license.txt ├── notebooks ├── BP_example_02.ipynb ├── CLI.ipynb ├── PersistentResults.ipynb ├── PrefixLib.ipynb ├── SPARQLEndpoints.ipynb ├── Schema_org.ipynb ├── SpecifyingStartShape.ipynb ├── WikiSlurper.ipynb ├── bloodpressure_example.ipynb ├── book_small.ipynb ├── book_small_text.ipynb ├── inconsistent_test.ipynb ├── school_example.ipynb ├── schoolbook_graph.ipynb ├── shex_example.ipynb ├── simple_shex.ipynb └── test_shexjsg.ipynb ├── pyshex ├── __init__.py ├── evaluate.py ├── git_describe.txt ├── parse_tree │ ├── __init__.py │ └── parse_node.py ├── prefixlib.py ├── shape_expressions_language │ ├── __init__.py │ ├── p3_terminology.py │ ├── p5_2_validation_definition.py │ ├── p5_3_shape_expressions.py │ ├── p5_4_node_constraints.py │ ├── p5_5_shapes_and_triple_expressions.py │ ├── p5_6_schema_requirements.py │ ├── p5_7_semantic_actions.py │ └── p5_context.py ├── shapemap_structure_and_language │ ├── __init__.py │ ├── p1_notation_and_terminology.py │ ├── p3_shapemap_structure.py │ └── p4_shapemap_usage.py ├── shex_evaluator.py ├── shex_manifest │ ├── __init__.py │ └── manifest.py ├── sparql11_query │ ├── __init__.py │ └── p17_1_operand_data_types.py ├── user_agent.py └── utils │ ├── __init__.py │ ├── collection_utils.py │ ├── datatype_utils.py │ ├── deprecated.py │ ├── matchesEachOfEvaluator.py │ ├── n3_mapper.py │ ├── partitions.py │ ├── rdf_namespace.py │ ├── schema_loader.py │ ├── schema_utils.py │ ├── slurp_utils.py │ ├── sparql_query.py │ ├── stringtoken.py │ ├── tortoise.py │ ├── trace_utils.py │ ├── url_utils.py │ └── value_set_utils.py ├── requirements-dev.txt ├── requirements.txt ├── setup.cfg ├── setup.py ├── tests ├── __init__.py ├── data │ ├── context.jsonld │ ├── earl_report.ttl │ ├── patient-example-d.ttl │ ├── patient.shex │ ├── schemas │ │ ├── 1Adot.json │ │ ├── biolink-modelnc.shex │ │ ├── meta.shex │ │ ├── startCode3.json │ │ ├── startCode3.shex │ │ └── startCode3.ttl │ ├── t1.shex │ ├── t1.sparql │ └── validation │ │ ├── 1dot-relative.shex │ │ ├── Is1_Ip1_LSTRING_LITERAL1_with_all_punctuation.ttl │ │ ├── Pstar.ttl │ │ ├── anon_start.shex │ │ ├── anon_start.ttl │ │ ├── biolink-model.ttl │ │ ├── manifest.jsonld │ │ ├── manifest.ttl │ │ ├── simple.shex │ │ ├── simple.ttl │ │ └── type-samples.ttl ├── test_biolink │ ├── __init__.py │ ├── data │ │ ├── meta.json │ │ ├── meta.ttl │ │ └── metashex.json │ └── test_biolink_items.py ├── test_cli │ ├── __init__.py │ ├── clitests.py │ ├── input │ │ ├── obs.shex │ │ ├── obs.ttl │ │ └── sparql.sparql │ ├── output │ │ └── evaluate │ │ │ ├── biolinkfail │ │ │ ├── biolinkpass │ │ │ ├── dbsparql1 │ │ │ ├── dbsparql2 │ │ │ ├── dbsparql3 │ │ │ ├── dbsparql4 │ │ │ ├── dbsparql5 │ │ │ ├── dbsparql6 │ │ │ ├── dbsparql7 │ │ │ ├── help │ │ │ ├── obs1 │ │ │ ├── pred-samples │ │ │ ├── t1 │ │ │ └── type-samples │ ├── test_evaluate.py │ └── test_sparql_options.py ├── test_collection_support │ └── test_collections.py ├── test_issues │ ├── __init__.py │ ├── data │ │ ├── Is1_Ip1_L_with_REGEXP_escapes_bare.ttl │ │ ├── Q12214.ttl │ │ ├── Q12214_min.ttl │ │ ├── Q12214_min_2.ttl │ │ ├── Q18557122.ttl │ │ ├── biolink-model.shex │ │ ├── biolink_model.sparql │ │ ├── bl_namedthing.shex │ │ ├── disease_min.shex │ │ ├── example-haplotype2.results │ │ ├── example-haplotype2.ttl │ │ ├── example-haplotype2_online.results │ │ ├── issue_20.errors │ │ ├── issue_20.shex │ │ ├── issue_20.ttl │ │ ├── manifests │ │ │ └── disease_manifest.json │ │ ├── observation.shex │ │ ├── shex │ │ │ ├── disease.shex │ │ │ └── issue_54.shex │ │ └── wikidata │ │ │ ├── disease │ │ │ ├── Q11085.ttl │ │ │ ├── Q12135.ttl │ │ │ ├── Q12206.ttl │ │ │ ├── Q12214.ttl │ │ │ ├── Q16495.ttl │ │ │ ├── Q18657.ttl │ │ │ ├── Q36855.ttl │ │ │ ├── Q36956.ttl │ │ │ ├── Q38404.ttl │ │ │ ├── Q40301.ttl │ │ │ ├── Q42982.ttl │ │ │ ├── Q49989.ttl │ │ │ ├── Q819207.ttl │ │ │ ├── Q8277.ttl │ │ │ ├── Q8285.ttl │ │ │ ├── Q829150.ttl │ │ │ ├── Q842169.ttl │ │ │ ├── Q848371.ttl │ │ │ ├── Q860395.ttl │ │ │ ├── Q883850.ttl │ │ │ ├── Q896643.ttl │ │ │ └── Q913856.ttl │ │ │ └── reactome │ │ │ ├── Q29017194.ttl │ │ │ ├── Q34340147.ttl │ │ │ ├── Q45316529.ttl │ │ │ ├── Q45316651.ttl │ │ │ ├── Q45316899.ttl │ │ │ ├── Q45316901.ttl │ │ │ ├── Q45316902.ttl │ │ │ ├── Q45316906.ttl │ │ │ ├── Q45317265.ttl │ │ │ ├── Q45317394.ttl │ │ │ └── Q45317517.ttl │ ├── test_andra_loop.py │ ├── test_comment_issue.py │ ├── test_crlf.py │ ├── test_diseases.py │ ├── test_fhir.py │ ├── test_guardian_issue.py │ ├── test_ill_founded.py │ ├── test_issue_11.py │ ├── test_issue_20.py │ ├── test_issue_21.py │ ├── test_issue_23.py │ ├── test_issue_25.py │ ├── test_issue_26.py │ ├── test_issue_28.py │ ├── test_issue_29.py │ ├── test_issue_30.py │ ├── test_issue_41.py │ ├── test_issue_42.py │ ├── test_issue_51.py │ ├── test_issue_54.py │ ├── test_issue_58.py │ ├── test_literal_issue15.py │ ├── test_no_start_node.py │ ├── test_rdf_parser.py │ ├── test_reactome.py │ ├── test_shexjs_issue14.py │ ├── test_shexjs_issue16.py │ ├── test_te_names.py │ ├── test_wikidata_1.py │ └── test_wild_rdf_datatype.py ├── test_notebooks │ ├── __init__.py │ ├── test_book_small_text.py │ ├── test_schemaorg.py │ └── wikidata_issue.py ├── test_p5_9_validation_examples │ ├── __init__.py │ └── test_p5_9_1_simple_examples.py ├── test_primer │ ├── SPARQLEndpoints.py │ ├── __init__.py │ └── test_1_quick_start.py ├── test_pyshex_utils │ ├── __init__.py │ ├── test_numeric_digits.py │ ├── test_partitions.py │ ├── test_patterns_in_json.py │ ├── test_schema_loader.py │ └── test_visitor.py ├── test_shape_expressions_language │ ├── __init__.py │ ├── test_p3_terminology.py │ ├── test_p5_4_2_node_kind_constraints.py │ ├── test_p5_4_3_datatype_constraints.py │ ├── test_p5_4_4_string_facet_constraints.py │ ├── test_p5_4_5_numeric_facet_constraints.py │ ├── test_p5_4_6_values_constraint.py │ └── test_p5_context.py ├── test_shapemap_structure_and_language │ ├── __init__.py │ └── test_p1_notation_and_terminology.py ├── test_shex_manifest │ ├── __init__.py │ └── test_basics.py ├── test_shextest_validation │ ├── __init__.py │ ├── test_manifest_shex_json.py │ └── test_manifest_shex_shexc.py ├── test_support_libraries │ ├── __init__.py │ ├── local_context.py │ ├── test_prefixlib.py │ └── test_shex_evaluator.py ├── test_utils │ ├── __init__.py │ ├── object │ │ └── observation_example_haplotype1.ttl │ ├── source │ │ └── observation_example_haplotype1.ttl │ ├── test_manifest.py │ ├── test_n3_mapper.py │ ├── test_sparql_query.py │ └── test_tortoise.py └── utils │ ├── SortoGraph.py │ ├── __init__.py │ ├── manifest.py │ ├── manifest_tester.py │ ├── setup_test.py │ ├── uri_redirector.py │ ├── web_server_utils.py │ └── wikidata_utils.py └── tox.ini /.github/workflows/main.yaml: -------------------------------------------------------------------------------- 1 | name: Build 2 | 3 | on: 4 | push: 5 | 6 | jobs: 7 | update-requirements: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@v2 11 | - uses: actions/setup-python@v2 12 | with: 13 | python-version: 3.8 14 | - uses: dschep/install-pipenv-action@v1 15 | - name: Update requirements 16 | run: | 17 | python -m pip install --upgrade pip 18 | pip install pipenv-to-requirements 19 | pipenv lock 20 | pipenv_to_requirements 21 | git add requirements*.txt 22 | if [[ -n $(git status -s requirements*.txt) ]] 23 | then 24 | git config --local user.email "action@github.com" 25 | git config --local user.name "GitHub Action" 26 | git commit -m 'Automatically generated requirements' requirements*.txt 27 | git push 28 | fi 29 | 30 | test: 31 | needs: update-requirements 32 | name: Run TOX tests 33 | runs-on: ubuntu-latest 34 | strategy: 35 | matrix: 36 | python-version: [ 3.7, 3.8, 3.9, "3.10" ] 37 | 38 | steps: 39 | - uses: actions/checkout@v2 40 | with: 41 | submodules: true 42 | - uses: actions/setup-python@v2 43 | with: 44 | python-version: ${{ matrix.python-version }} 45 | 46 | - name: Run TOX 47 | env: 48 | # Prevents conflicts during concurrent runs. Further 49 | # configuration takes place in the tox.ini file. 50 | COVERAGE_FILE: .coverage.${{matrix.python-version}} 51 | run: | 52 | echo "Generated by build process" > tests/data/SKIP_EXTERNAL_URLS 53 | pip install tox 54 | tox -e py 55 | 56 | - uses: codecov/codecov-action@v3 57 | with: 58 | # files: ./coverage1.xml,./coverage2.xml # optional 59 | # flags: unittests # optional 60 | # name: codecov-umbrella # optional 61 | # fail_ci_if_error: true # optional (default = false) 62 | verbose: true # optional (default = false) 63 | -------------------------------------------------------------------------------- /.github/workflows/pypi-publish.yaml: -------------------------------------------------------------------------------- 1 | name: Publish Python Package 2 | 3 | on: 4 | release: 5 | types: [created] 6 | 7 | jobs: 8 | build-n-publish: 9 | name: Build and publish Python 🐍 distributions 📦 to PyPI and TestPyPI 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v2 13 | - uses: actions/setup-python@v2 14 | 15 | - name: Install dependencies 16 | run: | 17 | python -m pip install --upgrade pip 18 | pip install wheel 19 | - name: build a binary wheel dist 20 | run: | 21 | rm -fr dist 22 | python setup.py bdist_wheel sdist 23 | 24 | - name: Publish distribution 📦 to PyPI 25 | uses: pypa/gh-action-pypi-publish@v1.2.2 26 | with: 27 | user: __token__ 28 | password: ${{ secrets.pypi_password }} 29 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Python template 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | .hypothesis/ 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | .static_storage/ 58 | .media/ 59 | local_settings.py 60 | 61 | # Flask stuff: 62 | instance/ 63 | .webassets-cache 64 | 65 | # Scrapy stuff: 66 | .scrapy 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # Jupyter Notebook 75 | .ipynb_checkpoints 76 | 77 | # pyenv 78 | .python-version 79 | 80 | # celery beat schedule file 81 | celerybeat-schedule 82 | 83 | # SageMath parsed files 84 | *.sage.py 85 | 86 | # Environments 87 | .env 88 | .venv 89 | env/ 90 | venv/ 91 | ENV/ 92 | env.bak/ 93 | venv.bak/ 94 | 95 | # Spyder project settings 96 | .spyderproject 97 | .spyproject 98 | 99 | # Rope project settings 100 | .ropeproject 101 | 102 | # mkdocs documentation 103 | /site 104 | 105 | # mypy 106 | .mypy_cache/ 107 | 108 | .idea/ 109 | 110 | # Do not submit Pipfile.lock (https://pypi.org/project/pipenv-to-requirements/) 111 | Pipfile.lock 112 | 113 | # mac os specific files 114 | .DS_Store 115 | 116 | # SKIP_EXTERNAL_URLS is added by test harnesses where appropriate 117 | tests/data/SKIP_EXTERNAL_URLS 118 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "tests/data/shexTest"] 2 | path = tests/data/shexTest 3 | url = git@github.com:shexSpec/shexTest.git 4 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | Alejandro González Hevia 2 | Ben McAlister 3 | Egon Willighagen 4 | Harold Solbrig 5 | Harold Solbrig 6 | Harold Solbrig 7 | andrawaag 8 | hsolbrig 9 | hsolbrig 10 | hsolbrig 11 | vemonet 12 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | name = "pypi" 3 | url = "https://pypi.org/simple" 4 | verify_ssl = true 5 | 6 | [dev-packages] 7 | pbr = "*" 8 | 9 | [packages] 10 | chardet = "*" 11 | rdflib-shim = "*" 12 | requests = ">=2.22.0" 13 | urllib3 = "*" 14 | ShExJSG = ">=0.8.2" 15 | CFGraph = ">=0.2.1" 16 | PyShExC = "==0.9.1" 17 | sparqlslurper = ">=0.5.1" 18 | sparqlwrapper = ">=1.8.5" 19 | pytest = ">=7.2.1" 20 | pytest-cov = ">=4.0.0" 21 | -------------------------------------------------------------------------------- /ancilliary/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hsolbrig/PyShEx/88b8449939f394545c84741db1668bd8a4d1fdbc/ancilliary/__init__.py -------------------------------------------------------------------------------- /ancilliary/earlreport.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | from rdflib import Graph, URIRef, BNode, Namespace, RDF 4 | from rdflib.namespace import DC 5 | from rdflib.term import Node, Literal 6 | 7 | header = """ 8 | @prefix rdf: . 9 | @prefix rdfs: . 10 | @prefix dc: . 11 | @prefix earl: . 12 | @prefix foaf: . 13 | @prefix doap: . 14 | @prefix ex: . 15 | @prefix xsd: . 16 | 17 | a doap:Project, earl:TestSubject, earl:Software ; 18 | doap:name "PyShEx" ; 19 | doap:homepage ; 20 | doap:license ; 21 | doap:shortdesc "Python implementation of ShEx"@en ; 22 | doap:description "Python implementation of ShEx"@en ; 23 | doap:created "2017-06-01"^^xsd:date ; 24 | doap:programming-language "Python" ; 25 | doap:implements ; 26 | doap:category ; 27 | doap:download-page ; 28 | doap:mailing-list ; 29 | doap:bug-database ; 30 | doap:developer ; 31 | doap:maintainer ; 32 | doap:documenter ; 33 | foaf:maker ; 34 | dc:title "PyShEx" ; 35 | dc:description "Python implementation of ShEx"@en ; 36 | dc:date "2018-11-13"^^xsd:date ; 37 | dc:creator . 38 | 39 | [] foaf:primaryTopic ; 40 | dc:issued "2018-11-13"^^xsd:date ; 41 | foaf:maker . 42 | 43 | a foaf:Person, earl:Assertor; 44 | foaf:name "Harold Solbrig"; 45 | foaf:title "Implementor"; 46 | foaf:homepage .""" 47 | 48 | EARL = Namespace("http://www.w3.org/ns/earl#") 49 | MFST = Namespace("https://raw.githubusercontent.com/shexSpec/shexTest/master/validation/manifest#") 50 | 51 | 52 | class EARLPage: 53 | def __init__(self, author: URIRef): 54 | self.g = Graph() 55 | self.g.parse(data=header, format="turtle") 56 | self.author = author 57 | 58 | def add(self, s: Node, p: URIRef, o: Node) -> "EARLPage": 59 | self.g.add((s, p, o)) 60 | return self 61 | 62 | def add_test_result(self, test_entry: str, status: str) -> None: 63 | entry = BNode() 64 | self.add(entry, RDF.type, EARL.Assertion)\ 65 | .add(entry, EARL.assertedBy, self.author)\ 66 | .add(entry, EARL.test, MFST[test_entry])\ 67 | .add(entry, EARL.subject, URIRef("https://pypi.org/project/PyShEx/"))\ 68 | .add(entry, EARL.mode, EARL.automatic) 69 | self._add_result(entry, status) 70 | 71 | def _add_result(self, entry: BNode, status: bool) -> None: 72 | rslt = BNode() 73 | self.add(rslt, RDF.type, EARL.TestResult)\ 74 | .add(rslt, EARL.outcome, EARL[status])\ 75 | .add(rslt, DC.date, Literal(datetime.datetime.utcnow().isoformat()))\ 76 | .add(entry, EARL.result, rslt) 77 | 78 | def __str__(self) -> str: 79 | return self.g.serialize(format="turtle").decode() 80 | 81 | 82 | if __name__ == '__main__': 83 | p = EARLPage(URIRef("https://github.com/hsolbrig")) 84 | p.add_test_result('0', 'passed') 85 | print(str(p)) 86 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.7-alpine 2 | 3 | RUN pip install PyShEx 4 | 5 | ENTRYPOINT ["shexeval"] 6 | CMD ["-h"] -------------------------------------------------------------------------------- /notebooks/SpecifyingStartShape.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "collapsed": "f" 7 | }, 8 | "source": [ 9 | " # Start Shapes and Focus nodes\n", 10 | "There are four parameters that are needed to evaluate Shape Expression conformance:\n", 11 | "\n", 12 | "1. The ShEx shape definition -- a set of one or more named shapes that are used to test graph conformance\n", 13 | "2. An RDF dataset -- a collection of triples a (proper or improper) subset of which are to be evaluated\n", 14 | "3. A list of focus nodes -- one or more URI's that are to be tested for conformance. *(Note: Literal values can also be focus nodes, but this is an advance topic)*\n", 15 | "4. A list of one or more starting shapes that are used to evaluate the the focus nodes for conformance\n", 16 | "\n" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [] 39 | } 40 | ], 41 | "metadata": { 42 | "kernelspec": { 43 | "display_name": "Python 2", 44 | "language": "python", 45 | "name": "python2" 46 | }, 47 | "language_info": { 48 | "codemirror_mode": { 49 | "name": "ipython", 50 | "version": 2 51 | }, 52 | "file_extension": ".py", 53 | "mimetype": "text/x-python", 54 | "name": "python", 55 | "nbconvert_exporter": "python", 56 | "pygments_lexer": "ipython2", 57 | "version": "2.7.6" 58 | } 59 | }, 60 | "nbformat": 4, 61 | "nbformat_minor": 0 62 | } 63 | -------------------------------------------------------------------------------- /notebooks/inconsistent_test.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# PyShEx Shape inconsistency detection\n", 8 | "If `satisfiesShape` is recursively called with the same focus node and shape definition, it:\n", 9 | "\n", 10 | "1) Conditionally asserts that the result is `True`. If this asserts results in the outermost evaluation to also be `True`, the focus node is reported as conforming to the shape definition.\n", 11 | "\n", 12 | "2) If assertion 1 results in an evaluation of `False`, it then conditionally asserts that the result is `False`. If this results in the entire evaluation being `False`, the focus node is reported as not conforming to the shape definition. \n", 13 | "\n", 14 | "3) Otherwise, the focus node is reported as \"not conforming\" with the reason being that the definition is inconsistent. (Quotes because it isn't actually possible to determine conformance to an inconsistent shape, but the return structure doesn't support a third alternative)" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "!pip install PyShEx --upgrade -q" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 2, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "from rdflib import Graph, Namespace\n", 33 | "from pyshex import ShExEvaluator\n", 34 | "\n", 35 | "\n", 36 | "EX = Namespace(\"http://a.example/\")" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 3, 42 | "metadata": {}, 43 | "outputs": [ 44 | { 45 | "name": "stdout", 46 | "output_type": "stream", 47 | "text": [ 48 | "True\n" 49 | ] 50 | } 51 | ], 52 | "source": [ 53 | "shex = \"\"\" { @}\"\"\"\n", 54 | "g = Graph()\n", 55 | "g.add((EX.x, EX.p, EX.x))\n", 56 | "e = ShExEvaluator(rdf=g, schema=shex, focus=EX.x, start=EX.S, debug=False)\n", 57 | "print(e.evaluate()[0].result)" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 4, 63 | "metadata": {}, 64 | "outputs": [ 65 | { 66 | "name": "stdout", 67 | "output_type": "stream", 68 | "text": [ 69 | "False: Testing against shape http://a.example/S\n", 70 | " Testing against shape http://a.example/S\n", 71 | " http://a.example/S: Inconsistent recursive shape reference\n" 72 | ] 73 | } 74 | ], 75 | "source": [ 76 | "shex = \"\"\" { not @}\"\"\"\n", 77 | "g = Graph()\n", 78 | "g.add((EX.x, EX.p, EX.x))\n", 79 | "e = ShExEvaluator(rdf=g, schema=shex, focus=EX.x, start=EX.S, debug=False)\n", 80 | "rslt = e.evaluate()\n", 81 | "print(f\"{rslt[0].result}: {rslt[0].reason}\")" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [] 90 | } 91 | ], 92 | "metadata": { 93 | "kernelspec": { 94 | "display_name": "Python 3", 95 | "language": "python", 96 | "name": "python3" 97 | }, 98 | "language_info": { 99 | "codemirror_mode": { 100 | "name": "ipython", 101 | "version": 3 102 | }, 103 | "file_extension": ".py", 104 | "mimetype": "text/x-python", 105 | "name": "python", 106 | "nbconvert_exporter": "python", 107 | "pygments_lexer": "ipython3", 108 | "version": "3.7.5" 109 | } 110 | }, 111 | "nbformat": 4, 112 | "nbformat_minor": 2 113 | } 114 | -------------------------------------------------------------------------------- /notebooks/shex_example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "This example assumes that PyShEx has been installed in jupyter environment" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "name": "stdout", 17 | "output_type": "stream", 18 | "text": [ 19 | "CONFORMS\n" 20 | ] 21 | } 22 | ], 23 | "source": [ 24 | "from pyshex.evaluate import evaluate\n", 25 | "from rdflib import Graph, Namespace, XSD\n", 26 | "\n", 27 | "EX = Namespace(\"http://our.org/example#\")\n", 28 | "FHIR = Namespace(\"http://hl7.org/fhir/\")\n", 29 | "\n", 30 | "shex = f\"\"\"PREFIX : <{FHIR}>\n", 31 | "PREFIX xsd: <{XSD}>\n", 32 | "BASE <{EX}>\n", 33 | "\n", 34 | "start = @:ObservationShape\n", 35 | "\n", 36 | ":ObservationShape {{ # An Observation has:\n", 37 | " :status [\"preliminary\" \"final\"]; # status in this value set\n", 38 | " :subject @:PatientShape # a subject matching .\n", 39 | "}}\n", 40 | "\n", 41 | ":PatientShape {{ # A Patient has:\n", 42 | " :name xsd:string*; # one or more names\n", 43 | " :birthdate xsd:date? # and an optional birthdate.\n", 44 | "}}\n", 45 | "\"\"\"\n", 46 | "\n", 47 | "ttl = f\"\"\"PREFIX : <{FHIR}>\n", 48 | "PREFIX xsd: <{XSD}>\n", 49 | "PREFIX ex: <{EX}>\n", 50 | "\n", 51 | "ex:Obs1\n", 52 | " :status \"final\" ;\n", 53 | " :subject ex:Patient2 .\n", 54 | "\n", 55 | "ex:Patient2\n", 56 | " :name \"Bob\" ;\n", 57 | " :birthdate \"1999-12-31\"^^xsd:date .\"\"\"\n", 58 | "\n", 59 | "g = Graph()\n", 60 | "g.parse(data=ttl, format=\"turtle\")\n", 61 | "\n", 62 | "rslt, reason = evaluate(g, shex, EX.Obs1)\n", 63 | "if rslt:\n", 64 | " print(\"CONFORMS\")\n", 65 | "else:\n", 66 | " print(f\"{reason if reason else 'DOES NOT CONFORM'}\")\n" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [] 75 | } 76 | ], 77 | "metadata": { 78 | "kernelspec": { 79 | "display_name": "Python 3", 80 | "language": "python", 81 | "name": "python3" 82 | }, 83 | "language_info": { 84 | "codemirror_mode": { 85 | "name": "ipython", 86 | "version": 3 87 | }, 88 | "file_extension": ".py", 89 | "mimetype": "text/x-python", 90 | "name": "python", 91 | "nbconvert_exporter": "python", 92 | "pygments_lexer": "ipython3", 93 | "version": "3.7.5" 94 | } 95 | }, 96 | "nbformat": 4, 97 | "nbformat_minor": 1 98 | } 99 | -------------------------------------------------------------------------------- /notebooks/simple_shex.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "This example assumes that PyShEx has been installed in jupyter environment" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "name": "stdout", 17 | "output_type": "stream", 18 | "text": [ 19 | "Valid: True\n", 20 | "{\n", 21 | " \"type\": \"Schema\",\n", 22 | " \"@context\": \"http://www.w3.org/ns/shex.jsonld\",\n", 23 | " \"shapes\": [\n", 24 | " {\n", 25 | " \"type\": \"Shape\",\n", 26 | " \"id\": \"http://a.example/S1\",\n", 27 | " \"expression\": {\n", 28 | " \"type\": \"TripleConstraint\",\n", 29 | " \"predicate\": \"http://a.example/p1\",\n", 30 | " \"valueExpr\": {\n", 31 | " \"type\": \"NodeConstraint\",\n", 32 | " \"values\": [\n", 33 | " \"http://a.example/o1\"\n", 34 | " ]\n", 35 | " }\n", 36 | " }\n", 37 | " }\n", 38 | " ]\n", 39 | "}\n", 40 | "CONFORMS!\n" 41 | ] 42 | } 43 | ], 44 | "source": [ 45 | "from ShExJSG import Schema\n", 46 | "from ShExJSG.ShExJ import Shape, IRIREF, TripleConstraint, NodeConstraint\n", 47 | "from rdflib import Namespace, Graph\n", 48 | "from pyshex.evaluate import evaluate\n", 49 | "\n", 50 | "EX = Namespace(\"http://a.example/\")\n", 51 | "\n", 52 | "schema = Schema()\n", 53 | "schema.shapes = [Shape(id=IRIREF(EX.S1),\n", 54 | " expression=TripleConstraint(predicate=IRIREF(EX.p1),\n", 55 | " valueExpr=NodeConstraint(values=[IRIREF(EX.o1)])))\n", 56 | " ]\n", 57 | "print(f\"Valid: {schema._is_valid()}\")\n", 58 | "print(f\"{schema._as_json_dumps()}\")\n", 59 | "\n", 60 | "g = Graph()\n", 61 | "g.add((EX.s1, EX.p1, EX.o1))\n", 62 | "\n", 63 | "if evaluate(g, schema, focus=EX.s1, start=EX.S1)[0]:\n", 64 | " print(\"CONFORMS!\")\n", 65 | "else:\n", 66 | " print(\"** ERROR\")\n", 67 | "\n", 68 | "\n", 69 | "\n" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [] 78 | } 79 | ], 80 | "metadata": { 81 | "kernelspec": { 82 | "display_name": "Python 3", 83 | "language": "python", 84 | "name": "python3" 85 | }, 86 | "language_info": { 87 | "codemirror_mode": { 88 | "name": "ipython", 89 | "version": 3 90 | }, 91 | "file_extension": ".py", 92 | "mimetype": "text/x-python", 93 | "name": "python", 94 | "nbconvert_exporter": "python", 95 | "pygments_lexer": "ipython3", 96 | "version": "3.7.5" 97 | } 98 | }, 99 | "nbformat": 4, 100 | "nbformat_minor": 1 101 | } 102 | -------------------------------------------------------------------------------- /notebooks/test_shexjsg.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "This example assumes that PyShEx has been installed in jupyter environment" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "name": "stdout", 17 | "output_type": "stream", 18 | "text": [ 19 | "True\n", 20 | "{\n", 21 | " \"type\": \"Schema\",\n", 22 | " \"@context\": \"http://www.w3.org/ns/shex.jsonld\",\n", 23 | " \"start\": \"blabla\"\n", 24 | "}\n" 25 | ] 26 | } 27 | ], 28 | "source": [ 29 | "from ShExJSG import ShExJ\n", 30 | "\n", 31 | "shapeEx = ShExJ.Schema()\n", 32 | "shapeEx['@context'] = \"http://www.w3.org/ns/shex.jsonld\"\n", 33 | "shapeEx.start = \"blabla\"\n", 34 | "print(shapeEx._is_valid())\n", 35 | "print(shapeEx._as_json_dumps())" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [] 44 | } 45 | ], 46 | "metadata": { 47 | "kernelspec": { 48 | "display_name": "Python 3", 49 | "language": "python", 50 | "name": "python3" 51 | }, 52 | "language_info": { 53 | "codemirror_mode": { 54 | "name": "ipython", 55 | "version": 3 56 | }, 57 | "file_extension": ".py", 58 | "mimetype": "text/x-python", 59 | "name": "python", 60 | "nbconvert_exporter": "python", 61 | "pygments_lexer": "ipython3", 62 | "version": "3.7.5" 63 | } 64 | }, 65 | "nbformat": 4, 66 | "nbformat_minor": 1 67 | } 68 | -------------------------------------------------------------------------------- /pyshex/__init__.py: -------------------------------------------------------------------------------- 1 | from pyshex.prefixlib import PrefixLibrary, standard_prefixes, known_prefixes 2 | from pyshex.shex_evaluator import ShExEvaluator 3 | 4 | import rdflib_shim 5 | shim_installed = rdflib_shim.RDFLIB_SHIM 6 | -------------------------------------------------------------------------------- /pyshex/evaluate.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Union, Tuple 2 | 3 | from ShExJSG import ShExJ 4 | from ShExJSG.ShExJ import IRIREF 5 | from rdflib import Graph, URIRef 6 | 7 | from pyshex.shape_expressions_language.p5_2_validation_definition import isValid 8 | from pyshex.shape_expressions_language.p5_context import Context 9 | from pyshex.shapemap_structure_and_language.p3_shapemap_structure import FixedShapeMap, ShapeAssociation, START, \ 10 | START_TYPE 11 | from pyshex.utils.schema_loader import SchemaLoader 12 | 13 | 14 | def evaluate(g: Graph, 15 | schema: Union[str, ShExJ.Schema], 16 | focus: Optional[Union[str, URIRef, IRIREF]], 17 | start: Optional[Union[str, URIRef, IRIREF, START, START_TYPE]]=None, 18 | debug_trace: bool = False) -> Tuple[bool, Optional[str]]: 19 | """ Evaluate focus node `focus` in graph `g` against shape `shape` in ShEx schema `schema` 20 | 21 | :param g: Graph containing RDF 22 | :param schema: ShEx Schema -- if str, it will be parsed 23 | :param focus: focus node in g. If not specified, all URI subjects in G will be evaluated. 24 | :param start: Starting shape. If omitted, the Schema start shape is used 25 | :param debug_trace: Turn on debug tracing 26 | :return: None if success or failure reason if failure 27 | """ 28 | if isinstance(schema, str): 29 | schema = SchemaLoader().loads(schema) 30 | if schema is None: 31 | return False, "Error parsing schema" 32 | if not isinstance(focus, URIRef): 33 | focus = URIRef(str(focus)) 34 | if start is None: 35 | start = str(schema.start) if schema.start else None 36 | if start is None: 37 | return False, "No starting shape" 38 | if not isinstance(start, IRIREF) and start is not START and start is not START_TYPE: 39 | start = IRIREF(str(start)) 40 | cntxt = Context(g, schema) 41 | cntxt.debug_context.debug = debug_trace 42 | map_ = FixedShapeMap() 43 | map_.add(ShapeAssociation(focus, start)) 44 | test_result, reasons = isValid(cntxt, map_) 45 | return test_result, '\n'.join(reasons) 46 | -------------------------------------------------------------------------------- /pyshex/git_describe.txt: -------------------------------------------------------------------------------- 1 | v0.7.18-7-g4b22958 2 | -------------------------------------------------------------------------------- /pyshex/parse_tree/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /pyshex/parse_tree/parse_node.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, Optional, List, Union, Tuple 2 | 3 | from pyjsg.jsglib import JSGObject 4 | from pyjsg.jsglib import isinstance_ 5 | from rdflib import BNode, URIRef, Graph 6 | 7 | from pyshex.shapemap_structure_and_language.p1_notation_and_terminology import RDFGraph, Node 8 | from pyshex.utils.collection_utils import format_collection 9 | from pyshex.utils.n3_mapper import N3Mapper 10 | 11 | 12 | class ParseNode: 13 | def __init__(self, 14 | function: Callable[["Context", Union[RDFGraph, Node], JSGObject], bool], 15 | expr: JSGObject, 16 | obj: Union[RDFGraph, Node], 17 | cntxt: "Context"): 18 | self.function = function 19 | self.expr = expr 20 | self.graph = obj if isinstance(obj, RDFGraph) else None 21 | self.node = obj if isinstance_(obj, Node) else None 22 | self.result: bool = None 23 | self._fail_reason: Optional[str] = None 24 | self.reason_stack: List[Tuple[Union[BNode, URIRef], Optional[str]]] = [] 25 | self.nodes: List[ParseNode] = [] 26 | self.n3m = cntxt.n3_mapper 27 | 28 | def dump_bnodes(self, g: Graph, node: BNode, indent: str, top: bool = True) -> List[str]: 29 | indent = indent + " " 30 | collection = format_collection(g, node, 6) 31 | if collection is not None: 32 | return [indent + c for c in collection] 33 | rval = [] 34 | if top: 35 | for s, p in g.subject_predicates(node): 36 | rval.append(f"{indent} {self.n3m.n3(s)} {self.n3m.n3(p)} {self.n3m.n3(node)} .") 37 | for p, o in sorted(g.predicate_objects(node)): 38 | rval += [f"{indent} {self.n3m.n3(node)} {self.n3m.n3(p)} {self.n3m.n3(o)} ."] 39 | if isinstance(o, BNode): 40 | rval += self.dump_bnodes(g, o, indent, top=False) 41 | return rval 42 | 43 | def fail_reasons(self, g: Graph, depth: int = 0) -> List[str]: 44 | def follow_reasons(d: int) -> List[str]: 45 | fr = [] 46 | if self._fail_reason: 47 | fr.append(d * " " + f" {self._fail_reason}") 48 | d += 1 49 | for n in self.nodes: 50 | fr += n.fail_reasons(g, d) 51 | return fr 52 | 53 | rval = [] 54 | for i in range(0, len(self.reason_stack)): 55 | node, shape_name = self.reason_stack[i] 56 | if not shape_name: 57 | shape_name = '(unnamed shape)' 58 | indent = (i+depth)*" " 59 | rval.append(f"{indent} Testing {self.n3m.n3(node)} against shape {shape_name}") 60 | if isinstance(node, BNode): 61 | rval += [f"{indent} {self.n3m.n3(node)} context:"] 62 | rval += self.dump_bnodes(g, node, indent) 63 | rval[-1] = rval[-1] + '\n' 64 | rval += follow_reasons(depth + len(self.reason_stack)) 65 | return rval 66 | 67 | def set_result(self, rval: bool) -> None: 68 | """ Set the result of the evaluation. If the result is true, prune all of the children that didn't cut it 69 | 70 | :param rval: Result of evaluation 71 | """ 72 | self.result = rval 73 | if self.result: 74 | self.nodes = [pn for pn in self.nodes if pn.result] 75 | -------------------------------------------------------------------------------- /pyshex/shape_expressions_language/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /pyshex/shape_expressions_language/p3_terminology.py: -------------------------------------------------------------------------------- 1 | """ Implementation of `3. Terminology `_ 2 | 3 | Shape expressions are defined using terms from RDF semantics [rdf11-mt]: 4 | 5 | * Node: one of IRI, blank node, Literal 6 | * Graph: a set of Triples of (subject, predicate, object) 7 | """ 8 | from typing import Set 9 | 10 | from rdflib import Graph 11 | 12 | from pyshex.shapemap_structure_and_language.p1_notation_and_terminology import Node, TriplePredicate, RDFGraph 13 | from pyshex.utils.rdf_namespace import RDFNamespace 14 | 15 | SHEX = RDFNamespace("http://www.w3.org/ns/shex#") 16 | 17 | 18 | def arcsOut(G: Graph, n: Node) -> RDFGraph: 19 | """ arcsOut(G, n) is the set of triples in a graph G with subject n. """ 20 | return RDFGraph(G.triples((n, None, None))) 21 | 22 | 23 | def predicatesOut(G: Graph, n: Node) -> Set[TriplePredicate]: 24 | """ predicatesOut(G, n) is the set of predicates in arcsOut(G, n). """ 25 | return {p for p, _ in G.predicate_objects(n)} 26 | 27 | 28 | def arcsIn(G: Graph, n: Node) -> RDFGraph: 29 | """ arcsIn(G, n) is the set of triples in a graph G with object n. """ 30 | return RDFGraph(G.triples((None, None, n))) 31 | 32 | 33 | def predicatesIn(G: Graph, n: Node) -> Set[TriplePredicate]: 34 | """ predicatesIn(G, n) is the set of predicates in arcsIn(G, n). """ 35 | return {p for _, p in G.subject_predicates(n)} 36 | 37 | 38 | def neigh(G: Graph, n: Node) -> RDFGraph: 39 | """ neigh(G, n) is the neighbourhood of the node n in the graph G. 40 | 41 | neigh(G, n) = arcsOut(G, n) ∪ arcsIn(G, n) 42 | """ 43 | return arcsOut(G, n) | arcsIn(G, n) 44 | 45 | 46 | def predicates(G: Graph, n: Node) -> Set[TriplePredicate]: 47 | """ redicates(G, n) is the set of predicates in neigh(G, n). 48 | 49 | predicates(G, n) = predicatesOut(G, n) ∪ predicatesIn(G, n) 50 | """ 51 | return predicatesOut(G, n) | predicatesIn(G, n) 52 | -------------------------------------------------------------------------------- /pyshex/shape_expressions_language/p5_2_validation_definition.py: -------------------------------------------------------------------------------- 1 | """ Implementation of `5.2 Validation Definition `_ """ 2 | from typing import Tuple, List 3 | 4 | from ShExJSG.ShExJ import BNODE 5 | from pyjsg.jsglib import isinstance_ 6 | 7 | from pyshex.parse_tree.parse_node import ParseNode 8 | from pyshex.shape_expressions_language.p5_3_shape_expressions import satisfies 9 | from pyshex.shape_expressions_language.p5_context import Context 10 | from pyshex.shapemap_structure_and_language.p1_notation_and_terminology import Node 11 | from pyshex.shapemap_structure_and_language.p3_shapemap_structure import FixedShapeMap, START, nodeSelector 12 | 13 | 14 | def isValid(cntxt: Context, m: FixedShapeMap) -> Tuple[bool, List[str]]: 15 | """`5.2 Validation Definition `_ 16 | 17 | The expression isValid(G, m) indicates that for every nodeSelector/shapeLabel pair (n, s) in m, s has a 18 | corresponding shape expression se and satisfies(n, se, G, m). satisfies is defined below for each form 19 | of shape expression 20 | 21 | :param cntxt: evaluation context - includes graph and schema 22 | :param m: list of NodeShape pairs to test 23 | :return: Success/failure indicator and, if fail, a list of failure reasons 24 | """ 25 | if not cntxt.is_valid: 26 | return False, cntxt.error_list 27 | parse_nodes = [] 28 | for nodeshapepair in m: 29 | n = nodeshapepair.nodeSelector 30 | if not isinstance_(n, Node): 31 | return False, [f"{n}: Triple patterns are not implemented"] 32 | # The third test below is because the spec asserts that completely empty graphs pass in certain circumstances 33 | elif not (next(cntxt.graph.predicate_objects(nodeshapepair.nodeSelector), None) or 34 | next(cntxt.graph.subject_predicates(nodeshapepair.nodeSelector), None) or 35 | not next(cntxt.graph.triples((None, None, None)), None)): 36 | return False, [f"Focus: {nodeshapepair.nodeSelector} not in graph"] 37 | else: 38 | s = cntxt.shapeExprFor(START if nodeshapepair.shapeLabel is None or nodeshapepair.shapeLabel is START 39 | else nodeshapepair.shapeLabel) 40 | cntxt.current_node = ParseNode(satisfies, s, n, cntxt) 41 | if not s: 42 | if nodeshapepair.shapeLabel is START or nodeshapepair.shapeLabel is None: 43 | cntxt.fail_reason = "START node is not specified or is invalid" 44 | else: 45 | cntxt.fail_reason = f"Shape: {nodeshapepair.shapeLabel} not found in Schema" 46 | return False, cntxt.process_reasons() 47 | parse_nodes.append(cntxt.current_node) 48 | if not satisfies(cntxt, n, s): 49 | cntxt.current_node.result = False 50 | return False, cntxt.process_reasons() 51 | else: 52 | cntxt.current_node.result = True 53 | return True, [] 54 | -------------------------------------------------------------------------------- /pyshex/shape_expressions_language/p5_6_schema_requirements.py: -------------------------------------------------------------------------------- 1 | """ Implemention of `5.6 Schema Requirements `_ 2 | 3 | The semantics defined above assume two structural requirements beyond those imposed by the grammar of the 4 | abstract syntax. These ensure referential integrity and eliminate logical paradoxes such as those that arrise 5 | through the use of negation. These are not constraints expressed by the schema but instead those imposed on 6 | the schema. 7 | """ 8 | from ShExJSG import ShExJ 9 | 10 | from pyshex.shape_expressions_language.p5_context import Context 11 | from pyshex.shapemap_structure_and_language.p1_notation_and_terminology import Node 12 | 13 | 14 | def conforms(cntxt: Context, n: Node, S: ShExJ.Shape) -> bool: 15 | """ `5.6.1 Schema Validation Requirement `_ 16 | 17 | A graph G is said to conform with a schema S with a ShapeMap m when: 18 | 19 | Every, SemAct in the startActs of S has a successful evaluation of semActsSatisfied. 20 | Every node n in m conforms to its associated shapeExprRefs sen where for each shapeExprRef sei in sen: 21 | sei references a ShapeExpr in shapes, and 22 | satisfies(n, sei, G, m) for each shape sei in sen. 23 | 24 | :return: 25 | """ 26 | # return semActsSatisfied(cntxt.schema.startActs, cntxt) and \ 27 | # all(reference_of(cntxt.schema, sa.shapeLabel) is not None and 28 | # 29 | return True 30 | 31 | 32 | def valid_shape_references(S: ShExJ.Schema, cntxt: Context) -> bool: 33 | """ `5.6.2 Shape Expression Reference Requirement `_""" 34 | return True 35 | 36 | 37 | def valid_triple_references(S: ShExJ.Schema, cntxt: Context) -> bool: 38 | """ `5.6.3 Triple Expression Reference Requirement `_""" 39 | return True 40 | 41 | 42 | def valid_negations(S: ShExJ.Schema, cntxt: Context) -> bool: 43 | """ `5.6.4 `_""" 44 | return True 45 | -------------------------------------------------------------------------------- /pyshex/shape_expressions_language/p5_7_semantic_actions.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implementation of `5.7 Semantic Actions `_ 3 | 4 | A stub for the moment. 5 | """ 6 | from typing import List, Optional 7 | 8 | from ShExJSG import ShExJ 9 | 10 | from pyshex.shape_expressions_language.p5_context import Context 11 | 12 | 13 | def semActsSatisfied(acts: Optional[List[ShExJ.SemAct]], cntxt: Context) -> bool: 14 | """ `5.7.1 Semantic Actions Semantics `_ 15 | 16 | The evaluation semActsSatisfied on a list of SemActs returns success or failure. The evaluation of an individual 17 | SemAct is implementation-dependent. 18 | """ 19 | return True 20 | -------------------------------------------------------------------------------- /pyshex/shapemap_structure_and_language/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hsolbrig/PyShEx/88b8449939f394545c84741db1668bd8a4d1fdbc/pyshex/shapemap_structure_and_language/__init__.py -------------------------------------------------------------------------------- /pyshex/shapemap_structure_and_language/p1_notation_and_terminology.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Union, Tuple, Iterator, Optional 3 | 4 | from rdflib import URIRef, BNode, Literal, Graph 5 | 6 | # This document assumes an understanding of the ShEx notation and terminology. 7 | # 8 | # ShapeExpression: a Boolean expression of ShEx shapes. 9 | # focus node: a node, potentially in an RDF graph, to be inspected for conformance with a shape expression. 10 | # 11 | # ShExMap uses the following terms from RDF semantics [rdf11-mt]: 12 | # 13 | # Node: one of IRI, blank node, Literal. 14 | # Graph: a set of Triples of (subject, predicate, object). 15 | 16 | 17 | # We have no idea what is intended in the above definition -- for the moment we'll define it as a function 18 | # ShapeExpression = Callable[[List[ShExJ.Shape], bool]] 19 | Node = Union[URIRef, BNode, Literal] 20 | FocusNode = Node 21 | TripleSubject = Union[URIRef, BNode] 22 | TriplePredicate = URIRef 23 | TripleObject = Union[URIRef, Literal, BNode] 24 | Triple = Tuple[TripleSubject, TriplePredicate, TripleObject] 25 | 26 | 27 | class RDFTriple(tuple): 28 | 29 | def __init__(self, _: Triple) -> None: 30 | super().__init__() 31 | 32 | @property 33 | def s(self) -> TripleSubject: 34 | return self[0] 35 | 36 | @property 37 | def p(self) -> TriplePredicate: 38 | return self[1] 39 | 40 | @property 41 | def o(self) -> TripleObject: 42 | return self[2] 43 | 44 | def __str__(self) -> str: 45 | return f"<{self.s}> <{self.p}> {self.o} ." 46 | 47 | 48 | class RDFGraph(set): 49 | def __init__(self, ts: Optional[Union[Iterator[RDFTriple], Iterator[Triple]]]=None) -> None: 50 | super().__init__([t if isinstance(t, RDFTriple) else RDFTriple(t) for t in ts] if ts is not None else []) 51 | 52 | def __str__(self) -> str: 53 | g = Graph() 54 | [g.add((e.s, e.p, e.o)) for e in self] 55 | return re.sub(r'^@prefix.*', '', g.serialize(format="turtle").decode(), flags=re.MULTILINE).strip() 56 | 57 | def add_triples(self, triples: Iterator[Triple]): 58 | super().update([RDFTriple(t) for t in triples]) 59 | -------------------------------------------------------------------------------- /pyshex/shapemap_structure_and_language/p4_shapemap_usage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hsolbrig/PyShEx/88b8449939f394545c84741db1668bd8a4d1fdbc/pyshex/shapemap_structure_and_language/p4_shapemap_usage.py -------------------------------------------------------------------------------- /pyshex/shex_manifest/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /pyshex/shex_manifest/manifest.py: -------------------------------------------------------------------------------- 1 | import os 2 | from urllib.parse import urlsplit 3 | from typing import List, cast, Optional 4 | 5 | import requests 6 | from ShExJSG.ShExJ import Schema 7 | from jsonasobj import JsonObj, load 8 | from rdflib import Graph 9 | 10 | from pyshex.shex_evaluator import EvaluationResult, ShExEvaluator 11 | from pyshex.utils.schema_loader import SchemaLoader 12 | 13 | 14 | def fetch_uri(self, url: str, base: str="") -> Optional[str]: 15 | req = requests.get(base + url) 16 | if req.ok: 17 | return req.text 18 | else: 19 | print(f"{base + url} {req.reason}" ) 20 | return None 21 | 22 | 23 | class ManifestEntry(JsonObj): 24 | schemaLabel: str 25 | schemaURL: str 26 | dataLabel: str 27 | dataURL : str 28 | queryMap: str 29 | status: str 30 | _manifest: "Manifest" 31 | 32 | _schema_text: str 33 | _schema: Schema 34 | _rdf_text: str 35 | _rdf: Graph 36 | 37 | def resolve(self) -> bool: 38 | """ Resolve the schema and data 39 | 40 | :return: success indicator 41 | """ 42 | 43 | @property 44 | def schema_text(self) -> str: 45 | if getattr(self, '_schema_text') is None: 46 | self._schema_text = fetch_uri(self.schemaURL, self._manifest.base) 47 | return self._schema_text 48 | 49 | @property 50 | def schema(self) -> Schema: 51 | if getattr(self, '_schema') is None: 52 | self._schema = SchemaLoader().loads(self.schema_text) 53 | return self._schema 54 | 55 | @property 56 | def rdf_text(self, format_:str ="turtle") -> str: 57 | if getattr(self, '_rdf_text') is None: 58 | self._rdf_text = fetch_uri(self.dataURL, self._manifest.base) 59 | return self._rdf_text 60 | 61 | @property 62 | def rdf(self) -> Graph: 63 | if getattr(self, '_rdf') is None: 64 | self._rdf = Graph() 65 | # TODO - look at rdf-translator (https://bitbucket.org/alexstolz/rdf-translator) and Pygments to 66 | # guess the format 67 | self._rdf.parse(data=self.rdf_text, format="turtle") 68 | return self._rdf 69 | 70 | def evaluate(self, debug: Optional[bool] = None, debug_slurps: Optional[bool] = None, 71 | over_slurp: Optional[bool] = None) -> List[EvaluationResult]: 72 | return None 73 | 74 | 75 | 76 | class Manifest: 77 | def __init__(self, source, base: Optional[str] = None, debug: Optional[bool] = False, 78 | debug_slurps: Optional[bool] = False,over_slurp: Optional[bool]=True) -> None: 79 | """ Load a manifest 80 | 81 | :param source: file name, URI or file-like object that carries the manifest description 82 | :param base: RDF and ShEx base directory or URL. If omitted, source file name/URI will be used 83 | :param debug: default debug setting for evaluate function 84 | :param debug_slurps: default debug_slurps setting for evaluate function 85 | :param over_slurp: default over_slurp setting for evaluate function 86 | """ 87 | self.manifest = load(source) 88 | 89 | self.base = base 90 | if not self.base: 91 | if isinstance(source, str): 92 | if '://' in source: 93 | self.base = urlsplit(source).path.split('/')[-1] 94 | else: 95 | self.base = os.path.dirname(source) 96 | 97 | self.debug = debug 98 | self.debug_slurps = debug_slurps 99 | self.over_slurp = over_slurp 100 | for entry in self.manifest: 101 | entry._manifest = self 102 | 103 | @property 104 | def entries(self) -> List[ManifestEntry]: 105 | return cast(List[ManifestEntry], self.manifest) 106 | -------------------------------------------------------------------------------- /pyshex/sparql11_query/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hsolbrig/PyShEx/88b8449939f394545c84741db1668bd8a4d1fdbc/pyshex/sparql11_query/__init__.py -------------------------------------------------------------------------------- /pyshex/sparql11_query/p17_1_operand_data_types.py: -------------------------------------------------------------------------------- 1 | from typing import cast, Union 2 | 3 | from rdflib import Literal, XSD, URIRef, BNode 4 | from rdflib.term import Node 5 | 6 | 7 | def is_typed_literal(n: Node) -> bool: 8 | return isinstance(n, Literal) and n.datatype is not None 9 | 10 | 11 | def is_plain_literal(n: Node) -> bool: 12 | return isinstance(n, Literal) and n.datatype is None 13 | 14 | 15 | def is_strict_numeric(n: Node) -> bool: 16 | """ numeric denotes typed literals with datatypes xsd:integer, xsd:decimal, xsd:float, and xsd:double. """ 17 | return is_typed_literal(n) and cast(Literal, n).datatype in [XSD.integer, XSD.decimal, XSD.float, XSD.double] 18 | 19 | 20 | def is_simple_literal(n: Node) -> bool: 21 | """ simple literal denotes a plain literal with no language tag. """ 22 | return is_typed_literal(n) and cast(Literal, n).datatype is None and cast(Literal, n).language is None 23 | 24 | 25 | def is_rdf_term(n: Node) -> bool: 26 | return isinstance(n, (URIRef, Literal, BNode)) 27 | 28 | 29 | def is_integer(n: Node) -> bool: 30 | return is_typed_literal(n) and cast(Literal, n).datatype in [ 31 | XSD.integer, 32 | XSD.nonPositiveInteger, 33 | XSD.negativeInteger, 34 | XSD.long, 35 | XSD.int, 36 | XSD.short, 37 | XSD.byte, 38 | XSD.nonNegativeInteger, 39 | XSD.unsignedLong, 40 | XSD.unsignedInt, 41 | XSD.unsignedShort, 42 | XSD.unsignedByte, 43 | XSD.positiveInteger 44 | ] 45 | 46 | 47 | def is_decimal(n: Node) -> bool: 48 | return is_integer(n) or (is_typed_literal(n) and cast(Literal, n).datatype in [XSD.decimal]) 49 | 50 | 51 | def is_numeric(n: Node) -> bool: 52 | return is_decimal(n) or (is_typed_literal(n) and cast(Literal, n).datatype in [XSD.float, XSD.double]) 53 | 54 | 55 | def is_sparql_operand_datatype(n: Union[Node, str]) -> bool: 56 | # From: https://www.w3.org/TR/sparql11-query/#operandDataTypes 57 | if isinstance(n, str): 58 | n = URIRef(n) 59 | return is_plain_literal(n) or (is_typed_literal(n) and cast(Literal, n).datatype in [ 60 | XSD.integer, 61 | XSD.decimal, 62 | XSD.float, 63 | XSD.double, 64 | XSD.string, 65 | XSD.boolean, 66 | XSD.dateTime, 67 | XSD.nonPositiveInteger, 68 | XSD.negativeInteger, 69 | XSD.long, 70 | XSD.int, 71 | XSD.short, 72 | XSD.byte, 73 | XSD.nonNegativeInteger, 74 | XSD.unsignedLong, 75 | XSD.unsignedInt, 76 | XSD.unsignedShort, 77 | XSD.unsignedByte, 78 | XSD.positiveInteger 79 | ]) 80 | -------------------------------------------------------------------------------- /pyshex/user_agent.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Optional 3 | 4 | from SPARQLWrapper import SPARQLWrapper 5 | from sparqlslurper import SlurpyGraph, GraphDBSlurpyGraph 6 | 7 | with open(os.path.join(os.path.dirname(__file__), 'git_describe.txt')) as desc_f: 8 | description = desc_f.read().strip() 9 | 10 | # https://meta.wikimedia.org/wiki/User-Agent_policy: 11 | # The generic format is / () / 12 | # [/ ...]. Parts that are not applicable can be omitted. 13 | 14 | UserAgent = f"PyShEx/{description[1:description.find('-')]} " \ 15 | f"(https://github.com/hsolbrig/PyShEx; solbrig@jhu.edu)" 16 | 17 | 18 | def SlurpyGraphWithAgent(endpoint: str, *args, persistent_bnodes: bool = False, agent: Optional[str] = None, 19 | gdb_slurper: Optional[bool] = False, **kwargs) -> SlurpyGraph: 20 | rval = GraphDBSlurpyGraph(endpoint, *args, persistent_bnodes=persistent_bnodes, **kwargs) if gdb_slurper else \ 21 | SlurpyGraph(endpoint, *args, persistent_bnodes=persistent_bnodes, **kwargs) 22 | rval.sparql.agent = agent if agent else UserAgent 23 | return rval 24 | 25 | class SPARQLWrapperWithAgent(SPARQLWrapper): 26 | def __init__(self, endpoint, updateEndpoint=None, returnFormat=None, defaultGraph=None, agent=UserAgent): 27 | super().__init__(endpoint, updateEndpoint=updateEndpoint, returnFormat=returnFormat, defaultGraph=defaultGraph, 28 | agent=agent) 29 | 30 | -------------------------------------------------------------------------------- /pyshex/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hsolbrig/PyShEx/88b8449939f394545c84741db1668bd8a4d1fdbc/pyshex/utils/__init__.py -------------------------------------------------------------------------------- /pyshex/utils/collection_utils.py: -------------------------------------------------------------------------------- 1 | from typing import Union, List, Optional 2 | 3 | from rdflib import Graph, URIRef, BNode, RDF 4 | 5 | 6 | def format_collection(g: Graph, subj: Union[URIRef, BNode], max_entries: int = None, nentries: int = 0) -> Optional[List[str]]: 7 | """ 8 | Return the turtle representation of subj as a collection 9 | 10 | :param g: Graph containing subj 11 | :param subj: subject of list 12 | :param max_entries: maximum number of list elements to return, None means all 13 | :param nentries: used for recursion 14 | 15 | :return: List of formatted entries if subj heads a well formed collection else None 16 | """ 17 | if subj == RDF.nil: 18 | return [')'] 19 | if max_entries is not None and nentries >= max_entries: 20 | return [' ...', ')'] 21 | cadr = cdr = None 22 | for p, o in g.predicate_objects(subj): 23 | if p == RDF.first and cadr is None: 24 | cadr = o 25 | elif p == RDF.rest and cdr is None: 26 | cdr = o 27 | else: 28 | return None 29 | # technically this can't happen but it doesn't hurt to address it 30 | if cadr == RDF.nil and cdr is None: 31 | return [] 32 | elif cadr is not None and cdr is not None: 33 | return [(' ' if nentries else '(') + cadr.n3(g.namespace_manager)] + format_collection(g, cdr, max_entries, 34 | nentries+1) 35 | else: 36 | return None 37 | 38 | -------------------------------------------------------------------------------- /pyshex/utils/datatype_utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Optional, Tuple, Union 3 | 4 | import jsonasobj 5 | from ShExJSG import ShExJ 6 | from pyjsg.jsglib import JSGString, JSGObject 7 | from rdflib import Literal 8 | 9 | from pyshex.sparql11_query.p17_1_operand_data_types import is_integer, is_numeric 10 | 11 | 12 | def can_cast_to(v: Literal, dt: str) -> bool: 13 | """ 5.4.3 Datatype Constraints 14 | 15 | Determine whether "a value of the lexical form of n can be cast to the target type v per 16 | XPath Functions 3.1 section 19 Casting[xpath-functions]." 17 | """ 18 | # TODO: rdflib doesn't appear to pay any attention to lengths (e.g. 257 is a valid XSD.byte) 19 | return v.value is not None and Literal(str(v), datatype=dt).value is not None 20 | 21 | 22 | def total_digits(n: Literal) -> Optional[int]: 23 | """ 5.4.5 XML Schema Numberic Facet Constraints 24 | 25 | totaldigits and fractiondigits constraints on values not derived from xsd:decimal fail. 26 | """ 27 | return len(str(abs(int(n.value)))) + fraction_digits(n) if is_numeric(n) and n.value is not None else None 28 | 29 | 30 | def fraction_digits(n: Literal) -> Optional[int]: 31 | """ 5.4.5 XML Schema Numeric Facet Constraints 32 | 33 | for "fractiondigits" constraints, v is less than or equals the number of digits to the right of the decimal place 34 | in the XML Schema canonical form[xmlschema-2] of the value of n, ignoring trailing zeros. 35 | """ 36 | # Note - the last expression below isolates the fractional portion, reverses it (e.g. 017320 --> 023710) and 37 | # converts it to an integer and back to a string 38 | return None if not is_numeric(n) or n.value is None \ 39 | else 0 if is_integer(n) or '.' not in str(n.value) or str(n.value).split('.')[1] == '0' \ 40 | else len(str(int(str(n.value).split('.')[1][::-1]))) 41 | 42 | 43 | def pattern_match(pattern: str, flags: str, val: str) -> bool: 44 | re_flags, pattern = _map_xpath_flags_to_re(reencode_escapes(pattern), flags) 45 | return re.search(pattern, val, flags=re_flags) is not None 46 | 47 | 48 | def reencode_escapes(pattern: str) -> str: 49 | return re.sub(r'\\.', _subf, pattern) 50 | 51 | 52 | def _subf(matchobj) -> str: 53 | o = matchobj.group(0) 54 | return o if o[1] in ['\\', '^', '$', '?', ',', '[', ']', '(', ')'] \ 55 | else '\t' if o[1] == 't' \ 56 | else '\n' if o[1] == 'n' \ 57 | else '\r' if o[1] == 'r' \ 58 | else o[1] 59 | 60 | 61 | def _map_xpath_flags_to_re(expr: str, xpath_flags: str) -> Tuple[int, str]: 62 | """ Map `5.6.2 Flags `_ to python 63 | 64 | :param expr: match pattern 65 | :param xpath_flags: xpath flags 66 | :returns: python flags / modified match pattern 67 | """ 68 | python_flags: int = 0 69 | modified_expr = expr 70 | if xpath_flags is None: 71 | xpath_flags = "" 72 | 73 | if 's' in xpath_flags: 74 | python_flags |= re.DOTALL 75 | if 'm' in xpath_flags: 76 | python_flags |= re.MULTILINE 77 | if 'i' in xpath_flags: 78 | python_flags |= re.IGNORECASE 79 | if 'x' in xpath_flags: 80 | modified_expr = re.sub(r'[\t\n\r ]|\[[^\]]*\]', _char_class_escape, modified_expr) 81 | if 'q' in xpath_flags: 82 | modified_expr = re.escape(modified_expr) 83 | 84 | return python_flags, modified_expr 85 | 86 | 87 | def _char_class_escape(m) -> str: 88 | """ regular expression are removed prior to matching with one exception: whitespace characters within character 89 | class expressions (charClassExpr) are not removed. 90 | """ 91 | match_str = m.group(0) 92 | return match_str if match_str[0] == '[' and match_str[-1] == ']' else '' 93 | 94 | 95 | def map_object_literal(v: Union[str, jsonasobj.JsonObj]) -> ShExJ.ObjectLiteral: 96 | """ `PyShEx.jsg `_ does not add identifying 97 | types to ObjectLiterals. This routine re-identifies the types 98 | """ 99 | # TODO: isinstance(v, JSGString) should work here, but it doesn't with IRIREF(http://a.example/v1) 100 | return v if issubclass(type(v), JSGString) or (isinstance(v, JSGObject) and 'type' in v) else \ 101 | ShExJ.IRIREF(v) if isinstance(v, str) else ShExJ.ObjectLiteral(**v._as_dict) 102 | -------------------------------------------------------------------------------- /pyshex/utils/deprecated.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | 4 | def deprecated(func): 5 | """This is a decorator which can be used to mark functions 6 | as deprecated. It will result in a warning being emmitted 7 | when the function is used.""" 8 | # Note: copied from https://code.activestate.com/recipes/391367-deprecated/ 9 | def newFunc(*args, **kwargs): 10 | warnings.warn("Call to deprecated function %s." % func.__name__, 11 | category=DeprecationWarning) 12 | return func(*args, **kwargs) 13 | newFunc.__name__ = func.__name__ 14 | newFunc.__doc__ = func.__doc__ 15 | newFunc.__dict__.update(func.__dict__) 16 | return newFunc -------------------------------------------------------------------------------- /pyshex/utils/n3_mapper.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Union 2 | 3 | from pyjsg.jsglib import isinstance_ 4 | from rdflib import BNode, URIRef, Literal, Graph 5 | from rdflib.namespace import NamespaceManager 6 | 7 | from pyshex.shapemap_structure_and_language.p1_notation_and_terminology import Triple 8 | 9 | 10 | class N3Mapper: 11 | def __init__(self, nsm: Union[Graph, NamespaceManager] = None) -> None: 12 | self._bnode_map: Dict[BNode, str] = {} 13 | self.namespace_manager = NamespaceManager(Graph()) if nsm is None \ 14 | else nsm.namespace_manager if isinstance(nsm, Graph) else nsm 15 | self._cur_bnode_number = 0 16 | 17 | @property 18 | def _next_bnode(self) -> str: 19 | self._cur_bnode_number += 1 20 | return f'_:b{self._cur_bnode_number}' 21 | 22 | def n3(self, node: Union[URIRef, BNode, Literal, Triple, str]) -> str: 23 | if isinstance_(node, Triple): 24 | return f"{self.n3(node[0])} {self.n3(node[1])} {self.n3(node[2])} ." 25 | elif isinstance(node, BNode): 26 | if node not in self._bnode_map: 27 | self._bnode_map[node] = self._next_bnode 28 | return self._bnode_map[node] 29 | else: 30 | if not isinstance(node, (URIRef, Literal)): 31 | node = URIRef(str(node)) 32 | return node.n3(self.namespace_manager) 33 | 34 | 35 | -------------------------------------------------------------------------------- /pyshex/utils/rdf_namespace.py: -------------------------------------------------------------------------------- 1 | """ Typed namespace wrapper for rdflib """ 2 | import typing 3 | 4 | from rdflib import Namespace, URIRef 5 | 6 | 7 | class RDFNamespace(Namespace): 8 | def __getitem__(self, *args) -> URIRef: 9 | return typing.cast(URIRef, super().__getitem__(*args)) 10 | 11 | def __getattr__(self, item) -> URIRef: 12 | return typing.cast(URIRef, super().__getattr__(item)) 13 | -------------------------------------------------------------------------------- /pyshex/utils/schema_loader.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | from typing import cast, Union, TextIO, Optional 4 | from urllib.request import urlopen 5 | 6 | from ShExJSG import ShExJ 7 | from pyjsg.jsglib import loads 8 | from pyshexc.parser_impl import generate_shexj 9 | from pyshexc.parser_impl.generate_shexj import load_shex_file 10 | 11 | 12 | class SchemaLoader: 13 | def __init__(self, base_location=None, redirect_location=None, schema_type_suffix=None) -> None: 14 | """ ShEx Schema loader, with the ability to redirect URI's to local directories or other URL's 15 | 16 | :param base_location: Location base supplied to `load` function 17 | :param redirect_location: Location to replace base for actual load 18 | :param schema_type_suffix: Replace schema file type suffix with this 19 | """ 20 | self.base_location = base_location 21 | self.redirect_location = redirect_location 22 | self.schema_format = schema_type_suffix 23 | self.root_location = None 24 | self.schema_text = None 25 | 26 | def load(self, schema_file: Union[str, TextIO], schema_location: Optional[str]=None) -> ShExJ.Schema: 27 | """ Load a ShEx Schema from schema_location 28 | 29 | :param schema_file: name or file-like object to deserialize 30 | :param schema_location: URL or file name of schema. Used to create the base_location 31 | :return: ShEx Schema represented by schema_location 32 | """ 33 | if isinstance(schema_file, str): 34 | schema_file = self.location_rewrite(schema_file) 35 | self.schema_text = load_shex_file(schema_file) 36 | else: 37 | self.schema_text = schema_file.read() 38 | 39 | if self.base_location: 40 | self.root_location = self.base_location 41 | elif schema_location: 42 | self.root_location = os.path.dirname(schema_location) + '/' 43 | else: 44 | self.root_location = None 45 | return self.loads(self.schema_text) 46 | 47 | def loads(self, schema_txt: str) -> ShExJ.Schema: 48 | """ Parse and return schema as a ShExJ Schema 49 | 50 | :param schema_txt: ShExC or ShExJ representation of a ShEx Schema 51 | :return: ShEx Schema representation of schema 52 | """ 53 | self.schema_text = schema_txt 54 | if schema_txt.strip()[0] == '{': 55 | # TODO: figure out how to propagate self.base_location into this parse 56 | return cast(ShExJ.Schema, loads(schema_txt, ShExJ)) 57 | else: 58 | return generate_shexj.parse(schema_txt, self.base_location) 59 | 60 | def location_rewrite(self, schema_location: str) -> str: 61 | if self.root_location is not None and self.redirect_location is not None: 62 | rval = schema_location.replace(self.root_location, self.redirect_location) \ 63 | if self.root_location and schema_location.startswith(self.root_location) else schema_location 64 | else: 65 | rval = schema_location 66 | if self.schema_format: 67 | rval = re.sub(r'\.[^.]+?(tern)?$',f'.{self.schema_format}\\1', rval) 68 | return rval 69 | -------------------------------------------------------------------------------- /pyshex/utils/slurp_utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from contextlib import AbstractContextManager 3 | 4 | from ShExJSG import ShExJ 5 | from rdflib import Graph 6 | from sparqlslurper import SlurpyGraph 7 | 8 | from pyshex.shape_expressions_language.p5_context import Context 9 | from pyshex.shapemap_structure_and_language.p1_notation_and_terminology import Node 10 | 11 | 12 | class slurper(AbstractContextManager): 13 | 14 | def __init__(self, cntxt: Context, n: Node, S: ShExJ.Shape): 15 | self.graph: SlurpyGraph = cntxt.graph 16 | self.tracing = isinstance(self.graph, SlurpyGraph) and cntxt.debug_context.trace_slurps 17 | self.n = n 18 | self.S = S 19 | 20 | def __enter__(self) -> Graph: 21 | if self.tracing: 22 | self.g_triples = self.graph.total_triples 23 | self.g_time = self.graph.total_slurptime 24 | print(f"# ← <{self.n}>@{self.S.id} ", end="") 25 | sys.stdout.flush() 26 | return self.graph 27 | 28 | def __exit__(self, exctype, excinst, exctb): 29 | if self.tracing: 30 | new_triples = self.graph.total_triples - self.g_triples 31 | if new_triples: 32 | print(f" {new_triples} triples " 33 | f"({int((self.graph.total_slurptime - self.g_time) * 1000)} μs)") 34 | else: 35 | print(f" (Cached)") 36 | -------------------------------------------------------------------------------- /pyshex/utils/sparql_query.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import jsonasobj 4 | import requests 5 | from SPARQLWrapper import JSON 6 | from rdflib import URIRef 7 | 8 | from pyshex.user_agent import UserAgent, SPARQLWrapperWithAgent 9 | 10 | 11 | class SPARQLQuery: 12 | def __init__(self, sparql_endpoint: str, sparql_file_uri_or_text: str, 13 | print_query: bool=False, print_results: bool=False, user_agent: str = UserAgent) -> None: 14 | """ Set up the query to run 15 | 16 | :param sparql_endpoint: URL of sparql endpoint 17 | :param sparql_file_uri_or_text: URI, filename or SPARQL text 18 | :param print_query: Print the sparql results query 19 | :param print_results: Print query results 20 | """ 21 | self.print_results = print_results 22 | if '\n' in sparql_file_uri_or_text or '\r' in sparql_file_uri_or_text or ' ' in sparql_file_uri_or_text: 23 | self.query = sparql_file_uri_or_text 24 | elif ':/' in sparql_file_uri_or_text: 25 | req = requests.get(sparql_file_uri_or_text, headers={'User-Agent': user_agent}) 26 | if not req.ok: 27 | raise ValueError(f"Unable to read {sparql_file_uri_or_text}") 28 | self.query = req.text 29 | else: 30 | with open(sparql_file_uri_or_text) as f: 31 | self.query = f.read() 32 | if print_query: 33 | print("SPARQL:") 34 | print(self.query) 35 | self.endpoint = SPARQLWrapperWithAgent(sparql_endpoint) 36 | self.endpoint.setQuery(self.query) 37 | self.endpoint.setReturnFormat(JSON) 38 | 39 | def focus_nodes(self) -> List[URIRef]: 40 | result = self.endpoint.query() 41 | 42 | processed_results = jsonasobj.load(result.response) 43 | if self.print_results: 44 | print('\t' + ('\n\t'.join([row.item.value for row in processed_results.results.bindings[:10]]))) 45 | if len(processed_results.results.bindings) > 10: 46 | print('\n\t ...') 47 | print('\n') 48 | return [URIRef(row.item.value) for row in processed_results.results.bindings] 49 | -------------------------------------------------------------------------------- /pyshex/utils/stringtoken.py: -------------------------------------------------------------------------------- 1 | 2 | class StringTokenSingleton(type): 3 | """ The ShEx spec uses a number of literal tokens, such as the string 'START'. This implements 4 | a StringToken class 5 | see: https://stackoverflow.com/questions/6760685/creating-a-singleton-in-python Method 3 for pattern used 6 | """ 7 | _instances = {} 8 | 9 | def __call__(cls, *args, **kwargs): 10 | if cls not in cls._instances: 11 | cls._instances[cls] = super().__call__(*args, **kwargs) 12 | return cls._instances[cls] 13 | 14 | def __init__(cls, name: str, *args, **kwargs): 15 | super().__init__(name, *args, **kwargs) 16 | 17 | 18 | class StringToken(metaclass=StringTokenSingleton): 19 | def __str__(self): 20 | return self.__class__.__name__ 21 | -------------------------------------------------------------------------------- /pyshex/utils/tortoise.py: -------------------------------------------------------------------------------- 1 | from rdflib import plugin 2 | from rdflib.plugins.serializers.turtle import TurtleSerializer 3 | from rdflib.serializer import Serializer 4 | 5 | 6 | class Cornucopia: 7 | """ 8 | An iterator that claims to contain everything 9 | """ 10 | def __iter__(self): 11 | return self 12 | 13 | def __contains__(self, item): 14 | return True 15 | 16 | 17 | class TurtleWithPrefixes(TurtleSerializer): 18 | """ A turtle serializer that always emits prefixes """ 19 | def __init__(self, store): 20 | super().__init__(store) 21 | self.roundtrip_prefixes = Cornucopia() 22 | 23 | 24 | def register(): 25 | plugin.register('tortoise', Serializer, 'pyshex.utils.tortoise', 'TurtleWithPrefixes') 26 | -------------------------------------------------------------------------------- /pyshex/utils/trace_utils.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, Optional, Set 2 | 3 | from pyjsg.jsglib import JSGObject 4 | from rdflib import URIRef 5 | 6 | from pyshex.parse_tree.parse_node import ParseNode 7 | from pyshex.shape_expressions_language.p5_context import Context, DebugContext 8 | from pyshex.shapemap_structure_and_language.p1_notation_and_terminology import RDFGraph, Node 9 | 10 | # TODO: factor out common code below. Differences are minor 11 | 12 | 13 | def trace_satisfies(newline: bool=True, skip_trace: Callable[[JSGObject], bool]=lambda _: False): 14 | def e(f: Callable[[Context, Node, JSGObject, DebugContext], bool]): 15 | def wrapper(cntxt: Context, n: Node, expr: JSGObject) -> bool: 16 | parent_parse_node = cntxt.current_node 17 | cntxt.current_node = ParseNode(f, expr, n, cntxt) 18 | parent_parse_node.nodes.append(cntxt.current_node) 19 | c = cntxt.debug_context 20 | c.splus() 21 | if c.debug and not skip_trace(expr): 22 | c.print(c.i(0, f'--> {f.__name__} {c.d()} node: {cntxt.n3_mapper.n3(n)}'), not newline) 23 | rval = f(cntxt, n, expr, c) 24 | if c.debug and not skip_trace(expr): 25 | c.print(c.i(0, f'<-- {f.__name__} {c.d()} node: {cntxt.n3_mapper.n3(n)}: {rval}')) 26 | c.sminus() 27 | cntxt.current_node.set_result(rval) 28 | cntxt.current_node = parent_parse_node 29 | return rval 30 | return wrapper 31 | return e 32 | 33 | 34 | def trace_matches(newline: bool=True): 35 | def e(f: Callable[[Context, RDFGraph, JSGObject, DebugContext, Optional[Set[URIRef]]], bool]): 36 | def wrapper(cntxt: Context, T: RDFGraph, expr: JSGObject, extras: Optional[Set[URIRef]]=None) -> bool: 37 | parent_parse_node = cntxt.current_node 38 | cntxt.current_node = ParseNode(f, expr, T, cntxt) 39 | parent_parse_node.nodes.append(cntxt.current_node) 40 | c = cntxt.debug_context 41 | c.splus() 42 | if c.debug: 43 | c.print(c.i(0, f'--> {f.__name__} {c.d()}'), not newline) 44 | rval = f(cntxt, T, expr, c, extras) if extras is not None else f(cntxt, T, expr, c) 45 | if c.debug: 46 | c.print(c.i(0, f'<-- {f.__name__} {c.d()} {rval}')) 47 | c.sminus() 48 | cntxt.current_node.result = rval 49 | cntxt.current_node = parent_parse_node 50 | return rval 51 | return wrapper 52 | return e 53 | 54 | 55 | def trace_matches_tripleconstraint(newline: bool=True): 56 | def e(f: Callable[[Context, Node, JSGObject, DebugContext], bool]): 57 | def wrapper(cntxt: Context, n: Node, expr: JSGObject) -> bool: 58 | c = cntxt.debug_context 59 | c.splus() 60 | if c.debug: 61 | c.print(c.i(0, f'--> {f.__name__} {c.d()}'), not newline) 62 | rval = f(cntxt, n, expr, c) 63 | if c.debug: 64 | c.print(c.i(0, f'<-- {f.__name__} {c.d()} {rval}')) 65 | c.sminus() 66 | return rval 67 | return wrapper 68 | return e 69 | -------------------------------------------------------------------------------- /pyshex/utils/url_utils.py: -------------------------------------------------------------------------------- 1 | from urllib.parse import urlparse, urlunparse, ParseResult 2 | 3 | import os 4 | 5 | 6 | def generate_base(path: str) -> str: 7 | """ Convert path, which can be a URL or a file path into a base URI 8 | 9 | :param path: file location or url 10 | :return: file location or url sans actual name 11 | """ 12 | if '://' in path: 13 | parts = urlparse(path) 14 | parts_dict = parts._asdict() 15 | parts_dict['path'] = os.path.split(parts.path)[0] if '/' in parts.path else '' 16 | return urlunparse(ParseResult(**parts_dict)) + '/' 17 | else: 18 | return os.path.split(path)[0].replace('\\', '/') + '/' 19 | -------------------------------------------------------------------------------- /pyshex/utils/value_set_utils.py: -------------------------------------------------------------------------------- 1 | from typing import Union, Optional 2 | 3 | from ShExJSG import ShExJ 4 | from ShExJSG.ShExJ import IRIREF 5 | from rdflib import URIRef, Literal 6 | 7 | from pyshex.shapemap_structure_and_language.p1_notation_and_terminology import Node 8 | 9 | 10 | def objectValueMatches(n: Node, vsv: ShExJ.objectValue) -> bool: 11 | """ http://shex.io/shex-semantics/#values 12 | 13 | Implements "n = vsv" where vsv is an objectValue and n is a Node 14 | 15 | Note that IRIREF is a string pattern, so the matching type is str 16 | """ 17 | return \ 18 | (isinstance(vsv, IRIREF) and isinstance(n, URIRef) and uriref_matches_iriref(n, vsv)) or \ 19 | (isinstance(vsv, ShExJ.ObjectLiteral) and isinstance(n, Literal) and literal_matches_objectliteral(n, vsv)) 20 | 21 | 22 | def uriref_matches_iriref(v1: URIRef, v2: Union[str, ShExJ.IRIREF]) -> bool: 23 | """ Compare :py:class:`rdflib.URIRef` value with :py:class:`ShExJ.IRIREF` value """ 24 | return str(v1) == str(v2) 25 | 26 | 27 | def uriref_startswith_iriref(v1: URIRef, v2: Union[str, ShExJ.IRIREF]) -> bool: 28 | """ Determine whether a :py:class:`rdflib.URIRef` value starts with the text of a :py:class:`ShExJ.IRIREF` value """ 29 | return str(v1).startswith(str(v2)) 30 | 31 | 32 | def literal_matches_objectliteral(v1: Literal, v2: ShExJ.ObjectLiteral) -> bool: 33 | """ Compare :py:class:`rdflib.Literal` with :py:class:`ShExJ.objectLiteral` """ 34 | v2_lit = Literal(str(v2.value), datatype=iriref_to_uriref(v2.type), lang=str(v2.language) if v2.language else None) 35 | return v1 == v2_lit 36 | 37 | 38 | def iriref_to_uriref(v: Union[str, ShExJ.IRIREF]) -> Optional[URIRef]: 39 | return URIRef(str(v)) if v else None 40 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # This requirements file has been automatically generated from `Pipfile` with 3 | # `pipenv-to-requirements` 4 | # 5 | # 6 | # This has been done to maintain backward compatibility with tools and services 7 | # that do not support `Pipfile` yet. 8 | # 9 | # Do NOT edit it directly, use `pipenv install [-d]` to modify `Pipfile` and 10 | # `Pipfile.lock` and then regenerate `requirements*.txt`. 11 | ################################################################################ 12 | 13 | pbr 14 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # This requirements file has been automatically generated from `Pipfile` with 3 | # `pipenv-to-requirements` 4 | # 5 | # 6 | # This has been done to maintain backward compatibility with tools and services 7 | # that do not support `Pipfile` yet. 8 | # 9 | # Do NOT edit it directly, use `pipenv install [-d]` to modify `Pipfile` and 10 | # `Pipfile.lock` and then regenerate `requirements*.txt`. 11 | ################################################################################ 12 | 13 | cfgraph>=0.2.1 14 | chardet 15 | pyshexc==0.9.1 16 | rdflib-shim 17 | requests>=2.22.0 18 | shexjsg>=0.8.2 19 | sparqlslurper>=0.5.1 20 | sparqlwrapper>=1.8.5 21 | urllib3 22 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = PyShEx 3 | url = https://github.com/hsolbrig/PyShEx 4 | description = Python ShEx interpreter 5 | author = Harold Solbrig 6 | author-email = solbrig@jhu.edu 7 | summary = Python ShEx Implementation 8 | license = CC0 1.0 Universal 9 | python-requires = >=3.6 10 | classifiers = 11 | Development Status :: 4 - Beta 12 | Environment :: Console 13 | Intended Audience :: Developers 14 | Topic :: Software Development :: Compilers 15 | License :: CC0 1.0 Universal (CC0 1.0) Public Domain Dedication 16 | Programming Language :: Python :: 3 :: Only 17 | Programming Language :: Python :: 3.7 18 | Programming Language :: Python :: 3.8 19 | keywords = 20 | ShEx 21 | rdf 22 | 23 | [files] 24 | packages = 25 | pyshex 26 | 27 | [entry_points] 28 | console_scripts = 29 | shexeval = pyshex.shex_evaluator:evaluate_cli 30 | 31 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from setuptools import setup 4 | 5 | NAME = "PyShEx" 6 | 7 | setup( 8 | name=NAME, 9 | setup_requires=['pbr'], 10 | pbr=True, 11 | ) -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | # True means refresh all test files (only partially implemented at the moment) 3 | import os 4 | 5 | refresh_files = False 6 | 7 | # True means that we skip all tests that go outside our own environment (e.g. wikidata, etc) 8 | # You can set this to True, False or base it on the present of a file in the root directory called "tests/data/SKIP_EXTERNAL_URLS" 9 | SKIP_EXTERNAL_URLS = os.environ.get('SKIP_EXTERNAL_URLS', None) 10 | SKIP_EXTERNAL_URLS_MSG = "External url's are not tested - set tests.__init__.py.SKIP_EXTERNAL_URLS to False to run" 11 | 12 | datadir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data')) 13 | if SKIP_EXTERNAL_URLS is None: 14 | SKIP_EXTERNAL_URLS = os.path.exists(os.path.join(datadir, 'SKIP_EXTERNAL_URLS')) 15 | 16 | print("Skipping external URL tests" if SKIP_EXTERNAL_URLS else "Including external URLs in tests") 17 | 18 | # Settings for rdflib parsing issue 19 | 20 | # See line 1578 in notation3.py: 21 | # k = 'abfrtvn\\"\''.find(ch) 22 | # if k >= 0: 23 | # uch = '\a\b\f\r\t\v\n\\"\''[k] 24 | from rdflib import __version__ as rdflib_version 25 | assert rdflib_version >= "5.0.0", "rdflib version 5.0.0 or greater is required" 26 | -------------------------------------------------------------------------------- /tests/data/context.jsonld: -------------------------------------------------------------------------------- 1 | { 2 | "@context": { 3 | "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", 4 | "rdfs": "http://www.w3.org/2000/01/rdf-schema#", 5 | "shex": "http://www.w3.org/ns/shex#", 6 | "xsd": "http://www.w3.org/2001/XMLSchema#", 7 | "mf": "http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#", 8 | "sht": "http://www.w3.org/ns/shacl/test-suite#", 9 | "sx": "https://shexspec.github.io/shexTest/ns#", 10 | 11 | "id": "@id", 12 | "type": "@type", 13 | 14 | "comment": "rdfs:comment", 15 | "label": "rdfs:label", 16 | "name": "mf:name", 17 | "prints": "mf:prints", 18 | 19 | "entries": {"@id": "mf:entries", "@container": "@list", "@type": "@id"}, 20 | "extensionResults": {"@id": "mf:extensionResults", "@type": "@id", "@container": "@list"}, 21 | "include": {"@id": "mf:include", "@type": "@id", "@container": "@list"}, 22 | 23 | "action": {"@id": "mf:action", "@type": "@id"}, 24 | "data": {"@id": "sx:data", "@type": "@id"}, 25 | "extension": {"@id": "mf:extension", "@type": "@id"}, 26 | "focus": {"@id": "sx:focus", "@type": "@id"}, 27 | "json": {"@id": "sx:json", "@type": "@id"}, 28 | "result": {"@id": "mf:result", "@type": "@id"}, 29 | "schema": {"@id": "sx:schema", "@type": "@id"}, 30 | "shape": {"@id": "sx:shape", "@type": "@id"}, 31 | "shex": {"@id": "sx:shex", "@type": "@id"}, 32 | "status": {"@id": "mf:status", "@type": "@vocab"}, 33 | "ttl": {"@id": "sx:ttl", "@type": "@id"} 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /tests/data/patient-example-d.ttl: -------------------------------------------------------------------------------- 1 | @prefix fhir: . 2 | @prefix owl: . 3 | @prefix rdfs: . 4 | @prefix xsd: . 5 | 6 | # - resource ------------------------------------------------------------------- 7 | 8 | a fhir:Patient; 9 | fhir:nodeRole fhir:treeRoot; 10 | fhir:Resource.id [ fhir:value "pat4"]; 11 | fhir:DomainResource.text [ 12 | fhir:Narrative.status [ fhir:value "generated" ]; 13 | fhir:Narrative.div "
\n

Patient Sandy Notsowell @ Acme Healthcare, Inc. MR = 123458, DECEASED

\n
" 14 | ]; 15 | fhir:Patient.identifier [ 16 | fhir:index 0; 17 | fhir:Identifier.use [ fhir:value "usual" ]; 18 | fhir:Identifier.type [ 19 | fhir:CodeableConcept.coding [ 20 | fhir:index 0; 21 | fhir:Coding.system [ fhir:value "http://terminology.hl7.org/CodeSystem/v2-0203" ]; 22 | fhir:Coding.code [ fhir:value "MR" ] 23 | ] 24 | ]; 25 | fhir:Identifier.system [ fhir:value "urn:oid:0.1.2.3.4.5.6.7" ]; 26 | fhir:Identifier.value [ fhir:value "123458" ] 27 | ]; 28 | fhir:Patient.active [ fhir:value "true"^^xsd:boolean]; 29 | fhir:Patient.name [ 30 | fhir:index 0; 31 | fhir:HumanName.use [ fhir:value "official" ]; 32 | fhir:HumanName.family [ fhir:value "Notsowell" ]; 33 | fhir:HumanName.given [ 34 | fhir:value "Sandy"; 35 | fhir:index 0 36 | ] 37 | ]; 38 | fhir:Patient.gender [ fhir:value "female"]; 39 | fhir:Patient.birthDate [ fhir:value "1982-08-02"^^xsd:date]; 40 | fhir:Patient.deceasedBoolean [ fhir:value "true"^^xsd:boolean]; 41 | fhir:Patient.managingOrganization [ 42 | fhir:link ; 43 | fhir:Reference.reference [ fhir:value "Organization/1" ]; 44 | fhir:Reference.display [ fhir:value "ACME Healthcare, Inc" ] 45 | ] . 46 | 47 | a fhir:Organization . 48 | 49 | # - ontology header ------------------------------------------------------------ 50 | 51 | a owl:Ontology; 52 | owl:imports fhir:fhir.ttl; 53 | owl:versionIRI . 54 | 55 | # ------------------------------------------------------------------------------------- 56 | 57 | -------------------------------------------------------------------------------- /tests/data/schemas/1Adot.json: -------------------------------------------------------------------------------- 1 | { 2 | "@context": "http://www.w3.org/ns/shex.jsonld", 3 | "type": "Schema", 4 | "shapes": [ 5 | { 6 | "id": "http://a.example/S1", 7 | "type": "Shape", 8 | "expression": { 9 | "type": "TripleConstraint", 10 | "predicate": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" 11 | } 12 | } 13 | ] 14 | } -------------------------------------------------------------------------------- /tests/data/schemas/startCode3.json: -------------------------------------------------------------------------------- 1 | { 2 | "@context": "http://www.w3.org/ns/shex.jsonld", 3 | "type": "Schema", 4 | "startActs": [ 5 | { 6 | "type": "SemAct", 7 | "name": "http://shex.io/extensions/Test/", 8 | "code": " print(\"startAct 1\") " 9 | }, 10 | { 11 | "type": "SemAct", 12 | "name": "http://shex.io/extensions/Test/", 13 | "code": " print(\"startAct 2\") " 14 | }, 15 | { 16 | "type": "SemAct", 17 | "name": "http://shex.io/extensions/Test/", 18 | "code": " print(\"startAct 3\") " 19 | } 20 | ], 21 | "shapes": [ 22 | { 23 | "id": "http://a.example/S1", 24 | "type": "Shape", 25 | "expression": { 26 | "type": "TripleConstraint", 27 | "predicate": "http://a.example/p1" 28 | } 29 | } 30 | ] 31 | } -------------------------------------------------------------------------------- /tests/data/schemas/startCode3.shex: -------------------------------------------------------------------------------- 1 | %{ print("startAct 1") %} 2 | %{ print("startAct 2") %} 3 | %{ print("startAct 3") %} 4 | { 5 | . 6 | } 7 | -------------------------------------------------------------------------------- /tests/data/schemas/startCode3.ttl: -------------------------------------------------------------------------------- 1 | PREFIX ex: 2 | PREFIX sx: 3 | PREFIX xsd: 4 | 5 | [] a sx:Schema ; 6 | sx:shapes ; 7 | sx:startActs ([ 8 | a sx:SemAct; 9 | sx:code " print(\"startAct 1\") "; 10 | sx:name 11 | ] [ 12 | a sx:SemAct; 13 | sx:code " print(\"startAct 2\") "; 14 | sx:name 15 | ] [ 16 | a sx:SemAct; 17 | sx:code " print(\"startAct 3\") "; 18 | sx:name 19 | ]) . 20 | 21 | a sx:Shape ; 22 | sx:expression [ a sx:TripleConstraint ; 23 | sx:predicate ] . 24 | 25 | -------------------------------------------------------------------------------- /tests/data/t1.shex: -------------------------------------------------------------------------------- 1 | PREFIX drugbank: 2 | PREFIX foaf: 3 | PREFIX xsd: 4 | PREFIX : 5 | 6 | START=@:S1 7 | 8 | :S1 {foaf:page IRI+ ; 9 | drugbank:limsDrugId xsd:string 10 | } -------------------------------------------------------------------------------- /tests/data/t1.sparql: -------------------------------------------------------------------------------- 1 | PREFIX rdf: 2 | PREFIX vocabClass: 3 | 4 | SELECT DISTINCT ?item WHERE { 5 | ?item rdf:type vocabClass:Offer 6 | } 7 | LIMIT 10 -------------------------------------------------------------------------------- /tests/data/validation/1dot-relative.shex: -------------------------------------------------------------------------------- 1 | { 2 | [] 3 | } 4 | -------------------------------------------------------------------------------- /tests/data/validation/Is1_Ip1_LSTRING_LITERAL1_with_all_punctuation.ttl: -------------------------------------------------------------------------------- 1 | ' !"#$%&\'()/:;<=>?@[]^_`{|}~' . 2 | -------------------------------------------------------------------------------- /tests/data/validation/Pstar.ttl: -------------------------------------------------------------------------------- 1 | BASE 2 | PREFIX : 3 | :a . # satisfies both P and T 4 | :a . # satisfies both P and T 5 | :a . # satisfies T only 6 | :a

. #

satisfies P only 7 | -------------------------------------------------------------------------------- /tests/data/validation/anon_start.shex: -------------------------------------------------------------------------------- 1 | start=@:S1 and @:S1 2 | 3 | :S1 { .} -------------------------------------------------------------------------------- /tests/data/validation/anon_start.ttl: -------------------------------------------------------------------------------- 1 | BASE 2 | PREFIX : 3 | :s1 :p :o1 . -------------------------------------------------------------------------------- /tests/data/validation/simple.shex: -------------------------------------------------------------------------------- 1 | { .} 2 | -------------------------------------------------------------------------------- /tests/data/validation/simple.ttl: -------------------------------------------------------------------------------- 1 | BASE 2 | PREFIX : 3 | :s1 :p1 :o1 . 4 | :s2 :p2 :o2 . 5 | :s3 :p3 :o3 . -------------------------------------------------------------------------------- /tests/data/validation/type-samples.ttl: -------------------------------------------------------------------------------- 1 | @prefix biolink: . 2 | @prefix rdf: . 3 | @prefix rdfs: . 4 | @prefix xml: . 5 | @prefix xsd: . 6 | 7 | a biolink:ChemicalToGeneAssociation ; 8 | biolink:type biolink:ChemicalToGeneAssociation; 9 | biolink:affects "Human" ; 10 | rdf:object ; 11 | biolink:publications ; 12 | rdf:predicate biolink:affects_transport_of ; 13 | rdf:subject . 14 | 15 | a biolink:GeneToGeneProductRelationship, 16 | biolink:SequenceFeatureRelationship; 17 | biolink:type biolink:SequenceFeatureRelationship; 18 | biolink:affects "Human" ; 19 | rdf:object ; 20 | biolink:publications , 21 | , 22 | ; 23 | rdf:predicate biolink:affects_transport_of ; 24 | rdf:subject . 25 | 26 | ; 27 | biolink:type biolink:ChemicalToGeneAssociation; 28 | biolink:affects "Human" ; 29 | rdf:object ; 30 | biolink:publications ; 31 | rdf:predicate biolink:affects_transport_of ; 32 | rdf:subject . 33 | 34 | a biolink:ChemicalToGeneAssociation ; 35 | biolink:affects "Human" ; 36 | rdf:object ; 37 | biolink:publications , 38 | , 39 | , 40 | , 41 | , 42 | ; 43 | rdf:predicate biolink:affects_transport_of ; 44 | rdf:subject . -------------------------------------------------------------------------------- /tests/test_biolink/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hsolbrig/PyShEx/88b8449939f394545c84741db1668bd8a4d1fdbc/tests/test_biolink/__init__.py -------------------------------------------------------------------------------- /tests/test_cli/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hsolbrig/PyShEx/88b8449939f394545c84741db1668bd8a4d1fdbc/tests/test_cli/__init__.py -------------------------------------------------------------------------------- /tests/test_cli/clitests.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import sys 4 | import textwrap 5 | import unittest 6 | from argparse import ArgumentParser 7 | from contextlib import redirect_stdout 8 | from io import StringIO 9 | from typing import Union, List, Optional, Callable 10 | 11 | from tests import refresh_files 12 | 13 | 14 | class ArgParseExitException(Exception): 15 | ... 16 | 17 | 18 | def _parser_exit(_: argparse.ArgumentParser, __=0, message: Optional[str]=None) -> None: 19 | raise ArgParseExitException(message) 20 | 21 | 22 | ArgumentParser.exit = _parser_exit 23 | 24 | 25 | class CLITestCase(unittest.TestCase): 26 | testdir: str = None 27 | test_output_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'output')) 28 | test_input_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'input')) 29 | testprog: str = None 30 | creation_messages: List[str] = None 31 | 32 | @staticmethod 33 | def prog_ep(argv: List[str]) -> bool: 34 | return False 35 | 36 | @classmethod 37 | def setUpClass(cls): 38 | cls.testdir_path = os.path.join(cls.test_output_dir, cls.testdir) 39 | os.makedirs(cls.testdir_path, exist_ok=True) 40 | cls.creation_messages = [] 41 | 42 | @classmethod 43 | def tearDownClass(cls): 44 | if cls.creation_messages: 45 | for msg in cls.creation_messages: 46 | print(msg, file=sys.stderr) 47 | cls.creation_messages = [] 48 | assert False, "Tests failed because baseline files were being created" 49 | 50 | def do_test(self, args: Union[str, List[str]], testfile: Optional[str]="", 51 | update_test_file: bool=False, error: type(Exception)=None, tox_wrap_fix: bool=False, 52 | failexpected: bool=False, text_filter: Callable[[str], str]=None) -> None: 53 | """ Execute a cli test 54 | 55 | @param args: Argument string or list to command 56 | @param testfile: name of file to record output in. If absent, using directory mode 57 | @param update_test_file: True means we need to update the test file 58 | @param error: If present, we expect this error 59 | @param tox_wrap_fix: tox seems to wrap redirected output at 60 columns. If true, try wrapping the test 60 | file before failing 61 | @param failexpected: True means we're logging an error 62 | @param text_filter: edits to remove non-matchable items 63 | """ 64 | testfile_path = os.path.join(self.testdir_path, testfile) 65 | if text_filter is None: 66 | text_filter = lambda txt: "".join(txt.replace('\r\n', '\n').strip().split()) 67 | 68 | outf = StringIO() 69 | arg_list = args.split() if isinstance(args, str) else args 70 | if error: 71 | with self.assertRaises(error): 72 | self.prog_ep(arg_list) 73 | return 74 | 75 | with redirect_stdout(outf): 76 | try: 77 | success = not self.prog_ep(arg_list) 78 | except ArgParseExitException: 79 | success = False 80 | 81 | self.assertTrue(success or failexpected) 82 | if not os.path.exists(testfile_path): 83 | with open(testfile_path, 'w') as f: 84 | f.write(outf.getvalue()) 85 | self.creation_messages.append(f'{testfile_path} did not exist - updated') 86 | 87 | if testfile: 88 | with open(testfile_path) as f: 89 | new_txt = text_filter(outf.getvalue()) 90 | old_txt = text_filter(f.read()) 91 | if old_txt != new_txt and tox_wrap_fix: 92 | old_txt = textwrap.fill(old_txt, 60) 93 | new_txt = textwrap.fill(new_txt, 60) 94 | self.assertEqual(old_txt, new_txt) 95 | else: 96 | print("Directory comparison needs to be added", file=sys.stderr) 97 | 98 | @staticmethod 99 | def clear_dir(folder: str) -> None: 100 | import os 101 | for the_file in os.listdir(folder): 102 | file_path = os.path.join(folder, the_file) 103 | try: 104 | if os.path.isfile(file_path): 105 | os.unlink(file_path) 106 | except Exception as e: 107 | print(e) 108 | 109 | 110 | if __name__ == '__main__': 111 | unittest.main() 112 | -------------------------------------------------------------------------------- /tests/test_cli/input/obs.shex: -------------------------------------------------------------------------------- 1 | PREFIX : 2 | PREFIX xsd: 3 | BASE 4 | 5 | start = @ 6 | 7 | { # An Observation has: 8 | :status ["preliminary" "final"]; # status in this value set 9 | :subject @ # a subject matching . 10 | } 11 | 12 | { # A Patient has: 13 | :name xsd:string*; # one or more names 14 | :birthdate xsd:date? # and an optional birthdate. 15 | } 16 | -------------------------------------------------------------------------------- /tests/test_cli/input/obs.ttl: -------------------------------------------------------------------------------- 1 | PREFIX : 2 | PREFIX xsd: 3 | BASE 4 | 5 | 6 | :status "final" ; 7 | :subject . 8 | 9 | 10 | :name "Bob" ; 11 | :birthdate "1999-12-31"^^xsd:date . -------------------------------------------------------------------------------- /tests/test_cli/input/sparql.sparql: -------------------------------------------------------------------------------- 1 | PREFIX rdf: 2 | PREFIX dbgraph: 3 | PREFIX drugbank: 4 | 5 | SELECT ?g ?s ?p ?o {graph dbgraph: {?s rdf:type ?o}} -------------------------------------------------------------------------------- /tests/test_cli/output/evaluate/biolinkfail: -------------------------------------------------------------------------------- 1 | Errors: 2 | Focus: https://biolink.github.io/biolink-model/ontology/biolink.ttl 3 | Start: http://bioentity.io/vocab/SchemaDefinition 4 | Reason: Testing against shape http://bioentity.io/vocab/Element 5 | Datatype constraint (http://www.w3.org/2001/XMLSchema#string) does not match BNode _:b1 6 | _:b1 context: 7 | ("rdf" 8 | "rdfs" 9 | "xsd" 10 | "skos" 11 | "OIO" 12 | ) -------------------------------------------------------------------------------- /tests/test_cli/output/evaluate/biolinkpass: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hsolbrig/PyShEx/88b8449939f394545c84741db1668bd8a4d1fdbc/tests/test_cli/output/evaluate/biolinkpass -------------------------------------------------------------------------------- /tests/test_cli/output/evaluate/dbsparql1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hsolbrig/PyShEx/88b8449939f394545c84741db1668bd8a4d1fdbc/tests/test_cli/output/evaluate/dbsparql1 -------------------------------------------------------------------------------- /tests/test_cli/output/evaluate/dbsparql2: -------------------------------------------------------------------------------- 1 | SPARQL: 2 | PREFIX rdf: 3 | PREFIX vocabClass: 4 | 5 | SELECT DISTINCT ?item WHERE { 6 | ?item rdf:type vocabClass:Offer 7 | } 8 | LIMIT 10 9 | SPARQL: (SELECT ?s ?p ?o { ?p ?o}) (0.49 secs) - 75 triples 10 | SPARQL: (SELECT ?s ?p ?o { ?p ?o}) (0.6 secs) - 123 triples 11 | SPARQL: (SELECT ?s ?p ?o { ?p ?o}) (0.57 secs) - 68 triples 12 | SPARQL: (SELECT ?s ?p ?o { ?p ?o}) (0.45 secs) - 69 triples 13 | SPARQL: (SELECT ?s ?p ?o { ?p ?o}) (0.62 secs) - 134 triples 14 | SPARQL: (SELECT ?s ?p ?o { ?p ?o}) (0.48 secs) - 70 triples 15 | SPARQL: (SELECT ?s ?p ?o { ?p ?o}) (0.46 secs) - 67 triples 16 | SPARQL: (SELECT ?s ?p ?o { ?p ?o}) (0.47 secs) - 60 triples 17 | SPARQL: (SELECT ?s ?p ?o { ?p ?o}) (0.49 secs) - 92 triples 18 | SPARQL: (SELECT ?s ?p ?o { ?p ?o}) (0.49 secs) - 59 triples 19 | -------------------------------------------------------------------------------- /tests/test_cli/output/evaluate/help: -------------------------------------------------------------------------------- 1 | usage: shexeval [-h] [-f FORMAT] [-s START] [-ut] [-sp STARTPREDICATE] 2 | [-fn FOCUS] [-A] [-d] [-ss] [-ssg] [-cf] [-sq SPARQL] [-se] 3 | [--stopafter STOPAFTER] [-ps] [-pr] [-gn GRAPHNAME] [-pb] 4 | [--useragent USERAGENT] 5 | rdf shex 6 | 7 | positional arguments: 8 | rdf Input RDF file or SPARQL endpoint if slurper or sparql 9 | options 10 | shex ShEx specification 11 | 12 | options: 13 | -h, --help show this help message and exit 14 | -f FORMAT, --format FORMAT 15 | Input RDF Format 16 | -s START, --start START 17 | Start shape. If absent use ShEx start node. 18 | -ut, --usetype Start shape is rdf:type of focus 19 | -sp STARTPREDICATE, --startpredicate STARTPREDICATE 20 | Start shape is object of this predicate 21 | -fn FOCUS, --focus FOCUS 22 | RDF focus node 23 | -A, --allsubjects Evaluate all non-bnode subjects in the graph 24 | -d, --debug Add debug output 25 | -ss, --slurper Use SPARQL slurper graph 26 | -ssg, --gdbslurper Use GraphDB specific slurper to persistent BNodes 27 | -cf, --flattener Use RDF Collections flattener graph 28 | -sq SPARQL, --sparql SPARQL 29 | SPARQL query to generate focus nodes 30 | -se, --stoponerror Stop on an error 31 | --stopafter STOPAFTER 32 | Stop after N nodes 33 | -ps, --printsparql Print SPARQL queries as they are executed 34 | -pr, --printsparqlresults 35 | Print SPARQL query and results 36 | -gn GRAPHNAME, --graphname GRAPHNAME 37 | Specific SPARQL graph to query - use '' for any named 38 | graph 39 | -pb, --persistbnodes Treat BNodes as persistent in SPARQL endpoint 40 | --useragent USERAGENT 41 | Use this user agent in the SPARQL Queries (Default: 42 | "{UserAgent}") 43 | -------------------------------------------------------------------------------- /tests/test_cli/output/evaluate/obs1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hsolbrig/PyShEx/88b8449939f394545c84741db1668bd8a4d1fdbc/tests/test_cli/output/evaluate/obs1 -------------------------------------------------------------------------------- /tests/test_cli/output/evaluate/pred-samples: -------------------------------------------------------------------------------- 1 | Errors: 2 | Focus: http://data2services/model/association/carrier/0d329fcdcfe5555eec7c1a5d3bbdd735 3 | Start: None 4 | Reason: No start node located -------------------------------------------------------------------------------- /tests/test_cli/output/evaluate/t1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hsolbrig/PyShEx/88b8449939f394545c84741db1668bd8a4d1fdbc/tests/test_cli/output/evaluate/t1 -------------------------------------------------------------------------------- /tests/test_cli/output/evaluate/type-samples: -------------------------------------------------------------------------------- 1 | Errors: 2 | Focus: http://data2services/model/association/carrier/08b0f41254f99fe99092848ca0acd921 3 | Start: None 4 | Reason: No start node located -------------------------------------------------------------------------------- /tests/test_cli/test_evaluate.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import sys 4 | import unittest 5 | from contextlib import redirect_stdout 6 | from io import StringIO 7 | from typing import List 8 | 9 | from pyshex.shex_evaluator import evaluate_cli 10 | from pyshex.user_agent import UserAgent 11 | from tests import datadir, SKIP_EXTERNAL_URLS, SKIP_EXTERNAL_URLS_MSG 12 | from tests.test_cli.clitests import CLITestCase, ArgParseExitException 13 | from tests.utils.web_server_utils import DRUGBANK_SPARQL_URL, is_up, is_down_reason 14 | 15 | update_test_files: bool = False 16 | 17 | 18 | class ShexEvaluatorTestCase(CLITestCase): 19 | testdir = "evaluate" 20 | testprog = 'shexeval' 21 | IN_TOX = False 22 | 23 | def prog_ep(self, argv: List[str]) -> bool: 24 | return bool(evaluate_cli(argv, prog=self.testprog)) 25 | 26 | @unittest.skipIf(os.environ.get('IN_TOX', False), "Skipping test_help because of TOX formatting parameters") 27 | def test_help(self): 28 | testfile_path = os.path.join(self.testdir_path, 'help') 29 | with open(testfile_path) as tf: 30 | help_text = tf.read().format(UserAgent=UserAgent) 31 | outf = StringIO() 32 | with redirect_stdout(outf): 33 | try: 34 | self.prog_ep(['--help']) 35 | except ArgParseExitException: 36 | pass 37 | self.maxDiff = None 38 | self.assertEqual(help_text.strip(), re.sub('optional arguments:', 'options:', 39 | (re.sub(';\\n\s*', '; ', outf.getvalue().strip())))) 40 | 41 | def test_obs(self): 42 | shex = os.path.join(self.test_input_dir, 'obs.shex') 43 | rdf = os.path.join(self.test_input_dir, 'obs.ttl') 44 | self.do_test([rdf, shex, '-fn', 'http://ex.org/Obs1'], 'obs1', update_test_file=update_test_files) 45 | self.assertFalse(update_test_files, "Updating test files") 46 | 47 | def test_biolink(self): 48 | shex = os.path.join(datadir,'schemas', 'meta.shex') 49 | rdf = os.path.join(datadir, 'validation', 'biolink-model.ttl') 50 | self.do_test([rdf, shex, '-fn', 'https://biolink.github.io/biolink-model/ontology/biolink.ttl', 51 | '-s', 'http://bioentity.io/vocab/SchemaDefinition', '-cf'], 'biolinkpass', 52 | update_test_file=update_test_files) 53 | self.do_test([rdf, shex, '-fn', 'https://biolink.github.io/biolink-model/ontology/biolink.ttl', 54 | '-s', 'http://bioentity.io/vocab/SchemaDefinition'], 'biolinkfail', 55 | update_test_file=update_test_files, failexpected=True) 56 | self.assertFalse(update_test_files, "Updating test files") 57 | 58 | def test_start_type(self): 59 | """ Test four subjects, two having one RDF type, one having two and one having none """ 60 | shex = os.path.join(datadir, 'schemas', 'biolink-modelnc.shex') 61 | rdf = os.path.join(datadir, 'validation', 'type-samples.ttl') 62 | self.do_test([rdf, shex, '-A', '-ut', '-cf'], 'type-samples', update_test_file=update_test_files, 63 | failexpected=True) 64 | self.assertFalse(update_test_files, "Updating test files") 65 | 66 | def test_start_predicate(self): 67 | """ Test four subjects, two having one RDF type, one having two and one having none """ 68 | shex = os.path.join(datadir, 'schemas', 'biolink-modelnc.shex') 69 | rdf = os.path.join(datadir, 'validation', 'type-samples.ttl') 70 | self.do_test([rdf, shex, '-A', '-sp', 'http://w3id.org/biolink/vocab/type', '-cf'], 'pred-samples', 71 | update_test_file=update_test_files, 72 | failexpected=True) 73 | self.assertFalse(update_test_files, "Updating test files") 74 | 75 | @unittest.skipIf(not is_up(DRUGBANK_SPARQL_URL), is_down_reason(DRUGBANK_SPARQL_URL)) 76 | def test_sparql_query(self): 77 | """ Test a sample DrugBank sparql query """ 78 | shex = os.path.join(datadir, 't1.shex') 79 | sparql = os.path.join(datadir, 't1.sparql') 80 | self.do_test([DRUGBANK_SPARQL_URL, shex, '-sq', sparql], 't1', update_test_file=update_test_files) 81 | 82 | 83 | if __name__ == '__main__': 84 | unittest.main() 85 | -------------------------------------------------------------------------------- /tests/test_cli/test_sparql_options.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import unittest 4 | from typing import List 5 | 6 | from pyshex.shex_evaluator import evaluate_cli 7 | from tests import datadir, SKIP_EXTERNAL_URLS, SKIP_EXTERNAL_URLS_MSG 8 | from tests.test_cli.clitests import CLITestCase 9 | from tests.utils.web_server_utils import FHIRCAT_GRAPHDB_URL, is_up, is_down_reason, DRUGBANK_SPARQL_URL, \ 10 | DUMONTIER_GRAPHDB_URL 11 | 12 | 13 | def elapsed_filter(txt: str) -> str: 14 | return re.sub(r'\(\d+(\.\d+)? ([a-zA-Z]*)\)', '(n.nn \\2)', txt) 15 | 16 | 17 | @unittest.skipIf(SKIP_EXTERNAL_URLS, SKIP_EXTERNAL_URLS_MSG) 18 | class SparqlQueryTestCase(CLITestCase): 19 | testdir = "evaluate" 20 | testprog = 'shexeval' 21 | schemadir = os.path.join(datadir, 'schemas') 22 | 23 | def prog_ep(self, argv: List[str]) -> bool: 24 | return bool(evaluate_cli(argv, prog=self.testprog)) 25 | 26 | @unittest.skipIf(not is_up(DRUGBANK_SPARQL_URL), is_down_reason(DRUGBANK_SPARQL_URL)) 27 | def test_sparql_query(self): 28 | """ Test a sample DrugBank sparql query """ 29 | shex = os.path.join(datadir, 't1.shex') 30 | sparql = os.path.join(datadir, 't1.sparql') 31 | self.do_test([DRUGBANK_SPARQL_URL, shex, '-sq', sparql], 'dbsparql1') 32 | 33 | @unittest.skipIf(not is_up(DRUGBANK_SPARQL_URL), is_down_reason(DRUGBANK_SPARQL_URL)) 34 | def test_print_queries(self): 35 | """ Test a sample DrugBank sparql query printing queries""" 36 | shex = os.path.join(datadir, 't1.shex') 37 | sparql = os.path.join(datadir, 't1.sparql') 38 | self.do_test([DRUGBANK_SPARQL_URL, shex, '-sq', sparql, '-ps'], 'dbsparql2', text_filter=elapsed_filter) 39 | 40 | @unittest.skipIf(not is_up(DRUGBANK_SPARQL_URL), is_down_reason(DRUGBANK_SPARQL_URL)) 41 | def test_print_results(self): 42 | """ Test a sample DrugBank sparql query printing results""" 43 | shex = os.path.join(datadir, 't1.shex') 44 | sparql = os.path.join(datadir, 't1.sparql') 45 | self.do_test([DRUGBANK_SPARQL_URL, shex, '-sq', sparql, '-pr', "--stopafter", "1"], 'dbsparql3', text_filter=elapsed_filter) 46 | 47 | @unittest.skipIf(not is_up(DRUGBANK_SPARQL_URL), is_down_reason(DRUGBANK_SPARQL_URL)) 48 | def test_named_graph(self): 49 | """ Test a sample DrugBank using any named graph """ 50 | 51 | shex = os.path.join(datadir, 't1.shex') 52 | sparql = os.path.join(datadir, 't1.sparql') 53 | self.maxDiff = None 54 | self.do_test([DRUGBANK_SPARQL_URL, shex, '-sq', sparql, '-ps', '-gn', "", "-pr"], 'dbsparql4', 55 | failexpected=True, text_filter=elapsed_filter) 56 | 57 | graphid = "" 58 | self.do_test([DRUGBANK_SPARQL_URL, shex, '-sq', sparql, '-ps', '-gn', graphid, "-pr"], 'dbsparql5', 59 | failexpected=True, text_filter=elapsed_filter) 60 | 61 | @unittest.skipIf(not is_up(DUMONTIER_GRAPHDB_URL), is_down_reason(DUMONTIER_GRAPHDB_URL)) 62 | def test_named_graph_types(self): 63 | """ Test a Drugbank query with named graph in the query """ 64 | shex = os.path.join(datadir, 'schemas', 'biolink-modelnc.shex') 65 | self.maxDiff = None 66 | self.do_test([DUMONTIER_GRAPHDB_URL, shex, '-ss', '-gn', '', '-ps', '-pr', '-ut', '-sq', 67 | 'select ?item where{?item a } LIMIT 20'], 68 | 'dbsparql6', failexpected=True, text_filter=elapsed_filter) 69 | 70 | @unittest.skipIf(not is_up(FHIRCAT_GRAPHDB_URL), is_down_reason(FHIRCAT_GRAPHDB_URL)) 71 | def test_infer_setting(self): 72 | """ Test setting infer to False """ 73 | 74 | shex = os.path.join(datadir, 'patient.shex') 75 | rdf = 'https://graph.fhircat.org/repositories/fhirontology?infer=false' 76 | self.maxDiff = None 77 | self.do_test([rdf, shex, '-fn', "http://hl7.org/fhir/Patient/pat4", '-ssg', '-pb', '-ps', '-pr'], 'dbsparql7', 78 | text_filter=elapsed_filter) 79 | 80 | 81 | if __name__ == '__main__': 82 | unittest.main() 83 | -------------------------------------------------------------------------------- /tests/test_collection_support/test_collections.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import unittest 4 | 5 | from pyshex import ShExEvaluator 6 | from CFGraph import CFGraph 7 | 8 | 9 | class ShexEvalTestCase(unittest.TestCase): 10 | 11 | def test_biolink_shexeval(self) -> None: 12 | base_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'data')) 13 | g = CFGraph() 14 | g.load(os.path.join(base_dir, 'validation', 'biolink-model.ttl'), format="turtle") 15 | evaluator = ShExEvaluator(g, 16 | os.path.join(base_dir, 'schemas', 'meta.shex'), 17 | "https://biolink.github.io/biolink-model/ontology/biolink.ttl", 18 | "http://bioentity.io/vocab/SchemaDefinition") 19 | result = evaluator.evaluate(debug=False) 20 | for rslt in result: 21 | if not rslt.result: 22 | print(f"Error: {rslt.reason}") 23 | self.assertTrue(all(r.result for r in result)) 24 | 25 | 26 | 27 | if __name__ == '__main__': 28 | unittest.main() 29 | -------------------------------------------------------------------------------- /tests/test_issues/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hsolbrig/PyShEx/88b8449939f394545c84741db1668bd8a4d1fdbc/tests/test_issues/__init__.py -------------------------------------------------------------------------------- /tests/test_issues/data/Is1_Ip1_L_with_REGEXP_escapes_bare.ttl: -------------------------------------------------------------------------------- 1 | """/ 2 | -\\a𝒸""" . 3 | -------------------------------------------------------------------------------- /tests/test_issues/data/Q12214_min.ttl: -------------------------------------------------------------------------------- 1 | @prefix rdf: . 2 | @prefix xsd: . 3 | @prefix rdfs: . 4 | @prefix owl: . 5 | @prefix wikibase: . 6 | @prefix wds: . 7 | @prefix wdata: . 8 | @prefix p: . 9 | @prefix ps: . 10 | @prefix prov: . 11 | @prefix wdref: . 12 | @prefix wd: . 13 | 14 | 15 | wd:Q12214 p:P279 wds:Q12214-93138cd6-4150-3e3f-0b31-fabce61ff691 . 16 | 17 | wds:Q12214-93138cd6-4150-3e3f-0b31-fabce61ff691 a wikibase:Statement, 18 | wikibase:BestRank ; 19 | wikibase:rank wikibase:NormalRank ; 20 | ps:P279 wd:Q18975241 . 21 | -------------------------------------------------------------------------------- /tests/test_issues/data/Q12214_min_2.ttl: -------------------------------------------------------------------------------- 1 | @prefix rdf: . 2 | @prefix xsd: . 3 | @prefix rdfs: . 4 | @prefix owl: . 5 | @prefix wikibase: . 6 | @prefix wds: . 7 | @prefix wdata: . 8 | @prefix p: . 9 | @prefix ps: . 10 | @prefix prov: . 11 | @prefix wdref: . 12 | @prefix wd: . 13 | 14 | wd:Q12214 p:P279 wds:Q12214-8B80A9EB-8074-4307-807C-0E93AE970CC2 . 15 | 16 | wds:Q12214-8B80A9EB-8074-4307-807C-0E93AE970CC2 a wikibase:Statement, 17 | wikibase:BestRank ; 18 | wikibase:rank wikibase:NormalRank ; 19 | ps:P279 wd:Q1928978 ; 20 | prov:wasDerivedFrom wdref:983bda8338732d051bbbe560ed02a1baef574a20 . 21 | 22 | wd:Q12214 p:P279 wds:Q12214-93138cd6-4150-3e3f-0b31-fabce61ff691 . 23 | 24 | wds:Q12214-93138cd6-4150-3e3f-0b31-fabce61ff691 a wikibase:Statement, 25 | wikibase:BestRank ; 26 | wikibase:rank wikibase:NormalRank ; 27 | ps:P279 wd:Q18975241 . 28 | 29 | -------------------------------------------------------------------------------- /tests/test_issues/data/biolink_model.sparql: -------------------------------------------------------------------------------- 1 | select ?item where{?item a } 2 | LIMIT 10 3 | -------------------------------------------------------------------------------- /tests/test_issues/data/bl_namedthing.shex: -------------------------------------------------------------------------------- 1 | { 2 | ( ? ; 3 | ? ; 4 | @ ? ; 5 | @ ? ; 6 | ? ; 7 | ? ; 8 | ? ; 9 | ? ; 10 | ? 11 | ) 12 | } -------------------------------------------------------------------------------- /tests/test_issues/data/disease_min.shex: -------------------------------------------------------------------------------- 1 | # Shape Expression for Diseases in Wikidata 2 | PREFIX wd: 3 | PREFIX wdt: 4 | PREFIX p: 5 | PREFIX prov: 6 | PREFIX pq: 7 | PREFIX xsd: 8 | PREFIX prv: 9 | PREFIX pr: 10 | PREFIX ps: 11 | PREFIX rdfs: 12 | PREFIX schema: 13 | PREFIX do: 14 | PREFIX doio: 15 | PREFIX mir: 16 | PREFIX gw: 17 | 18 | 19 | start = @gw:disease 20 | 21 | gw:disease EXTRA p:P31 { 22 | 23 | # Statements 24 | p:P279 @gw:P279_disease-parent-class* 25 | } 26 | 27 | gw:P279_disease-parent-class { 28 | prov:wasDerivedFrom @gw:do-reference ; 29 | } 30 | -------------------------------------------------------------------------------- /tests/test_issues/data/example-haplotype2.results: -------------------------------------------------------------------------------- 1 | Errors: 2 | Focus: http://hl7.org/fhir/Observation/example-haplotype2 3 | Start: _:start 4 | Reason: Testing against shape http://hl7.org/fhir/shape/Observation 5 | Testing _:b2 against shape http://hl7.org/fhir/shape/Extension 6 | _:b2 context: 7 | fhir:DomainResource.extension _:b2 . 8 | _:b2 fhir:Extension.url _:b1 . 9 | _:b1 fhir:value "http://hl7.org/fhir/StructureDefinition/observation-geneticsGene" . 10 | _:b2 fhir:Extension.valueCodeableConcept _:b3 . 11 | _:b3 fhir:CodeableConcept.coding _:b4 . 12 | _:b4 fhir:Coding.code _:b5 . 13 | _:b5 fhir:value "2623" . 14 | _:b4 fhir:Coding.display _:b6 . 15 | _:b6 fhir:value "CYP2C9" . 16 | _:b4 fhir:Coding.system _:b7 . 17 | _:b7 fhir:value "http://www.genenames.org" . 18 | _:b4 fhir:index "0"^^xsd:integer . 19 | _:b2 fhir:index "0"^^xsd:integer . 20 | 21 | Datatype constraint (http://www.w3.org/2001/XMLSchema#string) does not match BNode _:b1 22 | _:b1 context: 23 | _:b2 fhir:Extension.url _:b1 . 24 | _:b1 fhir:value "http://hl7.org/fhir/StructureDefinition/observation-geneticsGene" . 25 | -------------------------------------------------------------------------------- /tests/test_issues/data/example-haplotype2_online.results: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hsolbrig/PyShEx/88b8449939f394545c84741db1668bd8a4d1fdbc/tests/test_issues/data/example-haplotype2_online.results -------------------------------------------------------------------------------- /tests/test_issues/data/issue_20.errors: -------------------------------------------------------------------------------- 1 | Errors: 2 | Focus: http://example.org/ex/BPM1 3 | Start: http://example.org/ex/BloodPressureMeasurementShape 4 | Reason: Testing ex:BPM1 against shape http://example.org/ex/BloodPressureMeasurementShape 5 | Triples: 6 | ex:BPM1 :hasLocation ex:BPMLocation1 . 7 | ex:BPM1 :hasMethod ex:invasive . 8 | 2 triples cannot be partitioned into {0,1} passing groups 9 | Testing ex:BPM1 against shape http://example.org/ex/BloodPressureMeasurementShape 10 | Triples: 11 | ex:BPM1 :hasLocation ex:BPMLocation1 . 12 | 1 triples exceeds max {0,0} 13 | Testing ex:BPM1 against shape http://example.org/ex/BloodPressureMeasurementShape 14 | Triples: 15 | ex:BPM1 :hasLocation ex:BPMLocation1 . 16 | ex:BPM1 :hasMethod ex:invasive . 17 | 2 triples cannot be partitioned into {0,*} passing groups 18 | Testing ex:BPM1 against shape http://example.org/ex/BloodPressureMeasurementShape 19 | Triples: 20 | ex:BPM1 :hasLocation ex:BPMLocation1 . 21 | ex:BPM1 :hasMethod ex:invasive . 22 | 2 triples exceeds max {1,1} 23 | Testing ex:BPM1 against shape http://example.org/ex/BloodPressureMeasurementShape 24 | Triples: 25 | ex:BPM1 :hasLocation ex:BPMLocation1 . 26 | ex:BPM1 :hasMethod ex:invasive . 27 | 2 triples exceeds max {1,1} 28 | Testing ex:BPM1 against shape http://example.org/ex/BloodPressureMeasurementShape 29 | Predicate mismatch: http://hl7.org/fhir/hasLocation ≠ http://hl7.org/fhir/hasMethod 30 | Testing ex:BPM1 against shape http://example.org/ex/BloodPressureMeasurementShape 31 | Node: ex:invasive not in value set: 32 | {"values": ["http://example.org/ex/non-invasive"], "type": "... 33 | Testing ex:BPM1 against shape http://example.org/ex/BloodPressureMeasurementShape 34 | Predicate mismatch: http://hl7.org/fhir/hasMethod ≠ http://hl7.org/fhir/hasLocation 35 | Testing ex:BPM1 against shape http://example.org/ex/BloodPressureMeasurementShape 36 | Node: ex:invasive not in value set: 37 | {"values": ["http://example.org/ex/non-invasive"], "type": "... 38 | Testing ex:BPM1 against shape http://example.org/ex/BloodPressureMeasurementShape 39 | Predicate mismatch: http://hl7.org/fhir/hasMethod ≠ http://hl7.org/fhir/hasLocation 40 | Testing ex:BPM1 against shape http://example.org/ex/BloodPressureMeasurementShape 41 | Triples: 42 | ex:BPM1 :hasLocation ex:BPMLocation1 . 43 | ex:BPM1 :hasMethod ex:invasive . 44 | 2 triples exceeds max {1,1} 45 | Testing ex:BPM1 against shape http://example.org/ex/BloodPressureMeasurementShape 46 | Triples: 47 | ex:BPM1 :hasLocation ex:BPMLocation1 . 48 | ex:BPM1 :hasMethod ex:invasive . 49 | 2 triples exceeds max {1,1} 50 | Testing ex:BPM1 against shape http://example.org/ex/BloodPressureMeasurementShape 51 | No matching triples found for predicate :hasMethod 52 | Testing ex:BPM1 against shape http://example.org/ex/BloodPressureMeasurementShape 53 | No matching triples found for predicate :hasLocation 54 | -------------------------------------------------------------------------------- /tests/test_issues/data/issue_20.shex: -------------------------------------------------------------------------------- 1 | BASE 2 | PREFIX ex: 3 | PREFIX : 4 | 5 | start = @ 6 | 7 | { 8 | ( 9 | (:hasMethod [] ; 10 | :hasLocation IRI{0})? 11 | | 12 | (:hasMethod [] ; 13 | | :hasLocation IRI)* 14 | ) 15 | } -------------------------------------------------------------------------------- /tests/test_issues/data/issue_20.ttl: -------------------------------------------------------------------------------- 1 | BASE 2 | PREFIX ex: 3 | PREFIX : 4 | 5 | 6 | :hasMethod ; 7 | :hasLocation . 8 | 9 | 10 | :hasMethod ; 11 | :hasLocation . -------------------------------------------------------------------------------- /tests/test_issues/data/manifests/disease_manifest.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "schemaLabel": "disease shape", 4 | "schemaURL": "https://raw.githubusercontent.com/SuLab/Genewiki-ShEx/master/diseases/wikidata-disease-ontology.shex", 5 | "dataLabel": "Get all diseases from Wikidata" , 6 | "data": "Endpoint: https://query.wikidata.org/sparql", 7 | "queryMap": "SPARQL '''PREFIX wdt: \n\nSELECT ?item WHERE { ?item wdt:P699 ?doid .}'''@START", 8 | "status": "conformant" 9 | } 10 | ] -------------------------------------------------------------------------------- /tests/test_issues/data/shex/disease.shex: -------------------------------------------------------------------------------- 1 | # Shape Expression for Diseases in Wikidata 2 | PREFIX wd: 3 | PREFIX wdt: 4 | PREFIX wdtn: 5 | PREFIX wba: 6 | PREFIX p: 7 | PREFIX prov: 8 | PREFIX pq: 9 | PREFIX xsd: 10 | PREFIX prv: 11 | PREFIX pr: 12 | PREFIX ps: 13 | PREFIX rdfs: 14 | PREFIX schema: 15 | PREFIX do: 16 | PREFIX doio: 17 | PREFIX mir: 18 | PREFIX gw: 19 | 20 | 21 | start = @gw:disease 22 | 23 | gw:disease EXTRA p:P31 { 24 | 25 | # Statements 26 | p:P31 @gw:P31_disease-class ; 27 | p:P279 @gw:P279_disease-parent-class* ; 28 | p:P2888 @gw:P2888_disease-ontology-iri ; 29 | p:P2888 @gw:P2888_identifiers-org-iri ; 30 | 31 | ## IDENTIFIERS 32 | p:P699 @gw:P699_disease_ontology_id ; 33 | p:P486 @gw:P486_mesh_id* ; 34 | p:P492 @gw:P492_omim_id* ; 35 | p:P493 @gw:P493_icd_9* ; 36 | p:P494 @gw:P494_icd_10* ; 37 | p:P1550 @gw:P1550_orphanet_id* ; 38 | p:P1748 @gw:P1748_nci_thesaurus* ; 39 | p:P2892 @gw:P2892_umls-cui? ; 40 | } 41 | 42 | ## Statements 43 | 44 | gw:P31_disease-class { 45 | ps:P31 [wd:Q12136] ; 46 | prov:wasDerivedFrom @gw:do-reference ; 47 | 48 | } 49 | 50 | gw:P279_disease-parent-class { 51 | # ps:P279 @gw:disease ; # checking this recursive constraint is to expensive, hence it being a comment for now 52 | prov:wasDerivedFrom @gw:do-reference ; 53 | } 54 | 55 | gw:P2888_disease-ontology-iri { 56 | ps:P2888 [do:~] ; 57 | prov:wasDerivedFrom @gw:do-reference ; 58 | } 59 | 60 | gw:P2888_identifiers-org-iri { 61 | ps:P2888 [doio:~] ; 62 | prov:wasDerivedFrom @gw:miriam-reference 63 | } 64 | 65 | ## Identifiers 66 | gw:P699_disease_ontology_id { 67 | ps:P699 LITERAL /^DOID:[0-9]+$/; 68 | prov:wasDerivedFrom @gw:do-reference ; 69 | } 70 | 71 | gw:P486_mesh_id { 72 | ps:P486 LITERAL ; 73 | prov:wasDerivedFrom @gw:do-reference ; 74 | } 75 | 76 | gw:P492_omim_id { 77 | ps:P492 LITERAL ; 78 | prov:wasDerivedFrom @gw:do-reference ; 79 | } 80 | 81 | gw:P493_icd_9 { 82 | ps:P493 LITERAL ; 83 | prov:wasDerivedFrom @gw:do-reference ; 84 | } 85 | 86 | gw:P494_icd_10 { 87 | ps:P494 LITERAL ; 88 | prov:wasDerivedFrom @gw:do-reference ; 89 | } 90 | 91 | gw:P1550_orphanet_id { 92 | ps:P1550 LITERAL ; 93 | prov:wasDerivedFrom @gw:do-reference ; 94 | } 95 | 96 | gw:P1748_nci_thesaurus { 97 | ps:P1748 LITERAL ; 98 | prov:wasDerivedFrom @gw:do-reference ; 99 | } 100 | 101 | gw:P2888_exact_match { 102 | ps:P2888 IRI ; 103 | prov:wasDerivedFrom @gw:miriam-reference OR @gw:do-reference ; 104 | } 105 | 106 | gw:P2892_umls-cui { 107 | ps:P2892 LITERAL ; 108 | prov:wasDerivedFrom @gw:do-reference ; 109 | } 110 | 111 | 112 | gw:do-reference { 113 | pr:P248 IRI ; #Needs to be fixed by downloading the addtional arcs 114 | pr:P813 xsd:dateTime ; 115 | pr:P699 LITERAL /^DOID:[0-9]+$/; 116 | } 117 | 118 | gw:version-disease-ontology { 119 | p:P629 { ps:P629 [ wd:Q5282129 ] } ; # edition or translation of (P629) Disease Ontology (Q5282129) 120 | } 121 | 122 | gw:miriam-reference { 123 | pr:P248 [ wd:Q16335166 ] ; # stated in (P248) Miriam registry (Q16335166) 124 | pr:P854 [ mir:~ ] ; 125 | } 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | -------------------------------------------------------------------------------- /tests/test_issues/test_andra_loop.py: -------------------------------------------------------------------------------- 1 | import jsonasobj 2 | import requests 3 | from SPARQLWrapper import JSON 4 | from ShExJSG import ShExC 5 | 6 | from pyshex import ShExEvaluator 7 | from pyshex.user_agent import SlurpyGraphWithAgent, SPARQLWrapperWithAgent 8 | 9 | 10 | def get_sparql_dataframe(service, query): 11 | """ 12 | Helper function to convert SPARQL results into a Pandas data frame. 13 | """ 14 | sparql = SPARQLWrapperWithAgent(service) 15 | sparql.setQuery(query) 16 | sparql.setReturnFormat(JSON) 17 | result = sparql.query() 18 | 19 | processed_results = jsonasobj.load(result.response) 20 | cols = processed_results.head.vars 21 | 22 | out = [] 23 | for row in processed_results.results.bindings: 24 | item = [] 25 | for c in cols: 26 | item.append(row._as_dict.get(c, {}).get('value')) 27 | out.append(item) 28 | 29 | return pd.DataFrame(out, columns=cols) 30 | 31 | def run_shex_manifest(): 32 | #manifest = \ 33 | # "https://raw.githubusercontent.com/SuLab/Genewiki-ShEx/master/pathways/wikipathways/manifest_all.json" 34 | # manifest = jsonasobj.loads(requests.get(os.environ['MANIFEST_URL']).text) 35 | manifest_loc = "https://raw.githubusercontent.com/SuLab/Genewiki-ShEx/master/diseases/manifest_all.json" 36 | manifest = jsonasobj.loads(requests.get(manifest_loc).text) 37 | # print(os.environ['MANIFEST_URL']) 38 | for case in manifest: 39 | print(case._as_json_dumps()) 40 | if case.data.startswith("Endpoint:"): 41 | sparql_endpoint = case.data.replace("Endpoint: ", "") 42 | schema = requests.get(case.schemaURL).text 43 | shex = ShExC(schema).schema 44 | # print("==== Schema =====") 45 | #print(shex._as_json_dumps()) 46 | 47 | evaluator = ShExEvaluator(schema=shex, debug=False) 48 | sparql_query = case.queryMap.replace("SPARQL '''", "").replace("'''@START", "") 49 | 50 | df = get_sparql_dataframe(sparql_endpoint, sparql_query) 51 | for wdid in df.item: 52 | slurpeddata = SlurpyGraphWithAgent(sparql_endpoint) 53 | # slurpeddata = requests.get(wdid + ".ttl") 54 | 55 | results = evaluator.evaluate(rdf=slurpeddata, focus=wdid, debug=False, debug_slurps=True) 56 | for result in results: 57 | if result.result: 58 | print(str(result.focus) + ": CONFORMS") 59 | else: 60 | if str(result.focus) in [ 61 | "http://www.wikidata.org/entity/Q33525", 62 | "http://www.wikidata.org/entity/Q62736", 63 | "http://www.wikidata.org/entity/Q112670" 64 | ]: 65 | continue 66 | print( 67 | "item with issue: " + str(result.focus) + " - " + "shape applied: " + str(result.start)) 68 | 69 | 70 | # run_shex_manifest() -------------------------------------------------------------------------------- /tests/test_issues/test_comment_issue.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from pyshexc.parser_impl.generate_shexj import parse 4 | 5 | shex_schema = """ 6 | PREFIX xsd: 7 | PREFIX prov: 8 | PREFIX p: 9 | PREFIX pr: 10 | PREFIX prv: 11 | PREFIX pv: 12 | PREFIX ps: 13 | PREFIX gw: 14 | 15 | 16 | start = @gw:cancer # comment 17 | gw:cancer { 18 | p:P1748 { 19 | prov:wasDerivedFrom @ 20 | }+ 21 | } 22 | 23 | { 24 | pr:P248 IRI ; 25 | pr:P813 xsd:dateTime ; 26 | pr:P699 LITERAL 27 | } 28 | """ 29 | 30 | 31 | class ShexCommentTestCase(unittest.TestCase): 32 | 33 | def test_1(self): 34 | parse(shex_schema) 35 | self.assertTrue(True, "Parser didn't die") 36 | 37 | 38 | if __name__ == '__main__': 39 | unittest.main() 40 | -------------------------------------------------------------------------------- /tests/test_issues/test_crlf.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | 4 | from rdflib import Graph 5 | 6 | ttl_file = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'Is1_Ip1_L_with_REGEXP_escapes_bare.ttl')) 7 | 8 | 9 | class CRLFTestCase(unittest.TestCase): 10 | def test_crlf(self): 11 | """ Make sure that the data is being read in raw form -- that linefeeds aren't being stripped """ 12 | g = Graph() 13 | g.parse(ttl_file, format='turtle') 14 | self.assertEqual('/\t\n\r-\\a𝒸', list(g.objects())[0].value) 15 | 16 | if __name__ == '__main__': 17 | unittest.main() 18 | -------------------------------------------------------------------------------- /tests/test_issues/test_diseases.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import os 4 | 5 | from tests.utils.wikidata_utils import WikiDataTestCase 6 | from tests import SKIP_EXTERNAL_URLS, SKIP_EXTERNAL_URLS_MSG 7 | 8 | 9 | class WikiDiseasesTestCase(WikiDataTestCase): 10 | """ Test a sample conformance checker for the WikiData disease structure 11 | 12 | """ 13 | # This will change over time - expected values for the first 8 results 14 | expected_results = [True, True, True, True, True, True, True, True] 15 | 16 | @unittest.skipIf(SKIP_EXTERNAL_URLS, SKIP_EXTERNAL_URLS_MSG) 17 | def test_diseases(self): 18 | test_data_base = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'wikidata', 'disease')) 19 | 20 | rslts = self.run_test("https://raw.githubusercontent.com/SuLab/Genewiki-ShEx/master/diseases/manifest_100.json", 21 | num_entries=8, debug=False, debug_slurps=False, save_graph_dir=test_data_base) 22 | for rslt in rslts: 23 | print(f"{'CONFORMS' if rslt.result else 'FAIL'}: {rslt.focus}") 24 | # The following will validate from 1 to 8 entries 25 | self.assertTrue(all(expected == actual for expected, actual in zip([r.result for r in rslts], 26 | self.expected_results))) 27 | 28 | 29 | if __name__ == '__main__': 30 | unittest.main() 31 | -------------------------------------------------------------------------------- /tests/test_issues/test_fhir.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | from contextlib import redirect_stdout 4 | from io import StringIO 5 | 6 | from pyshex.shex_evaluator import evaluate_cli 7 | 8 | 9 | class FHIRServerTestCase(unittest.TestCase): 10 | 11 | def test_observation_online(self): 12 | """ Test online FHIR example """ 13 | source_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data') 14 | result = os.path.join(source_dir, 'example-haplotype2_online.results') 15 | outf = StringIO() 16 | with(redirect_stdout(outf)): 17 | evaluate_cli("http://hl7.org/fhir/observation-example-haplotype2.ttl " 18 | "http://build.fhir.org/observation.shex " 19 | "-fn http://hl7.org/fhir/Observation/example-haplotype2") 20 | if not os.path.exists(result): 21 | with open(result, 'w') as f: 22 | f.write(outf.getvalue()) 23 | self.assertTrue(False, "Created test file -- rerun ") 24 | with open(result) as f: 25 | self.assertEqual(f.read(), outf.getvalue()) 26 | 27 | def test_observation(self): 28 | """ Test of local FHIR example """ 29 | source_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data') 30 | rdf = os.path.join(source_dir, 'example-haplotype2.ttl') 31 | shex = os.path.join(source_dir, 'observation.shex') 32 | result = os.path.join(source_dir, 'example-haplotype2.results') 33 | outf = StringIO() 34 | with(redirect_stdout(outf)): 35 | evaluate_cli(f"{rdf} {shex} -fn http://hl7.org/fhir/Observation/example-haplotype2") 36 | if not os.path.exists(result): 37 | with open(result, 'w') as f: 38 | f.write(outf.getvalue()) 39 | self.assertTrue(False, "Created test file -- rerun ") 40 | with open(result) as f: 41 | self.assertEqual(f.read(), outf.getvalue()) 42 | 43 | if __name__ == '__main__': 44 | unittest.main() 45 | -------------------------------------------------------------------------------- /tests/test_issues/test_guardian_issue.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from pyshex import PrefixLibrary, ShExEvaluator 4 | 5 | schema = """ 6 | PREFIX ex: 7 | PREFIX xsd: 8 | PREFIX school: 9 | PREFIX foaf: 10 | 11 | school:enrolleeAge xsd:integer MinInclusive 13 MaxInclusive 20 12 | 13 | school:Enrollee { 14 | foaf:age @school:enrolleeAge ; 15 | ex:hasGuardian IRI {1,2} 16 | } 17 | 18 | school:Encapsulated { 19 | ex:hasMany { 20 | (ex:hasGuardian IRI {1,2}; 21 | ex:hasGuardian IRI {1,2}){3} 22 | }{2} 23 | } 24 | """ 25 | 26 | rdf = """ 27 | PREFIX ex: 28 | PREFIX inst: 29 | PREFIX school: 30 | PREFIX foaf: 31 | 32 | inst:Eric foaf:age 20 ; 33 | ex:hasGuardian inst:PersonA, inst:PersonB, inst:PersonC . 34 | 35 | inst:Fred ex:hasMany [ex:hasGuardian inst:Animal1, inst:Animal2], [ex:hasGuardian inst:Animal3]. 36 | """ 37 | 38 | 39 | class ThreeGuardiansTestCase(unittest.TestCase): 40 | def test_eric(self): 41 | p = PrefixLibrary(rdf) 42 | for result in ShExEvaluator(rdf=rdf, 43 | schema=schema, 44 | focus=p.INST.Eric, 45 | start=p.SCHOOL.Enrollee).evaluate(debug=False): 46 | print(f"{result.focus}: {'Passing' if result.result else 'Failing'}: \n{result.reason}") 47 | self.assertFalse(result.result) 48 | 49 | def test_fred(self): 50 | p = PrefixLibrary(rdf) 51 | for result in ShExEvaluator(rdf=rdf, 52 | schema=schema, 53 | focus=p.INST.Fred, 54 | start=p.SCHOOL.Encapsulated).evaluate(debug=False): 55 | print(f"{result.focus}: {'Passing' if result.result else 'Failing'}: \n{result.reason}") 56 | self.assertFalse(result.result) 57 | 58 | 59 | if __name__ == '__main__': 60 | unittest.main() 61 | -------------------------------------------------------------------------------- /tests/test_issues/test_ill_founded.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from rdflib import Graph, Namespace 4 | 5 | from pyshex import ShExEvaluator 6 | 7 | 8 | EX = Namespace("http://a.example/") 9 | 10 | 11 | class IllFoundedTestCase(unittest.TestCase): 12 | 13 | def test_false_positive_minimum(self): 14 | shex = """ { @}""" 15 | g = Graph() 16 | g.add((EX.x, EX.p, EX.x)) 17 | e = ShExEvaluator(rdf=g, schema=shex, focus=EX.x, start=EX.S, debug=False) 18 | self.assertTrue(e.evaluate()[0].result) 19 | 20 | def test_inconsistent(self): 21 | shex = """ { not @}""" 22 | g = Graph() 23 | g.add((EX.x, EX.p, EX.x)) 24 | e = ShExEvaluator(rdf=g, schema=shex, focus=EX.x, start=EX.S, debug=False) 25 | rslt = e.evaluate() 26 | self.assertFalse(rslt[0].result) 27 | self.assertEqual("""Testing against shape http://a.example/S 28 | Testing against shape http://a.example/S 29 | http://a.example/S: Inconsistent recursive shape reference""", rslt[0].reason.strip()) 30 | 31 | 32 | if __name__ == '__main__': 33 | unittest.main() 34 | -------------------------------------------------------------------------------- /tests/test_issues/test_issue_11.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | 4 | from rdflib import Namespace 5 | 6 | from pyshex import ShExEvaluator 7 | 8 | WIKIDATA = Namespace("http://www.wikidata.org/entity/") 9 | 10 | 11 | class FalsePositiveTestCase(unittest.TestCase): 12 | test_data = os.path.join(os.path.split(os.path.abspath(__file__))[0], 'data') 13 | 14 | def test_false_positive_minimum(self): 15 | with open(os.path.join(self.test_data, 'disease_min.shex')) as f: 16 | shex = f.read() 17 | e = ShExEvaluator(os.path.join(self.test_data, 'Q12214_min.ttl'), shex, WIKIDATA.Q12214, debug=False) 18 | self.assertFalse(e.evaluate()[0].result) 19 | 20 | def test_false_positive_minimum_2(self): 21 | with open(os.path.join(self.test_data, 'disease_min.shex')) as f: 22 | shex = f.read() 23 | e = ShExEvaluator(os.path.join(self.test_data, 'Q12214_min_2.ttl'), shex, WIKIDATA.Q12214, debug=False) 24 | self.assertFalse(e.evaluate()[0].result) 25 | 26 | def test_false_positive(self): 27 | with open(os.path.join(self.test_data, 'shex', 'disease.shex')) as f: 28 | shex = f.read() 29 | e = ShExEvaluator(os.path.join(self.test_data, 'Q12214.ttl'), shex, WIKIDATA.Q12214, debug=False) 30 | self.assertFalse(e.evaluate()[0].result) 31 | 32 | 33 | if __name__ == '__main__': 34 | unittest.main() 35 | -------------------------------------------------------------------------------- /tests/test_issues/test_issue_20.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | from contextlib import redirect_stdout 4 | from io import StringIO 5 | 6 | from pyshex import PrefixLibrary 7 | from pyshex.shex_evaluator import evaluate_cli 8 | 9 | 10 | class BPM2TestCase(unittest.TestCase): 11 | 12 | def test_fail(self): 13 | """ Test max cardinality of 0 AND error reporting """ 14 | datadir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data')) 15 | shexpath = os.path.join(datadir, 'issue_20.shex') 16 | rdfpath = os.path.join(datadir, 'issue_20.ttl') 17 | expectedpath = os.path.join(datadir, 'issue_20.errors') 18 | 19 | pl = PrefixLibrary(rdfpath) 20 | output = StringIO() 21 | with redirect_stdout(output): 22 | evaluate_cli(f"{rdfpath} {shexpath} -fn {pl.EX.BPM1}") 23 | evaluate_cli(f"{rdfpath} {shexpath} -fn {pl.EX.BPM2}") 24 | 25 | if not os.path.exists(expectedpath): 26 | with open(expectedpath, 'w') as f: 27 | f.write(output.getvalue()) 28 | self.assertTrue(False, "Output created, rerun") 29 | with open(expectedpath) as f: 30 | expected = f.read() 31 | 32 | self.maxDiff = None 33 | self.assertEqual(expected, output.getvalue()) 34 | 35 | if __name__ == '__main__': 36 | unittest.main() 37 | -------------------------------------------------------------------------------- /tests/test_issues/test_issue_21.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from pyshex import ShExEvaluator 4 | 5 | shex = """ 6 | BASE 7 | PREFIX xsd: 8 | PREFIX ex: 9 | PREFIX foaf: 10 | PREFIX : 11 | PREFIX rdfs: 12 | PREFIX foaf: 13 | start = @ 14 | { # A Patient has: 15 | :name xsd:string*; # one or more names 16 | :birthdate xsd:date? ; # and an optional birthdate. 17 | } 18 | { 19 | rdfs:label xsd:string ; 20 | :subject @ ; 21 | :hasmeasurementDate @ ; 22 | :valueSBP @ ; 23 | :valueDBP @ ; 24 | :valueABP @? ; 25 | (:hasMethod @ | 26 | :hasMethod @ ) ; 27 | :hasLocation @? ; 28 | :hasType @? ; 29 | :isAffectedBy @? 30 | } 31 | { 32 | :valueS xsd:integer; 33 | } 34 | { 35 | :valueD xsd:integer; 36 | } 37 | { 38 | :valueA xsd:integer; 39 | } 40 | { 41 | :method [ ]; 42 | } 43 | { 44 | :method []; 45 | } 46 | { 47 | :method []; 48 | } 49 | { 50 | :date xsd:date; 51 | } 52 | { 53 | :location [ ]; 54 | } 55 | { 56 | :type [ ]; 57 | } 58 | { 59 | :position [ ]; 60 | } 61 | """ 62 | 63 | rdf = """ 64 | BASE 65 | 66 | PREFIX xsd: 67 | PREFIX ex: 68 | PREFIX foaf: 69 | PREFIX : 70 | PREFIX rdfs: 71 | PREFIX foaf: 72 | 73 | :name "Bob" ; 74 | :birthdate "1999-12-31"^^xsd:date ; 75 | :has :BloodPressureMeasurementShape . 76 | 77 | :date "2010-12-31"^^xsd:date. 78 | 79 | :valueS 140 . 80 | 81 | :valueD 90 . 82 | 83 | :valueA 97 . 84 | 85 | :method . 86 | 87 | :location . 88 | 89 | :position . 90 | 91 | :type . 92 | 93 | 94 | a :BloodPressureMeasurementShape ; 95 | rdfs:label "First BP measurement" ; 96 | :subject ; 97 | :hasmeasurementDate ; 98 | :valueSBP ; 99 | :valueDBP ; 100 | :valueABP ; 101 | :method ; 102 | :location ; 103 | :type ; 104 | :position . 105 | """ 106 | 107 | 108 | class BPM1HangUnitTest(unittest.TestCase): 109 | def test_hang(self): 110 | results = ShExEvaluator().evaluate(rdf, shex, focus="http://example.org/ex/BPM1", debug=False) 111 | for r in results: 112 | if r.result: 113 | print("PASS") 114 | else: 115 | print(f"FAIL: {r.reason}") 116 | self.assertEqual([False], [r.result for r in results]) 117 | 118 | 119 | if __name__ == '__main__': 120 | unittest.main() 121 | -------------------------------------------------------------------------------- /tests/test_issues/test_issue_23.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from pyshex import ShExEvaluator, PrefixLibrary 4 | 5 | shex = """ 6 | BASE 7 | PREFIX ex: 8 | 9 | 10 | start = @ 11 | 12 | { ex:p . } 13 | """ 14 | 15 | rdf = """ 16 | BASE 17 | 18 |

"Stuff" . 19 | """ 20 | 21 | 22 | class Issue23TestCase(unittest.TestCase): 23 | def test_fail(self): 24 | pl = PrefixLibrary(shex) 25 | results = ShExEvaluator().evaluate(rdf, shex, focus=pl.EX.s, debug=False) 26 | self.assertTrue(results[0].result) 27 | results = ShExEvaluator().evaluate(rdf, shex, focus=pl.EX.t) 28 | self.assertFalse(results[0].result) 29 | self.assertEqual('Focus: http://example.org/ex/t not in graph', results[0].reason) 30 | results2 = ShExEvaluator().evaluate(rdf, shex, focus=[pl.EX.s, pl.EX.t2]) 31 | self.assertTrue(results2[0].result) 32 | self.assertFalse(results2[1].result) 33 | self.assertEqual('Focus: http://example.org/ex/t2 not in graph', results2[1].reason) 34 | 35 | 36 | if __name__ == '__main__': 37 | unittest.main() 38 | -------------------------------------------------------------------------------- /tests/test_issues/test_issue_25.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | from contextlib import redirect_stdout, redirect_stderr 4 | from io import StringIO 5 | 6 | from pyshex.shex_evaluator import evaluate_cli 7 | 8 | data_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'data')) 9 | validation_dir = os.path.join(data_dir, 'validation') 10 | rdffile = os.path.join(validation_dir, 'simple.ttl') 11 | shexfile = os.path.join(validation_dir, 'simple.shex') 12 | 13 | 14 | class Issue25TestCase(unittest.TestCase): 15 | 16 | def test_nostart(self): 17 | outf = StringIO() 18 | with(redirect_stdout(outf)): 19 | evaluate_cli(f"{rdffile} {shexfile} -A".split()) 20 | self.assertEqual("""Errors: 21 | Focus: None 22 | Start: None 23 | Reason: START node is not specified""", outf.getvalue().strip()) 24 | 25 | def test_all_nodes(self): 26 | outf = StringIO() 27 | with(redirect_stderr(outf)): 28 | evaluate_cli(f"{rdffile} {shexfile} -s http://example.org/shapes/S".split()) 29 | self.assertEqual('Error: You must specify one or more graph focus nodes, supply a SPARQL query, ' 30 | 'or use the "-A" option', 31 | outf.getvalue().strip()) 32 | outf = StringIO() 33 | with(redirect_stdout(outf)): 34 | evaluate_cli(f"{rdffile} {shexfile} -A -s http://example.org/shapes/S".split()) 35 | self.assertEqual("""Errors: 36 | Focus: http://a.example/s1 37 | Start: http://example.org/shapes/S 38 | Reason: Testing :s1 against shape http://example.org/shapes/S 39 | No matching triples found for predicate :s4 40 | 41 | Focus: http://a.example/s2 42 | Start: http://example.org/shapes/S 43 | Reason: Testing :s2 against shape http://example.org/shapes/S 44 | No matching triples found for predicate :s4 45 | 46 | Focus: http://a.example/s3 47 | Start: http://example.org/shapes/S 48 | Reason: Testing :s3 against shape http://example.org/shapes/S 49 | No matching triples found for predicate :s4""", outf.getvalue().strip()) 50 | 51 | 52 | 53 | if __name__ == '__main__': 54 | unittest.main() 55 | -------------------------------------------------------------------------------- /tests/test_issues/test_issue_26.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | from contextlib import redirect_stdout 4 | from io import StringIO 5 | 6 | from pyshex.shex_evaluator import evaluate_cli 7 | 8 | data_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'data')) 9 | validation_dir = os.path.join(data_dir, 'validation') 10 | rdffile = os.path.join(validation_dir, 'anon_start.ttl') 11 | shexfile = os.path.join(validation_dir, 'anon_start.shex') 12 | 13 | 14 | class Issue26TestCase(unittest.TestCase): 15 | 16 | @unittest.skipIf(False, "Issue 26 needs to be fixed") 17 | def test_anon_start(self): 18 | self.assertEqual(0, evaluate_cli(f"{rdffile} {shexfile} -A")) 19 | 20 | 21 | 22 | if __name__ == '__main__': 23 | unittest.main() 24 | -------------------------------------------------------------------------------- /tests/test_issues/test_issue_28.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | from contextlib import redirect_stdout 4 | from io import StringIO 5 | 6 | from pyshex.shex_evaluator import evaluate_cli 7 | 8 | data_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data')) 9 | 10 | 11 | # Note: This is a fragile test, as the endpoint below is not stabile. You may need to add a skip to it in the 12 | # not too distant future 13 | class InlineSPARQLIssue(unittest.TestCase): 14 | 15 | @unittest.skipIf(True, "Fragile endpoint - has BNODES at the moment. This also takes a looong time") 16 | def test_inline_rdf(self): 17 | """ Issue #28. Make sure inline SPARQL with no carriage return works """ 18 | shex = os.path.join(data_dir, 'biolink-model.shex') 19 | sparql = 'select ?item where{graph ?g {?item a }}' 20 | 21 | # This raises an InvalidSchema error 22 | messages = StringIO() 23 | with redirect_stdout(messages): 24 | evaluate_cli((['-ss', '-sq', sparql, 'http://graphdb.dumontierlab.com/repositories/ncats-red-kg', 25 | shex, '-ut', '-pb'])) 26 | print(messages.getvalue()) 27 | 28 | 29 | if __name__ == '__main__': 30 | unittest.main() 31 | -------------------------------------------------------------------------------- /tests/test_issues/test_issue_30.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | from contextlib import redirect_stdout 4 | from io import StringIO 5 | 6 | from pyshex.shex_evaluator import evaluate_cli 7 | 8 | data_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data')) 9 | 10 | 11 | class ErrorReportingIssue(unittest.TestCase): 12 | """ Test Issue #30. Note that this unit test is reasonably fragile, as it counts on an External SPARQL 13 | endpoint. 14 | """ 15 | 16 | @unittest.skipIf(False, "Fragile test - we need local data to consistently reproduce") 17 | def test_messages(self): 18 | """ Test failures with no reasons supplied """ 19 | shex = os.path.join(data_dir, 'biolink-model.shex') 20 | sparql = os.path.join(data_dir, 'biolink_model.sparql') 21 | messages = StringIO() 22 | with redirect_stdout(messages): 23 | evaluate_cli(f'-ss -sq {sparql} http://graphdb.dumontierlab.com/repositories/ncats-red-kg {shex} -ut -pb') 24 | for line in messages.getvalue().split('\n'): 25 | self.assertFalse(line.strip().endswith('Reason:')) 26 | 27 | 28 | if __name__ == '__main__': 29 | unittest.main() 30 | -------------------------------------------------------------------------------- /tests/test_issues/test_issue_41.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from pprint import pprint 3 | 4 | from rdflib import Graph, Namespace 5 | 6 | from pyshex import ShExEvaluator 7 | 8 | rdf = """ 9 | @prefix : . 10 | @prefix foaf: . 11 | @prefix rdf: . 12 | @prefix rdfs: . 13 | @prefix xml: . 14 | @prefix xsd: . 15 | a :Person ; 16 | foaf:age 43 ; 17 | foaf:firstName "Bob", 18 | "Joe" ; 19 | foaf:lastName "smith" . 20 | """ 21 | 22 | shex = """ 23 | 24 | 25 | 26 | CLOSED { 27 | ( @ * ; 28 | @ ; 29 | @ ? ; 30 | @ ? ; 31 | @ * 32 | ) 33 | } 34 | """ 35 | 36 | EXC = Namespace("http://example.org/context/") 37 | EXE = Namespace("http://example.org/sample/example1/") 38 | 39 | 40 | class Issue41TestCase(unittest.TestCase): 41 | def test_closed(self): 42 | """ Test closed definition """ 43 | 44 | e = ShExEvaluator(rdf=rdf, schema=shex, focus=EXC['42'], start=EXE.Person) 45 | 46 | pprint(e.evaluate()) 47 | self.assertFalse(e.evaluate()[0].result) 48 | 49 | from pyshex.evaluate import evaluate 50 | g = Graph() 51 | g.parse(data=rdf, format="turtle") 52 | pprint(evaluate(g, shex, focus=EXC['42'], start=EXE.Person)) 53 | 54 | 55 | 56 | if __name__ == '__main__': 57 | unittest.main() 58 | -------------------------------------------------------------------------------- /tests/test_issues/test_issue_42.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from pyshex import ShExEvaluator, PrefixLibrary 4 | 5 | shex = """ 6 | PREFIX ex: 7 | START = @ 8 | 9 | { ex:p . } 10 | """ 11 | 12 | rdf = """ 13 | BASE 14 | 15 |

"Stuff" . 16 | "Other stuff" . 17 | """ 18 | 19 | NUM_ITERS = 3 20 | 21 | class Issue42TestCase(unittest.TestCase): 22 | def test_multiple_evaluate(self): 23 | """ Test calling evaluate multiple times in a row """ 24 | p = PrefixLibrary(shex) 25 | e = ShExEvaluator(rdf=rdf, schema=shex, focus=p.EX.s) 26 | 27 | # conformant 28 | for _ in range(NUM_ITERS): 29 | self.assertTrue(e.evaluate()[0].result) 30 | 31 | # non-conformant 32 | for _ in range(NUM_ITERS): 33 | self.assertFalse(e.evaluate(focus=p.EX.a)[0].result) 34 | 35 | if __name__ == '__main__': 36 | unittest.main() 37 | -------------------------------------------------------------------------------- /tests/test_issues/test_issue_51.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from rdflib import Namespace, RDF 4 | 5 | from pyshex import ShExEvaluator 6 | 7 | BASE = Namespace("https://w3id.org/biolink/vocab/") 8 | 9 | rdf = f""" 10 | @prefix : <{BASE}> . 11 | @prefix rdf: <{RDF}> . 12 | :s rdf:type :X . 13 | """ 14 | 15 | shex = f""" 16 | BASE <{BASE}> 17 | 18 | ( 19 | {{ 20 | ( $ a [ ] ?; 21 | a [ ] 22 | ) 23 | }} OR @ 24 | ) 25 | 26 | {{&; a []}} 27 | """ 28 | 29 | shex2 = f""" 30 | BASE <{BASE}> 31 | 32 | ( 33 | {{ 34 | ( $ a [ ] ?; 35 | a [ ] 36 | ) 37 | }} OR @ 38 | ) 39 | 40 | {{&}} 41 | """ 42 | 43 | 44 | class Issue51TestCase(unittest.TestCase): 45 | def test_inner_te(self): 46 | """ Test recognition of an inner triple expression """ 47 | 48 | e = ShExEvaluator(rdf=rdf, schema=shex, focus=BASE.s, start=BASE.X).evaluate() 49 | self.assertTrue(e[0].result) 50 | 51 | def test_te_message(self): 52 | """ Test the error message (and eventually the startup test) """ 53 | e = ShExEvaluator(rdf=rdf, schema=shex2, focus=BASE.s, start=BASE.X).evaluate() 54 | self.assertFalse(e[0].result) 55 | self.assertEqual(' Testing :s against shape https://w3id.org/biolink/vocab/X\n' 56 | ' https://w3id.org/biolink/vocab/missing: Reference not found', e[0].reason) 57 | 58 | 59 | if __name__ == '__main__': 60 | unittest.main() 61 | -------------------------------------------------------------------------------- /tests/test_issues/test_issue_54.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | 4 | from rdflib import Namespace 5 | 6 | from pyshex import ShExEvaluator 7 | 8 | BASE = Namespace("https://w3id.org/biolink/vocab/") 9 | 10 | rdf = f""" 11 | PREFIX rdfs: 12 | PREFIX owl: 13 | PREFIX dcterms: 14 | PREFIX SEMMEDDB: 15 | PREFIX WD: 16 | 17 | a WD:Q12140; 18 | rdfs:subClassOf ; 19 | dcterms:description "Dimeric fusion protein consisting of ..."; 20 | rdfs:label "Etanercept"; 21 | "BIOD00052" . 22 | """ 23 | 24 | 25 | class Issue51TestCase(unittest.TestCase): 26 | test_data = os.path.join(os.path.split(os.path.abspath(__file__))[0], 'data') 27 | 28 | def test_performance_problem(self): 29 | """ Test a performance problem brought about by two possible type arcs in a definition """ 30 | 31 | e = ShExEvaluator(rdf=rdf, schema=os.path.join(self.test_data, 'shex', 'issue_54.shex'), 32 | focus="http://identifiers.org/drugbank:DB00005", 33 | start="https://w3id.org/biolink/vocab/Drug").evaluate() 34 | self.assertTrue(e[0].result) 35 | 36 | 37 | if __name__ == '__main__': 38 | unittest.main() 39 | -------------------------------------------------------------------------------- /tests/test_issues/test_issue_58.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from rdflib import Namespace 4 | 5 | from pyshex import ShExEvaluator 6 | 7 | shex = """BASE 8 | PREFIX obo: 9 | PREFIX rdf: 10 | PREFIX rdfs: 11 | PREFIX owl: 12 | PREFIX xsd: 13 | PREFIX GoBiologicalProcess: 14 | 15 | { 16 | rdf:type [ owl:Class ] {1}; 17 | } 18 | 19 | IRI @ AND EXTRA rdfs:subClassOf { 20 | rdfs:subClassOf [ GoBiologicalProcess: ] ; 21 | } 22 | """ 23 | 24 | rdf = """ 25 | @prefix : . 26 | @prefix M: . 27 | @prefix bl: . 28 | @prefix contributor: . 29 | @prefix date: . 30 | @prefix enabled_by: . 31 | @prefix evidence: . 32 | @prefix exact_match: . 33 | @prefix obo: . 34 | @prefix occurs_in: . 35 | @prefix owl: . 36 | @prefix part_of: . 37 | @prefix provided_by: . 38 | @prefix rdf: . 39 | @prefix rdfs: . 40 | @prefix source: . 41 | @prefix with: . 42 | @prefix xml: . 43 | @prefix xref: . 44 | @prefix xsd: . 45 | 46 | :i a , owl:NamedIndividual . 47 | 48 | a owl:Class ; 49 | rdfs:subClassOf , 50 | obo:BFO_0000002, 51 | obo:BFO_0000004, 52 | obo:BFO_0000030, 53 | obo:BFO_0000040, 54 | obo:CHEBI_23367, 55 | obo:CHEBI_24431, 56 | obo:CHEBI_33285, 57 | obo:CHEBI_33302, 58 | obo:CHEBI_33579, 59 | obo:CHEBI_33582, 60 | obo:CHEBI_33675, 61 | obo:CHEBI_33694, 62 | obo:CHEBI_33695, 63 | obo:CHEBI_33839, 64 | obo:CHEBI_35352, 65 | obo:CHEBI_36080, 66 | obo:CHEBI_36357, 67 | obo:CHEBI_50047, 68 | obo:CHEBI_50860, 69 | obo:CHEBI_51143, 70 | obo:GOCHE_15339, 71 | obo:GOCHE_22695, 72 | obo:GOCHE_39142, 73 | obo:GOCHE_50906, 74 | obo:GOCHE_51086, 75 | obo:PR_000018263, 76 | , 77 | owl:Thing . 78 | """ 79 | 80 | UNIPROT = Namespace("http://identifiers.org/uniprot/") 81 | BASE = Namespace("http://purl.obolibrary.org/obo/go/shapes/") 82 | 83 | 84 | class Issue58TestCase(unittest.TestCase): 85 | def test_simple_example(self): 86 | e = ShExEvaluator(rdf=rdf, schema=shex, focus=UNIPROT.Q13253, start=BASE.BiologicalProcessClass).evaluate() 87 | self.assertTrue(e[0].result) 88 | 89 | 90 | if __name__ == '__main__': 91 | unittest.main() 92 | -------------------------------------------------------------------------------- /tests/test_issues/test_literal_issue15.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from rdflib import Namespace, XSD 4 | from pyshex import ShExEvaluator 5 | 6 | EX = Namespace("http://example.org/") 7 | 8 | shex = f"""PREFIX : <{EX}> 9 | PREFIX xsd: <{XSD}> 10 | 11 | start = @ 12 | 13 | {{:p1 xsd:string }} 14 | """ 15 | 16 | data = f"""PREFIX : <{EX}> 17 | 18 | :d :p1 "final" . 19 | """ 20 | 21 | 22 | class ShexjsIssue17TestCase(unittest.TestCase): 23 | # Test of https://github.com/shexSpec/shex.js/issues/17 24 | 25 | def test_infinite_loop(self): 26 | e = ShExEvaluator(rdf=data, schema=shex, focus=EX.d) 27 | rslt = e.evaluate(debug=False) 28 | self.assertTrue(rslt[0].result) 29 | 30 | 31 | if __name__ == '__main__': 32 | unittest.main() 33 | -------------------------------------------------------------------------------- /tests/test_issues/test_no_start_node.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from rdflib import Graph, Namespace 4 | 5 | from pyshex import ShExEvaluator 6 | 7 | shex = """ { not @}""" 8 | EX = Namespace("http://a.example/") 9 | 10 | 11 | class NoStartNodeTestCase(unittest.TestCase): 12 | 13 | def test_no_start(self): 14 | g = Graph() 15 | g.add((EX.x, EX.p, EX.x)) 16 | e = ShExEvaluator(rdf=g, schema=shex, focus=EX.x) 17 | rslt = e.evaluate()[0] 18 | self.assertFalse(rslt.result) 19 | self.assertEqual('START node is not specified', rslt.reason.strip()) 20 | 21 | def test_bad_start(self): 22 | g = Graph() 23 | g.add((EX.x, EX.p, EX.x)) 24 | e = ShExEvaluator(rdf=g, schema=shex, start=EX.c, focus=EX.x) 25 | rslt = e.evaluate()[0] 26 | self.assertFalse(rslt.result) 27 | self.assertEqual('Shape: http://a.example/c not found in Schema', rslt.reason.strip()) 28 | 29 | 30 | if __name__ == '__main__': 31 | unittest.main() 32 | -------------------------------------------------------------------------------- /tests/test_issues/test_rdf_parser.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | 4 | from rdflib import Graph 5 | 6 | from tests import datadir 7 | 8 | """ Test for an error in the RDFLIB parser. To fix the bug in rdflib 4.2.2: 9 | > rdflib.plugins.parsers.notation3.py 10 | 11 | 1578 k = 'abfrtvn\\"\''.find(ch) 12 | if k >= 0: 13 | uch = '\a\b\f\r\t\v\n\\"\''[k] 14 | """ 15 | 16 | 17 | class RDFLIBTestCase(unittest.TestCase): 18 | def test_parser(self): 19 | rdff = os.path.join(datadir, 'validation', 'Is1_Ip1_LSTRING_LITERAL1_with_all_punctuation.ttl') 20 | with open(rdff, 'rb') as f: 21 | rdf = f.read().decode() 22 | Graph().parse(data=rdf, format="turtle") 23 | self.assertTrue(True, "Parser has been fixed") 24 | 25 | 26 | if __name__ == '__main__': 27 | unittest.main() 28 | -------------------------------------------------------------------------------- /tests/test_issues/test_reactome.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import os 4 | 5 | from tests import SKIP_EXTERNAL_URLS, SKIP_EXTERNAL_URLS_MSG 6 | from tests.utils.wikidata_utils import WikiDataTestCase 7 | 8 | 9 | @unittest.skipIf(SKIP_EXTERNAL_URLS, SKIP_EXTERNAL_URLS_MSG) 10 | class ReactomeTestCase(WikiDataTestCase): 11 | # This will change over time - expected values for the first 8 results 12 | # Note: This test has never been run past 1 13 | expected_results = [True, False, False, False, False, True, False, False] 14 | 15 | def test_wikidata_reactome(self): 16 | test_data_base = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'wikidata', 'reactome')) 17 | 18 | rslts = self.run_test( 19 | "https://raw.githubusercontent.com/shexSpec/schemas/master/Wikidata/pathways/Reactome/manifest_all.json", 20 | num_entries=1, debug=False, debug_slurps=False, save_graph_dir=test_data_base) 21 | for rslt in rslts: 22 | print(f"{'CONFORMS' if rslt.result else 'FAIL'}: {rslt.focus}") 23 | self.assertTrue(all(expected == actual for expected, actual in zip([r.result for r in rslts], 24 | self.expected_results))) 25 | 26 | 27 | if __name__ == '__main__': 28 | unittest.main() 29 | -------------------------------------------------------------------------------- /tests/test_issues/test_shexjs_issue14.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from rdflib import Graph, Namespace, XSD, Literal 4 | 5 | from pyshex import ShExEvaluator 6 | 7 | 8 | FHIR = Namespace("http://hl7.org/fhir/") 9 | EX = Namespace("http://example.org/") 10 | 11 | shex = f"""PREFIX : <{FHIR}> 12 | PREFIX xsd: <{XSD}> 13 | 14 | start = @ 15 | 16 | {{ 17 | :predd xsd:string ; 18 | ( :test @* | :test @* ); 19 | :test2 @ ; 20 | }} 21 | {{ :prede xsd:string ; }} 22 | {{ :subject @ ; :preda xsd:string }} 23 | {{ :subject @ ; :predc xsd:string }} 24 | """ 25 | 26 | data = f"""PREFIX : <{FHIR}> 27 | PREFIX xsd: <{XSD}> 28 | 29 | :d :predd "final" ; :test ; :test2 . 30 | :subject ; :prede "final" . 31 | :subject ; :predc "final" . 32 | """ 33 | 34 | 35 | class ShexjsIssue14TestCase(unittest.TestCase): 36 | # Test of https://github.com/shexSpec/shex.js/issues/16 37 | 38 | def test_infinite_loop(self): 39 | e = ShExEvaluator(rdf=data, schema=shex, focus=FHIR.d, debug=False) 40 | rslt = e.evaluate() 41 | # self.assertEqual("http://a.example/S: Inconsistent recursive shape reference", rslt[0].reason) 42 | self.assertFalse(rslt[0].result) 43 | print(rslt[0].reason) 44 | 45 | 46 | if __name__ == '__main__': 47 | unittest.main() 48 | -------------------------------------------------------------------------------- /tests/test_issues/test_shexjs_issue16.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from rdflib import Graph, Namespace, XSD, Literal 4 | 5 | from pyshex import ShExEvaluator 6 | 7 | 8 | FHIR = Namespace("http://hl7.org/fhir") 9 | EX = Namespace("http://example.org/") 10 | 11 | shex = f"""PREFIX : <{FHIR}> 12 | PREFIX xsd: <{XSD}> 13 | 14 | start = @:ObservationShape 15 | 16 | :ObservationShape {{ # An Observation has: 17 | (:status xsd:integer* | :status xsd:string* )* 18 | }} 19 | """ 20 | 21 | 22 | class ShexjsIssue16TestCase(unittest.TestCase): 23 | # Test of https://github.com/shexSpec/shex.js/issues/16 24 | 25 | def test_infinite_loop(self): 26 | g = Graph() 27 | g.add((EX.Obs1, FHIR.status, Literal("final"))) 28 | e = ShExEvaluator(rdf=g, schema=shex, focus=EX.Obs1, start=FHIR.ObservationShape, debug=False) 29 | self.assertTrue(e.evaluate()[0].result) 30 | 31 | 32 | if __name__ == '__main__': 33 | unittest.main() 34 | -------------------------------------------------------------------------------- /tests/test_issues/test_te_names.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from pprint import pprint 3 | 4 | from pyshex import ShExEvaluator 5 | 6 | shex = """ 7 | prefix : 8 | 9 | start = @ 10 | 11 | {$ (:ex1a .; :ex1b .)} 12 | {$ (:ex2a .; :ex2b .)} 13 | CLOSED {&; &;} 14 | """ 15 | 16 | passing = """ 17 | prefix : 18 | 19 | :t :ex1a 1; :ex1b 2; :ex2a 3; :ex2b 4 . 20 | """ 21 | 22 | failing_1 = """ 23 | prefix : 24 | 25 | :t :ex1a 1; :ex1b 2; :ex2a 3 . 26 | """ 27 | 28 | failing_2 = """ 29 | prefix : 30 | 31 | :t :ex1a 1; :ex1b 2; :ex2a 3; :ex2b 4; a :foo. 32 | """ 33 | 34 | 35 | class TeLabelTestCase(unittest.TestCase): 36 | def test_te_labels(self): 37 | """ Test triple expression labels """ 38 | e = ShExEvaluator(rdf=passing, schema=shex, focus="http://examples.org/ex/t").evaluate(debug=False) 39 | pprint(e) 40 | self.assertTrue(e[0].result) 41 | 42 | e = ShExEvaluator(rdf=failing_1, schema=shex, focus="http://examples.org/ex/t").evaluate() 43 | self.assertFalse(e[0].result) 44 | 45 | e = ShExEvaluator(rdf=failing_2, schema=shex, focus="http://examples.org/ex/t").evaluate() 46 | self.assertFalse(e[0].result) 47 | 48 | 49 | if __name__ == '__main__': 50 | unittest.main() 51 | -------------------------------------------------------------------------------- /tests/test_issues/test_wikidata_1.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import os 4 | 5 | from rdflib import Graph, Namespace 6 | 7 | from pyshex import ShExEvaluator, PrefixLibrary 8 | from pyshex.evaluate import evaluate 9 | from pyshex.shapemap_structure_and_language.p3_shapemap_structure import START 10 | 11 | shex_schema = """ 12 | PREFIX xsd: 13 | PREFIX prov: 14 | PREFIX p: 15 | PREFIX pr: 16 | PREFIX prv: 17 | PREFIX pv: 18 | PREFIX ps: 19 | PREFIX gw: 20 | 21 | 22 | start = @gw:cancer 23 | gw:cancer { 24 | p:P1748 { 25 | prov:wasDerivedFrom @ 26 | }+ 27 | } 28 | 29 | { 30 | pr:P248 IRI ; 31 | pr:P813 xsd:dateTime ; 32 | pr:P699 LITERAL 33 | } 34 | """ 35 | 36 | WIKIDATA = Namespace("http://www.wikidata.org/entity/") 37 | 38 | 39 | class WikiDataTestCase(unittest.TestCase): 40 | test_path = os.path.join(os.path.split(os.path.abspath(__file__))[0], 'data', 'Q18557122.ttl') 41 | 42 | def test_wikidata_1(self): 43 | g = Graph() 44 | g.parse(self.test_path, format="turtle") 45 | rslt, _ = evaluate(g, shex_schema, WIKIDATA.Q18557112) 46 | self.assertTrue(rslt) 47 | 48 | def test_wikidata_2(self): 49 | pfx = PrefixLibrary(shex_schema, wikidata="http://www.wikidata.org/entity/") 50 | evaluator = ShExEvaluator(self.test_path, shex_schema, pfx.WIKIDATA.Q18557112) 51 | print(evaluator.evaluate(start=pfx.GW.cancer, debug=False)) 52 | 53 | 54 | if __name__ == '__main__': 55 | unittest.main() 56 | -------------------------------------------------------------------------------- /tests/test_issues/test_wild_rdf_datatype.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from rdflib import Graph, Literal 4 | 5 | rdf = ' "ab"^^.' 6 | 7 | 8 | class DTTestCase(unittest.TestCase): 9 | def test_wild_datatype(self): 10 | """ Make sure that non-standard datatypes are preserved in rdflib""" 11 | g = Graph() 12 | ts = g.parse(data=rdf, format="turtle") 13 | self.assertEqual(list(ts.objects())[0], Literal('ab', datatype='http://a.example/bloodType')) 14 | 15 | 16 | if __name__ == '__main__': 17 | unittest.main() 18 | -------------------------------------------------------------------------------- /tests/test_notebooks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hsolbrig/PyShEx/88b8449939f394545c84741db1668bd8a4d1fdbc/tests/test_notebooks/__init__.py -------------------------------------------------------------------------------- /tests/test_notebooks/wikidata_issue.py: -------------------------------------------------------------------------------- 1 | from sparqlslurper import SlurpyGraph 2 | 3 | from pyshex import shex_evaluator 4 | from pyshex.shex_evaluator import evaluate_cli as shexeval 5 | from pyshex.user_agent import SlurpyGraphWithAgent 6 | 7 | 8 | permagraph = None 9 | 10 | 11 | def persistent_slurper(rdf: str) -> SlurpyGraph: 12 | global permagraph 13 | permagraph = SlurpyGraphWithAgent(rdf) 14 | return permagraph 15 | 16 | 17 | shex_evaluator.SlurpyGraph = persistent_slurper 18 | 19 | # The parameters are: 20 | # -ss Use sparql slurper 21 | # -sq SPARQL query 22 | # --stopafter 1 Don't go on. (Could be "-se" if one wished) 23 | # 24 | # If this is what you want, we can override the function that is called at the 25 | # end of each query 26 | 27 | x = ["-ss", 28 | "-sq", 29 | 'PREFIX wdt: \n\nSELECT ?item WHERE { ?item wdt:P699 ?doid } LIMIT 100', 30 | "http://query.wikidata.org/sparql", 31 | "https://raw.githubusercontent.com/SuLab/Genewiki-ShEx/master/diseases/wikidata-disease-ontology.shex", "-se" 32 | ] 33 | shexeval(x) 34 | # print(permagraph.serialize(format="turtle").decode()) -------------------------------------------------------------------------------- /tests/test_p5_9_validation_examples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hsolbrig/PyShEx/88b8449939f394545c84741db1668bd8a4d1fdbc/tests/test_p5_9_validation_examples/__init__.py -------------------------------------------------------------------------------- /tests/test_p5_9_validation_examples/test_p5_9_1_simple_examples.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from ShExJSG import ShExJ 4 | from rdflib import Literal 5 | 6 | from pyshex.utils.schema_utils import reference_of 7 | from tests.utils.setup_test import setup_test, setup_context 8 | 9 | shex_1 = """{ "type": "Schema", "shapes": [ 10 | { "id": "http://schema.example/IntConstraint", 11 | "type": "NodeConstraint", 12 | "datatype": "http://www.w3.org/2001/XMLSchema#integer" 13 | } ] }""" 14 | 15 | 16 | class SimpleExamplesTestCase(unittest.TestCase): 17 | @unittest.skipIf(True, "SimpleExamplesTestCase not implemented") 18 | def test_example_1(self): 19 | # from pyshex.shape_expressions_language.p5_3_shape_expressions import satisfies 20 | # cntxt = setup_context(shex_1, None) 21 | # 22 | # self.assertTrue(satisfies(cntxt, Literal('"30"^^'), 23 | # shex_1. 24 | self.assertEqual(True, False) 25 | 26 | 27 | if __name__ == '__main__': 28 | unittest.main() 29 | -------------------------------------------------------------------------------- /tests/test_primer/SPARQLEndpoints.py: -------------------------------------------------------------------------------- 1 | from pyshex.shex_evaluator import ShExEvaluator 2 | from pyshex.user_agent import SlurpyGraphWithAgent 3 | from pyshex.utils.sparql_query import SPARQLQuery 4 | 5 | # SPARQL Endpoint 6 | endpoint = 'http://wifo5-04.informatik.uni-mannheim.de/drugbank/sparql' 7 | 8 | # SPARQL Query 9 | sparql = """ 10 | PREFIX rdf: 11 | PREFIX vocabClass: 12 | 13 | SELECT DISTINCT ?item WHERE { 14 | ?item rdf:type vocabClass:Offer 15 | } 16 | LIMIT 10 17 | """ 18 | 19 | # ShEx Expression 20 | shex = """ 21 | PREFIX drugbank: 22 | PREFIX foaf: 23 | PREFIX xsd: 24 | PREFIX : 25 | 26 | START=@:S1 27 | 28 | :S1 {foaf:page IRI+ ; # one or more foaf pages 29 | drugbank:limsDrugId xsd:string # ane exactly one drug id 30 | }""" 31 | 32 | 33 | # Do the evaluation 34 | result = ShExEvaluator(SlurpyGraphWithAgent(endpoint), # RDF source 35 | shex, # ShEx definition 36 | SPARQLQuery(endpoint, sparql).focus_nodes()).evaluate() # Source off focus nodes 37 | 38 | # Print the results 39 | for r in result: 40 | print(f"{r.focus}: ", end="") 41 | if not r.result: 42 | print(f"FAIL: {r.reason}") 43 | else: 44 | print("PASS") 45 | -------------------------------------------------------------------------------- /tests/test_primer/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/test_primer/test_1_quick_start.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from rdflib import Graph, Namespace 4 | 5 | from pyshex.evaluate import evaluate 6 | 7 | shexc = """PREFIX school: 8 | PREFIX xsd: 9 | PREFIX ex: 10 | 11 | # Node constraint 12 | school:enrolleeAge xsd:integer MinInclusive 13 MaxInclusive 20 13 | 14 | 15 | school:Enrollee { 16 | # Triple constraint (including node constraint IRI) 17 | ex:hasGuardian IRI {1,2} 18 | } 19 | """ 20 | 21 | rdf1 = """PREFIX ex: 22 | PREFIX inst: 23 | 24 | inst:Student1 ex:hasGuardian 25 | inst:Person2, inst:Person3 .""" 26 | 27 | EX = Namespace("http://ex.example/#") 28 | SCHOOL = Namespace("http://school.example/#") 29 | 30 | 31 | class QuickStartTestCase(unittest.TestCase): 32 | @unittest.skipIf(True, "Not yet implemented") 33 | def test_first_example(self): 34 | g = Graph() 35 | g.parse(data=rdf1, format="turtle") 36 | rslt, reason = evaluate(g, shexc, EX.obs1, SCHOOL.Enrollee) 37 | self.assertEqual(True, False) 38 | 39 | 40 | if __name__ == '__main__': 41 | unittest.main() 42 | -------------------------------------------------------------------------------- /tests/test_pyshex_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hsolbrig/PyShEx/88b8449939f394545c84741db1668bd8a4d1fdbc/tests/test_pyshex_utils/__init__.py -------------------------------------------------------------------------------- /tests/test_pyshex_utils/test_numeric_digits.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from rdflib import Literal, XSD 4 | 5 | from pyshex.utils.datatype_utils import total_digits, fraction_digits 6 | 7 | 8 | class TotalDigitsTestCase(unittest.TestCase): 9 | def test_total_digits(self): 10 | self.assertEqual(2, total_digits(Literal(-17))) 11 | self.assertEqual(2, total_digits(Literal(17))) 12 | self.assertEqual(1, total_digits(Literal(0))) 13 | self.assertEqual(1, total_digits(Literal('0.0', datatype=XSD.decimal))) 14 | self.assertEqual(1, total_digits(Literal(-0.0, datatype=XSD.decimal))) 15 | self.assertEqual(1, total_digits(Literal(1.0, datatype=XSD.decimal))) 16 | self.assertEqual(1, total_digits(Literal(-1.0, datatype=XSD.decimal))) 17 | self.assertEqual(3, total_digits(Literal(5.55, datatype=XSD.decimal))) 18 | self.assertIsNone(total_digits(Literal('5.55j', datatype=XSD.decimal))) 19 | self.assertEqual(3, total_digits(Literal('-5.55', datatype=XSD.decimal))) 20 | 21 | @unittest.skipIf(True, "rdflib should never parse 5.55 as an integer, but it does") 22 | def test_total_digits_2(self): 23 | self.assertIsNone(total_digits(Literal(5.55, datatype=XSD.integer))) 24 | 25 | def test_fraction_digits(self): 26 | self.assertEqual(0, fraction_digits(Literal(1))) 27 | self.assertEqual(0, fraction_digits(Literal(-117253884))) 28 | self.assertEqual(0, fraction_digits(Literal(127, datatype=XSD.byte))) 29 | self.assertIsNone(fraction_digits(Literal("Hello"))) 30 | self.assertEqual(0, fraction_digits(Literal(117, datatype=XSD.float))) 31 | # Note: rdflib creates a type of XSD.double, which is NOT derived from decimal (!) 32 | self.assertEqual(0, fraction_digits(Literal(5.0))) 33 | self.assertEqual(0, fraction_digits(Literal(5.0, datatype=XSD.decimal))) 34 | self.assertEqual(2, fraction_digits(Literal(5.55, datatype=XSD.decimal))) 35 | self.assertEqual(2, fraction_digits(Literal('5.55', datatype=XSD.decimal))) 36 | self.assertEqual(0, fraction_digits(Literal(-5.0))) 37 | self.assertEqual(0, fraction_digits(Literal(-5.0, datatype=XSD.decimal))) 38 | self.assertEqual(2, fraction_digits(Literal(-5.55, datatype=XSD.decimal))) 39 | self.assertEqual(2, fraction_digits(Literal('-5.55', datatype=XSD.decimal))) 40 | self.assertIsNone(fraction_digits(XSD.decimal)) 41 | self.assertIsNone(fraction_digits(Literal('abc', datatype=XSD.decimal))) 42 | 43 | 44 | if __name__ == '__main__': 45 | unittest.main() 46 | -------------------------------------------------------------------------------- /tests/test_pyshex_utils/test_patterns_in_json.py: -------------------------------------------------------------------------------- 1 | import json 2 | import re 3 | import unittest 4 | 5 | 6 | class JsonPatternTestCase(unittest.TestCase): 7 | """ 8 | This test case is used to address issues in the string facets example 2 9 | """ 10 | def test_non_unicode(self): 11 | b1 = '^\\t\\\\X\?$' 12 | b2 = r'^\t\\X\?$' 13 | 14 | self.assertEqual(b1, b2) 15 | self.assertIsNotNone(re.search(b1, '\t\\X?')) 16 | self.assertIsNone(re.search(b1, 'a\t\\X?')) 17 | self.assertIsNone(re.search(b1, '\t\\X?z')) 18 | 19 | escaped_b1 = re.sub(r'\\', r'\\\\', b1) 20 | bj1 = f'{{"pattern" : "{escaped_b1}"}}' 21 | json_b1 = json.loads(bj1) 22 | self.assertIsNotNone(re.search(json_b1['pattern'], '\t\\X?')) 23 | 24 | def test_unicode(self): 25 | b1 = '^\\t\\\\𝒸\?$' 26 | b2 = r'^\t\\𝒸\?$' 27 | 28 | self.assertEqual(b1, b2) 29 | self.assertIsNotNone(re.search(b1, '\t\\𝒸?')) 30 | self.assertIsNone(re.search(b1, 'a\t\\𝒸?')) 31 | self.assertIsNone(re.search(b1, '\t\\𝒸?z')) 32 | 33 | escaped_b1 = re.sub(r'\\', r'\\\\', b1) 34 | bj1 = f'{{"pattern" : "{escaped_b1}"}}' 35 | json_b1 = json.loads(bj1) 36 | self.assertIsNotNone(re.search(json_b1['pattern'], '\t\\𝒸?')) 37 | 38 | def test_unicode_2(self): 39 | b1 = '^\\t\\\\\U0001D4B8\?$' 40 | b2 = r'^\t\\𝒸\?$' 41 | 42 | self.assertEqual(b1, b2) 43 | self.assertIsNotNone(re.search(b1, '\t\\\U0001D4B8?')) 44 | self.assertIsNone(re.search(b1, 'a\t\\\U0001D4B8?')) 45 | self.assertIsNone(re.search(b1, '\t\\\U0001D4B8?z')) 46 | 47 | escaped_b1 = re.sub(r'\\', r'\\\\', b1) 48 | bj1 = f'{{"pattern" : "{escaped_b1}"}}' 49 | json_b1 = json.loads(bj1) 50 | self.assertIsNotNone(re.search(json_b1['pattern'], '\t\\\U0001D4B8?')) 51 | 52 | 53 | if __name__ == '__main__': 54 | unittest.main() 55 | -------------------------------------------------------------------------------- /tests/test_pyshex_utils/test_schema_loader.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import os 4 | from rdflib import RDF 5 | 6 | from pyshex.utils.schema_loader import SchemaLoader 7 | 8 | schemas_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'data', 'schemas')) 9 | 10 | class SchemaLoaderTestCase(unittest.TestCase): 11 | def test_loads_shexc(self): 12 | """ Load a schema string and test a couple of elements """ 13 | loader = SchemaLoader() 14 | schema = loader.loads(""" { 15 | ( .| 16 | .| 17 | .| 18 | . 19 | ){2,3} 20 | }""") 21 | self.assertEqual("http://a.example/S1", schema.shapes[0].id) 22 | self.assertEqual({"http://a.example/p1", 23 | "http://a.example/p2", 24 | "http://a.example/p3", 25 | "http://a.example/p4"}, {e.predicate for e in schema.shapes[0].expression.expressions}) 26 | 27 | def test_loads_shexj(self): 28 | """ Load a schema string and test a couple of elements """ 29 | loader = SchemaLoader() 30 | schema = loader.loads("""{ 31 | "@context": "http://www.w3.org/ns/shex.jsonld", 32 | "type": "Schema", 33 | "shapes": [ 34 | { 35 | "id": "http://a.example/S1", 36 | "type": "Shape", 37 | "expression": { 38 | "type": "TripleConstraint", 39 | "predicate": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" 40 | } 41 | } 42 | ] 43 | }""") 44 | self.assertEqual("http://a.example/S1", schema.shapes[0].id) 45 | self.assertEqual(str(RDF.type), schema.shapes[0].expression.predicate) 46 | 47 | def test_load_shexc(self): 48 | loader = SchemaLoader() 49 | 50 | # Local file name 51 | fileloc = os.path.join(schemas_dir, 'startCode3.shex') 52 | schema = loader.load(fileloc) 53 | self.assertEqual("http://a.example/S1", schema.shapes[0].id) 54 | 55 | # Local file object 56 | with open(fileloc) as f: 57 | _ = loader.load(f) 58 | self.assertEqual("http://a.example/S1", schema.shapes[0].id) 59 | 60 | # URL 61 | fileurl = "https://raw.githubusercontent.com/shexSpec/shexTest/2.0/schemas/startCode3.shex" 62 | schema = loader.load(fileurl) 63 | self.assertEqual("http://a.example/S1", schema.shapes[0].id) 64 | 65 | def test_load_shexj(self): 66 | loader = SchemaLoader() 67 | 68 | # Local file name 69 | fileloc = os.path.join(schemas_dir, 'startCode3.json') 70 | schema = loader.load(fileloc) 71 | self.assertEqual("http://a.example/S1", schema.shapes[0].id) 72 | 73 | # Local file object 74 | with open(fileloc) as f: 75 | _ = loader.load(f) 76 | self.assertEqual("http://a.example/S1", schema.shapes[0].id) 77 | 78 | # URL 79 | fileurl = "https://raw.githubusercontent.com/shexSpec/shexTest/2.0/schemas/startCode3.json" 80 | schema = loader.load(fileurl) 81 | self.assertEqual("http://a.example/S1", schema.shapes[0].id) 82 | 83 | def test_location_rewrite(self): 84 | loader = SchemaLoader() 85 | # Note: Deliberately a bad URL to make sure this works 86 | loader.root_location = "https://raw.githubusercontent.com/shexSpec/shexTest/2.0/schemasz/" 87 | loader.redirect_location = schemas_dir + '/' 88 | fileloc = loader.root_location + 'startCode3.shex' 89 | schema = loader.load(fileloc) 90 | self.assertEqual("http://a.example/S1", schema.shapes[0].id) 91 | 92 | def test_format_change(self): 93 | loc = "https://raw.githubusercontent.com/shexSpec/shexTest/2.0/schemas/startCode3" 94 | loader = SchemaLoader(schema_type_suffix='json') 95 | self.assertEqual(f"{loc}.json", loader.location_rewrite(f"{loc}.shex")) 96 | self.assertEqual(f"{loc}.jsontern", loader.location_rewrite(f"{loc}.shextern")) 97 | loader.schema_format = 'shex' 98 | self.assertEqual(f"{loc}.shex", loader.location_rewrite(f"{loc}.shex")) 99 | self.assertEqual(f"{loc}.shextern", loader.location_rewrite(f"{loc}.shextern")) 100 | self.assertEqual(f"{loc}.shextern", loader.location_rewrite(f"{loc}.jsontern")) 101 | 102 | 103 | if __name__ == '__main__': 104 | unittest.main() 105 | -------------------------------------------------------------------------------- /tests/test_pyshex_utils/test_visitor.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from typing import List 3 | 4 | from ShExJSG import ShExJ 5 | 6 | from pyshex.shape_expressions_language.p5_context import Context 7 | from tests.utils.setup_test import setup_test 8 | 9 | shex_1 = """{ "type": "Schema", "shapes": [ 10 | { "id": "http://schema.example/EmployeeShape", 11 | "type": "Shape", "expression": { 12 | "type": "EachOf", "expressions": [ 13 | "http://schema.example/nameExpr", 14 | { "type": "TripleConstraint", 15 | "predicate": "http://schema.example/empID", 16 | "valueExpr": { "type": "NodeConstraint", 17 | "datatype": "http://www.w3.org/2001/XMLSchema#integer" } } ] } }, 18 | { "id": "http://schema.example/PersonShape", 19 | "type": "Shape", "expression": { 20 | "id": "http://schema.example/nameExpr", 21 | "type": "TripleConstraint", 22 | "predicate": "http://xmlns.com/foaf/0.1/name" } } ] }""" 23 | 24 | shex_2 = """{ 25 | "@context": "http://www.w3.org/ns/shex.jsonld", 26 | "type": "Schema", 27 | "shapes": [ 28 | { 29 | "id": "http://all.example/S1", 30 | "type": "ShapeNot", 31 | "shapeExpr": "http://all.example/S2" 32 | }, 33 | { 34 | "id": "http://all.example/S2", 35 | "type": "Shape", 36 | "expression": { 37 | "type": "TripleConstraint", 38 | "id": "http://all.example/S2e", 39 | "predicate": "http://all.example/p1", 40 | "min": 0, 41 | "max": 1, 42 | "valueExpr": "http://all.example/S2" 43 | } 44 | } 45 | ] 46 | }""" 47 | 48 | 49 | def visit_shape(v: List[ShExJ.shapeExprLabel], expr: ShExJ.shapeExpr, _:Context) -> None: 50 | if 'id' in expr and expr.id is not None: 51 | v.append(expr.id) 52 | 53 | 54 | def visit_te(v: List[ShExJ.tripleExprLabel], expr: ShExJ.shapeExpr, _:Context) -> None: 55 | if 'id' in expr and expr.id is not None: 56 | v.append(expr.id) 57 | 58 | 59 | class VisitorTestCase(unittest.TestCase): 60 | def test_example_1(self): 61 | schema, _ = setup_test(shex_1, None) 62 | cntxt = Context(None, schema) 63 | shapes_visited = [] 64 | triples_visited = [] 65 | cntxt.visit_shapes(schema.shapes[0], visit_shape, shapes_visited) 66 | self.assertEqual(["http://schema.example/EmployeeShape"], shapes_visited) 67 | 68 | @unittest.skipIf(True, "Example 2 may not be valid - check it") 69 | def test_example_2(self): 70 | schema, _ = setup_test(shex_2, None) 71 | cntxt = Context(None, schema) 72 | shapes_visited = [] 73 | triples_visited = [] 74 | cntxt.visit_shapes(schema.shapes[0], visit_shape, shapes_visited) 75 | self.assertEqual(["http://schema.example/S1", "http://schema.example/S2" ], shapes_visited) 76 | 77 | 78 | if __name__ == '__main__': 79 | unittest.main() 80 | -------------------------------------------------------------------------------- /tests/test_shape_expressions_language/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hsolbrig/PyShEx/88b8449939f394545c84741db1668bd8a4d1fdbc/tests/test_shape_expressions_language/__init__.py -------------------------------------------------------------------------------- /tests/test_shape_expressions_language/test_p3_terminology.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from rdflib import URIRef, Literal 4 | from rdflib.namespace import FOAF 5 | 6 | from tests.utils.setup_test import rdf_header, setup_test, EX, INST 7 | 8 | rdf_1 = f"""{rdf_header} 9 | inst:Issue1 10 | ex:state ex:unassigned ; 11 | ex:reportedBy ex:User2 . 12 | 13 | ex:User2 14 | foaf:name "Bob Smith" ; 15 | foaf:mbox . 16 | """ 17 | 18 | 19 | class TerminologyTestCase(unittest.TestCase): 20 | 21 | def test_example_1(self): 22 | from pyshex.shape_expressions_language.p3_terminology import arcsOut, arcsIn, neigh 23 | 24 | _, g = setup_test(None, rdf_1) 25 | 26 | self.assertEqual({ 27 | (EX.User2, FOAF.mbox, URIRef('mailto:bob@example.org')), 28 | (EX.User2, FOAF.name, Literal('Bob Smith'))}, 29 | arcsOut(g, EX.User2)) 30 | self.assertEqual({ 31 | (INST.Issue1, EX.reportedBy, EX.User2)}, 32 | arcsIn(g, EX.User2)) 33 | 34 | self.assertEqual({ 35 | (EX.User2, FOAF.mbox, URIRef('mailto:bob@example.org')), 36 | (EX.User2, FOAF.name, Literal('Bob Smith')), 37 | (INST.Issue1, EX.reportedBy, EX.User2)}, 38 | neigh(g, EX.User2)) 39 | 40 | def test_predicates(self): 41 | from pyshex.shape_expressions_language.p3_terminology import predicatesIn, predicatesOut, predicates 42 | _, g = setup_test(None, rdf_1) 43 | self.assertEqual({FOAF.mbox, FOAF.name}, predicatesOut(g, EX.User2)) 44 | self.assertEqual({EX.reportedBy}, predicatesIn(g, EX.User2)) 45 | self.assertEqual({FOAF.mbox, FOAF.name, EX.reportedBy}, predicates(g, EX.User2)) 46 | 47 | 48 | if __name__ == '__main__': 49 | unittest.main() 50 | -------------------------------------------------------------------------------- /tests/test_shape_expressions_language/test_p5_4_2_node_kind_constraints.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from typing import List 3 | 4 | from pyshex.parse_tree.parse_node import ParseNode 5 | from pyshex.shape_expressions_language.p5_context import Context 6 | from tests.utils.setup_test import rdf_header, EX, setup_context 7 | 8 | shex_1 = """{ "type": "Schema", "shapes": [ 9 | { "id": "http://schema.example/IssueShape", 10 | "type": "Shape", "expression": { 11 | "type": "TripleConstraint", "predicate": "http://schema.example/state", 12 | "valueExpr": { "type": "NodeConstraint", "nodeKind": "iri" } } } ] }""" 13 | 14 | rdf_1 = f"""{rdf_header} 15 | :issue1 ex:state ex:HunkyDory . 16 | :issue2 ex:taste ex:GoodEnough . 17 | :issue3 ex:state "just fine" . 18 | """ 19 | 20 | 21 | class NodeKindConstraintTest(unittest.TestCase): 22 | 23 | @staticmethod 24 | def fail_reasons(cntxt: Context) -> List[str]: 25 | return [e.strip() for e in cntxt.current_node.fail_reasons(cntxt.graph)] 26 | 27 | def test_example_1(self): 28 | from pyshex.shape_expressions_language.p5_4_node_constraints import nodeSatisfiesNodeKind 29 | cntxt = setup_context(shex_1, rdf_1) 30 | 31 | nc = cntxt.schema.shapes[0].expression.valueExpr 32 | 33 | focus = cntxt.graph.value(EX.issue1, EX.state) 34 | cntxt.current_node = ParseNode(nodeSatisfiesNodeKind, nc, focus, cntxt) 35 | self.assertTrue(nodeSatisfiesNodeKind(cntxt, focus, nc)) 36 | 37 | focus = cntxt.graph.value(EX.issue3, EX.state) 38 | cntxt.current_node = ParseNode(nodeSatisfiesNodeKind, nc, focus, cntxt) 39 | self.assertFalse(nodeSatisfiesNodeKind(cntxt, focus, nc)) 40 | self.assertEqual(['Node kind mismatch have: Literal expected: iri'], self.fail_reasons(cntxt)) 41 | 42 | if __name__ == '__main__': 43 | unittest.main() 44 | -------------------------------------------------------------------------------- /tests/test_shape_expressions_language/test_p5_4_5_numeric_facet_constraints.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from typing import List 3 | 4 | from rdflib import URIRef 5 | 6 | from pyshex.parse_tree.parse_node import ParseNode 7 | from pyshex.shape_expressions_language.p5_4_node_constraints import nodeSatisfiesNumericFacet 8 | from pyshex.shape_expressions_language.p5_context import Context 9 | from tests.utils.setup_test import EX, gen_rdf, setup_context 10 | 11 | shex_1 = """{ "type": "Schema", "shapes": [ 12 | { "id": "http://schema.example/IssueShape", 13 | "type": "Shape", "expression": { 14 | "type": "TripleConstraint", 15 | "predicate": "http://schema.example/confirmations", 16 | "valueExpr": { "type": "NodeConstraint", "mininclusive": 1 } } } ] }""" 17 | 18 | rdf_1 = gen_rdf(""" 19 | :issue1 ex:confirmations 1 . 20 | :issue2 ex:confirmations "2"^^xsd:byte . 21 | :issue3 ex:confirmations 0 . 22 | :issue4 ex:confirmations "ii"^^ex:romanNumeral .""") 23 | 24 | shex_2 = """{ 25 | "@context": "http://www.w3.org/ns/shex.jsonld", 26 | "type": "Schema", 27 | "shapes": [ 28 | { 29 | "id": "http://a.example/S1", 30 | "type": "Shape", 31 | "expression": { 32 | "type": "TripleConstraint", 33 | "predicate": "http://a.example/p1", 34 | "valueExpr": { 35 | "type": "NodeConstraint", 36 | "nodeKind": "literal", 37 | "fractiondigits": 4 38 | } 39 | } 40 | } 41 | ] 42 | }""" 43 | 44 | rdf_2 = gen_rdf(""" 45 | "1.23450"^^ .""") 46 | 47 | 48 | class NumericFacetTestCase(unittest.TestCase): 49 | @staticmethod 50 | def fail_reasons(cntxt: Context) -> List[str]: 51 | return [e.strip() for e in cntxt.current_node.fail_reasons(cntxt.graph)] 52 | 53 | def test_example_1(self): 54 | cntxt = setup_context(shex_1, rdf_1) 55 | nc = cntxt.schema.shapes[0].expression.valueExpr 56 | focus = cntxt.graph.value(EX.issue1, EX.confirmations) 57 | cntxt.current_node = ParseNode(nodeSatisfiesNumericFacet, nc, focus, cntxt) 58 | self.assertTrue(nodeSatisfiesNumericFacet(cntxt, focus, nc)) 59 | focus = cntxt.graph.value(EX.issue2, EX.confirmations) 60 | cntxt.current_node = ParseNode(nodeSatisfiesNumericFacet, nc, focus, cntxt) 61 | self.assertTrue(nodeSatisfiesNumericFacet(cntxt, focus, nc)) 62 | focus = cntxt.graph.value(EX.issue3, EX.confirmations) 63 | cntxt.current_node = ParseNode(nodeSatisfiesNumericFacet, nc, focus, cntxt) 64 | self.assertFalse(nodeSatisfiesNumericFacet(cntxt, focus, nc)) 65 | self.assertEqual(['Numeric value volation - minimum inclusive: 1.0 actual: 0'], self.fail_reasons(cntxt)) 66 | 67 | def test_trailing_zero(self): 68 | cntxt = setup_context(shex_2, rdf_2) 69 | nc = cntxt.schema.shapes[0].expression.valueExpr 70 | focus = cntxt.graph.value(URIRef("http://a.example/s1"), URIRef("http://a.example/p1")) 71 | cntxt.current_node = ParseNode(nodeSatisfiesNumericFacet, nc, focus, cntxt) 72 | self.assertTrue(nodeSatisfiesNumericFacet(cntxt, focus, nc)) 73 | 74 | 75 | if __name__ == '__main__': 76 | unittest.main() 77 | -------------------------------------------------------------------------------- /tests/test_shape_expressions_language/test_p5_context.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import json 3 | from typing import List 4 | 5 | from ShExJSG import ShExJ 6 | from ShExJSG.ShExJ import IRIREF 7 | from rdflib import URIRef, RDF 8 | 9 | from pyshex.shape_expressions_language.p5_context import Context 10 | from pyshex.utils.schema_utils import predicates_in_expression 11 | from tests.utils.setup_test import gen_rdf, setup_context 12 | 13 | shex_1 = """ 14 | { "type": "Schema", 15 | "shapes": [ 16 | { "id": "http://schema.example/UserShape", 17 | "type": "Shape", 18 | "extra": ["http://www.w3.org/1999/02/22-rdf-syntax-ns#type"], 19 | "expression": { "type": "TripleConstraint", 20 | "id" : "http://schema.example/te1", 21 | "predicate": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", 22 | "valueExpr": { "type": "NodeConstraint", 23 | "values": ["http://schema.example/Teacher"] 24 | } 25 | } 26 | } 27 | ] 28 | }""" 29 | 30 | rdf_1 = gen_rdf(""" ex:shoeSize "30"^^xsd:integer . 31 | a ex:Teacher . 32 | a ex:Person . 33 | ex:owner . 34 | ex:madeOf .""") 35 | 36 | 37 | def predicate_finder(predicates: List[URIRef], tc: ShExJ.TripleConstraint, cntxt: Context) -> None: 38 | if isinstance(tc, ShExJ.TripleConstraint): 39 | predicates.append(URIRef(tc.predicate)) 40 | 41 | 42 | def triple_expr_finder(predicates: List[URIRef], expr: ShExJ.shapeExpr, cntxt: Context) -> None: 43 | if isinstance(expr, ShExJ.Shape) and expr.expression is not None: 44 | cntxt.visit_triple_expressions(expr.expression, predicate_finder, predicates) 45 | 46 | 47 | class ContextTestCase(unittest.TestCase): 48 | def test_basic_context(self): 49 | c = setup_context(shex_1, rdf_1) 50 | self.assertEqual(['http://schema.example/UserShape'], list(c.schema_id_map.keys())) 51 | self.assertTrue(isinstance(list(c.schema_id_map.values())[0], ShExJ.Shape)) 52 | self.assertEqual(['http://schema.example/te1'], list(c.te_id_map.keys())) 53 | self.assertTrue(isinstance(list(c.te_id_map.values())[0], ShExJ.TripleConstraint)) 54 | 55 | def test_predicate_scan(self): 56 | c = setup_context(shex_1, rdf_1) 57 | predicates: List[URIRef] = [] 58 | c.visit_shapes(c.shapeExprFor(IRIREF('http://schema.example/UserShape')), triple_expr_finder, predicates) 59 | self.assertEqual([RDF.type], predicates) 60 | # Quick test of the utility function 61 | self.assertEqual(predicates_in_expression(c.shapeExprFor(IRIREF('http://schema.example/UserShape')), c), 62 | [ShExJ.IRIREF(str(u)) for u in predicates]) 63 | 64 | 65 | 66 | if __name__ == '__main__': 67 | unittest.main() 68 | -------------------------------------------------------------------------------- /tests/test_shapemap_structure_and_language/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hsolbrig/PyShEx/88b8449939f394545c84741db1668bd8a4d1fdbc/tests/test_shapemap_structure_and_language/__init__.py -------------------------------------------------------------------------------- /tests/test_shapemap_structure_and_language/test_p1_notation_and_terminology.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from rdflib import Literal 4 | 5 | from pyshex.shapemap_structure_and_language.p1_notation_and_terminology import RDFTriple, RDFGraph 6 | from tests.utils.setup_test import EX, gen_rdf, setup_test 7 | 8 | rdf_1 = gen_rdf(""" 9 | ex:submittedOn "2016-07-08"^^xsd:date . 10 | ex:submittedOn "2016-07-08T01:23:45Z"^^xsd:dateTime . 11 | ex:submittedOn "2016-07"^^xsd:date .""") 12 | 13 | 14 | rdf_out = """ns1:issue1 ns1:submittedOn "2016-07-08"^^xsd:date . 15 | 16 | ns1:issue2 ns1:submittedOn "2016-07-08T01:23:45+00:00"^^xsd:dateTime . 17 | 18 | ns1:issue3 ns1:submittedOn "2016-07-01"^^xsd:date .""" 19 | 20 | 21 | class NotationAndTerminologyTestCase(unittest.TestCase): 22 | def test_rdf_triple(self): 23 | x = RDFTriple((EX.issue1, EX.num, Literal(17))) 24 | self.assertEqual(EX.issue1, x.s) 25 | self.assertEqual(EX.num, x.p) 26 | self.assertEqual(17, x.o.value) 27 | self.assertEqual(" 17 .", 28 | str(x)) 29 | 30 | def test_rdf_graph(self): 31 | x = RDFGraph([(EX.issue1, EX.count, Literal(17))]) 32 | self.assertEqual(1, len(x)) 33 | x = RDFGraph([(EX.issue1, EX.count, Literal(17)), (EX.issue1, EX.count, Literal(17))]) 34 | self.assertEqual(1, len(x)) 35 | x = RDFGraph([(EX.issue1, EX.count, Literal(17)), RDFTriple((EX.issue1, EX.count, Literal(17)))]) 36 | self.assertEqual(1, len(x)) 37 | _, g = setup_test(None, rdf_1) 38 | x = RDFGraph(g) 39 | self.assertEqual(rdf_out, str(x)) 40 | 41 | 42 | if __name__ == '__main__': 43 | unittest.main() 44 | -------------------------------------------------------------------------------- /tests/test_shex_manifest/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/test_shex_manifest/test_basics.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from jsonasobj import loads 4 | 5 | from pyshex.shex_manifest.manifest import Manifest 6 | 7 | manifest_sample = """{ 8 | "schemaLabel": "bibframe book", 9 | "schemaURL": "book.shex", 10 | "dataLabel": "simple", 11 | "dataURL": "book.ttl", 12 | "queryMap": "@", 13 | "status": "conformant" 14 | }""" 15 | 16 | 17 | class ManifestTestCase(unittest.TestCase): 18 | def test_loader(self): 19 | manifest = Manifest("https://www.w3.org/2017/10/bibframe-shex/shex-simple-examples.json") 20 | me = manifest.entries[0] 21 | self.assertEqual('bibframe book', me.schemaLabel) 22 | self.assertEqual('book.shex', me.schemaURL) 23 | self.assertEqual('simple', me.dataLabel) 24 | self.assertEqual('book.ttl', me.dataURL) 25 | self.assertEqual('@', me.queryMap) 26 | self.assertEqual('conformant', me.status) 27 | self.assertEqual(9, len(manifest.entries)) 28 | 29 | 30 | if __name__ == '__main__': 31 | unittest.main() 32 | -------------------------------------------------------------------------------- /tests/test_shextest_validation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hsolbrig/PyShEx/88b8449939f394545c84741db1668bd8a4d1fdbc/tests/test_shextest_validation/__init__.py -------------------------------------------------------------------------------- /tests/test_shextest_validation/test_manifest_shex_json.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from tests.utils.manifest_tester import ManifestEntryTestCase 4 | 5 | 6 | class ManifestShexJsonTestCase(ManifestEntryTestCase): 7 | def __init__(self, methodname): 8 | # This is a spot that you can insert conditional skips -- the second parameter below is a dictionary of test 9 | # names and skip reasons. 10 | # Example: skips = {'1val1STRING_LITERAL1_with_all_punctuation_pass': issue_text} 11 | super().__init__(methodname, None) 12 | 13 | def test_shex_json(self): 14 | self.mfst.schema_loader.schema_format = "json" 15 | self.do_test() 16 | 17 | 18 | if __name__ == '__main__': 19 | unittest.main() 20 | -------------------------------------------------------------------------------- /tests/test_shextest_validation/test_manifest_shex_shexc.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | 4 | from rdflib import URIRef 5 | 6 | from ancilliary.earlreport import EARLPage 7 | from tests.utils.manifest_tester import ManifestEntryTestCase 8 | 9 | 10 | class ManifestShexShexCTestCase(ManifestEntryTestCase): 11 | def __init__(self, methodname): 12 | # This is a spot that you can insert conditional skips -- the second parameter below is a dictionary of test 13 | # names and skip reasons. 14 | # Example: skips = {'1val1STRING_LITERAL1_with_all_punctuation_pass': issue_text} 15 | super().__init__(methodname, None) 16 | 17 | def test_shex_shexc(self): 18 | self.mfst.shex_format = "shex" 19 | self.do_test() 20 | 21 | def test_generate_earl_report(self): 22 | self.mfst.schema_loader.schema_format = "shex" 23 | earlpage = EARLPage(URIRef("https://github.com/hsolbrig")) 24 | self.do_test(earlpage) 25 | earl_report = os.path.join(os.path.abspath(os.path.dirname(__file__)), '..', 'data', 'earl_report.ttl') 26 | earlpage.g.serialize(earl_report, format="turtle") 27 | print(f"EARL report generated in {earl_report}") 28 | 29 | if __name__ == '__main__': 30 | unittest.main() 31 | -------------------------------------------------------------------------------- /tests/test_support_libraries/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/test_support_libraries/local_context.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from rdflib import URIRef, XSD 3 | from pyshex import PrefixLibrary 4 | 5 | """ This module is used to test the PrefixLibrary's ability to inject namespaces directoy into the containing module 6 | It is used in conjunction with test_prefixlib.test_add_to_module """ 7 | 8 | pl = PrefixLibrary(""" 9 | PREFIX xsd: 10 | PREFIX prov: 11 | PREFIX p: 12 | PREFIX pr: 13 | PREFIX prv: 14 | PREFIX pv: 15 | PREFIX ps: 16 | PREFIX gw: """) 17 | 18 | pl.add_to_object(sys.modules[__name__]) 19 | 20 | def sample(name: str) -> URIRef: 21 | return PROV[name] 22 | 23 | pl.add_rdf('@prefix XSD: .') 24 | 25 | pl.add_to_object(sys.modules[__name__], override=True) 26 | 27 | def rdf(name: str) -> URIRef: 28 | return XSD[name] 29 | -------------------------------------------------------------------------------- /tests/test_support_libraries/test_shex_evaluator.py: -------------------------------------------------------------------------------- 1 | import os 2 | from rdflib import Graph, URIRef 3 | 4 | from pyshex import ShExEvaluator, PrefixLibrary 5 | import unittest 6 | 7 | from pyshex.shapemap_structure_and_language.p3_shapemap_structure import START 8 | 9 | shex_schema = """ 10 | PREFIX xsd: 11 | PREFIX prov: 12 | PREFIX p: 13 | PREFIX pr: 14 | PREFIX prv: 15 | PREFIX pv: 16 | PREFIX ps: 17 | PREFIX gw: 18 | 19 | 20 | start = @gw:cancer 21 | gw:cancer { 22 | p:P1748 { 23 | prov:wasDerivedFrom @ 24 | }+ 25 | } 26 | 27 | { 28 | pr:P248 IRI ; 29 | pr:P813 xsd:dateTime ; 30 | pr:P699 LITERAL 31 | } 32 | """ 33 | 34 | loc_prefixes = PrefixLibrary(None, 35 | wikidata="http://www.wikidata.org/entity/", 36 | gw="http://genewiki.shape/") 37 | 38 | 39 | class ShExEvaluatorTestCase(unittest.TestCase): 40 | def test_empty_constructor(self): 41 | evaluator = ShExEvaluator() 42 | # rdflib no longer emits unused prefixes -- an empty evaluator is now empty 43 | self.assertEqual("", evaluator.rdf.strip()) 44 | self.assertIsNone(evaluator.schema) 45 | self.assertIsNone(evaluator.focus) 46 | self.assertEqual([], evaluator.foci) 47 | self.assertEqual([START], evaluator.start) 48 | self.assertEqual("turtle", evaluator.rdf_format) 49 | self.assertTrue(isinstance(evaluator.g, Graph)) 50 | 51 | def test_complete_constructor(self): 52 | test_rdf = os.path.join(os.path.split(os.path.abspath(__file__))[0], '..', 'test_issues', 'data', 'Q18557122.ttl') 53 | evaluator = ShExEvaluator(test_rdf, shex_schema, 54 | [loc_prefixes.WIKIDATA, loc_prefixes.WIKIDATA.Q18557112], 55 | loc_prefixes.WIKIDATA.cancer) 56 | results = evaluator.evaluate() 57 | self.assertFalse(results[0].result) 58 | self.assertEqual(URIRef('http://www.wikidata.org/entity/'), results[0].focus) 59 | self.assertEqual(URIRef('http://www.wikidata.org/entity/cancer'), results[0].start) 60 | self.assertEqual('Focus: http://www.wikidata.org/entity/ not in graph', results[0].reason) 61 | self.assertEqual(URIRef('http://www.wikidata.org/entity/Q18557112'), results[1].focus) 62 | self.assertEqual(URIRef('http://www.wikidata.org/entity/cancer'), results[1].start) 63 | self.assertEqual(' Shape: http://www.wikidata.org/entity/cancer not found in Schema', 64 | results[1].reason) 65 | 66 | 67 | if __name__ == '__main__': 68 | unittest.main() 69 | -------------------------------------------------------------------------------- /tests/test_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hsolbrig/PyShEx/88b8449939f394545c84741db1668bd8a4d1fdbc/tests/test_utils/__init__.py -------------------------------------------------------------------------------- /tests/test_utils/test_n3_mapper.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | 4 | from rdflib import Graph, BNode 5 | 6 | from pyshex.utils.n3_mapper import N3Mapper 7 | 8 | 9 | class N3MapperUnitTest(unittest.TestCase): 10 | def test_basics(self): 11 | source_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'source') 12 | target_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'object') 13 | new_files = False 14 | 15 | os.makedirs(target_dir, exist_ok=True) 16 | self.maxDiff = None 17 | for f in os.listdir(source_dir): 18 | fpath = os.path.join(source_dir, f) 19 | if os.path.isfile(fpath): 20 | g = Graph() 21 | g.parse(fpath, format='turtle') 22 | mapper = N3Mapper(g.namespace_manager) 23 | result = '\n'.join([mapper.n3(t) 24 | for t in sorted(list(g), 25 | key=lambda t: (1, t) if isinstance(t[0], BNode) else (0, t))]) 26 | tpath = os.path.join(target_dir, f) 27 | if not os.path.exists(tpath): 28 | print(f"Creating: {tpath}") 29 | with open(tpath, 'w') as t: 30 | t.write(result) 31 | new_files = True 32 | with open(tpath) as t: 33 | self.assertEqual(t.read(), result) 34 | self.assertFalse(new_files, "New test files created - rerun") 35 | 36 | 37 | if __name__ == '__main__': 38 | unittest.main() 39 | -------------------------------------------------------------------------------- /tests/test_utils/test_sparql_query.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | from pprint import pprint 4 | 5 | from pyshex.utils.sparql_query import SPARQLQuery 6 | from tests import datadir 7 | 8 | 9 | class SparqlQueryTestCase(unittest.TestCase): 10 | @unittest.skipIf(True, "SPARQL query, sometimes URL is down. Need to look for an alternative.") 11 | def test_basics(self): 12 | q = SPARQLQuery('http://wifo5-04.informatik.uni-mannheim.de/drugbank/sparql', 13 | os.path.join(datadir, 't1.sparql')) 14 | self.assertEqual([ 15 | 'http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/drugs/DB00001', 16 | 'http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/drugs/DB00002', 17 | 'http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/drugs/DB00003', 18 | 'http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/drugs/DB00004', 19 | 'http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/drugs/DB00005', 20 | 'http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/drugs/DB00006', 21 | 'http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/drugs/DB00007', 22 | 'http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/drugs/DB00008', 23 | 'http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/drugs/DB00009', 24 | 'http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/drugs/DB00010'], 25 | [str(f) for f in q.focus_nodes()]) 26 | 27 | 28 | if __name__ == '__main__': 29 | unittest.main() 30 | -------------------------------------------------------------------------------- /tests/test_utils/test_tortoise.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from rdflib import Graph, URIRef 4 | 5 | from pyshex.utils import tortoise 6 | 7 | tortoise.register() 8 | 9 | class TortoiseTestCase(unittest.TestCase): 10 | def test_tortoise(self): 11 | g = Graph() 12 | self.assertEqual("""@prefix rdf: . 13 | @prefix rdfs: . 14 | @prefix xml: . 15 | @prefix xsd: .""", g.serialize(format="tortoise").decode().strip()) 16 | g.bind('foo', 'http://example.org/foo#') 17 | g.add((URIRef('http://example.org/foo#a'), 18 | URIRef('http://example.org/foo#b'), 19 | URIRef('http://example.org/foo#c'))) 20 | self.assertEqual("""@prefix foo: . 21 | @prefix rdf: . 22 | @prefix rdfs: . 23 | @prefix xml: . 24 | @prefix xsd: . 25 | 26 | foo:a foo:b foo:c .""", g.serialize(format='tortoise').decode().strip()) 27 | 28 | 29 | if __name__ == '__main__': 30 | unittest.main() 31 | -------------------------------------------------------------------------------- /tests/utils/SortoGraph.py: -------------------------------------------------------------------------------- 1 | from typing import NamedTuple, Union, Tuple, Optional, Generator 2 | 3 | from rdflib import Graph, URIRef, Literal, BNode 4 | from rdflib.term import Node 5 | 6 | QueryTriple = Tuple[Optional[URIRef], Optional[URIRef], Optional[Union[Literal, URIRef]]] 7 | 8 | SUBJ = Union[URIRef, BNode] 9 | PRED = URIRef 10 | OBJ = Node 11 | 12 | 13 | class RDFTriple(NamedTuple): 14 | s: SUBJ = None 15 | p: PRED = None 16 | o: OBJ = None 17 | 18 | 19 | class SortOGraph(Graph): 20 | """ rdflib Graph wrapper that sorts the outputs 21 | """ 22 | 23 | def triples(self, 24 | pattern: Optional[Union[QueryTriple, SUBJ]]) -> Generator[RDFTriple, None, None]: 25 | for t in sorted(super().triples(pattern)): 26 | yield t 27 | -------------------------------------------------------------------------------- /tests/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hsolbrig/PyShEx/88b8449939f394545c84741db1668bd8a4d1fdbc/tests/utils/__init__.py -------------------------------------------------------------------------------- /tests/utils/setup_test.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Tuple, Optional 3 | 4 | from ShExJSG import ShExJ 5 | from pyjsg.jsglib import loads 6 | 7 | from rdflib import Graph, RDF, RDFS, XSD 8 | from rdflib.namespace import FOAF 9 | 10 | from pyshex.shape_expressions_language.p5_context import Context 11 | from pyshex.utils.rdf_namespace import RDFNamespace 12 | 13 | EX = RDFNamespace("http://schema.example/") 14 | INST = RDFNamespace("http://inst.example/#") 15 | 16 | rdf_header = f""" 17 | prefix ex: <{EX}> 18 | prefix : <{EX}> 19 | prefix rdf: <{RDF}> 20 | prefix rdfs: <{RDFS}> 21 | prefix xsd: <{XSD}> 22 | prefix inst: <{INST}> 23 | prefix foaf: <{FOAF}> 24 | """ 25 | 26 | 27 | def setup_context(shex_str: str, rdf_str: Optional[str]) -> Context: 28 | schema, g = setup_test(shex_str, rdf_str) 29 | if g is None: 30 | g = Graph() 31 | g.parse(rdf_header) 32 | return Context(g, schema) 33 | 34 | 35 | def setup_test(shex_str: Optional[str], rdf_str: Optional[str]) -> Tuple[Optional[ShExJ.Schema], Optional[Graph]]: 36 | schema: ShExJ.Schema = loads(shex_str, ShExJ, strict=False) if shex_str else None 37 | if rdf_str: 38 | g = Graph() 39 | g.parse(data=rdf_str, format="turtle") 40 | else: 41 | g = None 42 | return schema, g 43 | 44 | 45 | def gen_rdf(rdf_fragment: str) -> str: 46 | """ Edit rdf_fragment from the spec to be complete. We 47 | 1) Add the rdf header and 48 | 2) convert relative URI's into URI's based in the default space """ 49 | return f"""{rdf_header}""" + re.sub(r'<([^.:>]+)>', r':\1', rdf_fragment) 50 | -------------------------------------------------------------------------------- /tests/utils/uri_redirector.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | from rdflib import URIRef 4 | 5 | 6 | class URIRedirector: 7 | def __init__(self, base: URIRef, target: str) -> None: 8 | self.base = base 9 | self.target = target 10 | 11 | def uri_for(self, uri: URIRef) -> Union[URIRef, str]: 12 | unix_uri = str(uri).replace('\\', '/') 13 | return unix_uri.replace(self.base, self.target) if unix_uri.startswith(self.base) else uri 14 | -------------------------------------------------------------------------------- /tests/utils/web_server_utils.py: -------------------------------------------------------------------------------- 1 | from functools import lru_cache 2 | from typing import Optional 3 | 4 | import requests 5 | 6 | from tests import SKIP_EXTERNAL_URLS 7 | 8 | # Various test locations 9 | DRUGBANK_SPARQL_URL = 'http://wifo5-04.informatik.uni-mannheim.de/drugbank/sparql' 10 | BIOLINK_MODEL_URL = 'https://biolink.github.io/biolink-model/' 11 | FHIRCAT_GRAPHDB_URL = 'https://graph.fhircat.org/repositories/fhirontology' 12 | DUMONTIER_GRAPHDB_URL = 'http://graphdb.dumontierlab.com/repositories/ncats-red-kg' 13 | 14 | PRE_CACHE = [ 15 | DRUGBANK_SPARQL_URL, 16 | BIOLINK_MODEL_URL 17 | ] 18 | 19 | @lru_cache() 20 | def is_up(url: str) -> Optional[bool]: 21 | """ Determine whether url is up and running """ 22 | if SKIP_EXTERNAL_URLS: 23 | return False 24 | try: 25 | requests.head(url, timeout=2) 26 | except Exception as e: 27 | return False 28 | return True 29 | 30 | def is_down_reason(url: str) -> str: 31 | svr_status = is_up(url) 32 | return f"Server {svr} is {'UP' if svr_status else 'DOWN' if svr_status is False else 'NOT BEING TESTED'}" 33 | 34 | # Prime the cache 35 | for svr in PRE_CACHE: 36 | print(is_down_reason(svr)) 37 | -------------------------------------------------------------------------------- /tests/utils/wikidata_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | from typing import Optional, List, NamedTuple, Union 4 | 5 | import jsonasobj 6 | import requests 7 | from SPARQLWrapper import JSON 8 | from jsonasobj import loads 9 | from rdflib import URIRef, Literal 10 | from rdflib.namespace import SKOS 11 | from sparqlslurper import SlurpyGraph 12 | 13 | from pyshex import PrefixLibrary, ShExEvaluator 14 | from pyshex.shex_evaluator import EvaluationResult 15 | from pyshex.user_agent import SlurpyGraphWithAgent, SPARQLWrapperWithAgent 16 | 17 | 18 | class DataFrame(NamedTuple): 19 | item: str 20 | 21 | 22 | class Triple(NamedTuple): 23 | s: Optional[URIRef] 24 | p: Optional[URIRef] 25 | o: Optional[Union[Literal, URIRef]] 26 | 27 | 28 | class WikiDataTestCase(unittest.TestCase): 29 | save_test_data = False 30 | 31 | @staticmethod 32 | def get_sparql_dataframe(service, query): 33 | """ 34 | Helper function to convert SPARQL results into a Pandas data frame. 35 | """ 36 | sparql = SPARQLWrapperWithAgent(service) 37 | sparql.setQuery(query) 38 | sparql.setReturnFormat(JSON) 39 | result = sparql.query() 40 | 41 | processed_results = jsonasobj.load(result.response) 42 | return [row.item.value for row in processed_results.results.bindings] 43 | 44 | def fetch_uri(self, uri: str) -> str: 45 | req = requests.get(uri) 46 | self.assertTrue(req.ok, f"Unable to read {uri}") 47 | return req.text 48 | 49 | def run_test(self, manifest_uri: str, num_entries: Optional[int]=None, verbose: bool=True, debug: bool=False, 50 | stop_on_fail: bool=False, debug_slurps: bool=False, save_graph_dir: Optional[str]=None) \ 51 | -> List[EvaluationResult]: 52 | """ Run the test identified by manifest_uri 53 | 54 | :param manifest_uri: uri of manifest 55 | :param num_entries: number of manifest elements to test 56 | :param verbose: True means talk about it 57 | :param debug: debug setting for shex evaluator 58 | :param stop_on_fail: True means run until failure 59 | :param debug_slurps: True means emit sparqlslurper statistics 60 | :param save_graph_dir: If present, save the final graph in this directory 61 | :return: 62 | """ 63 | manifest = loads(self.fetch_uri(manifest_uri)) 64 | rval: List[EvaluationResult] = [] 65 | for case in manifest: 66 | if verbose: 67 | print(case._as_json_dumps()) 68 | sparql_endpoint = case.data.replace("Endpoint: ", "") 69 | shex = self.fetch_uri(case.schemaURL) 70 | evaluator = ShExEvaluator(schema=shex, debug=debug) 71 | prefixes = PrefixLibrary(shex, SKOS=SKOS) 72 | sparql_query = case.queryMap.replace("SPARQL '''", "").replace("'''@START", "") 73 | dfs: List[str] = self.get_sparql_dataframe(sparql_endpoint, sparql_query) 74 | dfs_slice = dfs[:num_entries] if num_entries is not None else dfs 75 | for df in dfs_slice: 76 | slurper = SlurpyGraphWithAgent(sparql_endpoint) 77 | # slurper.debug_slurps = debug_slurps 78 | prefixes.add_bindings_to(slurper) 79 | print(f"Evaluating: {df}") 80 | results = evaluator.evaluate(rdf=slurper, focus=df, debug=debug, debug_slurps=debug_slurps, over_slurp=False) 81 | rval += results 82 | if save_graph_dir: 83 | element_name = df.rsplit('/', 1)[1] 84 | file_name = os.path.join(save_graph_dir, element_name + '.ttl') 85 | print(f"Writing: {file_name}") 86 | slurper.serialize(file_name, format="turtle") 87 | if stop_on_fail and not all(r.result for r in results): 88 | break 89 | return rval 90 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py37 py38 py39 py310 3 | 4 | [testenv] 5 | deps=unittest2 6 | whitelist_externals = python 7 | setenv = 8 | IN_TOX = true 9 | SKIP_EXTERNAL_URLS = true 10 | commands= 11 | pytest --cov=pyshex --cov-report xml --cov=./ -m unittest -k test_sparql_options 12 | pip install rdflib>=6.0.0 --upgrade 13 | pytest --cov=pyshex --cov-report xml --cov=./ -m unittest -k test_sparql_options 14 | --------------------------------------------------------------------------------