├── README ├── tests ├── __init__.py ├── test_context.json ├── test_add_classification.py ├── test_multiple_instantiation.py ├── test_reader.py ├── test_currency.py ├── test_vocab.py ├── test_dimensions.py └── test_model.py ├── cromulent ├── __init__.py ├── data │ ├── __init__.py │ ├── key_order.json │ ├── overrides.json │ └── crm-profile.json ├── multiple_instantiation.py └── reader.py ├── requirements.txt ├── setup.cfg ├── experimental ├── crm.py ├── bibframe.py └── bibframe_reader.py ├── .travis.yml ├── setup.py ├── CHANGELOG.md ├── .gitignore ├── utils ├── old │ ├── merge_inverses.py │ └── make_inverses.py ├── info.py ├── make_jsonld_context.py ├── process_ontologies.py └── data │ ├── linkedart_crm_enhancements.xml │ └── linkedart.xml ├── examples ├── example.py ├── json-to-lod.py ├── sales-to-lod.py └── knoedler-to-lod.py ├── .circleci └── config.yml ├── README.md └── LICENSE /README: -------------------------------------------------------------------------------- 1 | README.md -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cromulent/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cromulent/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | ordereddict 2 | rdflib 3 | PyLD 4 | -------------------------------------------------------------------------------- /tests/test_context.json: -------------------------------------------------------------------------------- 1 | {"@context": {"id": "@id"}} 2 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [wheel] 2 | universal = 1 3 | 4 | [check-manifest] 5 | ignore = 6 | .travis.yml 7 | tox.ini 8 | .gitignore 9 | 10 | -------------------------------------------------------------------------------- /experimental/crm.py: -------------------------------------------------------------------------------- 1 | import model 2 | from model import CromulentFactory, build_classes, \ 3 | KEY_ORDER_HASH, KEY_ORDER_DEFAULT 4 | 5 | factory = CromulentFactory("http://lod.example.org/museum/", \ 6 | context="http://linked.art/ns/context/1/full.jsonld") 7 | build_classes() 8 | model.factory = factory -------------------------------------------------------------------------------- /experimental/bibframe.py: -------------------------------------------------------------------------------- 1 | import model 2 | from model import CromulentFactory, build_classes, \ 3 | KEY_ORDER_HASH, KEY_ORDER_DEFAULT 4 | 5 | factory = CromulentFactory("http://lod.example.org/museum/", \ 6 | load_context=False) 7 | build_classes("utils/bibframe_vocab.tsv", top="rdf:Resource") 8 | model.factory = factory -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | dist: xenial 3 | 4 | python: 5 | - '2.7' 6 | - '3.6' 7 | - '3.7' 8 | - '3.8' 9 | install: 10 | - pip install coveralls ordereddict 11 | - python setup.py install 12 | notifications: 13 | email: 14 | recipients: 15 | - azaroth42@gmail.com 16 | script: 17 | coverage run --source=cromulent setup.py test 18 | after_success: 19 | coveralls 20 | -------------------------------------------------------------------------------- /tests/test_add_classification.py: -------------------------------------------------------------------------------- 1 | 2 | import unittest 3 | 4 | try: 5 | from collections import OrderedDict 6 | except: 7 | # 2.6 8 | from ordereddict import OrderedDict 9 | 10 | from cromulent import model, vocab 11 | from cromulent.model import factory 12 | 13 | class TestAddClassification(unittest.TestCase): 14 | def test_add_classification(self): 15 | amnt = model.MonetaryAmount(ident='') 16 | amnt.value = 7.0 17 | self.assertNotIn('Asking Price', factory.toString(amnt)) 18 | vocab.add_classification(amnt, vocab.AskingPrice) 19 | self.assertIn('Asking Price', factory.toString(amnt)) 20 | 21 | if __name__ == '__main__': 22 | unittest.main() 23 | -------------------------------------------------------------------------------- /tests/test_multiple_instantiation.py: -------------------------------------------------------------------------------- 1 | 2 | import unittest 3 | 4 | try: 5 | from collections import OrderedDict 6 | except: 7 | # 2.6 8 | from ordereddict import OrderedDict 9 | 10 | from cromulent import multiple_instantiation as mi 11 | from cromulent.model import factory, Person, DataError, Dimension 12 | 13 | 14 | class TestMIClasses(unittest.TestCase): 15 | 16 | def test_destruction(self): 17 | expect = OrderedDict([('id', u'http://lod.example.org/museum/Activity/1'), 18 | ('type', ['Destruction', 'Activity']), ('_label', "Test Destruction")]) 19 | mi.DestructionActivity._okayToUse = 1 20 | da = mi.DestructionActivity("1") 21 | da._label = "Test Destruction" 22 | factory.context_uri = "" 23 | dajs = factory.toJSON(da) 24 | self.assertEqual(dajs, expect) 25 | 26 | -------------------------------------------------------------------------------- /cromulent/multiple_instantiation.py: -------------------------------------------------------------------------------- 1 | 2 | # This assumes the default CIDOC-CRM, even though the model code 3 | # can generate classes for any ontology 4 | 5 | import inspect 6 | from cromulent.model import Destruction, EndOfExistence, Activity, Appellation, LinguisticObject 7 | 8 | # DestuctionActivity class as CRM has a Destruction Event and recommends multi-classing 9 | # WARNING: instantiating this class in the default profile will raise an error 10 | 11 | class DestructionActivity(Destruction, Activity): 12 | _uri_segment = "Activity" 13 | _type = ["crm:E6_Destruction", "crm:E7_Activity"] 14 | 15 | @property 16 | def type(self): 17 | return ["Destruction", "Activity"] 18 | DestructionActivity._classhier = inspect.getmro(DestructionActivity)[:-1] 19 | 20 | # And hence we make an EndOfExistence+Activity class 21 | # for all activities that end existences 22 | class EoEActivity(EndOfExistence, Activity): 23 | _uri_segment = "Activity" 24 | _type = ["crm:64_End_of_Existence", "crm:E7_Activity"] 25 | _niceType = ["EndOfExistence", "Activity"] 26 | 27 | @property 28 | def type(self): 29 | return ["EndOfExistence", "Activity"] 30 | 31 | EoEActivity._classhier = inspect.getmro(EoEActivity)[:-1] 32 | 33 | # No need for Linguistic Appellation any more, as we have E33_E41_Linguistic_Appellation -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | import sys 3 | 4 | if (sys.version_info[0:2] < (2,7)): 5 | install_requires =['ordereddict', 'future', 'rdflib', 'PyLD'] 6 | else: 7 | install_requires = ['rdflib', 'PyLD'] 8 | 9 | setup( 10 | name = 'cromulent', 11 | packages = ['cromulent'], 12 | package_data = { 13 | 'cromulent': ['data/crm_vocab.tsv', 'data/overrides.json', 14 | 'data/key_order.json', 'data/linked-art.json', 15 | 'data/cidoc-extension.json', 'data/crm-profile.json'] 16 | }, 17 | test_suite="tests", 18 | version = '0.16.11', 19 | description = 'A library for mapping CIDOC-CRM (v7.1) classes to Python objects', 20 | author = 'Rob Sanderson', 21 | author_email = 'robert.sanderson@yale.edu', 22 | url = 'https://github.com/linked-art/crom', 23 | install_requires=install_requires, 24 | classifiers = [ 25 | "Programming Language :: Python", 26 | "Programming Language :: Python :: 3", 27 | "Programming Language :: Python :: 2", 28 | "License :: OSI Approved :: Apache Software License", 29 | "Development Status :: 3 - Alpha", 30 | "Intended Audience :: Developers", 31 | "Operating System :: OS Independent", 32 | "Topic :: Software Development :: Libraries :: Python Modules", 33 | ] 34 | ) 35 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Cromulent (CROM) Change Log 2 | 3 | Any notable changes to CROM that affect either functionality or output will be documented in this file (the format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)). 4 | 5 | ## [Unreleased] 2020-11-03 6 | 7 | ## Added 8 | 9 | * Added this change log [[DEV-6985](https://jira.getty.edu/browse/DEV-6985)]. 10 | 11 | * Reinstated the `Relationship` entity and its associated properties `relates_to`, `relates_from`, `related_to_by`, and `related_from_by` as these are in production data modelling use, as their sudden removal led to runtime exceptions and prevented code reliant on CROM from operating [[DEV-6985](https://jira.getty.edu/browse/DEV-6985)]. 12 | 13 | * Reinstated the `Geometry` and `CoordinateSystem` entities as these are in production data modelling use, as their sudden removal led to runtime exceptions and prevented code reliant on CROM from operating [[DEV-6985](https://jira.getty.edu/browse/DEV-6985)]. 14 | 15 | * Reinstated the `current_keeper` and `current_keeper_of` properties as these are in production data modelling use, as their sudden removal led to runtime exceptions and prevented code reliant on CROM from operating [[DEV-6985](https://jira.getty.edu/browse/DEV-6985)]. 16 | 17 | ## Changed 18 | 19 | * Imported the updated Getty-local `linked-art.json` context document from the `getty-contexts` repository to ensure consistency [[DEV-6984](https://jira.getty.edu/browse/DEV-6984)]. 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | 91 | .DS_Store 92 | err_output 93 | tests/fishbat.bar 94 | -------------------------------------------------------------------------------- /utils/old/merge_inverses.py: -------------------------------------------------------------------------------- 1 | from lxml import etree 2 | import codecs 3 | 4 | NS = {'rdf':"http://www.w3.org/1999/02/22-rdf-syntax-ns#", 5 | 'xsd':"http://www.w3.org/2001/XMLSchema#", 6 | 'rdfs':"http://www.w3.org/2000/01/rdf-schema#", 7 | 'dcterms':"http://purl.org/dc/terms/", 8 | 'owl':"http://www.w3.org/2002/07/owl#", 9 | 'crm':"http://www.cidoc-crm.org/cidoc-crm/", 10 | 'xml': "http://www.w3.org/XML/1998/namespace" 11 | } 12 | 13 | fh = file('data/inverses.xml') 14 | data = fh.read() 15 | fh.close() 16 | dom = etree.XML(data) 17 | 18 | inverses = {} 19 | props = dom.xpath("//rdf:Property",namespaces=NS) 20 | for p in props: 21 | name = p.xpath('@rdf:about', namespaces=NS)[0] 22 | try: 23 | inv = p.xpath('./owl:inverseOf/@rdf:resource', namespaces=NS)[0] 24 | inverses[name] = inv 25 | except: 26 | pass 27 | 28 | fh = file('data/cidoc.xml') 29 | data = fh.read() 30 | fh.close() 31 | dom = etree.XML(data) 32 | 33 | # Now insert them into the right blocks 34 | 35 | for (n,i) in inverses.items(): 36 | try: 37 | elem = dom.xpath('//rdf:Property[@rdf:about="%s"]' % n, namespaces=NS)[0] 38 | if not elem.xpath('./owl:inverseOf', namespaces=NS): 39 | inv = etree.SubElement(elem, "{http://www.w3.org/2002/07/owl#}inverseOf") 40 | inv.set("{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource", i) 41 | inv.tail = "\n" 42 | except: 43 | print "Could not find property %s" % n 44 | 45 | 46 | # And rewrite the file 47 | 48 | fh = file('data/cidoc_inversed.xml', 'w') 49 | fh.write(etree.tostring(dom, pretty_print=True)) 50 | fh.close() 51 | -------------------------------------------------------------------------------- /cromulent/data/key_order.json: -------------------------------------------------------------------------------- 1 | { 2 | "@context": 0, 3 | "id": 1, 4 | "type": 2, 5 | "_label": 3, 6 | "classified_as": 4, 7 | 8 | "value": 5, 9 | "content": 5, 10 | "upper_value_limit": 6, 11 | "lower_value_limit": 7, 12 | "unit": 8, 13 | 14 | "identified_by": 10, 15 | "defined_by": 11, 16 | "referred_to_by" : 15, 17 | "about": 18, 18 | "technique": 19, 19 | 20 | "timespan": 20, 21 | "begin_of_the_begin": 21, 22 | "end_of_the_begin": 22, 23 | "begin_of_the_end": 23, 24 | "end_of_the_end": 24, 25 | "duration": 25, 26 | 27 | "started_by": 26, 28 | "continued": 26, 29 | "finished_by": 27, 30 | "continued_by": 27, 31 | 32 | "took_place_at": 30, 33 | "carried_out_by": 31, 34 | "used_specific_object": 33, 35 | "removed": 34, 36 | "diminished": 35, 37 | "added": 34, 38 | "augmented": 35, 39 | "transformed": 35, 40 | "produced": 38, 41 | "destroyed": 39, 42 | "born": 38, 43 | "died": 39, 44 | "formed": 38, 45 | "dissolved": 39, 46 | "created": 38, 47 | 48 | "assigned_by": 39, 49 | 50 | "carried_out": 40, 51 | "dimension": 41, 52 | "made_of": 42, 53 | "language": 42, 54 | "part_of": 43, 55 | "approximated_by": 44, 56 | "member_of": 45, 57 | 58 | "transferred_title_of": 50, 59 | "transferred_title_from": 51, 60 | "transferred_title_to": 52, 61 | "transferred_custody_of": 50, 62 | "transferred_custody_from": 51, 63 | "transferred_custody_to": 52, 64 | "paid_amount": 50, 65 | "paid_from": 51, 66 | "paid_to": 52, 67 | "moved": 50, 68 | "moved_from": 51, 69 | "moved_to": 52, 70 | "participant": 53, 71 | 72 | "shows": 60, 73 | "carries": 61, 74 | 75 | "consists_of": 10001, 76 | "composed_of": 10001, 77 | "part": 10001, 78 | "temporally_contains": 10001, 79 | "spatially_contains": 10001, 80 | "member": 10001 81 | } -------------------------------------------------------------------------------- /examples/example.py: -------------------------------------------------------------------------------- 1 | 2 | from cidoc_orm import factory, Document, Activity, Event, TimeSpan, ManMadeObject, Acquisition, Type 3 | 4 | # Locally "subclass" to create consistent patterns with E55 and AAT 5 | class Painting(ManMadeObject): 6 | def __init__(self, *args, **kw): 7 | super(Painting, self).__init__(*args, **kw) 8 | self.has_type = Type("http://vocab.getty.edu/aat/300033618") 9 | 10 | class LugtNumber(Identifier): 11 | def __init__(self, *args, **kw): 12 | super(LugtNumber, self).__init__(*args, **kw) 13 | # ??? 14 | self.has_type = Type("http://vocab.getty.edu/aat/300033618") 15 | 16 | class TMSNumber(Identifier): 17 | def __init__(self, *args, **kw): 18 | super(TMSNumber, self).__init__(*args, **kw) 19 | # Repository Number 20 | self.has_type = Type("http://vocab.getty.edu/aat/300404621") 21 | 22 | class LotNumber(Identifier): 23 | def __init__(self, *args, **kw): 24 | super(TMSNumber, self).__init__(*args, **kw) 25 | # Lot Number 26 | self.has_type = Type("http://vocab.getty.edu/aat/300404628") 27 | 28 | 29 | # Or actually subclass in an extension vocab 30 | class Mosaic(ManMadeObject): 31 | _type = "extension:Mosaic" 32 | 33 | factory.base_url = "http://data.getty.edu/provenance/" 34 | factory.default_lang = "en" 35 | 36 | catalog = Document("catalog") 37 | page = Document("catalog-entry") 38 | catalog.has_component = page 39 | auction = Activity("auction") 40 | catalog.documents = auction 41 | lot = Activity("lot") 42 | auction.consists_of = lot 43 | page.documents = lot 44 | txn = Acquisition("sale") 45 | lot.consists_of = txn 46 | what = Painting('my-painting') 47 | txn.transferred_title_of = what 48 | what.label = "My First Paint By Numbers" 49 | what.is_identified_by = TMSNumber("") 50 | 51 | 52 | print factory.toString(catalog, compact=False) 53 | -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | # Python CircleCI 2.0 configuration file 2 | # 3 | # Check https://circleci.com/docs/2.0/language-python/ for more details 4 | # 5 | version: 2 6 | jobs: 7 | build: 8 | docker: 9 | # specify the version you desire here 10 | # use `-browsers` prefix for selenium tests, e.g. `3.6.1-browsers` 11 | - image: circleci/python:3.8.0 12 | 13 | # Specify service dependencies here if necessary 14 | # CircleCI maintains a library of pre-built images 15 | # documented at https://circleci.com/docs/2.0/circleci-images/ 16 | # - image: circleci/postgres:9.4 17 | 18 | working_directory: ~/repo 19 | 20 | steps: 21 | - checkout 22 | 23 | # Download and cache dependencies 24 | - restore_cache: 25 | keys: 26 | - v1-dependencies-{{ checksum "requirements.txt" }} 27 | # fallback to using the latest cache if no exact match is found 28 | - v1-dependencies- 29 | 30 | - run: 31 | name: install dependencies 32 | command: | 33 | python3 -m venv venv 34 | . venv/bin/activate 35 | pip install -r requirements.txt 36 | 37 | - save_cache: 38 | paths: 39 | - ./venv 40 | key: v1-dependencies-{{ checksum "requirements.txt" }} 41 | 42 | # run tests! 43 | # this example uses Django's built-in test-runner 44 | # other common Python testing frameworks include pytest and nose 45 | # https://pytest.org 46 | # https://nose.readthedocs.io 47 | - run: 48 | name: run tests 49 | command: | 50 | . venv/bin/activate 51 | python setup.py test 52 | 53 | - store_artifacts: 54 | path: test-reports 55 | destination: test-reports 56 | -------------------------------------------------------------------------------- /utils/old/make_inverses.py: -------------------------------------------------------------------------------- 1 | from lxml import etree 2 | import codecs 3 | 4 | fh = file('cidoc.xml') 5 | data = fh.read() 6 | fh.close() 7 | 8 | NS = {'rdf':"http://www.w3.org/1999/02/22-rdf-syntax-ns#", 9 | 'xsd':"http://www.w3.org/2001/XMLSchema#", 10 | 'rdfs':"http://www.w3.org/2000/01/rdf-schema#", 11 | 'dcterms':"http://purl.org/dc/terms/", 12 | 'owl':"http://www.w3.org/2002/07/owl#", 13 | 'crm':"http://www.cidoc-crm.org/cidoc-crm/", 14 | 'xml': "http://www.w3.org/XML/1998/namespace" 15 | } 16 | 17 | dom = etree.XML(data) 18 | names = [] 19 | inverses = {} 20 | 21 | props = dom.xpath("//rdf:Property",namespaces=NS) 22 | for p in props: 23 | name = p.xpath('@rdf:about', namespaces=NS)[0] 24 | names.append(name) 25 | 26 | for p in props: 27 | name = p.xpath('@rdf:about', namespaces=NS)[0] 28 | fu = name.find('_') 29 | pid = name[:fu] 30 | if pid[-1] in ['a', 'b']: 31 | # No inverses for botb eote 32 | continue 33 | inverse = "" 34 | if pid[-1] == "i": 35 | pid = pid[:-1] 36 | else: 37 | pid = pid + "i" 38 | pid += "_" 39 | 40 | for i in names: 41 | if i.startswith(pid) and i != name: 42 | inverse = i 43 | break 44 | if inverse: 45 | inverses[name] = inverse 46 | 47 | # Now print ONLY the inverses 48 | outlines = [ 49 | '' 50 | ] 51 | 52 | for n in names: 53 | if n in inverses: 54 | outlines.append(' ' % n ) 55 | outlines.append(' ' % inverses[n]) 56 | outlines.append(' ') 57 | outlines.append('') 58 | outstr = '\n'.join(outlines) 59 | 60 | fh = file('data/inverses.xml', 'w') 61 | fh.write(outstr) 62 | fh.close() 63 | -------------------------------------------------------------------------------- /cromulent/data/overrides.json: -------------------------------------------------------------------------------- 1 | { 2 | "P45": "made_of", 3 | "P7i": "location_of", 4 | "P5": "subState", 5 | "P5i": "subState_of", 6 | "P20i": "specific_purpose_of", 7 | "P42": "assigned_type", 8 | "P42i": "type_assigned_by", 9 | "P37": "assigned_identifier", 10 | "P37i": "identifier_assigned_by", 11 | "P35i": "condition_identified_by", 12 | 13 | "P28": "transferred_custody_from", 14 | "P29": "transferred_custody_to", 15 | "P29i": "acquired_custody_through", 16 | "P14i": "carried_out", 17 | "P140": "assigned_to", 18 | "P50": "current_custodian", 19 | "P50i": "current_custodian_of", 20 | 21 | "P9": "part", 22 | "P9i": "part_of", 23 | "P46": "part", 24 | "P46i": "part_of", 25 | "P86": "part_of", 26 | "P86i": "part", 27 | "P89": "part_of", 28 | "P89i": "part", 29 | "P106": "part", 30 | "P106i": "part_of", 31 | "P127i": "part", 32 | "P127": "part_of", 33 | 34 | "P148": "c_part", 35 | "P148i": "c_part_of", 36 | 37 | "P107": "member", 38 | "P107i": "member_of", 39 | "P56": "bears", 40 | "la:has_member": "member", 41 | "la:member_of": "member_of", 42 | 43 | "P32": "technique", 44 | "P33": "specific_technique", 45 | "P12": "involved", 46 | "P101": "general_use", 47 | "P100i": "died", 48 | "P74": "residence", 49 | 50 | "P65": "shows", 51 | "P2": "classified_as", 52 | "P190": "content", 53 | "P177": "assigned_property", 54 | 55 | "P133": "distinct_from", 56 | "P164i": "timespan_of_presence", 57 | "P151i": "participated_in_formation", 58 | "P165i": "incorporated_by", 59 | "P132": "volume_overlaps_with", 60 | "P135i": "type_created_by", 61 | "P139": "alternative", 62 | 63 | "P172": "spatially_contains", 64 | "P186i": "type_produced_by", 65 | "P168": "defined_by", 66 | 67 | "P165": "presence_of", 68 | "P195": "presence_of_thing", 69 | "P195i": "thing_presence", 70 | "P196i": "thing_defined_by", 71 | 72 | "skos:closeMatch": "close_match", 73 | "skos:exactMatch": "exact_match", 74 | "dcterms:conformsTo": "conforms_to", 75 | "dcterms:relation": "related", 76 | "schema:genre": "style", 77 | "rdfs:seeAlso": "see_also", 78 | "rdfs:label": "_label", 79 | "sci:O13_triggers": "caused", 80 | "sci:O13i_is_triggered_by": "caused_by", 81 | "sci:O19_encountered_object": "encountered", 82 | "sci:O19i_was_object_encountered_at": "encountered_by" 83 | } 84 | -------------------------------------------------------------------------------- /utils/info.py: -------------------------------------------------------------------------------- 1 | 2 | import sys, argparse 3 | from cromulent import model, vocab 4 | 5 | parser = argparse.ArgumentParser() 6 | parser.add_argument('what') 7 | parser.add_argument('--okay', '--profile', dest="okay", type=bool) 8 | parser.add_argument('--filter', dest="filter") 9 | parser.add_argument('--self', dest="onlySelf", type=bool) 10 | args = parser.parse_args() 11 | 12 | def list_all_props(what, filter=None, okay=None): 13 | props = [] 14 | ks = [] 15 | for c in what._classhier: 16 | for k,v in c._all_properties.items(): 17 | if not k in ks and \ 18 | (not okay or (okay and v.profile_okay)) and \ 19 | (filter is None or isinstance(filter, v.range) or \ 20 | filter is v.range): 21 | props.append(v) 22 | ks.append(k) 23 | props.sort(key=lambda x: x.property) 24 | return props 25 | 26 | def list_my_props(what, filter=None, okay=None): 27 | props = [] 28 | ks = [] 29 | for k,v in what._all_properties.items(): 30 | if not k in ks and \ 31 | (not okay or (okay and v.profile_okay)) and \ 32 | (filter is None or isinstance(filter, v.range) or \ 33 | filter is v.range): 34 | props.append(v) 35 | ks.append(k) 36 | props.sort(key=lambda x: x.property) 37 | return props 38 | 39 | what = args.what 40 | try: 41 | c = getattr(model, what) 42 | except: 43 | try: 44 | c = getattr(vocab, what) 45 | except: 46 | print(f"Unknown model or vocab class: {what}") 47 | sys.exit(1) 48 | 49 | if args.filter: 50 | try: 51 | cf = getattr(model, args.filter) 52 | f = cf() 53 | except: 54 | f = None 55 | else: 56 | cf = None 57 | f = None 58 | 59 | 60 | print(f"Main Class: \033[95m{c.__name__}\033[0m") 61 | if cf: 62 | print(f"Filtered To: \033[95m{cf.__name__}\033[0m") 63 | else: 64 | print("Filtered To: None") 65 | print(f"Using Profile: {args.okay}") 66 | 67 | 68 | 69 | model.factory.validate_profile = False 70 | instance = c() 71 | 72 | if args.onlySelf: 73 | ap = list_my_props(instance, okay=args.okay, filter=f) 74 | else: 75 | ap = list_all_props(instance, okay=args.okay, filter=f) 76 | 77 | #ap2 = instance.list_all_props(okay=args.okay, filter=f) 78 | 79 | 80 | for pi in ap: 81 | if pi.property in ['close_match', 'exact_match']: 82 | continue 83 | out = f"{pi.property} ({pi.predicate})" 84 | if pi.inverse_property: 85 | inv = f"{pi.inverse_property} ({pi.inverse_predicate})" 86 | else: 87 | inv = "" 88 | if pi.range == str: 89 | rng = "\033[93mLiteral" 90 | else: 91 | rng = pi.range.__name__ 92 | # old skool colorizing 93 | print(f"\033[95m{what:<15} \033[92m{out:<50} / {inv:<50} \033[95m{rng}\033[0m") 94 | 95 | -------------------------------------------------------------------------------- /tests/test_reader.py: -------------------------------------------------------------------------------- 1 | 2 | import unittest 3 | 4 | try: 5 | from collections import OrderedDict 6 | except: 7 | # 2.6 8 | from ordereddict import OrderedDict 9 | 10 | from cromulent import reader 11 | from cromulent.model import factory, Person, DataError, BaseResource, \ 12 | Dimension, override_okay, AttributeAssignment 13 | 14 | from cromulent import vocab 15 | 16 | class TestReader(unittest.TestCase): 17 | 18 | def setUp(self): 19 | self.reader = reader.Reader() 20 | # ensure we can use parent_of 21 | override_okay(Person, 'parent_of') 22 | # Person._properties['parent_of']['multiple'] = 1 23 | 24 | def test_read(self): 25 | self.assertRaises(DataError, self.reader.read, "") 26 | self.assertRaises(DataError, self.reader.read, "This is not JSON") 27 | self.assertRaises(DataError, self.reader.read, "{}") 28 | 29 | whostr = '{"type": "Person", "_label": "me"}' 30 | self.assertTrue(isinstance(self.reader.read(whostr), Person)) 31 | 32 | whostr = '{"@context": "fishbat", "type": "Person", "_label": "me"}' 33 | self.assertTrue(isinstance(self.reader.read(whostr), Person)) 34 | 35 | levelstr = '{"type": "Person", "parent_of": {"type": "Person", "_label": "child"}}' 36 | self.assertTrue(isinstance(self.reader.read(levelstr).parent_of[0], Person)) 37 | 38 | basestr = '{"_label": "base"}' 39 | self.assertTrue(isinstance(self.reader.read(basestr), BaseResource)) 40 | 41 | unknown = '{"type":"FishBat"}' 42 | self.assertRaises(DataError, self.reader.read, unknown) 43 | 44 | unknown2 = '{"type":"Person", "fishbat": "bob"}' 45 | self.assertRaises(DataError, self.reader.read, unknown) 46 | 47 | def test_attrib_assign(self): 48 | vocab.add_attribute_assignment_check() 49 | 50 | data = """ 51 | { 52 | "id": "https://linked.art/example/activity/12", 53 | "type": "AttributeAssignment", 54 | "assigned": { 55 | "id": "https://linked.art/example/name/10", 56 | "type": "Name", 57 | "content": "Exhibition Specific Name" 58 | }, 59 | "assigned_property": "identified_by", 60 | "assigned_to": { 61 | "id": "https://linked.art/example/object/12", 62 | "type": "HumanMadeObject", 63 | "_label": "Real Painting Name" 64 | } 65 | } 66 | """ 67 | d = self.reader.read(data) 68 | self.assertTrue(isinstance(d, AttributeAssignment)) 69 | 70 | 71 | def test_vocab_collision(self): 72 | # Test that the algorithm picks the right vocab instance 73 | # if multiple have the same AAT term but different base class 74 | 75 | data = """ 76 | { 77 | "type": "LinguisticObject", 78 | "_label": "Sale recorded in catalog: B-267 0003 (1817) (record number 22947)", 79 | "part_of": [ 80 | { 81 | "type": "LinguisticObject", 82 | "_label": "Sale Catalog B-267", 83 | "classified_as": [ 84 | { 85 | "id": "http://vocab.getty.edu/aat/300026068", 86 | "type": "Type", 87 | "_label": "Auction Catalog" 88 | } 89 | ] 90 | } 91 | ] 92 | } 93 | """ 94 | d = self.reader.read(data) 95 | self.assertTrue(isinstance(d.part_of[0], vocab.AuctionCatalogText)) 96 | 97 | -------------------------------------------------------------------------------- /experimental/bibframe_reader.py: -------------------------------------------------------------------------------- 1 | from lxml import etree 2 | import codecs 3 | import json 4 | 5 | default_key_order = 10000 6 | 7 | NS = {'rdf':"http://www.w3.org/1999/02/22-rdf-syntax-ns#", 8 | 'xsd':"http://www.w3.org/2001/XMLSchema#", 9 | 'rdfs':"http://www.w3.org/2000/01/rdf-schema#", 10 | 'dcterms':"http://purl.org/dc/terms/", 11 | 'owl':"http://www.w3.org/2002/07/owl#", 12 | 'crm':"http://www.cidoc-crm.org/cidoc-crm/", 13 | 'skos':"http://www.w3.org/2004/02/skos/core#", 14 | 'xml': "http://www.w3.org/XML/1998/namespace" 15 | } 16 | 17 | fh = file('bibframe.rdf') 18 | data = fh.read() 19 | fh.close() 20 | dom = etree.XML(data) 21 | stuff = [] 22 | 23 | property_overrides = {} 24 | 25 | classes = dom.xpath("//rdfs:Class", namespaces=NS) 26 | 27 | if not classes: 28 | classes = dom.xpath('//owl:Class', namespaces=NS) 29 | 30 | for c in classes: 31 | label = c.xpath('./rdfs:label/text()', namespaces=NS)[0] 32 | try: 33 | comment = c.xpath('./rdfs:comment/text()', namespaces=NS) 34 | if not comment: 35 | comment = c.xpath('./skos:definition/text()', namespaces=NS) 36 | if comment: 37 | comment = comment[0] 38 | comment = comment.strip() 39 | comment = comment.replace('\n', '\\n').replace('\t', ' ') 40 | except: 41 | comment = "" 42 | name = c.xpath('@rdf:about', namespaces=NS)[0] 43 | 44 | subCls = c.xpath('./rdfs:subClassOf/@rdf:resource', namespaces=NS) 45 | if subCls: 46 | # could be multiples 47 | subCls = '|'.join(subCls) 48 | else: 49 | subCls = "" 50 | 51 | uc1 = name.rfind("/") 52 | ccname = name[uc1+1:] 53 | ccname = ccname.replace("_or_", "_Or_").replace("_of_", "_Of_") 54 | ccname = ccname.replace('-', '').replace('_', '') 55 | 56 | stuff.append([name, "class", ccname, label, comment, subCls]) 57 | 58 | props = dom.xpath("//rdf:Property",namespaces=NS) 59 | if not props: 60 | props = dom.xpath('//owl:DatatypeProperty', namespaces=NS) 61 | props.extend(dom.xpath('owl:ObjectProperty', namespaces=NS)) 62 | 63 | for p in props: 64 | label = p.xpath('./rdfs:label/text()', namespaces=NS)[0] 65 | try: 66 | comment = p.xpath('./rdfs:comment/text()', namespaces=NS) 67 | if not comment: 68 | comment = c.xpath('./skos:definition/text()', namespaces=NS) 69 | if comment: 70 | comment = comment[0] 71 | comment = comment.strip() 72 | comment = comment.replace('\n', '\\n').replace('\t', ' ') 73 | except: 74 | comment = "" 75 | 76 | name = p.xpath('@rdf:about', namespaces=NS)[0] 77 | domn = p.xpath('./rdfs:domain/@rdf:resource', namespaces=NS) 78 | if domn: 79 | domn = domn[0] 80 | for (k,v) in NS.items(): 81 | domn = domn.replace(v,"%s:" % k) 82 | else: 83 | domn = "" 84 | rang = p.xpath('./rdfs:range/@rdf:resource', namespaces=NS) 85 | if rang: 86 | rang = rang[0] 87 | for (k,v) in NS.items(): 88 | rang = rang.replace(v,"%s:" % k) 89 | else: 90 | rang = "" 91 | subProp = p.xpath('./rdfs:subPropertyOf/@rdf:resource', namespaces=NS) 92 | if subProp: 93 | subProp = subProp[0] 94 | else: 95 | subProp = "" 96 | 97 | inverse = p.xpath('./owl:inverseOf/@rdf:resource', namespaces=NS) 98 | if inverse: 99 | inverse = inverse[0] 100 | else: 101 | inverse = "" 102 | 103 | uc1 = name.find("_") 104 | pno = name[:uc1] 105 | if property_overrides.has_key(pno): 106 | ccname = property_overrides[pno] 107 | else: 108 | ccname = name[uc1+1:] 109 | ccname = ccname.replace("-", "") 110 | if ccname.startswith("is_"): 111 | ccname = ccname[3:] 112 | elif ccname.startswith("has_") or ccname.startswith("had_") or ccname.startswith("was_"): 113 | ccname = ccname[4:] 114 | 115 | # koi = str(key_order_hash.get(ccname, default_key_order)) 116 | koi = "10000" 117 | stuff.append([name, "property", ccname, label, comment, subProp, domn, rang, inverse, koi]) 118 | 119 | outdata = '\n'.join(['\t'.join(x) for x in stuff]) 120 | fh = codecs.open('bibframe_vocab.tsv', 'w', 'utf-8') 121 | fh.write(outdata) 122 | fh.close() 123 | -------------------------------------------------------------------------------- /tests/test_currency.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import unittest 4 | try: 5 | from contextlib import suppress 6 | except: 7 | # Python 2.7 8 | suppress = None 9 | import pprint 10 | from datetime import datetime 11 | from cromulent import model, vocab 12 | from cromulent.extract import extract_monetary_amount 13 | import cromulent.extract 14 | 15 | CUSTOM_MAPPING = { 16 | 'xxx': vocab.register_instance('xxx custom currency', {'parent': model.Currency, 'id': '999999999', 'label': 'My Dollars'}), 17 | 'zzz': 'us dollars' 18 | } 19 | 20 | class TestCurrencyExtraction(unittest.TestCase): 21 | ''' 22 | Test the ability to extract currency data. 23 | ''' 24 | def setUp(self): 25 | pass 26 | 27 | def tearDown(self): 28 | pass 29 | 30 | def test_extract_simple(self): 31 | e = extract_monetary_amount({ 32 | 'price': '10.0', 33 | 'currency': 'pounds' 34 | }) 35 | self.assertEqual(e.type, 'MonetaryAmount') 36 | self.assertEqual(e._label, '10.00 pounds') 37 | self.assertEqual(e.value, 10) 38 | c = e.currency 39 | self.assertEqual(c.type, 'Currency') 40 | self.assertEqual(c._label, 'British Pounds') 41 | 42 | def test_extract_comma_separated(self): 43 | e = extract_monetary_amount({ 44 | 'price': '1,280.5', 45 | 'currency': 'pounds' 46 | }) 47 | self.assertEqual(e.type, 'MonetaryAmount') 48 | self.assertEqual(e._label, '1,280.50 pounds') 49 | self.assertEqual(e.value, 1280.50) 50 | c = e.currency 51 | self.assertEqual(c.type, 'Currency') 52 | self.assertEqual(c._label, 'British Pounds') 53 | 54 | def test_extract_label_digits(self): 55 | e = extract_monetary_amount({ 56 | 'price': '1,280.5', 57 | 'currency': 'pounds' 58 | }, truncate_label_digits=4) 59 | self.assertEqual(e.type, 'MonetaryAmount') 60 | self.assertEqual(e._label, '1,280.5000 pounds') 61 | self.assertEqual(e.value, 1280.50) 62 | c = e.currency 63 | self.assertEqual(c.type, 'Currency') 64 | self.assertEqual(c._label, 'British Pounds') 65 | 66 | def test_extract_multiple_comma_separated(self): 67 | e = extract_monetary_amount({ 68 | 'price': '1,310,720.5', 69 | 'currency': 'pounds' 70 | }) 71 | self.assertEqual(e.type, 'MonetaryAmount') 72 | self.assertEqual(e._label, '1,310,720.50 pounds') 73 | self.assertEqual(e.value, 1310720.5) 74 | c = e.currency 75 | self.assertEqual(c.type, 'Currency') 76 | self.assertEqual(c._label, 'British Pounds') 77 | 78 | def test_extract_est(self): 79 | e = extract_monetary_amount({ 80 | 'est_price': '12.0', 81 | 'currency': 'pounds' 82 | }) 83 | self.assertEqual(e.value, 12) 84 | c = e.currency 85 | self.assertEqual(e.classified_as[0]._label, 'Estimated Price') 86 | self.assertEqual(e.currency._label, 'British Pounds') 87 | 88 | def test_extract_start(self): 89 | e = extract_monetary_amount({ 90 | 'start_price': '8.5', 91 | 'currency': 'pounds' 92 | }) 93 | self.assertEqual(e.value, 8.5) 94 | c = e.currency 95 | self.assertEqual(e.classified_as[0]._label, 'Starting Price') 96 | self.assertEqual(e.currency._label, 'British Pounds') 97 | 98 | def test_extract_custom_currency_key(self): 99 | d = { 100 | 'price': '7', 101 | 'currency': 'zzz' 102 | } 103 | with self.assertRaises(AttributeError): 104 | e = extract_monetary_amount(d) 105 | self.assertEqual(e.currency._label, 'Custom Currency') 106 | 107 | e = extract_monetary_amount(d, currency_mapping=CUSTOM_MAPPING) 108 | self.assertEqual(e.value, 7) 109 | self.assertEqual(e.currency._label, 'US Dollars') 110 | 111 | def test_extract_custom_currency_instance(self): 112 | d = { 113 | 'price': '7', 114 | 'currency': 'xxx' 115 | } 116 | with self.assertRaises(AttributeError): 117 | e = extract_monetary_amount(d) 118 | self.assertEqual(e.currency._label, 'Custom Currency') 119 | 120 | e = extract_monetary_amount(d, currency_mapping=CUSTOM_MAPPING) 121 | self.assertEqual(e.value, 7) 122 | self.assertEqual(e.currency._label, 'My Dollars') 123 | 124 | def test_extract_price_with_citation(self): 125 | d = { 126 | 'price': '7', 127 | 'currency': 'pounds', 128 | 'citation': 'crom test suite' 129 | } 130 | e = extract_monetary_amount(d, add_citations=True) 131 | self.assertEqual(e.value, 7) 132 | self.assertEqual(e.currency._label, 'British Pounds') 133 | self.assertEqual(e.referred_to_by[0].content, 'crom test suite') 134 | 135 | 136 | if __name__ == '__main__': 137 | unittest.main() 138 | -------------------------------------------------------------------------------- /cromulent/reader.py: -------------------------------------------------------------------------------- 1 | from cromulent import model, vocab 2 | from cromulent.model import factory, DataError, OrderedDict, BaseResource 3 | from cromulent.model import STR_TYPES 4 | 5 | import json 6 | 7 | class Reader(object): 8 | 9 | def __init__(self, validate_props=True, validate_profile=True): 10 | self.uri_object_map = {} 11 | self.forward_refs = [] 12 | self.vocab_props = ['assigned_property'] 13 | self.vocab_classes = {} 14 | self.validate_profile = validate_profile 15 | self.validate_props = validate_props 16 | 17 | for cx in dir(vocab): 18 | what = getattr(vocab, cx) 19 | # crying cat face -- type as a @property returns the function, not the value 20 | # when calling it on a class rather than an instance 21 | try: 22 | mytype = what._classhier[0].__name__ 23 | except AttributeError: 24 | continue 25 | # find classes 26 | if (cx[0].isupper() and not hasattr(model, cx) and type(what) == type): 27 | # class 28 | self.vocab_classes[(mytype, what._classification[0].id)] = what 29 | 30 | def read(self, data): 31 | if not data: 32 | raise DataError("No data provided: %r" % data) 33 | elif type(data) in STR_TYPES: 34 | try: 35 | data = json.loads(data) 36 | except: 37 | raise DataError("Data is not valid JSON") 38 | if not data: 39 | raise DataError("No Data provided") 40 | self.uri_object_map = {} 41 | self.forward_refs = [] 42 | try: 43 | what = self.construct(data) 44 | self.process_forward_refs() 45 | self.uri_object_map = {} 46 | self.forward_refs = [] 47 | return what 48 | except: 49 | raise 50 | 51 | def process_forward_refs(self): 52 | for (what, prop, uri) in self.forward_refs: 53 | if uri in self.uri_object_map: 54 | setattr(what, prop, self.uri_object_map[uri]) 55 | else: 56 | raise NotImplementedError("No class information for %s.%s = %s" % (what, prop, uri)) 57 | 58 | def construct(self, js): 59 | # pass in json, get back object 60 | if '@context' in js: 61 | del js['@context'] 62 | 63 | ident = js.get('id', '') 64 | typ = js.get('type', None) 65 | 66 | if typ == None: 67 | clx = BaseResource 68 | else: 69 | # Get class based on name 70 | try: 71 | clx = getattr(model, typ) 72 | except AttributeError: 73 | # No such class 74 | raise DataError("Resource %s has unknown class %s" % (ident, typ) ) 75 | 76 | # now check vocab.ext_classes to try and refine 77 | trash = None 78 | if 'classified_as' in js: 79 | for c in js['classified_as']: 80 | i = c.get('id', '') 81 | clx2 = self.vocab_classes.get((typ, i), None) 82 | if clx2 is not None: 83 | clx = clx2 84 | trash = c 85 | break 86 | 87 | what = clx(ident=ident) 88 | what._validate_profile = self.validate_profile 89 | self.uri_object_map[ident] = what 90 | 91 | if self.validate_props: 92 | propList = what.list_all_props() 93 | 94 | # sort data by KOH to minimize chance of bad backrefs 95 | itms = list(js.items()) 96 | itms.sort(key=lambda x: factory.key_order_hash.get(x[0], 10000)) 97 | 98 | for (prop, value) in itms: 99 | if prop in ['id', 'type']: 100 | continue 101 | 102 | if self.validate_props and not prop in propList: 103 | raise DataError("Unknown property %s on %s" % (prop, clx.__name__)) 104 | 105 | # Climb looking for range 106 | for c in what._classhier: 107 | if prop in c._all_properties: 108 | rng = c._all_properties[prop].range 109 | break 110 | 111 | if type(value) != list: 112 | value = [value] 113 | for subvalue in value: 114 | if trash is not None and prop == 'classified_as' and subvalue == trash: 115 | continue 116 | if rng == str: 117 | setattr(what, prop, subvalue) 118 | elif type(subvalue) == dict or isinstance(subvalue, OrderedDict): 119 | # recurse ... 120 | val = self.construct(subvalue) 121 | setattr(what, prop, val) 122 | elif type(subvalue) in STR_TYPES and prop in self.vocab_props: 123 | # keep as string 124 | setattr(what, prop, subvalue) 125 | elif type(subvalue) in STR_TYPES: 126 | # raw URI to be made into a class of type rng 127 | # or back reference 128 | if subvalue in self.uri_object_map: 129 | setattr(what, prop, self.uri_object_map[subvalue]) 130 | elif rng in [model.Type, BaseResource]: 131 | # Always a X, often no more info 132 | setattr(what, prop, rng(ident=subvalue)) 133 | else: 134 | self.forward_refs.append([what, prop, subvalue]) 135 | else: 136 | # No idea!! 137 | raise DataError("Value %r is not expected for %s" % (subvalue, prop)) 138 | 139 | return what 140 | 141 | 142 | -------------------------------------------------------------------------------- /tests/test_vocab.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import sys 3 | import os 4 | 5 | from cromulent import vocab, model 6 | from cromulent.model import factory 7 | 8 | class TestClassBuilder(unittest.TestCase): 9 | def setUp(self): 10 | pass 11 | 12 | def tearDown(self): 13 | pass 14 | 15 | def test_class(self): 16 | vocab.register_aat_class("TestObject1", {"parent": model.HumanMadeObject, "id": "1", "label": "example 1"}) 17 | from cromulent.vocab import TestObject1 18 | self.assertEqual(TestObject1._classification[0].id, 'http://vocab.getty.edu/aat/1') 19 | 20 | def test_instance(self): 21 | vocab.register_instance("TestMaterial2", {"parent": model.Material, "id": "2", "label": "example 2"}) 22 | self.assertTrue('TestMaterial2' in vocab.instances) 23 | tm2 = vocab.instances['TestMaterial2'] 24 | self.assertEqual(tm2.id, "http://vocab.getty.edu/aat/2") 25 | 26 | def test_metatype(self): 27 | vocab.register_instance("example", {"parent": model.Type, "id": "3", "label": "example type"}) 28 | vocab.register_aat_class("TestObject2", 29 | {"parent": model.HumanMadeObject, "id": "4", "label": "example typed object", "metatype": "example"}) 30 | from cromulent.vocab import TestObject2 31 | self.assertEqual(TestObject2._classification[0].classified_as[0].id, 'http://vocab.getty.edu/aat/3') 32 | 33 | def test_multitype(self): 34 | from cromulent.vocab import make_multitype_obj, Painting, Drawing 35 | inst = make_multitype_obj(Painting, Drawing) 36 | self.assertTrue(isinstance(inst, Painting)) 37 | self.assertTrue(len(inst.classified_as) == 2) 38 | self.assertTrue(inst.classified_as[1].id == "http://vocab.getty.edu/aat/300033973") 39 | 40 | from cromulent.model import HumanMadeObject 41 | 42 | inst = make_multitype_obj(HumanMadeObject, Painting) 43 | self.assertTrue(len(inst.classified_as) == 1) 44 | self.assertTrue(inst.classified_as[0].id == "http://vocab.getty.edu/aat/300033618") 45 | 46 | def test_conceptual_parts(self): 47 | r = model.Right() 48 | r2 = model.Right() 49 | self.assertRaises(model.DataError, r.__setattr__, 'part', r2) 50 | r.c_part = r2 51 | self.assertTrue(r2 in r.c_part) 52 | 53 | vocab.conceptual_only_parts() 54 | r3 = model.Right() 55 | r4 = model.Right() 56 | r3.part = r4 57 | self.assertTrue(r4 in r3.c_part) 58 | self.assertTrue("part" in model.factory.toJSON(r3)) 59 | self.assertTrue(r4 in r3.part) 60 | 61 | 62 | def test_art_setter(self): 63 | p = model.HumanMadeObject("a", art=1) 64 | p._label = "a" 65 | pj = p._toJSON(done={}) 66 | self.assertFalse(pj.get('classified_as', None)) 67 | vocab.add_art_setter() 68 | p2 = vocab.Painting("b", art=1) 69 | p2j = p2._toJSON(done={}) 70 | 71 | def test_aa_check(self): 72 | 73 | # Make sure that some other test hasn't set it 74 | try: 75 | del model.AttributeAssignment.set_assigned_property 76 | except: 77 | pass 78 | 79 | t = model.Type() 80 | aa = model.AttributeAssignment() 81 | # First check that aa accepts a type 82 | aa.assigned_property = t 83 | # And will not accept a string 84 | self.assertRaises(model.DataError, aa.__setattr__, "assigned_property", "classified_as") 85 | 86 | # Check we can set anything to assigned / assigned_to 87 | aa.assigned_property = None 88 | aa.assigned = aa 89 | aa.assigned_to = aa 90 | self.assertEqual(aa.assigned, aa) 91 | self.assertEqual(aa.assigned_to, aa) 92 | 93 | vocab.add_attribute_assignment_check() 94 | 95 | # This should fail right now as can't classify as an AA 96 | self.assertRaises(model.DataError, aa.__setattr__, "assigned_property", "classified_as") 97 | aa.assigned = None 98 | aa.assigned_to = None 99 | aa.assigned = t 100 | aa.assigned_to = t 101 | aa.assigned_property = "classified_as" 102 | self.assertEqual(aa.assigned_property, 'classified_as') 103 | 104 | 105 | def test_boundary_setter(self): 106 | vocab.add_linked_art_boundary_check() 107 | p = model.Person() 108 | p2 = model.Person() 109 | n = model.Name() 110 | n.content = "Test" 111 | p2.identified_by = n 112 | p.exact_match = p2 113 | # Now, Test should not appear in the resulting JSON of p 114 | factory.linked_art_boundaries = True 115 | js = factory.toJSON(p) 116 | self.assertTrue(not 'identified_by' in js['exact_match'][0]) 117 | factory.linked_art_boundaries = False 118 | js = factory.toJSON(p) 119 | self.assertTrue('identified_by' in js['exact_match'][0]) 120 | 121 | def test_procurement_boundary(self): 122 | vocab.add_linked_art_boundary_check() 123 | a = model.Activity() 124 | p = vocab.ProvenanceEntry() 125 | a.caused = p 126 | js = factory.toJSON(a) 127 | self.assertTrue(not 'classified_as' in js['caused'][0]) 128 | 129 | def test_linguistic_object_boundary(self): 130 | vocab.add_linked_art_boundary_check() 131 | jrnl = vocab.JournalText(label="journal") 132 | issue = vocab.IssueText(label="issue") 133 | issue.part_of = jrnl 134 | issue.referred_to_by = vocab.MaterialStatement(content="Statement") 135 | 136 | js = factory.toJSON(issue) 137 | # Have not embedded journal in issue 138 | self.assertTrue(not 'classified_as' in js['part_of'][0]) 139 | # Have embedded statement in issue 140 | self.assertTrue('content' in js['referred_to_by'][0]) 141 | self.assertTrue('type' in js['referred_to_by'][0]['classified_as'][0]['classified_as'][0]) 142 | 143 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/thegetty/crom.svg?branch=master)](https://travis-ci.org/thegetty/crom) [![Coverage Status](https://coveralls.io/repos/github/thegetty/crom/badge.svg?branch=master)](https://coveralls.io/github/thegetty/crom?branch=master) 2 | 3 | # Cromulent 4 | 5 | A Python library to make creation of CIDOC CRM easier by mapping classes/predicates to python objects/properties, thereby making the CRM "CRoMulent", a Simpsons neologism for "acceptable" or "fine". 6 | 7 | ## Status: Beta 8 | 9 | The core vocabulary loading functionality is reasonably stable. The vocabulary section is expanding as we find new, useful terms to include and will likely constantly change. 10 | 11 | The code is actively being developed and compability breaking changes are thus to be expected as we use it in various projects across The J Paul Getty Trust, and beyond. 12 | 13 | ## How to Use It 14 | 15 | ### Basic Usage 16 | 17 | Import the classes from the model module. As the classes are dynamically generated, they're not in the code but will be there once the `build_classes` function has been called. 18 | 19 | ```python 20 | from cromulent.model import factory, Group 21 | g1 = Group(ident="Organization") 22 | g2 = Group(ident="Department") 23 | g1.member = g2 24 | print factory.toString(g1, compact=False) 25 | ``` 26 | 27 | The constructor for the classes takes the following parameters: 28 | 29 | * `ident` - an identifier to use for this instance. If specified, it should be a URI represented as a string. If it is the empty string, it will result in no identifier. If not specified, or specified as `None`, then it will be auto-generated by the factory if `auto_assign_id` is true, or if `auto_assign_id` is false, then it will result in no identifier. 30 | * `label` - a human readable label for the resource, to act as internal documentation for the data 31 | * `value` or `content` - a data value for the class. Dimensions and MonetaryAmounts use `value` which must be a number, and Name, Identifier, LinguisticObject and similar use `content` which must be a string. 32 | * Additional keywords may be passed in, and will be sent to class-specific initialization code. 33 | 34 | 35 | ### Vocabulary 36 | 37 | ```python 38 | from cromulent.model import factory 39 | from cromulent.vocab import Height 40 | h = Height() 41 | h.value = 6 42 | print factory.toString(h, compact=False) 43 | ``` 44 | 45 | ### Tricks and Gotchas 46 | 47 | * Assigning to the same property repeatedly does NOT overwrite the value, instead it appends. To overwrite a value, instead set it to a false value first. 48 | 49 | 50 | ### Factory settings 51 | 52 | There are quite a few settings for how the module works, which are managed by a `factory` object. 53 | 54 | URI and File System Configuration: 55 | * `base_url` The base url on to which to append any slug given when an object is created 56 | * `base_dir` The base directory into which to write files, via factory.toFile() 57 | * `filename_extension` The extension to use on files written via toFile(), defaults to ".json" 58 | * `default_lang` The code for the default language to use on text values 59 | * `context_uri` The URI to use for `@context` in the JSON-LD serialization 60 | * `context_json` The parsed JSON object of the context from which the prefixes are derived 61 | * `full_names` Should the serialization use the full CRM names for classes and properties instead of the more readable ones defined in the mapping, defaults to False 62 | * `prefixes` A dictionary of prefix to URI for URIs to compress down to `prefix:slug` format 63 | * `prefixes_rev` The reverse of the prefixes dictionary 64 | * `pipe_scoped_contexts` A convenience setting for generating documentation, where properties that map to the same JSON output are represented as `short_name|full_name` to be post-processed. 65 | * `json_indent` How many spaces should each level of indentation be when serializing to a human readable form, defaults to 2 66 | * `id_type_label` Should the id, type and label properties all be used when serializing resources that have already been processed, defaults to True 67 | * `elasticsearch_compatible` Despite JSON-LD 1.0 compaction rules, should a single URI be represented as {"@id": "URI"} rather than just "URI", to make the resulting JSON compatible with elasticsearch and similar JSON processing engines. Defaults to False. 68 | * `serialize_all_resources` NOT YET IMPLEMENTED. If true, then all resources will be serialized separately, not just the top level resource. 69 | 70 | Model Validation and Generation: 71 | * `materialize_inverses` Should the inverse relationships be set automatically, defaults to False 72 | * `validate_properties` Should the model be validated at run time when setting properties, defaults to True (this allows you to save processing time once you're certain your code does the right thing) 73 | * `validate_properties` Should the properties be validated as being part of the model at all 74 | * `validate_profile` Should the profile of which terms should be used be validated 75 | * `process_multiplicity` Should properties that allow multiple values always be an array 76 | * `validate_range` Should the object be validated that it is legal to be the value of the property 77 | * `auto_assign_id` Should a URI be autogenerated and assigned, defaults to True 78 | * `auto_id_type` The method by which the URI is generated, taken from the following values: 79 | * "int" (just increment an integer in a single value space) 80 | * "int-per-type" (increment an integer, with a separate value space per class) 81 | * "int-per-segment" (increment an integer, with a separate value space per URI segment associated with a class) 82 | * "uuid" (just use UUIDs everywhere) 83 | 84 | Internal: 85 | * `debug_level` Settings for debugging errors and warnings, defaults to "warn" 86 | * `log_stream` An object implementing the stream API to write log messages to, defaults to sys.stderr 87 | 88 | 89 | 90 | ## How it Works 91 | 92 | At import time, the library parses the vocabulary data file (data/crm_vocab.tsv) and creates Python classes in the module's global scope from each of the defined RDF classes. The names of the classes are intended to be easy to use and remember, not necessarily identical to the CRM ontology's names. It also records the properties that can be used with that class, and at run time checks whether the property is defined and that the value fits the defined range. 93 | 94 | ## Hacking 95 | 96 | You can change the mapping by tweaking `utils/vocab_reader.py` and rerunning it to build a new TSV input file. See also the experimental code for loading completely different ontologies. 97 | 98 | -------------------------------------------------------------------------------- /utils/make_jsonld_context.py: -------------------------------------------------------------------------------- 1 | 2 | import codecs 3 | import json 4 | 5 | try: 6 | from collections import OrderedDict 7 | except: 8 | try: 9 | from ordereddict import OrderedDict 10 | except: 11 | raise Exception("To run with old pythons you must: easy_install ordereddict") 12 | 13 | fn = '../cromulent/data/crm_vocab.tsv' 14 | fh = codecs.open(fn, 'r', 'utf-8') 15 | lines = fh.readlines()[1:] # Chomp header line 16 | fh.close() 17 | 18 | context = OrderedDict() 19 | context['@version'] = 1.1 20 | context['crm'] = "http://www.cidoc-crm.org/cidoc-crm/" 21 | context['sci'] = "http://www.ics.forth.gr/isl/CRMsci/" 22 | context['rdf'] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" 23 | context['rdfs'] = "http://www.w3.org/2000/01/rdf-schema#" 24 | context['dc'] = "http://purl.org/dc/elements/1.1/" 25 | context['dcterms'] = "http://purl.org/dc/terms/" 26 | context['schema'] = "http://schema.org/" 27 | context['skos'] = "http://www.w3.org/2004/02/skos/core#" 28 | context['foaf'] = 'http://xmlns.com/foaf/0.1/' 29 | context['xsd'] = "http://www.w3.org/2001/XMLSchema#" 30 | context['dig'] = "http://www.ics.forth.gr/isl/CRMdig/" 31 | context["la"] = "https://linked.art/ns/terms/" 32 | context["archaeo"] = "http://www.cidoc-crm.org/cidoc-crm/CRMarchaeo/" 33 | 34 | ## These are only aliases. The processing is defined by the spec. 35 | context['id'] = "@id" 36 | context['type'] = "@type" 37 | 38 | extension = OrderedDict() 39 | extension['@version'] = 1.1 40 | extension['crm'] = "http://www.cidoc-crm.org/cidoc-crm/" 41 | 42 | vocab_properties = ["assigned_property"] 43 | 44 | parts = { 45 | "P9": ["crm:P9_consists_of", "crm:P9i_forms_part_of"], 46 | "P46": ["crm:P46_is_composed_of", "crm:P46i_forms_part_of"], 47 | "P106": ["crm:P106_is_composed_of", "crm:P106i_forms_part_of"], 48 | "P86": ["crm:P86i_contains", "crm:P86_falls_within"], 49 | "P89": ["crm:P89i_contains", "crm:P89_falls_within"], 50 | "P148": ["crm:P148_has_component", "crm:P148i_is_component_of"], 51 | "skos": ["skos:narrower", "skos:broader"], 52 | "set": ["la:has_member", "la:member_of"], 53 | "P107": ["crm:P107_has_current_or_former_member", "crm:P107i_is_current_or_former_member_of"] 54 | } 55 | 56 | p177_context = { 57 | "part": None, 58 | "temporal_part": "crm:P9_consists_of", 59 | "physical_part": "crm:P46_is_composed_of", 60 | "symbolic_part": "crm:P106_is_composed_of", 61 | "propositional_part": "crm:P148_has_component", 62 | "timespan_part": "crm:P86i_contains", 63 | "location_part": "crm:P89i_contains", 64 | "interest_part": "la:interest_part", 65 | "part_of": None, 66 | "temporal_part_of": "crm:P9i_forms_part_of", 67 | "physical_part_of": "crm:P46i_forms_part_of", 68 | "symbolic_part_of": "crm:P106i_forms_part_of", 69 | "propositional_part_of": "crm:P148i_is_component_of", 70 | "timespan_part_of": "crm:P86_falls_within", 71 | "location_part_of": "crm:P89_falls_within", 72 | } 73 | 74 | scoped_classes = { 75 | "Activity": "P9", 76 | "Acquisition": "P9", 77 | "TransferOfCustody": "P9", 78 | "Production": "P9", 79 | "AttributeAssignment": "P9", 80 | "HumanMadeObject": "P46", 81 | "LinguisticObject": "P106", 82 | "VisualItem": "P106", # XXX This is the symbolic partitioning, not the conceptual partitioning of P149 83 | "Identifier": "P106", 84 | "TimeSpan": "P86", 85 | "Place": "P89", 86 | "Type": "skos", 87 | "Language": "skos", 88 | "Material": "skos", 89 | "MeasurementUnit": "skos", 90 | "BeginningOfExistence": "P9", 91 | "EndOfExistence": "P9", 92 | "Creation": "P9", 93 | "Formation": "P9", 94 | "InformationObject": "P106", 95 | "Transformation": "P9", 96 | "Joining": "P9", 97 | "Leaving": "P9", 98 | "PropositionalObject": "P148", 99 | "Currency": "skos", 100 | "Payment": "P9", 101 | "Right": "P148", 102 | "Name": "P106", 103 | "Birth": "P9", 104 | "Death": "P9", 105 | "Event": "P9", 106 | "Destruction": "P9", 107 | "Move": "P9", 108 | "Modification": "P9", 109 | "Dissolution": "P9", 110 | "Period": "P9", 111 | "PhysicalThing": "P46", 112 | "PhysicalObject": "P46", 113 | "PhysicalFeature": "P46", 114 | "BiologicalObject": "P46", 115 | "Site": "P46", 116 | "PhysicalHumanMadeThing": "P46", 117 | "HumanMadeFeature": "P46", 118 | "Title": "P106", 119 | "Inscription": "P106", 120 | "Mark": "P106", 121 | "Appellation": "P106", 122 | "PartAddition": "P9", 123 | "PartRemoval": "P9", 124 | "SymbolicObject": "P106", 125 | "Purchase": "P9", 126 | "Set": "set", 127 | "Group": "P107", 128 | "Person": "P107" 129 | } 130 | 131 | other_scoped = { 132 | } 133 | 134 | # enforce these in the context 135 | literal_types = [ 136 | "xsd:dateTime" 137 | ] 138 | # Let these default 139 | empty_literal_types = [ 140 | "rdfs:Literal", 141 | "xsd:string" 142 | ] 143 | 144 | 145 | for l in lines: 146 | l = l[:-1] # chomp 147 | info= l.split('\t') 148 | name = info[0] 149 | if info[1] == "class": 150 | # map json key to ontology for @type:@vocab 151 | ctname = info[2] 152 | if name.startswith("E"): 153 | name = "crm:%s" % name 154 | context[ctname] = {"@id": name} 155 | if ctname in scoped_classes: 156 | part = parts[scoped_classes[ctname]][0] 157 | part_of = parts[scoped_classes[ctname]][1] 158 | 159 | # XXX member_of needs to be added to person and Group as Group one 160 | # and member_of_set for Set one 161 | # then member_of is Set for everything else 162 | 163 | if scoped_classes[ctname] in ['set', 'P107']: 164 | context[ctname]['@context'] = { 165 | "member": {"@id": part, "@type": "@id", "@container": "@set"}, 166 | "member_of": {"@id": part_of, "@type": "@id", "@container": "@set"} 167 | } 168 | else: 169 | context[ctname]['@context'] = { 170 | "part": {"@id": part, "@type": "@id", "@container": "@set"}, 171 | "part_of": {"@id": part_of, "@type": "@id", "@container": "@set"}, 172 | "member_of": {"@id": parts["set"][1], "@type": "@id", "@container": "@set"} 173 | } 174 | # Add other scopes if needed 175 | if ctname in other_scoped: 176 | context[ctname]['@context'] = other_scoped[ctname] 177 | 178 | else: 179 | ctname = info[2] 180 | write = not ctname in ['part', 'part_of', 'member', 'member_of'] 181 | # These need to be added correctly to all parents in the ontology 182 | # ... as above 183 | 184 | dmn = info[6] 185 | rng = info[7] 186 | mult = info[11] or '1' 187 | if ctname in context: 188 | print("Already found: %s (%s vs %s)" % (ctname, context[ctname]['@id'], name)) 189 | else: 190 | 191 | if rng: 192 | if rng in empty_literal_types: 193 | typ = None 194 | elif rng in literal_types: 195 | typ = rng 196 | elif ctname in vocab_properties: 197 | typ = "@vocab" 198 | else: 199 | typ = "@id" 200 | else: 201 | typ = None 202 | 203 | if name.startswith("P"): 204 | name = "crm:%s" % name 205 | 206 | if write: 207 | if not typ: 208 | context[ctname] = {"@id": name} 209 | elif mult == '1': 210 | context[ctname] = {"@id": name, "@type": typ, "@container":"@set"} 211 | else: 212 | context[ctname] = {"@id": name, "@type": typ} 213 | 214 | if ctname == "assigned_property_type": 215 | context['assigned_property_type']['@context'] = p177_context 216 | 217 | # Otherwise, we're part / part_of, so ignore 218 | # print("scoped context: %s: %s on %s" % (ctname, name, dmn)) 219 | 220 | ctxt = {"@context": context} 221 | 222 | outstr = json.dumps(ctxt, indent=2) 223 | fh = open("../cromulent/data/linked-art.json", 'w') 224 | fh.write(outstr) 225 | fh.close() 226 | -------------------------------------------------------------------------------- /utils/process_ontologies.py: -------------------------------------------------------------------------------- 1 | from lxml import etree 2 | import codecs 3 | import json 4 | import sys 5 | 6 | PROFILE_ONLY = '--profile' in sys.argv 7 | default_key_order = 10000 8 | 9 | NS = {'rdf':"http://www.w3.org/1999/02/22-rdf-syntax-ns#", 10 | 'xsd':"http://www.w3.org/2001/XMLSchema#", 11 | 'rdfs':"http://www.w3.org/2000/01/rdf-schema#", 12 | 'dcterms':"http://purl.org/dc/terms/", 13 | 'owl':"http://www.w3.org/2002/07/owl#", 14 | 'crm':"http://www.cidoc-crm.org/cidoc-crm/", 15 | 'xml': "http://www.w3.org/XML/1998/namespace", 16 | 'ore': "http://www.openarchives.org/ore/terms/", 17 | 'la': "https://linked.art/ns/terms/", 18 | "skos": "http://www.w3.org/2004/02/skos/core#", 19 | "schema": "http://schema.org/", 20 | "dc": "http://purl.org/dc/elements/1.1/", 21 | "geo": "http://www.ics.forth.gr/isl/CRMgeo/", 22 | "dig": "http://www.ics.forth.gr/isl/CRMdig/", 23 | "sci": "http://www.ics.forth.gr/isl/CRMsci/", 24 | "archaeo": "http://www.cidoc-crm.org/cidoc-crm/CRMarchaeo/" 25 | } 26 | 27 | 28 | # Order imposed by the library 29 | # @context = 0, id = 1, rdf:type = 2 30 | # rdfs:label = 5, rdf:value = 6, dc:description = 7 31 | 32 | fh = open('../cromulent/data/key_order.json') 33 | data = fh.read() 34 | fh.close() 35 | key_order_hash = json.loads(data) 36 | 37 | # Allow configuration of overrides for the mapping of ontology to python/json 38 | fh = open('../cromulent/data/overrides.json') 39 | data = fh.read() 40 | fh.close() 41 | property_overrides = json.loads(data) 42 | 43 | # Allow subsetting of CRM into in-use / not-in-use to enable the library 44 | # to warn on instantiation of not-in-use properties or classes 45 | fh = open('../cromulent/data/crm-profile.json') 46 | data = fh.read() 47 | fh.close() 48 | profile_flags = json.loads(data) 49 | 50 | stuff = [] 51 | propXHash = {} 52 | classXHash = {} 53 | 54 | def process_classes(dom): 55 | classes = dom.xpath("//rdfs:Class", namespaces=NS) 56 | for c in classes: 57 | name = c.xpath('@rdf:about', namespaces=NS)[0] 58 | for (pref,ns) in NS.items(): 59 | if name.startswith(ns): 60 | name = name.replace(ns, "%s:" % pref) 61 | break 62 | 63 | if not name in profile_flags: 64 | print(" WARNING: %s not in profile" % name) 65 | useflag = str(profile_flags.get(name, 0)) 66 | if name in classXHash: 67 | classXHash[name][0] = c 68 | else: 69 | classXHash[name] = [c, useflag] 70 | 71 | label = c.xpath('./rdfs:label[@xml:lang="en"]/text()', namespaces=NS)[0] 72 | try: 73 | comment = c.xpath('./rdfs:comment/text()', namespaces=NS)[0] 74 | comment = comment.strip() 75 | comment = comment.replace('\n', '\\n').replace('\t', ' ') 76 | except: 77 | comment = "" 78 | 79 | subClsL = c.xpath('./rdfs:subClassOf/@rdf:resource', namespaces=NS) 80 | if subClsL: 81 | # could be multiples 82 | subCls = '|'.join(subClsL) 83 | for s in subClsL: 84 | try: 85 | classXHash[s][1] = 3 86 | except KeyError: 87 | classXHash[s] = [None, 3] 88 | else: 89 | subCls = "" 90 | 91 | # Hack extensions to be readable :( 92 | if name == "geo:SP4_Spatial_Coordinate_Reference_System": 93 | ccname = "CoordinateSystem" 94 | elif name == "geo:SP5_Geometric_Place_Expression": 95 | ccname = "Geometry" 96 | elif name == "geo:SP6_Declarative_Place": 97 | ccname = "DeclarativePlace" 98 | elif name == "E33_E41_Linguistic_Appellation": 99 | ccname = "Name" 100 | elif name == "dig:D1_Digital_Object": 101 | ccname = "DigitalObject" 102 | elif name == "sci:S19_Encounter_Event": 103 | ccname = "Encounter" 104 | else: 105 | # Assume that we've done our job okay and put in overrides for NSS 106 | cidx = name.find(":") 107 | if cidx > -1: 108 | ccname = name[cidx+1:] 109 | else: 110 | uc1 = name.find("_") 111 | ccname = name[uc1+1:] 112 | ccname = ccname.replace("_or_", "_Or_").replace("_of_", "_Of_") 113 | ccname = ccname.replace('-', '').replace('_', '') 114 | 115 | stuff.append([name, "class", ccname, label, comment, subCls, useflag]) 116 | 117 | def process_props(dom): 118 | props = dom.xpath("//rdf:Property",namespaces=NS) 119 | for p in props: 120 | name = p.xpath('@rdf:about', namespaces=NS)[0] 121 | 122 | 123 | # replace archaeo first, as a superstring of crm base :( 124 | if name.startswith("http://www.cidoc-crm.org/cidoc-crm/CRMarchaeo/"): 125 | name = name.replace("http://www.cidoc-crm.org/cidoc-crm/CRMarchaeo/", "archaeo:") 126 | 127 | for (pref,ns) in NS.items(): 128 | if name.startswith(ns): 129 | name = name.replace(ns, "%s:" % pref) 130 | break 131 | 132 | if not name in profile_flags: 133 | print(" WARNING: %s not in profile" % name) 134 | useflags = profile_flags.get(name, [0,0]) or [0,0] 135 | propXHash[name] = [p, useflags[0]] 136 | 137 | try: 138 | label = p.xpath('./rdfs:label[@xml:lang="en"]/text()', namespaces=NS)[0] 139 | except: 140 | print(p.xpath('./@rdf:about', namespaces=NS)) 141 | print(p.xpath('./rdfs:label/text()', namespaces=NS)) 142 | raise ValueError 143 | try: 144 | comment = p.xpath('./rdfs:comment/text()', namespaces=NS)[0] 145 | comment = comment.strip() 146 | comment = comment.replace('\n', '\\n').replace('\t', ' ') 147 | except: 148 | comment = "" 149 | 150 | domn = p.xpath('./rdfs:domain/@rdf:resource', namespaces=NS) 151 | if domn: 152 | domn = domn[0] 153 | for (k,v) in NS.items(): 154 | domn = domn.replace(v,"%s:" % k) 155 | else: 156 | domn = "" 157 | rang = p.xpath('./rdfs:range/@rdf:resource', namespaces=NS) 158 | if rang: 159 | rang = rang[0] 160 | for (k,v) in NS.items(): 161 | rang = rang.replace(v,"%s:" % k) 162 | else: 163 | rang = "" 164 | 165 | subProp = p.xpath('./rdfs:subPropertyOf/@rdf:resource', namespaces=NS) 166 | if subProp: 167 | subProp = subProp[0] 168 | else: 169 | subProp = "" 170 | 171 | inverse = p.xpath('./owl:inverseOf/@rdf:resource', namespaces=NS) 172 | if inverse: 173 | inverse = inverse[0] 174 | for (pref,ns) in NS.items(): 175 | if inverse.startswith(ns): 176 | inverse = inverse.replace(ns, "%s:" % pref) 177 | break 178 | else: 179 | inverse = "" 180 | 181 | cidx = name.find(":") 182 | if name in property_overrides: 183 | ccname = property_overrides[name] 184 | elif cidx > -1: 185 | ccname = name[cidx+1:] 186 | else: 187 | uc1 = name.find("_") 188 | pno = name[:uc1] 189 | if pno in property_overrides: 190 | ccname = property_overrides[pno] 191 | else: 192 | ccname = name[uc1+1:] 193 | ccname = ccname.replace("-", "") 194 | if ccname.startswith("is_"): 195 | ccname = ccname[3:] 196 | elif ccname.startswith("has_") or ccname.startswith("had_") or ccname.startswith("was_"): 197 | ccname = ccname[4:] 198 | 199 | koi = str(key_order_hash.get(ccname, default_key_order)) 200 | 201 | # [0/1/2, 0/1] for [no/okay/warn, single/multiple] 202 | stuff.append([name, "property", ccname, label, comment, subProp, domn, rang, inverse, koi, 203 | str(useflags[0]), str(useflags[1])]) 204 | 205 | 206 | # This order is important. 207 | # Need to process the class definition before the properties of the class 208 | # linkedart defines properties against the classes in the core and extensions 209 | # so needs to come last 210 | 211 | files = ['cidoc.xml', 'linkedart_crm_enhancements.xml', 'linkedart.xml'] 212 | 213 | for fn in files: 214 | print("processing: %s" % fn) 215 | fh = open('data/%s' % fn) 216 | data = fh.read() 217 | fh.close() 218 | try: 219 | dom = etree.XML(data.encode('utf-8')) 220 | except: 221 | dom = etree.XML(data) 222 | process_classes(dom) 223 | process_props(dom) 224 | 225 | 226 | 227 | headers = ["term", "term type", "json-ld key", "label", "scope note", "subPropertyOf", "domain", \ 228 | "range", "inverse", "key order", "okay to use?", "okay for multiple?"] 229 | 230 | # outdata = '\n'.join(['\t'.join(x) for x in stuff]) 231 | fh = codecs.open('../cromulent/data/crm_vocab.tsv', 'w', 'utf-8') 232 | # write header 233 | line = '\t'.join(headers) + '\n' 234 | fh.write(line) 235 | 236 | for l in stuff: 237 | name = l[0] 238 | line = '\t'.join(l) + "\n" 239 | if name in classXHash: 240 | okay = classXHash[name][1] 241 | elif name in propXHash: 242 | okay = propXHash[name][1] 243 | else: 244 | okay = 0 245 | print("Could not find %s" % name) 246 | if not PROFILE_ONLY or okay: 247 | fh.write(line) 248 | fh.close() 249 | -------------------------------------------------------------------------------- /tests/test_dimensions.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import unittest 4 | try: 5 | from contextlib import suppress 6 | except: 7 | # Python 2.7 8 | suppress = None 9 | import pprint 10 | from datetime import datetime 11 | from cromulent.extract import Dimension, normalized_dimension_object 12 | import cromulent.extract 13 | 14 | class TestDimensionExtraction(unittest.TestCase): 15 | ''' 16 | Test the ability to extract various formats of dimensions. 17 | ''' 18 | def setUp(self): 19 | pass 20 | 21 | def tearDown(self): 22 | pass 23 | 24 | def test_parse_simple_dimensions(self): 25 | ''' 26 | Test the documented formats that `cromulent.extract.parse_simple_dimensions` can parse 27 | and ensure that it returns the expected data. 28 | ''' 29 | tests = { 30 | "3'": [Dimension(3, 'feet', None)], 31 | '3 feet': [Dimension(3, 'feet', None)], 32 | '3 foot': [Dimension(3, 'feet', None)], 33 | '3 ft': [Dimension(3, 'feet', None)], 34 | '3 ft.': [Dimension(3, 'feet', None)], 35 | '2"': [Dimension(2, 'inches', None)], 36 | '2 in': [Dimension(2, 'inches', None)], 37 | '2 in.': [Dimension(2, 'inches', None)], 38 | '2 inch': [Dimension(2, 'inches', None)], 39 | '2 inches': [Dimension(2, 'inches', None)], 40 | '2 duymen': [Dimension(2, 'inches', None)], 41 | '2 d.': [Dimension(2, 'inches', None)], 42 | '2 d': [Dimension(2, 'inches', None)], 43 | '''2'8"''': [Dimension(2, 'feet', None), Dimension(8, 'inches', None)], 44 | '4cm': [Dimension(4, 'cm', None)], 45 | '2 pieds 3 pouces': [Dimension(2, 'fr_feet', None), Dimension(3, 'fr_inches', None)], 46 | '1 pied 7 pouces': [Dimension(1, 'fr_feet', None), Dimension(7, 'fr_inches', None)], 47 | '8 pouce': [Dimension(8, 'fr_inches', None)], 48 | '8 pouces': [Dimension(8, 'fr_inches', None)], 49 | '8 1/2 pouces': [Dimension(8.5, 'fr_inches', None)], 50 | '8 1/4 pouces': [Dimension(8.25, 'fr_inches', None)], 51 | '8 1/8 pouces': [Dimension(8.125, 'fr_inches', None)], 52 | '1': [Dimension(1, None, None)], 53 | 54 | # values without a unit that follow values with a unit stay in the same system but using the next-finer unit 55 | '2 pieds 3': [Dimension(2, 'fr_feet', None), Dimension(3, 'fr_inches', None)], 56 | "1' 3": [Dimension(1, 'feet', None), Dimension(3, 'inches', None)], 57 | } 58 | 59 | for value, expected in tests.items(): 60 | dims = cromulent.extract.parse_simple_dimensions(value) 61 | if expected is not None: 62 | self.assertIsInstance(dims, list) 63 | self.assertEqual(dims, expected, msg='dimensions: %r' % (value,)) 64 | else: 65 | self.assertIsNone(dims) 66 | 67 | def test_dimension_cleaner(self): 68 | ''' 69 | Test the documented formats that `cromulent.extract.dimensions_cleaner` can parse 70 | and ensure that it returns the expected data. 71 | ''' 72 | tests = { 73 | '''2 in by 1 in''': ([Dimension(2, 'inches', None)], [Dimension(1, 'inches', None)]), 74 | '''2'2"h x 2'8"w''': ([Dimension(2, 'feet', 'height'), Dimension(2, 'inches', 'height')], [Dimension(2, 'feet', 'width'), Dimension(8, 'inches', 'width')]), 75 | '''1'3"x4cm h''': ([Dimension(1, 'feet', None), Dimension(3, 'inches', None)], [Dimension(4, 'cm', 'height')]), 76 | '''1'3" by 4"''': ([Dimension(1, 'feet', None), Dimension(3, 'inches', None)], [Dimension(4, 'inches', None)]), 77 | 'Haut 14 pouces, large 10 pouces': ([Dimension(14, 'fr_inches', 'height')], [Dimension(10, 'fr_inches', 'width')]), 78 | 'Haut. 48 pouces, large 68 pouces': ([Dimension(48, 'fr_inches', 'height')], [Dimension(68, 'fr_inches', 'width')]), 79 | '1 by 4': ([Dimension(1, None, None)], [Dimension(4, None, None)]), 80 | 'Hoog. 6 v., breed 3 v': ([Dimension(6, 'feet', 'height')], [Dimension(3, 'feet', 'width')]), 81 | 'Breedt 6 v., hoog 3 v': ([Dimension(6, 'feet', 'width')], [Dimension(3, 'feet', 'height')]), 82 | '20 cm x 24,5 cm': ([Dimension(20, 'cm', None)], [Dimension(24.5, 'cm', None)]), 83 | '2 w by 5 h': ([Dimension(2, None, 'width')], [Dimension(5, None, 'height')]), 84 | 'Hauteur 1 pied 4 pouces, largeur 1 pied 1/2 pouc.': ([Dimension(1, 'fr_feet', 'height'), Dimension(value=4, unit='fr_inches', which='height')], [Dimension(1, 'fr_feet', 'width'), Dimension(value=0.5, unit='fr_inches', which='width')]), 85 | 'h.73 pouces 4 lignes, l.50 pouces': ([Dimension(value=73, unit='fr_inches', which='height'), Dimension(value=4, unit='ligne', which='height')], [Dimension(value=50, unit='fr_inches', which='width')]), 86 | 'haut. 5 pouc. larg. 5 pouc. 4 linges': ([Dimension(value=5, unit='fr_inches', which='height')], [Dimension(value=5, unit='fr_inches', which='width'), Dimension(value=4, unit='ligne', which='width')]), 87 | 'haut. 9 pouc. 4 lignes larg. 10 pouc. 4 linges': ([Dimension(value=9, unit='fr_inches', which='height'), Dimension(value=4, unit='ligne', which='height')], [Dimension(value=10, unit='fr_inches', which='width'), Dimension(value=4, unit='ligne', which='width')]), 88 | 'h 38 cm, w 27 cm': ([Dimension(38, 'cm', 'height')], [Dimension(27, 'cm', 'width')]), 89 | "hauteur 9 pouces, largeur 7": ([Dimension(value=9, unit='fr_inches', which='height')], [Dimension(value=7, unit=None, which='width')]), 90 | } 91 | 92 | for value, expected in tests.items(): 93 | dims = cromulent.extract.dimensions_cleaner(value) 94 | if expected is not None: 95 | self.assertIsInstance(dims, tuple) 96 | # print('===== got:') 97 | # pprint.pprint(dims) 98 | # print('----- expected:') 99 | # pprint.pprint(expected) 100 | # print('=====') 101 | self.assertEqual(dims, expected, msg='dimensions: %r' % (value,)) 102 | else: 103 | self.assertIsNone(dims) 104 | 105 | def test_extract_physical_dimensions(self): 106 | ''' 107 | Test the documented formats that `cromulent.extract.extract_physical_dimensions` 108 | can parse and ensure that it returns the expected data. 109 | ''' 110 | tests = {} 111 | h9l7_height = cromulent.vocab.Height(ident='', content=9.0) 112 | h9l7_height.identified_by = cromulent.model.Name(ident='', content='9 French inches') 113 | h9l7_height.unit = cromulent.vocab.instances.get('fr_inches') 114 | h9l7_width = cromulent.vocab.Width(ident='', content=7.0) 115 | tests["hauteur 9 pouces, largeur 7"] = [h9l7_height, h9l7_width] 116 | 117 | for value, expected_dims in tests.items(): 118 | dims = list(cromulent.extract.extract_physical_dimensions(value)) 119 | for got, expected in zip(dims, expected_dims): 120 | self.assertEqual(got.value, expected.value) 121 | self.assertEqual(got.type, expected.type) 122 | 123 | if suppress is None: 124 | # Python 2.7 125 | if hasattr(expected, 'unit'): 126 | self.assertEqual(got.unit, expected.unit) 127 | if hasattr(expected, 'classified_as'): 128 | self.assertEqual(got.classified_as, expected.classified_as) 129 | if hasattr(expected, 'identified_by'): 130 | self.assertEqual(got.identified_by, expected.identified_by) 131 | else: 132 | with suppress(AttributeError): 133 | self.assertEqual(got.unit, expected.unit) 134 | with suppress(AttributeError): 135 | self.assertEqual(got.classified_as, expected.classified_as) 136 | with suppress(AttributeError): 137 | self.assertEqual(got.identified_by, expected.identified_by) 138 | 139 | def test_extract_physical_dimensions_with_default(self): 140 | ''' 141 | Test the documented formats that `cromulent.extract.extract_physical_dimensions` 142 | can parse, specifying a default unit, and ensure that it returns the expected data. 143 | ''' 144 | tests = {} 145 | h9l7_height = cromulent.vocab.Height(ident='', content=9.0) 146 | h9l7_height.identified_by = cromulent.model.Name(ident='', content='9 French inches') 147 | h9l7_height.unit = cromulent.vocab.instances.get('fr_inches') 148 | h9l7_width = cromulent.vocab.Width(ident='', content=7.0) 149 | h9l7_width.unit = cromulent.vocab.instances.get('inches') 150 | tests["hauteur 9 pouces, largeur 7"] = [h9l7_height, h9l7_width] 151 | 152 | for value, expected_dims in tests.items(): 153 | dims = list(cromulent.extract.extract_physical_dimensions(value, default_unit='inches')) 154 | for got, expected in zip(dims, expected_dims): 155 | self.assertEqual(got.value, expected.value) 156 | self.assertEqual(got.type, expected.type) 157 | self.assertEqual(got.unit, expected.unit) 158 | 159 | def test_normalize_dimension(self): 160 | tests = { 161 | '1 ft, 2 in': ('1 foot, 2 inches', Dimension(value=14, unit='inches', which=None)), 162 | '8 1/2 pouces': ('8.5 French inches', Dimension(value=8.5, unit='fr_inches', which=None)), 163 | '1 pied 7 pouces': ('1 French foot, 7 French inches', Dimension(value=19, unit='fr_inches', which=None)), 164 | '2 pied 1/2 pouces': ('2 French feet, 0.5 French inches', Dimension(value=24.5, unit='fr_inches', which=None)), 165 | '1 pied 3 pouce. 3 linges': ('1 French foot, 3 French inches, 3 lignes', Dimension(value=15.25, unit='fr_inches', which=None)), 166 | "4' 8": ('4 feet, 8 inches', Dimension(value=56, unit='inches', which=None)), 167 | "1 pied 2": ('1 French foot, 2 French inches', Dimension(value=14, unit='fr_inches', which=None)), 168 | } 169 | for value, expected in tests.items(): 170 | elabel, edim = expected 171 | dims = cromulent.extract.parse_simple_dimensions(value) 172 | dim, label = normalized_dimension_object(dims) 173 | self.assertEqual(label, elabel) 174 | self.assertEqual(dim, edim) 175 | 176 | if __name__ == '__main__': 177 | unittest.main() 178 | -------------------------------------------------------------------------------- /utils/data/linkedart_crm_enhancements.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Digital Object 7 | This class comprises identifiable immaterial items that can be represented as sets of bit sequences, such as data sets, e-texts, images, audio or video items, software, etc., and are documented as single units. Any aggregation of instances of D1 Digital Object into a whole treated as single unit is also regarded as an instance of D1 Digital Object. This means that for instance, the content of a DVD, an XML file on it, and an element of this file, are regarded as distinct instances of D1 Digital Object, mutually related by the P106 is composed of (forms part of) property. A D1 Digital Object does not depend on a specific physical carrier, and it can exist on one or more carriers simultaneously. 8 | 9 | 10 | 11 | 12 | triggers 13 | 14 | 15 | 16 | 17 | 18 | 19 | triggered by 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | Encounter 28 | 29 | This class comprises activities of S4 Observation (substance) where an E39 Actor encounters an instance of E18 Physical Thing of a kind relevant for the mission of the observation or regarded as potentially relevant for some community (identity). This observation produces knowledge about the existence of the respective thing at a particular place in or on surrounding matter. This knowledge may be new to the group of people the actor belongs to. In that case we would talk about a discovery. The observer may recognize or assign an individual identity of the thing encountered or regard only the type as noteworthy in the associated documentation or report. 30 | Note that this representation treats S19 as a subClass of only E7 Activity for ease of implementation, as we do not need the full set of relationships available via the complete hierarcy. In the full CRMsci, it is Activity -> Attribute Assignment -> Observation -> Encounter. 31 | 32 | 33 | 34 | 35 | encountered object 36 | This property associates an instance of S19 Encounter Event with an instance of E18 Physical 37 | Thing that has been found. e.g. The finding (S19) encountered (O19) the 18 arrowheads (E18) from Lerna in Argolis 38 | 39 | 40 | 41 | 42 | 43 | 44 | was encountered at 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | occurs during 54 | This property identifies a situation in which the entire instance of the E52 Time-Span of an instance of E2 Temporal Entity is within the instance of the E52 Time-Span of another instance of E2 Temporal Entity that starts before and ends after the included temporal entity. 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | label 68 | A human-readable name for the subject. 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | exactMatch 82 | Exact Match, not quite sameAs, good for most uses 83 | 84 | 85 | 86 | 87 | 88 | closeMatch 89 | Close Match, good for some uses 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | has narrower term 98 | Or is broader term of 99 | 100 | 101 | 102 | 103 | 104 | 105 | has broader term 106 | Or is narrower term of 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | has top concept 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | is top concept of 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | is in scheme 134 | Relates a resource (for example a concept) to a concept scheme in which it is included. 135 | A concept may be a member of more than one concept scheme. 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | seeAlso 145 | A related resource, that is machine readable and related to the current resource. 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | conforms to 155 | Some thing conforms to some standard 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | format 164 | The media type of the information object 165 | 166 | 167 | 168 | 169 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /examples/json-to-lod.py: -------------------------------------------------------------------------------- 1 | 2 | import json 3 | from cidoc_orm import factory, TimeSpan, ManMadeObject, Type, Identifier, \ 4 | Production, Person, Place, Group, Material, Type, Mark, Right, Document, \ 5 | Activity 6 | import re 7 | 8 | # Meta meta 9 | ext_classes = { 10 | "TMSNumber": {"parent": Identifier, "vocab": "aat", "id": "300404621"}, 11 | "AccessionNumber": {"parent": Identifier, "vocab": "aat", "id": "300312355"}, 12 | "Inscription": {"parent": Mark, "vocab": "aat", "id": "300028702"}, 13 | "Signature": {"parent": Mark, "vocab": "aat", "id": "300028705"}, 14 | "Exhibition": {"parent": Activity, "vocab": "aat", "id": "300054766"}, 15 | "Painting": {"parent": ManMadeObject, "vocab": "aat", "id": "300033618"}, 16 | "Sculpture": {"parent": ManMadeObject, "vocab": "aat", "id": "300047090"}, 17 | "Drawing": {"parent": ManMadeObject, "vocab": "aat", "id": "300033973"}, 18 | "Miniature": {"parent": ManMadeObject, "vocab": "aat", "id": "300033936"}, 19 | "Tapestry": {"parent": ManMadeObject, "vocab": "aat", "id": "300205002"}, 20 | "Furniture": {"parent": ManMadeObject, "vocab": "aat", "id": "300037680"}, 21 | "Mosaic": {"parent": ManMadeObject, "vocab": "aat", "id": "300015342"}, 22 | "Photograph": {"parent": ManMadeObject, "vocab": "aat", "id": "300046300"}, 23 | "Drawing": {"parent": ManMadeObject, "vocab": "aat", "id": "300033973"}, 24 | "Coin": {"parent": ManMadeObject, "vocab": "aat", "id": "300037222"}, 25 | "Vessel": {"parent": ManMadeObject, "vocab": "aat", "id": "300193015"}, 26 | "PhotographPrint": {"parent": ManMadeObject, "vocab": "aat", "id": "300127104"}, 27 | "PhotographAlbum": {"parent": ManMadeObject, "vocab": "aat", "id": "300026695"}, 28 | "PhotographBook": {"parent": ManMadeObject, "vocab": "aat", "id": "300265728"} 29 | } 30 | 31 | # Jewelry 32 | # Text Book Album 33 | # Implement 34 | 35 | # Note many sub types of Vessels, including 36 | # Bowl, Flask, Beaker, Cup, Jar, Amphora, 37 | 38 | for (name,v) in ext_classes.items(): 39 | c = type(name, (v['parent'],), {}) 40 | c._p2_has_type = "http://vocab.getty.edu/%s/%s" % (v['vocab'], v['id']) 41 | globals()[name] = c 42 | 43 | aat_type_mapping = { 44 | "Painting": Painting, 45 | "Paintings": Painting, 46 | "Drawing": Drawing, 47 | "Furniture": Furniture, 48 | "Coin": Coin, 49 | "Sculpture": Sculpture, 50 | "Vessels": Vessel 51 | } 52 | 53 | # "panel": "300014657" # A wooden support 54 | 55 | aat_part_mapping = { 56 | "supports": "300014844" # The thing that is painted on 57 | } 58 | 59 | aat_material_mapping = { 60 | "watercolor": "300015045", 61 | "oil": "300015050", 62 | "tempera": "300015062", 63 | "canvas": "300014078", 64 | "oak": "300012264", 65 | "gold leaf": "300264831", 66 | "paper": "300014109", 67 | "copper": "300011020", 68 | "terracotta": "300010669", 69 | "glass": "300010797", 70 | "chalk": "300011727", 71 | "bronze": "300010957", 72 | "marble": "300011443", 73 | "albumen silver print": "300127121", 74 | "gelatin silver print": "300128695", 75 | "silver": "300011029" 76 | } 77 | 78 | aat_culture_mapping = { 79 | "french": "300111188", 80 | "italian": "300111198", 81 | "german": "300111192", 82 | "dutch": "300020929" 83 | } 84 | 85 | dim_type_mapping = { 86 | "height": "300055644", 87 | "width": "300055647", 88 | "depth": "300072633", 89 | "diameter": "300055624", 90 | "weight": "300056240" 91 | } 92 | 93 | 94 | # Meta 95 | class CreditLine(Right): 96 | def __init__(self, *args, **kw): 97 | super(CreditLine, self).__init__(*args, **kw) 98 | # XXX Find a good Type for this 99 | self.has_type = Type("http://example.org/ns/creditline") 100 | CreditLine._properties['value'] = {"rdf": "rdfs:value", "range": str} 101 | 102 | class SourceCreditLine(CreditLine): 103 | def __init__(self, *args, **kw): 104 | super(SourceCreditLine, self).__init__(*args, **kw) 105 | # XXX Find a good Type for this 106 | self.has_type = [] 107 | self.has_type = Type("http://example.org/ns/sourcecreditline") 108 | 109 | class Department(Group): 110 | def __init__(self, *args, **kw): 111 | super(Department, self).__init__(*args, **kw) 112 | self.is_current_or_former_member_of = Museum 113 | 114 | 115 | # Monkey patch Type's _toJSON to only emit full data if not just URI+type 116 | def typeToJSON(self, top=False): 117 | props = self.__dict__.keys() 118 | if len(props) > 3: 119 | return super(Type, self)._toJSON() 120 | else: 121 | return self.id 122 | 123 | Type._toJSON = typeToJSON 124 | Person._properties['familyName'] = {"rdf": "schema:familyName", "range": str} 125 | Person._properties['givenName'] = {"rdf": "schema:givenName", "range": str} 126 | ManMadeObject._properties['culture'] = {"rdf": "schema:genre", "range": Type} 127 | 128 | 129 | factory.base_url = "http://data.getty.edu/museum/" 130 | factory.default_lang = "en" 131 | 132 | departments = {} 133 | locations = {} 134 | 135 | GettyTrust = Group("http://vocab.getty.edu/ulan/500115987") 136 | GettyTrust.label = "J. Paul Getty Trust" 137 | Museum = Group("http://vocab.getty.edu/ulan/500115988") 138 | Museum.is_current_or_former_member_of = GettyTrust 139 | Museum.label = "J Paul Getty Museum" 140 | 141 | painting_on_re = re.compile("^(.+?) on (.+?)$") 142 | painting_and_re = re.compile("^(.+?) and (.+?)$") 143 | 144 | def parse_materials(materials, typ): 145 | 146 | mats = [] 147 | if typ == Painting: 148 | # Test for X on Y 149 | mat = materials.lower() 150 | m = painting_on_re.match(mat) 151 | 152 | if m: 153 | paint = m.groups()[0] 154 | 155 | # x and y 156 | m2 = painting_and_re.match(paint) 157 | if m2: 158 | paints = m2.groups() 159 | else: 160 | paints = [paint] 161 | for p in paints: 162 | if aat_material_mapping.has_key(p): 163 | mats.append(Material("http://vocab.getty.edu/aat/%s" % aat_material_mapping[p])) 164 | else: 165 | pass 166 | # print "Paint: %s" % paint 167 | 168 | support = m.groups()[1] 169 | if aat_material_mapping.has_key(support): 170 | mats.append(Material("http://vocab.getty.edu/aat/%s" % aat_material_mapping[support])) 171 | else: 172 | # look for common adjectives, ()s 173 | swords = support.split(' ') 174 | for sw in swords: 175 | if aat_material_mapping.has_key(sw): 176 | mats.append(Material("http://vocab.getty.edu/aat/%s" % aat_material_mapping[sw])) 177 | 178 | return mats 179 | 180 | 181 | fh = file('record_cache.json') 182 | data = fh.read() 183 | fh.close() 184 | cache = json.loads(data) 185 | 186 | 187 | # Load up people 188 | fh = file('500_people.json') 189 | data = fh.read() 190 | fh.close() 191 | peoplel = json.loads(data) 192 | people = {} 193 | for who in peoplel: 194 | whoid = str(who['id']) 195 | if people.has_key(whoid): 196 | continue 197 | else: 198 | wrec = {'id': whoid} 199 | wrec['type'] = who['type'] 200 | wrec['date'] = who['display_date'] 201 | wrec['name'] = who['display_name'] 202 | wrec['nationality'] = who['display_nationality'] 203 | wrec['birthplace'] = who['display_birthplace'] 204 | wrec['deathplace'] = who['display_deathplace'] 205 | wrec['institution'] = who['display_institution'] 206 | wrec['image'] = who['display_image'] 207 | wrec['biography'] = who['display_biography'] 208 | people[whoid] = wrec 209 | 210 | print "Processing..." 211 | 212 | #recs = cache.values() 213 | recs = [cache['645']] 214 | 215 | ldrecs = [] 216 | for rec in recs: 217 | ident = str(rec['id']) 218 | 219 | # Build a Foo type of MMO 220 | clslabel = rec['classification']['name'] 221 | clsid = str(rec['classification']['id']) 222 | ot = rec['object_types'] # {'primary': {}, '???': {}} 223 | try: 224 | otid = str(ot['primary']['id']) 225 | otlabel = ot['primary']['display_value'] 226 | except: 227 | otid = "" 228 | otlabel = "" 229 | 230 | if clslabel == "Photographs": 231 | if otlabel == "Print": 232 | obj = PhotographPrint(ident) 233 | elif otlabel == "Album": 234 | obj = PhotographAlbum(ident) 235 | elif otlabel == "Book": 236 | obj = PhotographBook(ident) 237 | elif otlabel.lower() == "cased object": 238 | # Treat as print? 239 | obj = PhotographPrint(ident) 240 | else: 241 | print "Unknown photograph subtype: %s" % otlabel 242 | elif aat_type_mapping.has_key(otlabel): 243 | obj = aat_type_mapping[otlabel](ident) 244 | elif aat_type_mapping.has_key(clslabel): 245 | obj = aat_type_mapping[clslabel](ident) 246 | else: 247 | obj = ManMadeObject(ident) 248 | # print "ot: '%s' ; cls: '%s'" % (otlabel, clslabel) 249 | t = Type(str(clsid)) 250 | t.label = clslabel 251 | obj.has_type = t 252 | 253 | tms = TMSNumber(ident) 254 | tms.value = ident 255 | obj.is_identified_by = tms 256 | 257 | recno = rec['number'] 258 | accno = AccessionNumber(recno) 259 | accno.value = recno 260 | obj.is_identified_by = accno 261 | 262 | obj.label = rec['title'] 263 | try: 264 | obj.description = rec['description']['display']['value'] 265 | except: 266 | pass 267 | 268 | production = Production(ident) 269 | obj.was_produced_by = production 270 | ts = TimeSpan(ident) 271 | ts.description = rec['date'] 272 | # XXX Parse date string for dates 273 | production.has_timespan = ts 274 | 275 | # XXX if there are multiple makers with different roles, 276 | # create a super Production with components, and each 277 | # role gets a separate component 278 | 279 | for mk in rec['makers']: 280 | mkid = str(mk['id']) 281 | role = mk['role'] 282 | 283 | who = Person(mkid) 284 | 285 | # Find in person db or deref 286 | first = mk['name_first'] 287 | last = mk['name_last'] 288 | 289 | try: 290 | person = people[mkid] 291 | who.label = person['name'] 292 | who.description = person['biography'] 293 | who.givenName = first 294 | who.familyName = last 295 | who.birthPlace = Place() 296 | who.deathPlace = Place() 297 | who.birthDate = "" 298 | who.deathDate = "" 299 | except: 300 | pass 301 | 302 | production.carried_out_by = who 303 | # XXX Link to ULAN 304 | 305 | # found, depicted, created 306 | 307 | if rec['places'] and rec['places'].has_key('place_created'): 308 | p = rec['places']['place_created'] 309 | pid = str(p['id']) 310 | where = Place(pid) 311 | where.label = p['display_value'] 312 | production.took_place_at = where 313 | 314 | # XXX Check for place_depicted (find out all possible keys) 315 | 316 | m = Material(ident) 317 | m.description = rec['medium'] 318 | obj.consists_of = m 319 | mats = parse_materials(rec['medium'], obj.__class__) 320 | if mats: 321 | for mat in mats: 322 | m.defines_typical_wholes_for = mat 323 | 324 | dpt = rec['department'] 325 | dptid = dpt['id'] 326 | try: 327 | dept = departments[dptid] 328 | except: 329 | dept = Department(str(dpt['id'])) 330 | dept.label = dpt['name'] 331 | departments[dptid] = dept 332 | obj.has_current_owner = dept 333 | 334 | if rec['location']: 335 | loc = rec['location'][0] 336 | locid = str(loc['id']) 337 | try: 338 | where = locations[locid] 339 | except: 340 | where = Place(locid) 341 | where.label = loc['name'] 342 | locations[locid] = where 343 | obj.has_current_location = where 344 | 345 | culture = Type("culture") 346 | culture.label = rec['culture'] 347 | obj.culture = culture 348 | # XXX Map to AAT 349 | 350 | if rec['markings']: 351 | markings = Mark() 352 | markings.description = rec['markings'] 353 | obj.shows_visual_item = markings 354 | if rec['signature']: 355 | sig = Signature() 356 | sig.description = rec['signature'] 357 | obj.shows_visual_item = sig 358 | if rec['inscription']: 359 | insc = Inscription() 360 | insc.description = rec['inscription'] 361 | obj.shows_visual_item = insc 362 | 363 | if rec.has_key('creditline'): 364 | credit = CreditLine() 365 | credit.value = rec['creditline'] 366 | obj.is_subject_to = credit 367 | if rec['source_creditline']: 368 | srcCredit = SourceCreditLine() 369 | srcCredit.value = rec['source_creditline'] 370 | obj.is_subject_to = srcCredit 371 | 372 | if rec['bibliography']: 373 | bx = 0 374 | for bib in rec['bibliography']: 375 | bt = bib['display_source_type'] 376 | bv = bib['display_value'] 377 | doc = Document("%s/%s" % (ident, str(bx))) 378 | bx += 1 379 | doc.label = bv 380 | doc.has_type = Type(bt) 381 | # XXX extract actual bib data and map to something sensible 382 | obj.is_documented_in = doc 383 | 384 | if rec['provenance']: 385 | for prov in rec['provenance']: 386 | date = prov['display_date'] 387 | pid = str(prov['id']) 388 | who = prov['display_constituent'] 389 | # XXX Parse constituent and map to provenance patterns 390 | 391 | 392 | if rec['related_exhibitions']: 393 | for exh in rec['related_exhibitions']: 394 | exhid = str(exh['record_identifier']) 395 | ttl = exh['display_title'] 396 | dates = exh['display_dates'] 397 | 398 | exhibition = Exhibition(exhid) 399 | exhibition.label = ttl 400 | if dates: 401 | ts = TimeSpan(exhid) 402 | ts.description = dates 403 | exhibition.has_timespan = ts 404 | # XXX parse for begin, end dates 405 | 406 | vens = exh['display_venues'] 407 | for v in vens: 408 | name = v['display_name'] 409 | loc = v['display_location'] 410 | vid = str(v['record_identifier']) 411 | vdates = v['display_dates'] 412 | 413 | venue = Activity(vid) 414 | venue.label = name 415 | if vdates: 416 | vts = TimeSpan(vid) 417 | vts.description = vdates 418 | venue.has_timespan = vts 419 | # XXX Parse for begin, end dates 420 | if loc: 421 | place = Place(vid) 422 | place.description = loc 423 | # XXX Parse location 424 | venue.took_place_at = place 425 | exhibition.consists_of = venue 426 | 427 | # XXX Catalog Number is a Document that documents the Venue or Exhibition 428 | 429 | obj.was_present_at = exhibition 430 | 431 | ldrecs.append(obj) 432 | # print factory.toString(obj, compact=False) 433 | #break 434 | 435 | 436 | -------------------------------------------------------------------------------- /examples/sales-to-lod.py: -------------------------------------------------------------------------------- 1 | 2 | from lxml import etree 3 | import json 4 | import csv 5 | import codecs 6 | import re 7 | import os 8 | import sys 9 | from dateutil.parser import parse as dateparse 10 | 11 | # for cidoc_orm, see: https://github.com/azaroth42/Python-CIDOC-ORM 12 | from cidoc_orm import factory, TimeSpan, Identifier, LegalBody, \ 13 | Production, Actor, Place, Group, Material, Mark, \ 14 | Activity, InformationObject, Purchase, Acquisition, MonetaryAmount, \ 15 | Currency, MeasurementUnit, Dimension, PhysicalObject, VisualItem, Title 16 | 17 | from aat_mapping import ManMadeObject, Type, Person, materialTypes, register_aat_class, \ 18 | Painting, Sculpture, Drawing, Miniature, Graphic, Enamel, Tapestry, Mosaic, \ 19 | Embroidery, Furniture, LocalNumber, dimensionUnits 20 | 21 | 22 | PhysicalObject._properties['had_starting_price'] = {"rdf": "gri:had_starting_price", "range": MonetaryAmount} 23 | 24 | cmUnit = dimensionUnits['cm'] 25 | 26 | # Cache of repeated Objects 27 | catalogO = {} 28 | placeO = {} 29 | nationalityO = {} 30 | personO = {} 31 | 32 | materialO = {} 33 | 34 | bad_price = {} 35 | bad_dates = {} 36 | bad_types = {} 37 | bad_materials = {} 38 | 39 | # XXX -- Distinguish Local from Lugt 40 | register_aat_class("LugtNumber", Identifier, "300404621") 41 | 42 | factory.base_url = "http://data.getty.edu/provenance/" 43 | factory.default_lang = "en" 44 | 45 | objTypeMap = { 46 | u'gem\xe4lde': Painting, 47 | 'skulptur': Sculpture, 48 | 'zeichnung': Drawing, 49 | 'miniatur': Miniature, 50 | 'graphik': Graphic, 51 | 'painting': Painting, 52 | 'enamel': Enamel, 53 | 'miniature': Miniature, 54 | 'sculpture': Sculpture, 55 | 'drawing': Drawing, 56 | 'tapestry': Tapestry, 57 | 'embroidery': Embroidery, 58 | 'furniture': Furniture, 59 | 'mosaic': Mosaic, 60 | 'watercolor': Painting 61 | } 62 | 63 | 64 | r = "(je|l|h|d|b|durchm|durchmesser|dm[.]?)?[ ]*(ca.|h)?[ ]*([0-9,.]+)([ ]*(cm)?[ ]*x[ ]*([0-9,.]+)[ ]*(cm)?)?" 65 | dimre = re.compile(r) 66 | # dim1 = groups()[2], dim2 = groups()[5] 67 | 68 | 69 | def process_record(rec): 70 | recData = {} 71 | for elm in rec.getchildren(): 72 | tag = elm.tag 73 | curr = recData.get(tag, None) 74 | 75 | if elm.getchildren(): 76 | value = elm 77 | else: 78 | value = elm.text 79 | 80 | if curr is None: 81 | recData[tag] = value 82 | elif type(recData[tag]) == list: 83 | recData[tag].append(value) 84 | else: 85 | recData[tag] = [curr, value] 86 | 87 | cno = recData['Catalogue_No'] 88 | try: 89 | catalog = catalogO[cno] 90 | auction = catalog.refers_to 91 | 92 | # Try and update end of timespan 93 | if not hasattr(auction.has_timespan, 'end_of_the_end'): 94 | sed = recData.get('Sale_End_Date', '') 95 | if sed: 96 | try: 97 | dt = dateparse(sed) 98 | span.end_of_the_end = "%s-%s-%sT00:00:00 CET" % (dt.year, dt.month, dt.day) 99 | except: 100 | try: 101 | bad_dates[sed] += 1 102 | except: 103 | bad_dates[sed] = 1 104 | 105 | except: 106 | catalog = InformationObject(cno) 107 | catalogO[cno] = catalog 108 | auction = Activity(cno) 109 | catalog.refers_to = auction 110 | catalog.has_representation = VisualItem(recData['GSC_link_to_pdf']) 111 | catalog.is_identified_by = LocalNumber(cno) 112 | 113 | # Auction date 114 | span = TimeSpan(cno) 115 | sbd = recData['Sale_Begin_Date'] 116 | sed = recData.get('Sale_End_Date', '') 117 | try: 118 | dt = dateparse(sbd) 119 | span.begin_of_the_begin = "%s-%s-%sT00:00:00 CET" % (dt.year, dt.month, dt.day) 120 | except: 121 | try: 122 | bad_dates[sbd] += 1 123 | except: 124 | bad_dates[sbd] = 1 125 | if sed: 126 | try: 127 | dt = dateparse(sed) 128 | span.end_of_the_end = "%s-%s-%sT00:00:00 CET" % (dt.year, dt.month, dt.day) 129 | except: 130 | try: 131 | bad_dates[sed] += 1 132 | except: 133 | bad_dates[sed] = 1 134 | auction.has_timespan = span 135 | 136 | # Auction location 137 | city = recData['City_of_Sale'] 138 | try: 139 | cityPlace = placeO[city] 140 | except: 141 | cityPlace = Place(city) 142 | cityPlace.label = city 143 | placeO[city] = cityPlace 144 | auction.took_place_at = cityPlace 145 | 146 | try: 147 | country = cityPlace.falls_within 148 | except: 149 | try: 150 | country = recData['Country_Auth'] 151 | countryPlace = Place(country) 152 | countryPlace.label = country 153 | cityPlace.falls_within = countryPlace 154 | except: 155 | # No country 156 | pass 157 | 158 | try: 159 | # Auction House 160 | house = recData['Auction_House'] 161 | try: 162 | ahouse = houseO[house] 163 | except: 164 | ahouse = LegalBody(house) 165 | ahouse.label = house 166 | auction.carried_out_by = ahouse 167 | except: 168 | # No auction house? :( 169 | pass 170 | 171 | try: 172 | lno = recData["Lot_Number"] 173 | except: 174 | print "No lot number for %s" % recData['recno'] 175 | return 176 | 177 | cnolot = cno +'/'+ lno 178 | 179 | # Build an aggregation of objects for the lot 180 | lotset = PhysicalObject(cnolot + "-set") 181 | # InfoObj for the entry 182 | entry = InformationObject(cnolot) 183 | entry.refers_to = lotset 184 | catalog.is_composed_of = entry 185 | 186 | if recData.has_key('Price'): 187 | pr = recData['Price'] 188 | # Process prinfo 189 | 190 | if type(pr) == list: 191 | pr = pr[0] 192 | if not type(pr) in [str, unicode]: 193 | try: 194 | pr = pr.text 195 | except: 196 | pr = "" 197 | 198 | pr = pr.replace("[?]", "") 199 | pr = pr.replace('?', '') 200 | pr = pr.strip() 201 | 202 | if pr: 203 | pr = pr.replace('1/2', '.5') 204 | pr = pr.replace('1/4', '.25') 205 | pr = pr.replace('3/4', '.75') 206 | pr = pr.replace(' .', '.') 207 | 208 | # Unknown: x-y-z x.y.z 209 | # x"y' x=y x:y x=y-z 210 | # 211 | 212 | if pr.find(" frs") > -1: 213 | curr = Currency("francs") 214 | curr.label = "francs" 215 | pr = pr.replace(' frs', '') 216 | elif pr.find(" fl") > -1: 217 | curr = Currency("fl.s") 218 | curr.label = "fl.s" 219 | pr = pr.replace(' fl', '') 220 | elif pr.find(" livres") > -1: 221 | curr = Currency("pounds") 222 | curr.label = "pounds" 223 | pr = pr.replace(' livres', '') 224 | else: 225 | curr = None 226 | 227 | pr = pr.replace('[or]', 'or') 228 | oidx = pr.find(' or ') 229 | if oidx > -1: 230 | pr = pr[:oidx] 231 | pr = pr.strip() 232 | 233 | fidx = pr.find(' for ') 234 | if fidx > -1: 235 | pr = pr[:fidx] 236 | pr = pr.strip() 237 | 238 | try: 239 | p = float(pr) 240 | except: 241 | p = -1 242 | try: 243 | bad_price[pr] += 1 244 | except: 245 | bad_price[pr] = 1 246 | if p >= 0: 247 | amnt = MonetaryAmount(cnolot + "-start") 248 | amnt.has_value = p 249 | if curr: 250 | amnt.has_currency = curr 251 | lotset.had_starting_price = amnt 252 | 253 | 254 | # Build the object 255 | 256 | try: 257 | typs = recData['Object_Types'].xpath('./Object_Type/text()') 258 | ot = typs[0] 259 | cls = objTypeMap[ot] 260 | except: 261 | cls = ManMadeObject 262 | try: 263 | bad_types[ot] += 1 264 | except: 265 | bad_types[ot] = 1 266 | 267 | obj = cls(cnolot) 268 | lotset.is_composed_of = obj 269 | 270 | title = Title(cnolot) 271 | obj.has_title = title 272 | try: 273 | title.value = recData['Title'] 274 | except: 275 | title.value = "[No Title Known]" 276 | if recData.has_key('Title_Modifier'): 277 | title.has_note = unicode(recData['Title_Modifier']) 278 | 279 | if recData.has_key('Materials'): 280 | for mat in recData['Materials'].xpath('./Material/text()'): 281 | ot = mat.lower() 282 | ot = ot.replace(',', '') 283 | ot = ot.replace('#', '') 284 | ot = ot.replace('.', '') 285 | ot = ot.replace('?', '') 286 | ot = ot.replace('auf', '') 287 | ot = ot.replace('und', '') 288 | ot = ot.replace("on", " ") 289 | ot = ot.replace("and", " ") 290 | ot = ot.replace(" ", ' ') 291 | 292 | words = ot.split(' ') 293 | obj.consists_of = [] 294 | for w in words: 295 | if w: 296 | try: 297 | material = materialO[w] 298 | except: 299 | material = Material(w) 300 | materialO[w] = material 301 | material.value = w 302 | obj.consists_of = material 303 | 304 | if recData.has_key("Dimensions"): 305 | for dimtext in recData['Dimensions'].xpath('./Dimension_Text/text()'): 306 | m = dimre.match(dimtext) 307 | if m: 308 | d1 = m.groups()[2] 309 | d2 = m.groups()[5] 310 | 311 | dim1 = Dimension(cnolot + "_d1") 312 | dim1.has_value = d1 313 | dim1.has_unit = cmUnit 314 | obj.has_dimension = dim1 315 | 316 | if d2: 317 | dim2 = Dimension(cnolot + "_d2") 318 | dim2.has_value = d2 319 | dim2.has_unit = cmUnit 320 | obj.has_dimension = dim2 321 | 322 | else: 323 | #print "Can't handle dimension data:" 324 | #print dimtext 325 | try: 326 | bad_materials[dimtext] += 1 327 | except: 328 | bad_materials[dimtext] = 1 329 | 330 | # Artist could be modified by Attrib_Mod 331 | # e.g. zugeschrieben --> attributed (2200) 332 | # Kopie von --> copy from (1) 333 | # stil --> style [of] (24) 334 | 335 | # Artist 336 | if recData.has_key('Artist'): 337 | arts = recData['Artist'] 338 | if type(arts) != list: 339 | arts = [arts] 340 | for artist in arts: 341 | va = artist.xpath('./Verb_Artist/text()') 342 | aa = artist.xpath('./Artist_Auth/text()') 343 | natl = artist.xpath('./Nationality/text()') 344 | mod = artist.xpath('./Attrib_Mod/text()') 345 | 346 | # first try to detect non names 347 | if aa: 348 | aa = unicode(aa[0]) 349 | if aa == "NEW": 350 | # treat as if not present 351 | pass 352 | elif aa.startswith('['): 353 | # anonymous artist with some known features 354 | pass 355 | aname = aa.lower() 356 | aname = aname.replace(" ", "_") 357 | try: 358 | who = personO[aname] 359 | except: 360 | who = Person(aname) 361 | personO[aname] = who 362 | # put verbatim name somewhere 363 | # and authority name in p131 is identified by 364 | who.label = aa 365 | 366 | if natl: 367 | natl = unicode(natl[0]) 368 | try: 369 | nat = nationalityO[natl] 370 | except: 371 | nat = Group(natl) 372 | nat.label = natl 373 | who.is_current_or_former_member_of = nat 374 | 375 | # one production event per artist? 376 | prod = Production(cnolot + aname) 377 | prod.carried_out_by = who 378 | obj.was_produced_by = prod 379 | 380 | # seller if we know 381 | seller = None 382 | if recData.has_key("Seller"): 383 | sells = recData['Seller'] 384 | if type(sells) != list: 385 | sells = [sells] 386 | sx = 0 387 | for s in sells: 388 | try: 389 | lbl = unicode(s.xpath("./Seller_Auth/text()")[0]) 390 | except: 391 | try: 392 | lbl = unicode(s.xpath("./Verb_Seller/text()")[0]) 393 | except: 394 | # ???!!! 395 | continue 396 | end = "-seller" 397 | if sx: 398 | end += "-%s" % sx 399 | seller = Actor(cnolot + end) 400 | seller.label = lbl 401 | obj.has_former_or_current_owner = seller 402 | sx += 1 403 | 404 | try: 405 | txn = recData['Transaction'] 406 | txn = txn.lower() 407 | except: 408 | txn = "unknown" 409 | txn = txn.replace('[?]', '') 410 | txn = txn.replace("unknown or ", "") 411 | txn = txn.replace(" or unknown", "") 412 | txn = txn.strip() 413 | 414 | if txn.find(" or ") > -1: 415 | # Don't know what to do with X or Y 416 | print "Not processing or for txn" 417 | return 418 | 419 | if txn in ["sold", "bought in", "passed"]: 420 | lot = Activity(cnolot) 421 | auction.consists_of = lot 422 | lot.used_specific_object = lotset 423 | 424 | span = TimeSpan(cnolot) 425 | try: 426 | date = recData['Lot_Sale_Date'] 427 | # NB: this is going to end up strange due to UK vs EU timezones 428 | try: 429 | dt = dateparse(date) 430 | begin = "%s-%s-%sT00:00:00 CET" % (dt.year, dt.month, dt.day) 431 | end = "%s-%s-%sT23:59:59 CET" % (dt.year, dt.month, dt.day) 432 | span.begin_of_the_begin = begin 433 | span.end_of_the_end = end 434 | lot.has_timespan = span 435 | except: 436 | try: 437 | bad_dates[date] += 1 438 | except: 439 | bad_dates[date] = 1 440 | except: 441 | print "No Sale Date: %s" % recData['recno'] 442 | 443 | ln = recData.get('Lot_Notes', None) 444 | if ln is not None: 445 | lot.has_note = unicode(ln) 446 | 447 | entry.refers_to = lot 448 | 449 | if txn in ['sold', 'bought in']: 450 | # Bidding activity exists 451 | bidding = Activity(cnolot + "-bidding") 452 | lot.consists_of = bidding 453 | 454 | if txn == "sold": 455 | txn = Purchase(cnolot+"-transaction") 456 | lot.consists_of = txn 457 | bidding.occurs_before = txn 458 | 459 | acq = Purchase(cnolot + "-acquisition") 460 | txn.consists_of = acq 461 | acq.transferred_title_of = obj 462 | 463 | if seller: 464 | sellers = obj.has_former_or_current_owner 465 | if type(sellers) == list: 466 | for s in sellers: 467 | acq.transferred_title_from = s 468 | else: 469 | acq.transferred_title_from = sellers 470 | 471 | bx = 0 472 | if recData.has_key("Buyer"): 473 | buys = recData['Buyer'] 474 | if type(buys) != list: 475 | buys = [buys] 476 | for b in buys: 477 | try: 478 | lbl = unicode(b.xpath("./Buyer_Auth/text()")[0]) 479 | except: 480 | try: 481 | lbl = unicode(b.xpath("./Verb_Buyer/text()")[0]) 482 | except: 483 | # ???!!! 484 | continue 485 | 486 | end = "-buyer" 487 | if bx: 488 | end += "-%s" % bx 489 | buyer = Actor(cnolot + end) 490 | buyer.label = lbl 491 | acq.transferred_title_to = buyer 492 | bx += 1 493 | 494 | 495 | return catalog 496 | 497 | 498 | 499 | recdir = '/Users/rsanderson/Box Sync/PI_Sales/' 500 | files = ['salesdb1.xml', 'salesdb2.xml', 'salesdb3.xml', 'salesdb4.xml'] 501 | files = ['salesdb1.xml'] 502 | 503 | for fn in files: 504 | rec = [] 505 | fh = file(os.path.join(recdir, fn)) 506 | #fh = codecs.open(os.path.join(recdir, fn), 'r', 'iso-8859-1') 507 | 508 | # chomp first three lines 509 | fh.readline() ; fh.readline() ; fh.readline() 510 | 511 | header = '\n' 512 | 513 | line = fh.readline() 514 | while line != "\n": 515 | while (line.find('') == -1): 516 | rec.append(line) 517 | line = fh.readline() 518 | if not line: 519 | break 520 | rec.append(line) 521 | 522 | data = ''.join(rec) 523 | data = header + data 524 | data = data.replace("Catalogue_No.", "Catalogue_No") 525 | data = data.replace("Country_Auth.", "Country_Auth") 526 | data = data.replace('\x04', '') 527 | data = data.replace('\x1f', '') 528 | 529 | try: 530 | dom = etree.XML(data) 531 | except: 532 | print "Invalid record data: %s" % data[:200] 533 | 534 | top = process_record(dom) 535 | # break 536 | 537 | line = fh.readline() 538 | rec = [] 539 | 540 | fh.close() 541 | 542 | -------------------------------------------------------------------------------- /cromulent/data/crm-profile.json: -------------------------------------------------------------------------------- 1 | { 2 | "E10_Transfer_of_Custody": 1, 3 | "E11_Modification": 1, 4 | "E12_Production": 1, 5 | "E13_Attribute_Assignment": 1, 6 | "E14_Condition_Assessment": 0, 7 | "E15_Identifier_Assignment": 0, 8 | "E16_Measurement": 0, 9 | "E17_Type_Assignment": 0, 10 | "E18_Physical_Thing": 0, 11 | "E19_Physical_Object": 0, 12 | "E1_CRM_Entity": 0, 13 | "E20_Biological_Object": 0, 14 | "E21_Person": 1, 15 | "E22_Human-Made_Object": 1, 16 | "E24_Physical_Human-Made_Thing": 0, 17 | "E25_Human-Made_Feature": 0, 18 | "E26_Physical_Feature": 0, 19 | "E27_Site": 0, 20 | "E28_Conceptual_Object": 0, 21 | "E29_Design_or_Procedure": 0, 22 | "E2_Temporal_Entity": 0, 23 | "E30_Right": 1, 24 | "E31_Document": 0, 25 | "E32_Authority_Document": 1, 26 | "E33_Linguistic_Object": 1, 27 | "E34_Inscription": 0, 28 | "E35_Title": 0, 29 | "E36_Visual_Item": 1, 30 | "E37_Mark": 0, 31 | "E38_Image": 0, 32 | "E39_Actor": 1, 33 | "E3_Condition_State": 0, 34 | "E40_Legal_Body": 0, 35 | "E41_Appellation": 0, 36 | "E42_Identifier": 1, 37 | "E4_Period": 1, 38 | "E52_Time-Span": 1, 39 | "E53_Place": 1, 40 | "E54_Dimension": 1, 41 | "E55_Type": 1, 42 | "E56_Language": 1, 43 | "E57_Material": 1, 44 | "E58_Measurement_Unit": 1, 45 | "E5_Event": 1, 46 | "E63_Beginning_of_Existence": 0, 47 | "E64_End_of_Existence": 0, 48 | "E65_Creation": 1, 49 | "E66_Formation": 1, 50 | "E67_Birth": 1, 51 | "E68_Dissolution": 1, 52 | "E69_Death": 1, 53 | "E6_Destruction": 1, 54 | "E70_Thing": 0, 55 | "E71_Human-Made_Thing": 0, 56 | "E72_Legal_Object": 0, 57 | "E73_Information_Object": 1, 58 | "E74_Group": 1, 59 | "E77_Persistent_Item": 0, 60 | "E78_Curated_Holding": 0, 61 | "E79_Part_Addition": 1, 62 | "E7_Activity": 1, 63 | "E80_Part_Removal": 1, 64 | "E81_Transformation": 1, 65 | "E83_Type_Creation": 0, 66 | "E84_Information_Carrier": 0, 67 | "E85_Joining": 1, 68 | "E86_Leaving": 1, 69 | "E87_Curation_Activity": 0, 70 | "E89_Propositional_Object": 1, 71 | "E8_Acquisition": 1, 72 | "E90_Symbolic_Object": 0, 73 | "E92_Spacetime_Volume": 0, 74 | "E93_Presence": 0, 75 | "E96_Purchase": 0, 76 | "E97_Monetary_Amount": 1, 77 | "E98_Currency": 1, 78 | "E99_Product_Type": 0, 79 | "E9_Move": 1, 80 | "E33_E41_Linguistic_Appellation": 1, 81 | "dig:D1_Digital_Object": 1, 82 | "geo:SP5_Geometric_Place_Expression": 1, 83 | "geo:SP4_Spatial_Coordinate_Reference_System": 1, 84 | "geo:SP6_Declarative_Place": 1, 85 | "sci:S19_Encounter_Event": 1, 86 | "la:Phase": 0, 87 | "la:RightAcquisition": 1, 88 | "la:Payment": 1, 89 | "la:Relationship": 1, 90 | "la:Set": 1, 91 | "la:Addition": 1, 92 | "la:Removal": 1, 93 | "la:DigitalService": 1, 94 | 95 | "P100_was_death_of": [1,0], 96 | "P100i_died_in": [1,0], 97 | "P101_had_as_general_use": [1,1], 98 | "P101i_was_use_of": [0,1], 99 | "P102_has_title": [0,1], 100 | "P102i_is_title_of": [0,0], 101 | "P103_was_intended_for": [0,1], 102 | "P103i_was_intention_of": [0,1], 103 | "P104_is_subject_to": [1,1], 104 | "P104i_applies_to": [1,1], 105 | "P105_right_held_by": [0,1], 106 | "P105i_has_right_on": [0,1], 107 | "P106_is_composed_of": [1,1], 108 | "P106i_forms_part_of": [1,1], 109 | "P107_has_current_or_former_member": [1,1], 110 | "P107i_is_current_or_former_member_of": [1,1], 111 | "P108_has_produced": [1,1], 112 | "P108i_was_produced_by": [1,0], 113 | "P109_has_current_or_former_curator": [0,1], 114 | "P109i_is_current_or_former_curator_of": [0,1], 115 | "P10_falls_within": [0,1], 116 | "P10i_contains": [0,1], 117 | "P110_augmented": [1,0], 118 | "P110i_was_augmented_by": [1,1], 119 | "P111_added": [1,0], 120 | "P111i_was_added_by": [1,1], 121 | "P112_diminished": [1,0], 122 | "P112i_was_diminished_by": [1,1], 123 | "P113_removed": [1,0], 124 | "P113i_was_removed_by": [1,1], 125 | "P11_had_participant": [1,1], 126 | "P11i_participated_in": [1,1], 127 | "P121_overlaps_with": [0,1], 128 | "P122_borders_with": [0,1], 129 | "P123_resulted_in": [1,1], 130 | "P123i_resulted_from": [1,1], 131 | "P124_transformed": [1,1], 132 | "P124i_was_transformed_by": [1,1], 133 | "P125_used_object_of_type": [0,1], 134 | "P125i_was_type_of_object_used_in": [0,1], 135 | "P126_employed": [1,1], 136 | "P126i_was_employed_in": [0,1], 137 | "P127_has_broader_term": [0,1], 138 | "P127i_has_narrower_term": [0,1], 139 | "P128_carries": [1,1], 140 | "P128i_is_carried_by": [1,1], 141 | "P129_is_about": [1,1], 142 | "P129i_is_subject_of": [1,1], 143 | "P12_occurred_in_the_presence_of": [1,1], 144 | "P12i_was_present_at": [1,1], 145 | "P130_shows_features_of": [0,1], 146 | "P130i_features_are_also_found_on": [0,1], 147 | "P131_is_identified_by": [0,1], 148 | "P131i_identifies": [0,1], 149 | "P132_overlaps_with": [0,1], 150 | "P133_is_separated_from": [0,1], 151 | "P134_continued": [1,1], 152 | "P134i_was_continued_by": [1,1], 153 | "P135_created_type": [0,1], 154 | "P135i_was_created_by": [0,1], 155 | "P136_was_based_on": [0,1], 156 | "P136i_supported_type_creation": [0,1], 157 | "P137_exemplifies": [0,1], 158 | "P137i_is_exemplified_by": [0,1], 159 | "P138_represents": [1,1], 160 | "P138i_has_representation": [1,1], 161 | "P139_has_alternative_form": [1,1], 162 | "P13_destroyed": [1,0], 163 | "P13i_was_destroyed_by": [1,0], 164 | "P140_assigned_attribute_to": [1,0], 165 | "P140i_was_attributed_by": [1,1], 166 | "P141_assigned": [1,1], 167 | "P141i_was_assigned_by": [1,1], 168 | "P142_used_constituent": [0,1], 169 | "P142i_was_used_in": [0,1], 170 | "P143_joined": [1,0], 171 | "P143i_was_joined_by": [1,1], 172 | "P144_joined_with": [1,0], 173 | "P144i_gained_member_by": [1,1], 174 | "P145_separated": [1,0], 175 | "P145i_left_by": [1,1], 176 | "P146_separated_from": [1,0], 177 | "P146i_lost_member_by": [1,1], 178 | "P147_curated": [0,1], 179 | "P147i_was_curated_by": [0,1], 180 | "P148_has_component": [1,1], 181 | "P148i_is_component_of": [1,1], 182 | "P149_is_identified_by": [0,1], 183 | "P149i_identifies": [0,1], 184 | "P14_carried_out_by": [1,1], 185 | "P14i_performed": [1,1], 186 | "P150_defines_typical_parts_of": [0,1], 187 | "P150i_defines_typical_wholes_for": [0,1], 188 | "P151_was_formed_from": [0,1], 189 | "P151i_participated_in": [0,1], 190 | "P152_has_parent": [0,1], 191 | "P152i_is_parent_of": [0,1], 192 | "P156_occupies": [1,1], 193 | "P156i_is_occupied_by": [1,1], 194 | "P157_is_at_rest_relative_to": [0,1], 195 | "P157i_provides_reference_space_for": [0,1], 196 | "P15_was_influenced_by": [1,1], 197 | "P15i_influenced": [1,1], 198 | "P160_has_temporal_projection": [0,1], 199 | "P161_has_spatial_projection": [0,1], 200 | "P164_during": [0,1], 201 | "P164i_was_time-span_of": [0,1], 202 | "P165_incorporates": [0,1], 203 | "P165i_is_incorporated_in": [0,1], 204 | "P166_was_a_presence_of": [0,1], 205 | "P166i_had_presence": [0,1], 206 | "P167_at": [0,1], 207 | "P167i_was_place_of": [0,1], 208 | "P168_place_is_defined_by": [1,1], 209 | "P168i_defines_place": [0,1], 210 | "P16_used_specific_object": [1,1], 211 | "P16i_was_used_for": [1,1], 212 | "P177_assigned_property_of_type": [1,0], 213 | "P179_had_sales_price": [0,1], 214 | "P179i_was_sales_price_of": [0,1], 215 | "P17_was_motivated_by": [1,1], 216 | "P17i_motivated": [1,1], 217 | "P180_has_currency": [1,0], 218 | "P180i_was_currency_of": [1,1], 219 | "P181_has_amount": [0,0], 220 | "P182_ends_before_or_with_the_start_of": [1,1], 221 | "P182i_starts_after_or_with_the_end_of": [1,1], 222 | "P183_ends_before_the_start_of": [1,1], 223 | "P183i_starts_after_the_end_of": [1,1], 224 | "P184_ends_before_or_with_the_end_of": [1,1], 225 | "P184i_ends_with_or_after_the_end_of": [1,1], 226 | "P185_ends_before_the_end_of": [1,1], 227 | "P185i_ends_after_the_end_of": [1,1], 228 | "P189_approximates": [1,1], 229 | "P189i_is_approximated_by": [1,1], 230 | "P19_was_intended_use_of": [0,1], 231 | "P19i_was_made_for": [0,1], 232 | "P190_has_symbolic_content": [1,0], 233 | "P191_had_duration": [1,0], 234 | "P191i_was_duration_of": [1,0], 235 | "P1_is_identified_by": [1, 1], 236 | "P1i_identifies": [1, 0], 237 | "P20_had_specific_purpose": [1,1], 238 | "P20i_was_purpose_of": [1,1], 239 | "P21_had_general_purpose": [1,1], 240 | "P21i_was_purpose_of": [0,1], 241 | "P22_transferred_title_to": [1,1], 242 | "P22i_acquired_title_through": [1,1], 243 | "P23_transferred_title_from": [1,1], 244 | "P23i_surrendered_title_through": [1,1], 245 | "P24_transferred_title_of": [1,1], 246 | "P24i_changed_ownership_through": [1,1], 247 | "P25_moved": [1,1], 248 | "P25i_moved_by": [1,1], 249 | "P26_moved_to": [1,0], 250 | "P26i_was_destination_of": [1,1], 251 | "P27_moved_from": [1,0], 252 | "P27i_was_origin_of": [1,1], 253 | "P28_custody_surrendered_by": [1,1], 254 | "P28i_surrendered_custody_through": [1,1], 255 | "P29_custody_received_by": [1,1], 256 | "P29i_received_custody_through": [1,1], 257 | "P2_has_type": [1,1], 258 | "P2i_is_type_of": [0,1], 259 | "P30_transferred_custody_of": [1,1], 260 | "P30i_custody_transferred_through": [1,1], 261 | "P31_has_modified": [1,1], 262 | "P31i_was_modified_by": [1,1], 263 | "P32_used_general_technique": [1,1], 264 | "P32i_was_technique_of": [2,1], 265 | "P33_used_specific_technique": [0,1], 266 | "P33i_was_used_by": [0,1], 267 | "P34_concerned": [0,1], 268 | "P34i_was_assessed_by": [0,1], 269 | "P35_has_identified": [0,1], 270 | "P35i_was_identified_by": [0,1], 271 | "P37_assigned": [0,1], 272 | "P37i_was_assigned_by": [0,1], 273 | "P38_deassigned": [0,1], 274 | "P38i_was_deassigned_by": [0,1], 275 | "P39_measured": [0,1], 276 | "P39i_was_measured_by": [0,1], 277 | "P3_has_note": [0,1], 278 | "P40_observed_dimension": [0,1], 279 | "P40i_was_observed_in": [0,1], 280 | "P41_classified": [0,1], 281 | "P41i_was_classified_by": [0,1], 282 | "P42_assigned": [0,1], 283 | "P42i_was_assigned_by": [0,1], 284 | "P43_has_dimension": [1,1], 285 | "P43i_is_dimension_of": [1,0], 286 | "P44_has_condition": [0,1], 287 | "P44i_is_condition_of": [0,1], 288 | "P45_consists_of": [1,1], 289 | "P45i_is_incorporated_in": [1,1], 290 | "P46_is_composed_of": [1,1], 291 | "P46i_forms_part_of": [1,1], 292 | "P48_has_preferred_identifier": [0,1], 293 | "P48i_is_preferred_identifier_of": [0,1], 294 | "P49_has_former_or_current_keeper": [0,1], 295 | "P49i_is_former_or_current_keeper_of": [0,1], 296 | "P4_has_time-span": [1,0], 297 | "P4i_is_time-span_of": [0,1], 298 | "P50_has_current_keeper": [1,1], 299 | "P50i_is_current_keeper_of": [1,1], 300 | "P51_has_former_or_current_owner": [0,1], 301 | "P51i_is_former_or_current_owner_of": [0,1], 302 | "P52_has_current_owner": [1,1], 303 | "P52i_is_current_owner_of": [1,1], 304 | "P53_has_former_or_current_location": [0,1], 305 | "P53i_is_former_or_current_location_of": [0,1], 306 | "P54_has_current_permanent_location": [0,1], 307 | "P54i_is_current_permanent_location_of": [0,1], 308 | "P55_has_current_location": [1,0], 309 | "P55i_currently_holds": [1,1], 310 | "P56_bears_feature": [1,1], 311 | "P56i_is_found_on": [1,0], 312 | "P57_has_number_of_parts": [0,1], 313 | "P58_has_section_definition": [0,1], 314 | "P58i_defines_section": [0,1], 315 | "P59_has_section": [0,1], 316 | "P59i_is_located_on_or_within": [0,1], 317 | "P5_consists_of": [0,1], 318 | "P5i_forms_part_of": [0,1], 319 | "P62_depicts": [1,1], 320 | "P62i_is_depicted_by": [1,1], 321 | "P65_shows_visual_item": [1,1], 322 | "P65i_is_shown_by": [1,1], 323 | "P67_refers_to": [1,1], 324 | "P67i_is_referred_to_by": [1,1], 325 | "P68_foresees_use_of": [0,1], 326 | "P68i_use_foreseen_by": [0,1], 327 | "P69_is_associated_with": [0,1], 328 | "P70_documents": [0,1], 329 | "P70i_is_documented_in": [0,1], 330 | "P71_lists": [1,1], 331 | "P71i_is_listed_in": [1,1], 332 | "P72_has_language": [1,1], 333 | "P72i_is_language_of": [1,1], 334 | "P73_has_translation": [1,1], 335 | "P73i_is_translation_of": [1,1], 336 | "P74_has_current_or_former_residence": [1,1], 337 | "P74i_is_current_or_former_residence_of": [1,1], 338 | "P75_possesses": [1,1], 339 | "P75i_is_possessed_by": [1,1], 340 | "P76_has_contact_point": [1,1], 341 | "P76i_provides_access_to": [1,1], 342 | "P78_is_identified_by": [0,1], 343 | "P78i_identifies": [0,1], 344 | "P79_beginning_is_qualified_by": [0,1], 345 | "P7_took_place_at": [1,1], 346 | "P7i_witnessed": [0,1], 347 | "P80_end_is_qualified_by": [0,1], 348 | "P81_ongoing_throughout": [0,1], 349 | "P81a_end_of_the_begin": [1,0], 350 | "P81b_begin_of_the_end": [1,0], 351 | "P82_at_some_time_within": [0,1], 352 | "P82a_begin_of_the_begin": [1,0], 353 | "P82b_end_of_the_end": [1,0], 354 | "P86_falls_within": [0,1], 355 | "P86i_contains": [0,1], 356 | "P87_is_identified_by": [0,1], 357 | "P87i_identifies": [0,1], 358 | "P89_falls_within": [1,1], 359 | "P89i_contains": [1,1], 360 | "P8_took_place_on_or_within": [0,1], 361 | "P8i_witnessed": [0,1], 362 | "P90_has_value": [1,0], 363 | "P90a_has_lower_value_limit": [1,0], 364 | "P90b_has_upper_value_limit": [1,0], 365 | "P91_has_unit": [1,0], 366 | "P91i_is_unit_of": [0,1], 367 | "P92_brought_into_existence": [0,1], 368 | "P92i_was_brought_into_existence_by": [0,0], 369 | "P93_took_out_of_existence": [0,1], 370 | "P93i_was_taken_out_of_existence_by": [0,0], 371 | "P94_has_created": [1,1], 372 | "P94i_was_created_by": [1,0], 373 | "P95_has_formed": [1,1], 374 | "P95i_was_formed_by": [1,0], 375 | "P96_by_mother": [0,0], 376 | "P96i_gave_birth": [0,1], 377 | "P97_from_father": [0,1], 378 | "P97i_was_father_for": [0,1], 379 | "P98_brought_into_life": [1,0], 380 | "P98i_was_born": [1,0], 381 | "P99_dissolved": [1,0], 382 | "P99i_was_dissolved_by": [1,0], 383 | "P9_consists_of": [1,1], 384 | "P9i_forms_part_of": [1,1], 385 | 386 | "P169i_spacetime_volume_is_defined_by": [0,0], 387 | "P170i_time_is_defined_by": [0,0], 388 | "P171_at_some_place_within": [0,0], 389 | "P172_contains": [0,0], 390 | "P173_starts_before_or_with_the_end_of": [1,1], 391 | "P173i_ends_after_or_with_the_start_of": [1,1], 392 | "P174_starts_before_the_end_of": [1,1], 393 | "P174i_ends_after_the_start_of": [1,1], 394 | "P175_starts_before_or_with_the_start_of": [1,1], 395 | "P175i_starts_with_or_after_the_start_of": [1,1], 396 | "P176_starts_before_the_start_of": [1,1], 397 | "P176i_starts_after_the_start_of": [1,1], 398 | "P186_produced_thing_of_product_type": [0,0], 399 | "P186i_is_produced_by": [0,0], 400 | "P187_has_production_plan": [0,0], 401 | "P187i_is_production_plan_for": [0,0], 402 | "P188_requires_production_tool": [0,0], 403 | "P188i_is_production_tool_for": [0,0], 404 | "P195_was_a_presence_of": [0,1], 405 | "P195i_had_presence": [0,1], 406 | "P196_defines": [0,1], 407 | "P196i_is_defined_by": [0,1], 408 | "P197_covered_parts_of": [0,1], 409 | "P197i_was_partially_covered_by": [0,1], 410 | "P198_holds_or_supports": [1,1], 411 | "P198i_is_held_or_supported_by": [1,1], 412 | "P199_represents_instance_of_type": [1,1], 413 | "P199i_has_instance_represented_by":[1,1], 414 | 415 | "la:paid_amount": [1,0], 416 | "la:paid_from": [1,1], 417 | "la:paid_to": [1,1], 418 | "la:establishes": [1,0], 419 | "la:established_by": [1,0], 420 | "la:invalidates": [1,1], 421 | "la:invalidated_by": [1,0], 422 | "la:relates_to": [1,0], 423 | "la:relates_from": [1,0], 424 | "la:related_to_by": [1,1], 425 | "la:related_from_by": [1,1], 426 | "la:initiated": [0,1], 427 | "la:initiated_by": [0,1], 428 | "la:terminated": [0,1], 429 | "la:terminated_by": [0,1], 430 | "la:has_phase": [0,1], 431 | "la:phase_of": [0,0], 432 | "la:related_entity": [0,0], 433 | "la:related_entity_of": [0,1], 434 | "la:relationship": [0,0], 435 | "la:has_member": [1,1], 436 | "la:member_of": [1,1], 437 | "la:added_to": [1,0], 438 | "la:added_to_by": [1,1], 439 | "la:added_member": [1,0], 440 | "la:added_member_by": [1,1], 441 | "la:removed_from": [1,0], 442 | "la:removed_from_by": [1,1], 443 | "la:removed_member": [1,0], 444 | "la:removed_member_by": [1,1], 445 | "la:digitally_carries": [1,1], 446 | "la:digitally_carried_by": [1,1], 447 | "la:digitally_shows": [1,1], 448 | "la:digitally_shown_by": [1,1], 449 | "la:digitally_available_via": [1,1], 450 | "la:digitally_makes_available": [1,1], 451 | "la:property_classified_as": [1,1], 452 | "la:represents_instance_of_type": [1,1], 453 | "la:instance_represented_by": [1,1], 454 | "la:current_permanent_custodian": [1,0], 455 | "la:current_permanent_custodian_of": [1,1], 456 | "la:equivalent": [1,1], 457 | "la:access_point": [1,1], 458 | 459 | "dc:format": [1,0], 460 | "dcterms:conformsTo": [1,1], 461 | "dcterms:subject": [1,1], 462 | "rdf:value": [1,0], 463 | "rdfs:seeAlso": [1,1], 464 | "rdfs:label": [1,0], 465 | "skos:exactMatch": [1,1], 466 | "skos:closeMatch": [1,1], 467 | "skos:narrower": [1,1], 468 | "skos:broader": [1,1], 469 | "skos:hasTopConcept": [1,1], 470 | "skos:topConceptOf": [1,1], 471 | "skos:inScheme": [1,1], 472 | "sci:O13_triggers": [1,1], 473 | "sci:O13i_is_triggered_by": [1,1], 474 | "sci:O19_encountered_object": [1,1], 475 | "sci:O19i_was_object_encountered_at": [1,1], 476 | "archaeo:AP25_occurs_during": [1,1] 477 | } -------------------------------------------------------------------------------- /utils/data/linkedart.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | Property Classified As 13 | Record dot one properties via Attribute Assignments 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | Current Permanent Custodian 23 | This property records the normal custodian of the object at the time when the assertion was made. The object may have a temporary custodian, for example when it is loaned to another organization for an exhibition or between departments for conservation or storage. 24 | 25 | 26 | 27 | 28 | 29 | 30 | Current Permanent Custodian Of 31 | Inverse of Current Permanent Custodian 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | has equivalent instance 41 | Similar to skos:exactMatch, the referenced entity is an equivalent instance to the referencing entity. This would not have the same inference issue as exactMatch, whereby the domain and range become skos:Concept, which is equivalent to E55_Type ... and thus everything becomes a Type. 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | Payment 51 | Payment of Money 52 | 53 | 54 | 55 | 56 | Paid Amount 57 | The amount paid. 58 | 59 | 60 | 61 | 62 | 63 | Paid From 64 | Who the payment came from 65 | 66 | 67 | 68 | 69 | 70 | Paid To 71 | Who the payment went to 72 | 73 | 74 | 75 | 76 | 77 | Right Acquisition 78 | The acquiring or establishment of a particular E30 Right over some entity 79 | 80 | 81 | 82 | 83 | establishes 84 | The right established by a RightAcquisition 85 | 86 | 87 | 88 | 89 | 90 | established by 91 | The RightAcquisition that established this Right 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | invalidates 100 | The right which is invalidated by a RightAcquisition 101 | 102 | 103 | 104 | 105 | 106 | invalidated by 107 | The RightAcquisition that invalidated this Right 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | Phase 120 | The period of time during which an entity is in a certain phase or state of its existence. The phase can be physical (the box is open, the painting is 14 ft wide) or social (the sculpture is owned by some Actor, the building is used as a castle). 121 | 122 | 123 | 124 | 125 | initiated 126 | Events can start or initiate Phases, such as the Modification of a painting to trim it from 16 feet to 14 ft would initiate a 14 feet phase, or the Acquisition of a sculpture by some Actor would initiate that Actor's ownership phase. This relationship links the initiating Event to the Phase. 127 | 128 | 129 | 130 | 131 | 132 | 133 | initiated by 134 | The inverse of la:initiated. 135 | 136 | 137 | 138 | 139 | 140 | 141 | terminated 142 | Events can end or terminate Phases, such as the Modification of a painting to trim it from 16 feet to 14 ft would end the 16 feet phase, or the Acquisition of a sculpture by some Actor would end the seller's ownership. This relationship links the terminating Event to the Phase. 143 | 144 | 145 | 146 | 147 | 148 | 149 | terminated by 150 | The inverse of la:terminated. 151 | 152 | 153 | 154 | 155 | 156 | 157 | has phase 158 | The relationship between an E1 Entity and one of its Phases. A painting that had two sizes, 16 feet and 14 feet wide, would have two Phases, one for each width. 159 | 160 | 161 | 162 | 163 | 164 | 165 | phase of 166 | The inverse of la:has_phase. 167 | 168 | 169 | 170 | 171 | 172 | 173 | related entity 174 | An E1 Entity that defines the nature of the Phase. For an ownership phase, this would be the owning E39 Actor. For the size of painting phase, this would be the E54 Dimension that describes the size. 175 | 176 | 177 | 178 | 179 | 180 | 181 | related entity of 182 | The inverse of la:related_entity. 183 | 184 | 185 | 186 | 187 | 188 | 189 | related by 190 | The relationship between the Entity that the phase is of, and the defining entity. For an ownership phase, this would be P52 has current owner. For the size of painting phase, this would be P43 has dimension. 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | Set 206 | 207 | 208 | 209 | 210 | 211 | 212 | has member 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | member of 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | Addition 229 | The addition of some entity to a Set 230 | 231 | 232 | 233 | 234 | added to 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | added to by 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | added 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | added by 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | Removal 267 | The removal of some entity from a Set 268 | 269 | 270 | 271 | 272 | removed from 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | removed from by 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | removed 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | removed by 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | digitally carries 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | digitally carried by 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | digitally shows 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | digitally shown by 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | access point 340 | From an ur- digital object to a single concrete representation. 341 | A locator as opposed to an identifier, similar to the approximated_by for Place. 342 | 343 | 344 | 345 | 346 | 347 | Digital Service 348 | 349 | 350 | 351 | 352 | 353 | digitally available via 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | digitally makes available 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | -------------------------------------------------------------------------------- /examples/knoedler-to-lod.py: -------------------------------------------------------------------------------- 1 | 2 | import json 3 | import codecs 4 | import re 5 | import inspect 6 | 7 | # for cidoc_orm, see: https://github.com/azaroth42/Python-CIDOC-ORM 8 | from cidoc_orm import factory, TimeSpan, ManMadeObject, Type, Identifier, \ 9 | Production, Actor, Person, Place, Group, Material, Mark, \ 10 | Activity, InformationObject, Purchase, Acquisition, MonetaryAmount, \ 11 | Currency, MeasurementUnit, Dimension, PhysicalObject 12 | 13 | # Meta meta 14 | ext_classes = { 15 | "LocalNumber": {"parent": Identifier, "vocab": "aat", "id": "300404621"}, 16 | "AccessionNumber": {"parent": Identifier, "vocab": "aat", "id": "300312355"}, 17 | "Inscription": {"parent": Mark, "vocab": "aat", "id": "300028702"}, 18 | "Signature": {"parent": Mark, "vocab": "aat", "id": "300028705"}, 19 | "Painting": {"parent": ManMadeObject, "vocab": "aat", "id": "300033618"}, 20 | "Sculpture": {"parent": ManMadeObject, "vocab": "aat", "id": "300047090"}, 21 | "Drawing": {"parent": ManMadeObject, "vocab": "aat", "id": "300033973"}, 22 | "Miniature": {"parent": ManMadeObject, "vocab": "aat", "id": "300033936"}, 23 | "Tapestry": {"parent": ManMadeObject, "vocab": "aat", "id": "300205002"}, 24 | "Furniture": {"parent": ManMadeObject, "vocab": "aat", "id": "300037680"}, 25 | "Mosaic": {"parent": ManMadeObject, "vocab": "aat", "id": "300015342"}, 26 | "Photograph": {"parent": ManMadeObject, "vocab": "aat", "id": "300046300"}, 27 | "Drawing": {"parent": ManMadeObject, "vocab": "aat", "id": "300033973"}, 28 | "Coin": {"parent": ManMadeObject, "vocab": "aat", "id": "300037222"}, 29 | "Vessel": {"parent": ManMadeObject, "vocab": "aat", "id": "300193015"}, 30 | "PhotographPrint": {"parent": ManMadeObject, "vocab": "aat", "id": "300127104"}, 31 | "PhotographAlbum": {"parent": ManMadeObject, "vocab": "aat", "id": "300026695"}, 32 | "PhotographBook": {"parent": ManMadeObject, "vocab": "aat", "id": "300265728"} 33 | } 34 | 35 | factory.base_url = "http://data.getty.edu/provenance/" 36 | factory.context_uri = "http://data.getty.edu/contexts/crm_context.jsonld" 37 | 38 | for (name,v) in ext_classes.items(): 39 | c = type(name, (v['parent'],), {}) 40 | c._p2_has_type = "http://vocab.getty.edu/%s/%s" % (v['vocab'], v['id']) 41 | globals()[name] = c 42 | 43 | # At the moment it's just an activity, no subtype info 44 | class TakeInventory(Activity): 45 | pass 46 | 47 | class Payment(Activity): 48 | _properties = { 49 | "paid_amount": {"rdf": "pi:paid_amount", "range": MonetaryAmount}, 50 | "paid_to": {"rdf": "pi:paid_to", "range": Actor}, 51 | "paid_from": {"rdf": "pi:paid_from", "range": Actor} 52 | } 53 | _uri_segment = "Payment" 54 | _type = "pi:Payment" 55 | 56 | Payment._classhier = inspect.getmro(Payment)[:-1] 57 | 58 | # Object Types 59 | # {'Pastel': 265, 'Photograph': 4, 'Clocks': 1, 'Painting [?]': 8, 'Painting': 37313, 60 | # '[not identified]': 2, 'Clothing': 1, 'Playing Cards': 1, 'Watercolor': 547, 61 | # 'Maps': 1, 'Book': 35, 'Decorative Art': 9, 'Painting; Sculpture': 1, 'Print': 21, 62 | # 'Watercolor; Painting': 1, 'Sculpture': 1817, 'Drawing': 225, 'Tapestry': 61, 'Furniture': 22} 63 | 64 | endOfMonths = {'01': 31, '02': 28, '03':31, '04':30, '05':31, '06':30,\ 65 | '07':31, '08':31, '09':30, '10':31, '11':30, '12':31} 66 | 67 | aat_type_mapping = { 68 | "Painting": Painting, 69 | "Drawing": Drawing, 70 | "Furniture": Furniture, 71 | "Sculpture": Sculpture, 72 | "Tapestry": Tapestry, 73 | "Watercolor": Painting, 74 | "Pastel": Painting 75 | } 76 | 77 | # # A wooden support 78 | aat_part_mapping = { 79 | "supports": "300014844" # The thing that is painted on 80 | } 81 | 82 | aat_material_mapping = { 83 | "panel": "300014657", # Is really a support 84 | "watercolor": "300015045", 85 | "oil": "300015050", 86 | "tempera": "300015062", 87 | "canvas": "300014078", 88 | "oak": "300012264", 89 | "gold leaf": "300264831", 90 | "paper": "300014109", 91 | "copper": "300011020", 92 | "terracotta": "300010669", 93 | "glass": "300010797", 94 | "chalk": "300011727", 95 | "bronze": "300010957", 96 | "marble": "300011443", 97 | "albumen silver print": "300127121", 98 | "gelatin silver print": "300128695", 99 | "silver": "300011029" 100 | } 101 | 102 | # pen, pencil, card, cardboard, porcelain, wax, ceramic, plaster 103 | # crayon, millboard, gouache, brass, stone, lead, iron, clay, 104 | # alabaster, limestone 105 | 106 | 107 | materialTypes = {} 108 | for (k,v) in aat_material_mapping.items(): 109 | m = Material("http://vocab.getty.edu/aat/%s" % v) 110 | m.label = k 111 | materialTypes[k] = m 112 | 113 | aat_culture_mapping = { 114 | "french": "300111188", 115 | "italian": "300111198", 116 | "german": "300111192", 117 | "dutch": "300020929" 118 | } 119 | 120 | dim_type_mapping = { 121 | "height": "300055644", 122 | "width": "300055647", 123 | "depth": "300072633", 124 | "diameter": "300055624", 125 | "weight": "300056240" 126 | } 127 | 128 | dim_unit_mapping = { 129 | "inches": "300379100", 130 | "feet": "300379101", 131 | "cm": "300379098" 132 | } 133 | 134 | inches = MeasurementUnit("http://vocab.getty.edu/aat/%s" % dim_unit_mapping['inches']); 135 | inches.label = "inches" 136 | 137 | aat_genre_mapping = { 138 | "Abstract" : "300134134", # maybe? 139 | "abstract" : "300134134", # maybe? 140 | "Genre": "300139140", # maybe? 141 | "History": "300386045", 142 | "Landscape": "300015636", 143 | "Portrait": "300015637", 144 | "Still Life": "300015638" 145 | } 146 | 147 | genreTypes = {} 148 | for (k,v) in aat_genre_mapping.items(): 149 | t = Type("http://vocab.getty.edu/aat/%s" % v) 150 | t.label = k 151 | genreTypes[k] = t 152 | 153 | aat_subject_mapping = { 154 | "Allegory": "", 155 | "Animals": "", 156 | "Architecture": "", 157 | "Battles": "", 158 | "Figure Studies": "", 159 | "Interiors": "", 160 | "Landscapes with figures": "", 161 | "Literature": "", 162 | "Marines": "", 163 | "Military": "", 164 | "Mythology (figures)": "", 165 | "Mythology (narrative)": "", 166 | "Religious (figures)": "", 167 | "Religious (narrative)": "", 168 | "Ruins": "", 169 | "Sporting": "", 170 | "Topographical Views": "" 171 | } 172 | 173 | # Monkey patch Type's _toJSON to only emit full data if not just URI+type 174 | def typeToJSON(self, top=False): 175 | props = self.__dict__.keys() 176 | if len(props) > 3: 177 | return super(Type, self)._toJSON() 178 | else: 179 | return self.id 180 | 181 | Type._toJSON = typeToJSON 182 | Person._properties['familyName'] = {"rdf": "schema:familyName", "range": str} 183 | Person._properties['givenName'] = {"rdf": "schema:givenName", "range": str} 184 | Person._properties['nationality'] = {"rdf": "schema:nationality", "range": Place} 185 | ManMadeObject._properties['culture'] = {"rdf": "schema:genre", "range": Type} 186 | ManMadeObject._properties['height'] = {"rdf": "schema:height", "range": Dimension} 187 | ManMadeObject._properties['width'] = {"rdf": "schema:width", "range": Dimension} 188 | 189 | knoedler = Group("knoedler") 190 | knoedler.label = "Knoedler" 191 | 192 | factory.materialize_inverses = True 193 | 194 | def process_money_value(value): 195 | value = value.replace('[', '') 196 | value = value.replace(']', '') 197 | value = value.replace('-', '') 198 | value = value.replace("?", '') 199 | value = value.replace('not written', '') 200 | value = value.replace('sold at auction', '') 201 | value = value.replace('x', '') 202 | value = value.replace('Lot Price', '') 203 | value = value.replace('See sales book', '') 204 | value = value.strip() 205 | return value 206 | 207 | def process_materials(what, materials): 208 | materials = materials.lower() 209 | materials = materials.replace("&", "and") 210 | materials = materials.replace("card [cardboard]", "cardboard") 211 | materials = materials.replace("c [canvas]", "canvas") 212 | materials = materials.replace("w/c [watercolor]", "watercolor") 213 | materials = materials.replace("[bronze]", "bronze") 214 | materials = materials.replace("c on p [canvas on panel]", "canvas on panel") 215 | materials = materials.replace("[from sales book 9, 1907-1912, f. 361]", "") 216 | materials = materials.replace("p [panel]", "panel") 217 | materials = materials.replace('terra cotta', 'terracotta') 218 | materials = materials.replace(',', '') 219 | materials = materials.replace('-', '') 220 | materials = materials.replace('procelain', 'porcelain') 221 | materials = materials.strip() 222 | matwds = materials.split(' ') 223 | mats = [] 224 | for mw in matwds: 225 | if mw in ['on', 'and']: 226 | continue 227 | try: 228 | mats.append(materialTypes[mw]) 229 | except: 230 | # print "Material not found: %s" % mw 231 | pass 232 | 233 | return mats 234 | 235 | divre = re.compile('^([0-9]+) ([0-9]+)/([0-9]+)( |$)') 236 | unitre = re.compile('^([0-9.]+) (high|height|h|long|length|l)( |$)') 237 | 238 | def process_dimensions(dims): 239 | dims = dims.lower() 240 | # assume default of inches 241 | dims = dims.replace('"', '') 242 | dims = dims.replace('in.', '') 243 | dims = dims.replace('inches', '') 244 | dims = dims.replace('//', '/') 245 | dims = dims.replace('[', '') 246 | dims = dims.replace(']', '') 247 | dims = dims.replace('X', 'x') 248 | dl = dims.split('x') 249 | 250 | dimensions = [] 251 | p = 0 252 | for d in dl: 253 | d = d.strip() 254 | # check for (nn n/n) 255 | which = "" 256 | m = divre.match(d) 257 | if m: 258 | (main, numr, denom, end) = m.groups() 259 | ttl = int(main) + (float(numr) / float(denom)) 260 | else: 261 | try: 262 | ttl = int(d) 263 | except: 264 | m = unitre.match(d) 265 | if m: 266 | (ttl, which, end) = m.groups() 267 | if which.startswith('h'): 268 | which = 'h' 269 | else: 270 | which = 'w' 271 | else: 272 | # print "----- %s" % d 273 | continue 274 | if not which: 275 | which = "w" if p else "h" 276 | p += 1 277 | dimensions.append([ttl, which]) 278 | return dimensions 279 | 280 | 281 | def print_rec_full(rec): 282 | its = rec.items() 283 | its.sort() 284 | for (k,v) in its: 285 | if v: 286 | print "%s: %s" % (k, v) 287 | 288 | stock_books = {} 289 | pages = {} 290 | 291 | fh = file('knoedler_cache.json') 292 | cache = json.load(fh) 293 | fh.close() 294 | 295 | recs = cache.values() 296 | recs = sorted(recs, key=lambda x: x['star_id'])[:5000] 297 | # recs = [cache['72910']] 298 | 299 | for rec in recs: 300 | 301 | bookId = rec['stock_book_id'] 302 | try: 303 | book = stock_books[bookId] 304 | except: 305 | # create the book 306 | book = InformationObject(bookId) 307 | book.label = "Knoedler Stock Book %s" % bookId 308 | stock_books[bookId] = book 309 | 310 | pageId = "%s/%s" % (bookId, rec['page_num']) 311 | try: 312 | page = pages[pageId] 313 | except: 314 | # create the page in the book 315 | page = InformationObject(pageId) 316 | page.label = "Page %s" % rec['page_num'] 317 | pages[pageId] = page 318 | book.has_fragment = page 319 | 320 | # create the entry 321 | entryId = "%s/%s" % (pageId, rec['row_num']) 322 | entry = InformationObject(entryId) 323 | entry.label = "Row %s" % rec['row_num'] 324 | page.has_fragment = entry 325 | 326 | # the description and notes fields are related to the entry 327 | # not the object 328 | if rec['notes']: 329 | entry.description = rec['notes'] 330 | if rec['description']: 331 | entry.description = rec['description'] 332 | 333 | oid = rec['pi_id'] 334 | 335 | # create the activity that the entry describes 336 | # 'Sold':26598,'Unsold':11824,'Exchanged':103,'Presented':246,'Transferred':310 337 | # 'Returned':533,'Unknown': 629, 338 | # 'Lost': 7, 'Voided': 16, 'Disjointed': 16, 'Cancelled': 47, 'Removed': 6 339 | 340 | # The outbound activity 341 | txn = None 342 | inv = None 343 | 344 | txnType = rec['transaction'] 345 | if txnType == "Sold": 346 | txn = Purchase(oid) 347 | elif txnType in ['Exchanged', 'Presented', 'Transferred']: 348 | if rec['price_amount']: 349 | txn = Purchase(oid) 350 | else: 351 | txn = Acquisition(oid) 352 | elif txnType in ["Unsold", "Cancelled"]: 353 | # Stock taking, or never left inventory due to no sale 354 | inv = TakeInventory(oid) 355 | elif txnType in ["Lost", "Removed"]: 356 | # Leaves inventory, but not via a transfer of ownership 357 | # E8 can represent end of ownership. There's just no new owner. 358 | txn = Acquisition(oid) 359 | elif txnType == "Voided": 360 | # Bad data; voided should be skipped (per Kelly) 361 | continue 362 | elif txnType == "Returned": 363 | # Can't tell what this actually means yet 364 | # Could be entering or leaving Knoedler stock! 365 | continue 366 | elif txnType == "Unknown": 367 | if rec['price_amount']: 368 | txn = Purchase(oid) 369 | else: 370 | inv = TakeInventory(oid) 371 | else: 372 | # I think this is only Disjointed 373 | # print_rec(rec) 374 | continue 375 | 376 | # The inbound activity that always happens 377 | # For consistency, always generate a Payment 378 | if rec['purchase_amount']: 379 | inTxn = Purchase("purch_%s" % oid) 380 | pay = Payment("purch_%s" % oid) 381 | inTxn.consists_of = pay 382 | 383 | amnt = MonetaryAmount("purch_price_%s" % oid) 384 | value = process_money_value(rec['purchase_amount']) 385 | if value: 386 | try: 387 | amnt.has_value = float(value) 388 | except: 389 | amnt.description = value 390 | if rec['purchase_currency']: 391 | curr = Currency(rec['purchase_currency']) 392 | curr.label = rec['purchase_currency'] 393 | amnt.has_currency = curr 394 | if rec['purchase_note']: 395 | amnt.description = rec['purchase_note'] 396 | pay.paid_amount = amnt 397 | pay.paid_from = knoedler 398 | inTxn.had_sales_price = amnt 399 | 400 | else: 401 | inTxn = Acquisition("purch_%s" % oid) 402 | 403 | inTxn.transferred_title_to = knoedler 404 | if rec['seller_name'] or rec['seller_name_auth']: 405 | # Look up in authority? 406 | seller = Actor("seller_%s" % oid) 407 | seller.label = rec['seller_name_auth'] if rec['seller_name_auth'] else rec['seller_name'] 408 | if rec['seller_loc'] or rec['seller_loc_auth']: 409 | sellerPlace = Place("seller_place_%s" % oid) 410 | sellerPlace.label = rec['seller_loc_auth'] if rec['seller_loc_auth'] else rec['seller_loc'] 411 | seller.has_current_or_former_residence = sellerPlace 412 | inTxn.transferred_title_from = seller 413 | if rec['purchase_amount']: 414 | pay.paid_to = seller 415 | 416 | # CurationPeriod 417 | curated = Activity("curated_%s" % oid) 418 | curated.is_started_by = inTxn 419 | 420 | if txn: 421 | # from 422 | txn.transferred_title_from = knoedler 423 | # to 424 | if rec['buyer_name'] or rec['buyer_name_auth']: 425 | # Look up in authority? 426 | buyer = Actor("buyer_%s" % oid) 427 | buyer.label = rec['buyer_name_auth'] if rec['buyer_name_auth'] else rec['buyer_name'] 428 | if rec['buyer_loc'] or rec['buyer_loc_auth']: 429 | buyerPlace = Place("buyer_place_%s" % oid) 430 | buyerPlace.label = rec['buyer_loc_auth'] if rec['buyer_loc_auth'] else rec['buyer_loc'] 431 | buyer.has_current_or_former_residence = buyerPlace 432 | txn.transferred_title_to = buyer 433 | 434 | # when 435 | if rec['sale_date_year']: 436 | # if year, then all. blank is "00" 437 | yr = rec['sale_date_year'] 438 | mt = rec['sale_date_month'] 439 | dy = rec['sale_date_day'] 440 | if dy != "00": 441 | start = "%s-%s-%s" % (yr,mt,dy) 442 | end = start 443 | elif mt != "00": 444 | start = "%s-%s-01" % (yr,mt) 445 | end = "%s-%s-%s" % (yr,mt,endOfMonths[mt]) 446 | else: 447 | start = "%s-01-01" % yr 448 | end = "%s-12-31" % yr 449 | span = TimeSpan("sale_span_%s" % oid) 450 | span.begin_of_the_begin = start 451 | span.end_of_the_end = end 452 | txn.has_timespan = span 453 | 454 | value = process_money_value(rec['price_amount']) 455 | if value: 456 | amnt = MonetaryAmount("sale_price_%s" % oid) 457 | try: 458 | amnt.has_value = float(value) 459 | except: 460 | amnt.description = value 461 | if rec['price_currency']: 462 | curr = Currency(rec['price_currency']) 463 | curr.label = rec['price_currency'] 464 | amnt.has_currency = curr 465 | if rec['price_note']: 466 | amnt.description = rec['price_note'] 467 | txn.had_sales_price = amnt 468 | 469 | # Check knoedler_share 470 | if rec['knoedler_share_amount']: 471 | 472 | value = process_money_value(rec['knoedler_share_amount']) 473 | if value: 474 | amnt = MonetaryAmount("shared_price_%s" % oid) 475 | try: 476 | amnt.has_value = float(value) 477 | except: 478 | amnt.description = value 479 | if rec['knoedler_share_currency']: 480 | curr = Currency(rec['knoedler_share_currency']) 481 | curr.label = rec['knoedler_share_currency'] 482 | amnt.has_currency = curr 483 | if rec['knoedler_share_note']: 484 | amnt.description = rec['knoedler_share_note'] 485 | 486 | pay = Payment("kshare_%s" % oid) 487 | txn.consists_of = pay 488 | pay.paid_amount = amnt 489 | pay.paid_to = knoedler 490 | if rec['buyer_name'] or rec['buyer_name_auth']: 491 | pay.paid_from = buyer 492 | 493 | else: 494 | pay = Payment("sale_%s" % oid) 495 | txn.consists_of = pay 496 | pay.paid_amount = amnt 497 | pay.paid_to = knoedler 498 | if rec['buyer_name'] or rec['buyer_name_auth']: 499 | pay.paid_from = buyer 500 | 501 | curated.is_finished_by = txn 502 | elif inv: 503 | # Taking of Inventory as part of the curation period 504 | curated.consists_of = inv 505 | 506 | # If taking inventory, then the entry date is for that 507 | if rec['entry_date_year']: 508 | # if year, then all. blank is "00" 509 | yr = rec['entry_date_year'] 510 | mt = rec['entry_date_month'] 511 | dy = rec['entry_date_day'] 512 | if dy != "00": 513 | start = "%s-%s-%s" % (yr,mt,dy) 514 | end = start 515 | elif mt != "00": 516 | start = "%s-%s-01" % (yr,mt) 517 | end = "%s-%s-%s" % (yr,mt,endOfMonths[mt]) 518 | else: 519 | start = "%s-01-01" % yr 520 | end = "%s-12-31" % yr 521 | span = TimeSpan("sale_span_%s" % oid) 522 | span.begin_of_the_begin = start 523 | span.end_of_the_end = end 524 | inv.has_timespan = span 525 | 526 | 527 | if not inv: 528 | # entry date is for purchase 529 | if rec['entry_date_year']: 530 | # if year, then all. blank is "00" 531 | yr = rec['entry_date_year'] 532 | mt = rec['entry_date_month'] 533 | dy = rec['entry_date_day'] 534 | if dy != "00": 535 | start = "%s-%s-%s" % (yr,mt,dy) 536 | end = start 537 | elif mt != "00": 538 | start = "%s-%s-01" % (yr,mt) 539 | end = "%s-%s-%s" % (yr,mt,endOfMonths[mt]) 540 | else: 541 | start = "%s-01-01" % yr 542 | end = "%s-12-31" % yr 543 | span = TimeSpan("purch_span_%s" % oid) 544 | span.begin_of_the_begin = start 545 | span.end_of_the_end = end 546 | inTxn.has_timespan = span 547 | 548 | # create the object of the transaction 549 | 550 | objectType = rec['object_type'] 551 | try: 552 | what = aat_type_mapping[objectType](oid) 553 | except: 554 | what = ManMadeObject(oid) 555 | 556 | curated.used_specific_object = what 557 | inTxn.transferred_title_of = what 558 | entry.refers_to = inTxn 559 | if txn: 560 | txn.transferred_title_of = what 561 | entry.refers_to = txn 562 | elif txnType == "Voided": 563 | entry.refers_to = what 564 | 565 | what.label = rec['title'] 566 | 567 | idnt = AccessionNumber("knoedler_%s" % oid) 568 | idnt.value = rec['knoedler_id'] 569 | # No way to say it's Knoedler's number? 570 | # Could have a Creation of the Identifier performed by Knoedler :( 571 | 572 | if rec['artist_name'] or rec['artist_name_auth']: 573 | artist = Person("artist_%s" % oid) 574 | artist.label = rec['artist_name_auth'] if rec['artist_name_auth'] else rec['artist_name'] 575 | if rec['nationality']: 576 | artist.nationality = Place("artist_natl_%s" % oid) 577 | artist.nationality.label = rec['nationality'] 578 | 579 | prodn = Production("production_%s" % oid) 580 | prodn.carried_out_by = artist 581 | what.was_produced_by = prodn 582 | 583 | if rec['artist_name_2'] or rec['artist_name_auth_2']: 584 | artist = Person("artist2_%s" % oid) 585 | artist.label = rec['artist_name_auth_2'] if rec['artist_name_auth_2'] else rec['artist_name_2'] 586 | if rec['nationality_2']: 587 | artist.nationality = Place('artist_2_natl_%s' % oid) 588 | artist.nationality.label = rec['nationality_2'] 589 | prodn.carried_out_by = artist 590 | 591 | 592 | # genre 593 | if rec['genre'] and not rec['genre'] == '[not identified]': 594 | if not aat_genre_mapping.has_key(rec['genre']): 595 | print "Not found: %s" % (rec['genre']) 596 | else: 597 | what.has_type = genreTypes[rec['genre']] 598 | 599 | # subject 600 | if rec['subject']: 601 | s = rec['subject'] 602 | if s.find(';'): 603 | ss = [x.strip() for x in s.split(';')] 604 | else: 605 | ss = [s] 606 | for s in ss: 607 | # s = s.replace('Int\xe9rieurs', 'Interiors') 608 | sid = s.replace(' ', '') 609 | sid = sid.replace('(', '') 610 | sid = sid.replace(')', '') 611 | t = Type(sid) 612 | t.label = s 613 | what.depicts = t 614 | 615 | # materials 616 | if rec['materials']: 617 | # XXX Finish this 618 | process_materials(what, rec['materials']) 619 | # what.made_of = material 620 | 621 | if rec['dimensions']: 622 | # XXX Finish this too 623 | dims = process_dimensions(rec['dimensions']) 624 | for d in dims: 625 | dim = Dimension("%s_%s" % (d[0], oid)) 626 | dim.has_value = d[0] 627 | dim.has_unit = inches 628 | if d[1] == 'h': 629 | what.height = dim 630 | else: 631 | what.width = dim 632 | 633 | collection = InformationObject("collection") 634 | for s in stock_books.values(): 635 | collection.has_fragment = s 636 | 637 | 638 | factory.full_names = True 639 | outstr = factory.toString(collection, compact=False) 640 | 641 | fh = file('knoedler.jsonld', 'w') 642 | fh.write(outstr) 643 | fh.close() 644 | 645 | # Note that these entries are really one transaction 646 | # 64699 ... 64732 647 | 648 | -------------------------------------------------------------------------------- /tests/test_model.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import sys 3 | import os 4 | import shutil 5 | import json 6 | import pickle 7 | from collections import OrderedDict 8 | from cromulent import model, vocab 9 | from cromulent.model import override_okay 10 | 11 | 12 | class TestFactorySetup(unittest.TestCase): 13 | 14 | def setUp(self): 15 | model.factory.base_url = 'http://data.getty.edu/provenance/' 16 | model.factory.base_dir = 'tests/provenance_base_dir' 17 | model.factory.default_lang = 'en' 18 | #model.factory.context_uri = 'http://www.cidoc-crm.org/cidoc-crm/' 19 | 20 | def tearDown(self): 21 | model.factory.base_url = 'http://lod.example.org/museum/' 22 | model.factory.log_stream = sys.stderr 23 | model.factory.debug_level = 'warn' 24 | 25 | def test_base_url(self): 26 | self.assertEqual(model.factory.base_url, 'http://data.getty.edu/provenance/') 27 | 28 | def test_base_dir(self): 29 | self.assertEqual(model.factory.base_dir, 'tests/provenance_base_dir') 30 | 31 | def test_default_lang(self): 32 | self.assertEqual(model.factory.default_lang, 'en') 33 | 34 | def test_set_debug_stream(self): 35 | strm = open('err_output', 'w') 36 | model.factory.set_debug_stream(strm) 37 | self.assertEqual(model.factory.log_stream, strm) 38 | 39 | def test_set_debug(self): 40 | model.factory.set_debug('error_on_warning') 41 | self.assertEqual(model.factory.debug_level, 'error_on_warning') 42 | self.assertRaises(model.ConfigurationError, model.factory.set_debug, 'xxx') 43 | self.assertRaises(model.MetadataError, model.factory.maybe_warn, "test") 44 | 45 | def test_load_context(self): 46 | self.assertRaises(model.ConfigurationError, model.factory.load_context, 47 | "foo", {"foo":"does_not_exist.txt"}) 48 | model.factory.load_context("foo", {"foo":"tests/test_context.json"}) 49 | self.assertEqual(model.factory.context_json, {"@context":{"id":"@id"}}) 50 | self.assertRaises(model.ConfigurationError, model.factory.load_context, "", {}) 51 | 52 | def test_pickle(self): 53 | model.factory.log_stream = sys.stderr 54 | srlz = pickle.dumps(model.factory) 55 | newfac = pickle.loads(srlz) 56 | self.assertTrue(model.factory.log_stream is newfac.log_stream) 57 | 58 | 59 | 60 | class TestFactorySerialization(unittest.TestCase): 61 | 62 | def setUp(self): 63 | self.collection = model.InformationObject('collection') 64 | self.collection._label = "Test Object" 65 | 66 | def test_broken_unicode(self): 67 | model.factory.debug_level = "error_on_warning" 68 | try: 69 | badval = b"\xFF\xFE\x02" 70 | except: 71 | badval = "\xFF\xFE\x02" 72 | badjs = {"_label": badval} 73 | self.assertRaises(model.MetadataError, model.factory._buildString, 74 | js=badjs) 75 | 76 | def test_toJSON(self): 77 | # model.factory.context_uri = 'http://lod.getty.edu/context.json' 78 | expect = OrderedDict([ 79 | ('@context', model.factory.context_uri), 80 | ('id', u'http://lod.example.org/museum/InformationObject/collection'), 81 | ('type', 'InformationObject'), ('_label', 'Test Object')]) 82 | outj = model.factory.toJSON(self.collection) 83 | self.assertEqual(expect, outj) 84 | 85 | def test_toJSON_fast(self): 86 | model.factory.json_serializer = "fast" 87 | expect = {'@context': model.factory.context_uri, 88 | 'id': 'http://lod.example.org/museum/InformationObject/collection', 89 | 'type': 'InformationObject', 90 | '_label': 'Test Object'} 91 | outj = model.factory.toJSON(self.collection) 92 | self.assertEqual(expect, outj) 93 | model.factory.json_serializer = "normal" 94 | 95 | def test_toJSON_normal(self): 96 | expect = OrderedDict([(u'@context', model.factory.context_uri), 97 | (u'@id', u'http://lod.example.org/museum/Person/1'), (u'@type', u'crm:E21_Person'), 98 | ('rdfs:label', 'Test Person')]) 99 | model.factory.full_names = True 100 | p = model.Person("1") 101 | p._label = "Test Person" 102 | outj = model.factory.toJSON(p) 103 | self.assertEqual(expect, outj) 104 | # reset 105 | model.factory.full_names = False 106 | 107 | def test_toString(self): 108 | expect = u'{"@context":"'+model.factory.context_uri+'","id":"http://lod.example.org/museum/InformationObject/collection","type":"InformationObject","_label":"Test Object"}' 109 | outs = model.factory.toString(self.collection) 110 | self.assertEqual(expect, outs) 111 | 112 | def test_toString_fast(self): 113 | # Should only be trusted in python 3 114 | if sys.version_info.major >= 3 and sys.version_info.minor >= 6: 115 | expect = u'{"@context":"'+model.factory.context_uri+'","id":"http://lod.example.org/museum/InformationObject/collection","type":"InformationObject","_label":"Test Object"}' 116 | model.factory.json_serializer = "fast" 117 | outs = model.factory.toString(self.collection) 118 | model.factory.json_serializer = "normal" 119 | self.assertEqual(expect, outs) 120 | else: 121 | print("Skipping toString_fast test in Python 2.x") 122 | 123 | def test_toFile(self): 124 | self.assertRaises(model.ConfigurationError, model.factory.toFile, self.collection) 125 | # Test auto filename determination 126 | model.factory.base_dir = 'tests' 127 | model.factory.toFile(self.collection) 128 | self.assertTrue(os.path.isfile('tests/InformationObject/collection.json')) 129 | # Test explicit filename setting 130 | model.factory.toFile(self.collection, filename='tests/fishbat.bar') 131 | self.assertTrue(os.path.isfile('tests/fishbat.bar')) 132 | # Tidy up 133 | shutil.rmtree('tests/InformationObject') 134 | 135 | def test_breadth(self): 136 | x = model.TransferOfCustody() 137 | e = model.Activity() 138 | fr = model.Group() 139 | to = model.Group() 140 | w = model.HumanMadeObject() 141 | fr._label = "From" 142 | to._label = "To" 143 | x.transferred_custody_of = w 144 | x.transferred_custody_from = fr 145 | x.transferred_custody_to = to 146 | e.used_specific_object = w 147 | e.carried_out_by = to 148 | w.current_owner = fr 149 | x.specific_purpose = e 150 | js = model.factory.toJSON(x) 151 | # Okay ... if we're breadth first, then custody_from is a resource 152 | # And now it's the first in the list 153 | self.assertTrue(isinstance(js['transferred_custody_from'][0], OrderedDict)) 154 | 155 | def test_string_list(self): 156 | x = model.Activity() 157 | x._label = ["Label 1", "Label 2"] 158 | js = model.factory.toJSON(x) 159 | self.assertTrue(js['_label'] == x._label) 160 | 161 | def test_external(self): 162 | x = model.ExternalResource(ident="1") 163 | model.factory.elasticsearch_compatible = 1 164 | js = x._toJSON(done=None) 165 | self.assertTrue(type(js) == dict) 166 | model.factory.elasticsearch_compatible = 0 167 | js = x._toJSON(done=None) 168 | # testing unicode in 2, str in 3 :( 169 | self.assertTrue(type(js) != dict) 170 | 171 | def test_recursion(self): 172 | x = model.Activity() 173 | x.part = x 174 | js = model.factory.toJSON(x) 175 | # If our recursion checks have regressed, this will barf right here 176 | self.assertTrue(1) 177 | 178 | def test_pipe_scoped(self): 179 | x = model.Activity() 180 | y = model.Activity() 181 | x.part = y 182 | model.factory.pipe_scoped_contexts = True 183 | js = model.factory.toJSON(x) 184 | self.assertTrue('part|crm:P9_consists_of' in js) 185 | model.factory.pipe_scoped_contexts = False 186 | js = model.factory.toJSON(x) 187 | self.assertTrue('part|crm:P9_consists_of' not in js) 188 | self.assertTrue('part' in js) 189 | 190 | def test_collapse_json(self): 191 | model.factory.auto_id_type = "uuid" 192 | model.factory.base_url = "http://lod.example.org/museum/" 193 | model.factory.context_uri = "https://linked.art/ns/v1/linked-art.json" 194 | p = model.Person() 195 | p.classified_as = model.Type(ident="http://example.org/Type", label="Test") 196 | res1 = model.factory.toString(p, compact=False, collapse=60) # all new lines 197 | res2 = model.factory.toString(p, compact=False, collapse=120) # compact list of type 198 | self.assertEqual(len(res1.splitlines()), 12) 199 | self.assertEqual(len(res2.splitlines()), 6) 200 | 201 | def test_production_mode(self): 202 | 203 | # model.factory.production_mode() 204 | # Can't unset the cached hierarchy 205 | # and it causes the test for the hierarchy to fail 206 | model.factory.validate_profile = False 207 | model.factory.validate_properties = False 208 | model.factory.validate_range = False 209 | model.factory.validate_multiplicity = False 210 | 211 | p = model.Person() 212 | p.identified_by = model.Name(value="abc") 213 | p.part = model.HumanMadeObject() 214 | js = model.factory.toJSON(p) 215 | 216 | model.factory.production_mode(state=False) 217 | 218 | 219 | def test_ordering(self): 220 | p = model.Person(label="Person") 221 | p.classified_as = model.Type(ident="type-uri") 222 | p.referred_to_by = model.LinguisticObject(content="text") 223 | p.dimension = model.Dimension(value=1) 224 | 225 | outstr = model.factory.toString(p) 226 | lbl = outstr.index("_label") 227 | clsf = outstr.index("classified_as") 228 | r2b = outstr.index("referred_to_by") 229 | dim = outstr.index("dimension") 230 | self.assertTrue(lbl < clsf) 231 | self.assertTrue(clsf < r2b) 232 | self.assertTrue(r2b < dim) 233 | 234 | 235 | class TestProcessTSV(unittest.TestCase): 236 | 237 | def test_process_tsv(self): 238 | expect = {u'subs': [u'E84_Information_Carrier'], u'label': u'Human-Made Object', u'className': u'HumanMadeObject', 239 | u'subOf': u'E19_Physical_Object|E24_Physical_Human-Made_Thing', u'props': [], u'class': None, u'okay': u'1'} 240 | fn = 'cromulent/data/crm_vocab.tsv' 241 | vocabData = model.process_tsv(fn) 242 | man_made = vocabData['E22_Human-Made_Object'] 243 | del man_made['desc'] # too long and volatile 244 | # check subs specifically - could be coming from an extension 245 | if man_made['subs'] != expect['subs']: 246 | del man_made['subs'] 247 | del expect['subs'] 248 | self.assertEqual(expect, man_made) 249 | 250 | 251 | class TestBuildClasses(unittest.TestCase): 252 | 253 | def test_build_classes(self): 254 | tsv = "\nClassName_full\tclass\tClassName_py\tClass Label\tClass Description\t\t1\t\n" 255 | fh = open('tests/temp.tsv', 'w') 256 | fh.write(tsv) 257 | fh.close() 258 | model.build_classes("tests/temp.tsv", "ClassName_full") 259 | from cromulent.model import ClassName_py 260 | self.assertEqual('Class Description', ClassName_py.__doc__) 261 | os.remove('tests/temp.tsv') 262 | 263 | class TestBuildClass(unittest.TestCase): 264 | 265 | def test_build_class(self): 266 | tsv = "\nClassName_full\tclass\tClassName_py2\tClass Label\tClass Description\t\t1\t\n" 267 | fh = open('tests/temp.tsv', 'w') 268 | fh.write(tsv) 269 | fh.close() 270 | vocabData = model.process_tsv('tests/temp.tsv') 271 | model.build_class('ClassName_full', model.BaseResource, vocabData) 272 | from cromulent.model import ClassName_py2 273 | self.assertEqual('Class Description', ClassName_py2.__doc__) 274 | os.remove('tests/temp.tsv') 275 | 276 | class TestAutoIdentifiers(unittest.TestCase): 277 | 278 | def test_bad_autoid(self): 279 | model.factory.auto_assign_id = True 280 | model.factory.auto_id_type = "broken" 281 | self.assertRaises(model.ConfigurationError, model.factory.generate_id, 282 | "irrelevant") 283 | 284 | def test_int(self): 285 | model.factory.auto_assign_id = True 286 | model.factory.auto_id_type = "int" 287 | p = model.Person() 288 | p2 = model.Activity() 289 | self.assertEqual(int(p.id[-1]), int(p2.id[-1])-1) 290 | 291 | def test_int_per_type(self): 292 | model.factory.auto_assign_id = True 293 | model.factory.auto_id_type = "int-per-type" 294 | p = model.Person() 295 | p2 = model.Person() 296 | self.assertEqual(int(p.id[-1]), int(p2.id[-1])-1) 297 | p3 = model.Activity() 298 | self.assertEqual(int(p.id[-1]), int(p3.id[-1])) 299 | 300 | def test_int_per_segment(self): 301 | model.factory.auto_assign_id = True 302 | model.factory._auto_id_segments = {} 303 | model.factory.auto_id_type = "int-per-segment" 304 | model.Activity._uri_segment = model.Person._uri_segment 305 | p = model.Person() 306 | p2 = model.Activity() 307 | self.assertEqual(int(p.id[-1]), int(p2.id[-1])-1) 308 | p3 = model.TimeSpan() 309 | self.assertEqual(int(p.id[-1]), int(p3.id[-1])) 310 | 311 | def test_uuid(self): 312 | model.factory.auto_assign_id = True 313 | model.factory.auto_id_type = "uuid" 314 | p = model.Person() 315 | self.assertTrue(p.id.startswith('urn:uuid:')) 316 | 317 | def test_prefixes(self): 318 | 319 | model.factory.prefixes = {'fish':'http://example.org/ns/'} 320 | p3 = model.Person('fish:3') 321 | self.assertEqual(p3.id, 'fish:3') 322 | self.assertEqual(p3._full_id, 'http://example.org/ns/3') 323 | 324 | model.factory.prefixes = {} 325 | p4 = model.Person('fish:4') 326 | self.assertTrue(p4.id.startswith(model.factory.base_url)) 327 | 328 | def test_other_uris(self): 329 | p1 = model.Person(ident="tag:some-info-about-person") 330 | self.assertEqual(p1.id, "tag:some-info-about-person") 331 | p2 = model.Person(ident="info:ulan/500012345") 332 | self.assertEqual(p2.id, "info:ulan/500012345") 333 | p3 = model.Person(ident="some:random:thing:with:colons") 334 | self.assertFalse(p3.id == "some:random:thing:with:colons") 335 | 336 | def test_no_ident(self): 337 | 338 | model.factory.auto_assign_id = True 339 | p1 = model.Person() # auto assigned 340 | p2 = model.Person(ident=None) # auto assigned 341 | p3 = model.Person(ident="") # bnode explicitly 342 | 343 | self.assertTrue(p1.id.startswith('http')) 344 | self.assertTrue(p2.id.startswith('http')) 345 | self.assertEqual(p3.id, '') 346 | 347 | model.factory.auto_assign_id = False 348 | p4 = model.Person() # bnode is default 349 | p5 = model.Person(ident=None) # bnode is default 350 | p6 = model.Person(ident="") # bnode explicitly 351 | 352 | self.assertEqual(p4.id, '') 353 | self.assertEqual(p5.id, '') 354 | self.assertEqual(p6.id, '') 355 | 356 | 357 | class TestBaseResource(unittest.TestCase): 358 | 359 | def setUp(self): 360 | override_okay(model.Person, 'parent_of') 361 | self.artist = model.Person('00001', 'Jane Doe') 362 | self.son = model.Person('00002', 'John Doe') 363 | 364 | def test_init(self): 365 | self.assertEqual(self.artist.id, 'http://lod.example.org/museum/Person/00001') 366 | self.assertEqual(self.artist._type, 'crm:E21_Person') 367 | self.assertEqual(self.artist.type, 'Person') 368 | self.assertEqual(self.artist._label, 'Jane Doe') 369 | self.assertFalse(hasattr(self.artist, 'value')) 370 | self.assertFalse(hasattr(self.artist, 'has_type')) 371 | 372 | def test_check_prop(self): 373 | desc = self.artist._check_prop('_label', 'Jane Doe\'s Bio') 374 | self.assertEqual(desc, 1) 375 | parent = self.artist._check_prop('parent_of', self.son) 376 | self.assertEqual(parent, 2) 377 | 378 | def test_list_all_props(self): 379 | props = self.artist.list_all_props() 380 | props.sort() 381 | self.assertEqual(props[-1], 'witnessed') 382 | self.assertTrue('_label' in props) 383 | self.assertTrue('identified_by' in props) 384 | 385 | def test_list_my_props(self): 386 | p1 = model.Person() 387 | p1.classified_as = model.Type() 388 | props = p1.list_my_props() 389 | self.assertEqual(set(props), set(['classified_as', 'id'])) 390 | props = p1.list_my_props(filter=model.Type) 391 | self.assertEqual(props, ['classified_as']) 392 | 393 | def test_allows_multiple(self): 394 | p = model.Person() 395 | self.assertTrue(p.allows_multiple('classified_as')) 396 | self.assertFalse(p.allows_multiple('born')) 397 | self.assertRaises(model.DataError, p.allows_multiple, 'fish') 398 | 399 | def test_check_reference(self): 400 | self.assertTrue(self.artist._check_reference('http')) 401 | self.assertFalse(self.artist._check_reference('xxx')) 402 | self.assertTrue(self.artist._check_reference({'id': 'xxx'})) 403 | self.assertFalse(self.artist._check_reference({'xxx': 'yyy'})) 404 | self.assertTrue(self.artist._check_reference(self.son)) 405 | self.assertTrue(self.artist._check_reference(['http'])) 406 | self.assertFalse(self.artist._check_reference(['xxx', 'yyy'])) 407 | self.assertTrue(self.artist._check_reference(model.Person)) 408 | 409 | def test_multiplicity(self): 410 | model.factory.process_multiplicity = True 411 | who = model.Actor() 412 | mmo = model.HumanMadeObject() 413 | prod = model.Production() 414 | mmo.produced_by = prod 415 | who.current_owner_of = mmo 416 | mmo.current_owner = who 417 | self.assertEqual(mmo.current_owner, [who]) 418 | self.assertEqual(who.current_owner_of, [mmo]) 419 | self.assertEqual(mmo.produced_by, prod) 420 | 421 | def test_init_params(self): 422 | p1 = model.Person(ident="urn:uuid:1234") 423 | self.assertEqual(p1.id, "urn:uuid:1234") 424 | p2 = model.Person(ident="http://schema.org/Foo") 425 | self.assertEqual(p2.id, "schema:Foo") 426 | p3 = model.Name(content="Test") 427 | self.assertEqual(p3.content, "Test") 428 | c = model.MonetaryAmount(value=10) 429 | self.assertEqual(c.value, 10) 430 | n = model.Name(value="Rob") 431 | self.assertEqual(n.content, "Rob") 432 | i = model.Identifier(content="xyz123") 433 | self.assertEqual(i.content, "xyz123") 434 | i2 = model.Identifier(value="abc") 435 | self.assertEqual(i2.content, "abc") 436 | 437 | def test_dir(self): 438 | props = dir(self.artist) 439 | self.assertTrue('identified_by' in props) 440 | 441 | 442 | class TestPropertyCache(unittest.TestCase): 443 | 444 | def test_cache_hierarchy(self): 445 | o = model.HumanMadeObject() 446 | self.assertEqual(o._all_properties, {}) 447 | model.factory.cache_hierarchy() 448 | self.assertTrue(len(o._all_properties) > 50) 449 | 450 | 451 | class TestMagicMethods(unittest.TestCase): 452 | 453 | def setUp(self): 454 | override_okay(model.Person, 'parent_of') 455 | # model.Person._properties['parent_of']['multiple'] = 1 456 | 457 | def test_set_magic_resource(self): 458 | artist = model.Person('00001', 'Jane Doe') 459 | son = model.Person('00002', 'John Doe') 460 | daughter = model.Person('00002', 'Jenny Doe') 461 | son2 = model.Person('00002', 'Jim Doe') 462 | artist._set_magic_resource('parent_of', son) 463 | self.assertEqual(artist.parent_of, [son]) 464 | artist._set_magic_resource('parent_of', daughter) 465 | try: 466 | self.assertIn(son, artist.parent_of) 467 | self.assertIn(daughter, artist.parent_of) 468 | except: 469 | # 2.6 doesn't have assertIn 470 | self.assertTrue(son in artist.parent_of) 471 | self.assertTrue(daughter in artist.parent_of) 472 | 473 | artist._set_magic_resource('parent_of', son2) 474 | try: 475 | self.assertIn(son, artist.parent_of) 476 | self.assertIn(daughter, artist.parent_of) 477 | self.assertIn(son2, artist.parent_of) 478 | except: 479 | self.assertTrue(son in artist.parent_of) 480 | self.assertTrue(daughter in artist.parent_of) 481 | self.assertTrue(son2 in artist.parent_of) 482 | 483 | def test_set_magic_resource_inverse(self): 484 | model.factory.materialize_inverses = True 485 | artist = model.Person('00001', 'Jane Doe') 486 | son = model.Person('00002', 'John Doe') 487 | artist._set_magic_resource('parent_of', son) 488 | self.assertEqual(son.parent, [artist]) 489 | model.factory.materialize_inverses = False 490 | 491 | def test_validate_profile_off(self): 492 | model.factory.validate_profile = False 493 | ia = model.IdentifierAssignment() 494 | # If it's not turned off this should raise 495 | model.factory.validate_profile = True 496 | self.assertRaises(model.ProfileError, model.IdentifierAssignment) 497 | p1 = model.Person() 498 | self.assertRaises(model.ProfileError, p1.__setattr__, 'documented_in', "foo") 499 | 500 | def test_validation_unknown(self): 501 | model.factory.validate_properties = True 502 | artist = model.Person('00001', 'Jane Doe') 503 | self.assertRaises(model.DataError, artist.__setattr__, 'unknown_property', 1) 504 | 505 | def test_validation_wrong_type(self): 506 | model.factory.validate_properties = True 507 | artist = model.Person('00001', 'Jane Doe') 508 | self.assertRaises(model.DataError, artist.__setattr__, 'parent_of', 'Bad Value') 509 | 510 | def test_validation_off(self): 511 | model.factory.validate_properties = False 512 | artist = model.Person('00001', 'Jane Doe') 513 | artist.unknown_property = 1 514 | self.assertEqual(artist.unknown_property, 1) 515 | model.factory.validate_properties = True 516 | 517 | def test_validate_multiplicity(self): 518 | model.factory.validate_multiplicity = True 519 | who = model.Person() 520 | b1 = model.Birth() 521 | who.born = b1 522 | b2 = model.Birth() 523 | self.assertRaises(model.ProfileError, who.__setattr__, 'born', b2) 524 | model.factory.validate_multiplicity = False 525 | who.born = b2 526 | self.assertEqual(who.born, [b1, b2]) 527 | 528 | def test_not_multiple_instance(self): 529 | who = model.Person() 530 | n = model.Name(content="Test") 531 | who.identified_by = n 532 | 533 | model.factory.multiple_instances_per_property = "error" 534 | self.assertRaises(model.DataError, who.__setattr__, 'identified_by', n) 535 | self.assertEqual(who.identified_by, [n]) 536 | 537 | model.factory.multiple_instances_per_property = "drop" 538 | who.identified_by = n 539 | self.assertEqual(who.identified_by, [n,n]) 540 | # and check that only serialized once 541 | js = model.factory.toJSON(who) 542 | self.assertEqual(len(js['identified_by']), 1) 543 | 544 | model.factory.multiple_instances_per_property = "allow" 545 | js = model.factory.toJSON(who) 546 | self.assertEqual(len(js['identified_by']), 2) 547 | 548 | 549 | class TestObjectEquality(unittest.TestCase): 550 | def setUp(self): 551 | self.artist = model.Person('00001', 'Jane Doe') 552 | self.son = model.Person('00002', 'John Doe') 553 | self.daughter = model.Person('00002', 'Jenny Doe') 554 | self.son2 = model.Person('00002', 'Jim Doe') 555 | 556 | def test_eq_ident(self): 557 | self.assertEqual(self.artist, self.artist) 558 | self.assertEqual(self.son, model.Person('00002', 'John Doe')) 559 | self.assertEqual(self.son2, model.Person('00002', 'Jim Doe')) 560 | self.assertEqual(self.daughter, model.Person('00002', 'Jenny Doe')) 561 | 562 | def test_eq_value(self): 563 | self.assertEqual(self.artist, model.Person('00001', 'Jane Doe')) 564 | self.assertEqual(self.son, self.son) 565 | self.assertEqual(self.son2, self.son2) 566 | self.assertEqual(self.daughter, self.daughter) 567 | 568 | def test_in_value(self): 569 | people = ( 570 | model.Person('00001', 'Jane Doe'), # artist 571 | model.Person('00002', 'Jim Doe') # son2 572 | ) 573 | self.assertIn(self.artist, people) 574 | self.assertNotIn(self.son, people) 575 | self.assertNotIn(self.daughter, people) 576 | self.assertIn(self.son2, people) 577 | 578 | def test_neq(self): 579 | self.assertNotEqual(self.artist, self.son) 580 | self.assertNotEqual(self.artist, model.Person('00001', 'Jane')) # label differs 581 | self.assertNotEqual(self.artist, self.daughter) 582 | self.assertNotEqual(self.artist, self.son2) 583 | self.assertNotEqual(self.son, self.daughter) 584 | self.assertNotEqual(self.son, self.son2) 585 | self.assertNotEqual(self.daughter, self.son2) 586 | 587 | def nation(self, name, ident): 588 | place = vocab.Place(ident='tag:getty.edu,2019:digital:pipeline:provenance:REPLACE-WITH-UUID#PLACE-COUNTRY-' + name, label=name) 589 | nation = model.Place(ident=ident) 590 | place.classified_as = vocab.instances['nation'] 591 | place.identified_by = model.Name(ident='', content=name) 592 | return place 593 | 594 | def test_equality(self): 595 | from cromulent.model import factory 596 | place1 = self.nation('Belgium', 'http://vocab.getty.edu/aat/300128207') 597 | place2 = self.nation('Belgium', 'http://vocab.getty.edu/aat/300128207') 598 | self.assertEqual(place1, place2) 599 | 600 | if __name__ == '__main__': 601 | unittest.main() 602 | 603 | --------------------------------------------------------------------------------