├── README
├── tests
├── __init__.py
├── test_context.json
├── test_add_classification.py
├── test_multiple_instantiation.py
├── test_reader.py
├── test_currency.py
├── test_vocab.py
├── test_dimensions.py
└── test_model.py
├── cromulent
├── __init__.py
├── data
│ ├── __init__.py
│ ├── key_order.json
│ ├── overrides.json
│ └── crm-profile.json
├── multiple_instantiation.py
└── reader.py
├── requirements.txt
├── setup.cfg
├── experimental
├── crm.py
├── bibframe.py
└── bibframe_reader.py
├── .travis.yml
├── setup.py
├── CHANGELOG.md
├── .gitignore
├── utils
├── old
│ ├── merge_inverses.py
│ └── make_inverses.py
├── info.py
├── make_jsonld_context.py
├── process_ontologies.py
└── data
│ ├── linkedart_crm_enhancements.xml
│ └── linkedart.xml
├── examples
├── example.py
├── json-to-lod.py
├── sales-to-lod.py
└── knoedler-to-lod.py
├── .circleci
└── config.yml
├── README.md
└── LICENSE
/README:
--------------------------------------------------------------------------------
1 | README.md
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/cromulent/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/cromulent/data/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | ordereddict
2 | rdflib
3 | PyLD
4 |
--------------------------------------------------------------------------------
/tests/test_context.json:
--------------------------------------------------------------------------------
1 | {"@context": {"id": "@id"}}
2 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [wheel]
2 | universal = 1
3 |
4 | [check-manifest]
5 | ignore =
6 | .travis.yml
7 | tox.ini
8 | .gitignore
9 |
10 |
--------------------------------------------------------------------------------
/experimental/crm.py:
--------------------------------------------------------------------------------
1 | import model
2 | from model import CromulentFactory, build_classes, \
3 | KEY_ORDER_HASH, KEY_ORDER_DEFAULT
4 |
5 | factory = CromulentFactory("http://lod.example.org/museum/", \
6 | context="http://linked.art/ns/context/1/full.jsonld")
7 | build_classes()
8 | model.factory = factory
--------------------------------------------------------------------------------
/experimental/bibframe.py:
--------------------------------------------------------------------------------
1 | import model
2 | from model import CromulentFactory, build_classes, \
3 | KEY_ORDER_HASH, KEY_ORDER_DEFAULT
4 |
5 | factory = CromulentFactory("http://lod.example.org/museum/", \
6 | load_context=False)
7 | build_classes("utils/bibframe_vocab.tsv", top="rdf:Resource")
8 | model.factory = factory
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | dist: xenial
3 |
4 | python:
5 | - '2.7'
6 | - '3.6'
7 | - '3.7'
8 | - '3.8'
9 | install:
10 | - pip install coveralls ordereddict
11 | - python setup.py install
12 | notifications:
13 | email:
14 | recipients:
15 | - azaroth42@gmail.com
16 | script:
17 | coverage run --source=cromulent setup.py test
18 | after_success:
19 | coveralls
20 |
--------------------------------------------------------------------------------
/tests/test_add_classification.py:
--------------------------------------------------------------------------------
1 |
2 | import unittest
3 |
4 | try:
5 | from collections import OrderedDict
6 | except:
7 | # 2.6
8 | from ordereddict import OrderedDict
9 |
10 | from cromulent import model, vocab
11 | from cromulent.model import factory
12 |
13 | class TestAddClassification(unittest.TestCase):
14 | def test_add_classification(self):
15 | amnt = model.MonetaryAmount(ident='')
16 | amnt.value = 7.0
17 | self.assertNotIn('Asking Price', factory.toString(amnt))
18 | vocab.add_classification(amnt, vocab.AskingPrice)
19 | self.assertIn('Asking Price', factory.toString(amnt))
20 |
21 | if __name__ == '__main__':
22 | unittest.main()
23 |
--------------------------------------------------------------------------------
/tests/test_multiple_instantiation.py:
--------------------------------------------------------------------------------
1 |
2 | import unittest
3 |
4 | try:
5 | from collections import OrderedDict
6 | except:
7 | # 2.6
8 | from ordereddict import OrderedDict
9 |
10 | from cromulent import multiple_instantiation as mi
11 | from cromulent.model import factory, Person, DataError, Dimension
12 |
13 |
14 | class TestMIClasses(unittest.TestCase):
15 |
16 | def test_destruction(self):
17 | expect = OrderedDict([('id', u'http://lod.example.org/museum/Activity/1'),
18 | ('type', ['Destruction', 'Activity']), ('_label', "Test Destruction")])
19 | mi.DestructionActivity._okayToUse = 1
20 | da = mi.DestructionActivity("1")
21 | da._label = "Test Destruction"
22 | factory.context_uri = ""
23 | dajs = factory.toJSON(da)
24 | self.assertEqual(dajs, expect)
25 |
26 |
--------------------------------------------------------------------------------
/cromulent/multiple_instantiation.py:
--------------------------------------------------------------------------------
1 |
2 | # This assumes the default CIDOC-CRM, even though the model code
3 | # can generate classes for any ontology
4 |
5 | import inspect
6 | from cromulent.model import Destruction, EndOfExistence, Activity, Appellation, LinguisticObject
7 |
8 | # DestuctionActivity class as CRM has a Destruction Event and recommends multi-classing
9 | # WARNING: instantiating this class in the default profile will raise an error
10 |
11 | class DestructionActivity(Destruction, Activity):
12 | _uri_segment = "Activity"
13 | _type = ["crm:E6_Destruction", "crm:E7_Activity"]
14 |
15 | @property
16 | def type(self):
17 | return ["Destruction", "Activity"]
18 | DestructionActivity._classhier = inspect.getmro(DestructionActivity)[:-1]
19 |
20 | # And hence we make an EndOfExistence+Activity class
21 | # for all activities that end existences
22 | class EoEActivity(EndOfExistence, Activity):
23 | _uri_segment = "Activity"
24 | _type = ["crm:64_End_of_Existence", "crm:E7_Activity"]
25 | _niceType = ["EndOfExistence", "Activity"]
26 |
27 | @property
28 | def type(self):
29 | return ["EndOfExistence", "Activity"]
30 |
31 | EoEActivity._classhier = inspect.getmro(EoEActivity)[:-1]
32 |
33 | # No need for Linguistic Appellation any more, as we have E33_E41_Linguistic_Appellation
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | import sys
3 |
4 | if (sys.version_info[0:2] < (2,7)):
5 | install_requires =['ordereddict', 'future', 'rdflib', 'PyLD']
6 | else:
7 | install_requires = ['rdflib', 'PyLD']
8 |
9 | setup(
10 | name = 'cromulent',
11 | packages = ['cromulent'],
12 | package_data = {
13 | 'cromulent': ['data/crm_vocab.tsv', 'data/overrides.json',
14 | 'data/key_order.json', 'data/linked-art.json',
15 | 'data/cidoc-extension.json', 'data/crm-profile.json']
16 | },
17 | test_suite="tests",
18 | version = '0.16.11',
19 | description = 'A library for mapping CIDOC-CRM (v7.1) classes to Python objects',
20 | author = 'Rob Sanderson',
21 | author_email = 'robert.sanderson@yale.edu',
22 | url = 'https://github.com/linked-art/crom',
23 | install_requires=install_requires,
24 | classifiers = [
25 | "Programming Language :: Python",
26 | "Programming Language :: Python :: 3",
27 | "Programming Language :: Python :: 2",
28 | "License :: OSI Approved :: Apache Software License",
29 | "Development Status :: 3 - Alpha",
30 | "Intended Audience :: Developers",
31 | "Operating System :: OS Independent",
32 | "Topic :: Software Development :: Libraries :: Python Modules",
33 | ]
34 | )
35 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Cromulent (CROM) Change Log
2 |
3 | Any notable changes to CROM that affect either functionality or output will be documented in this file (the format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)).
4 |
5 | ## [Unreleased] 2020-11-03
6 |
7 | ## Added
8 |
9 | * Added this change log [[DEV-6985](https://jira.getty.edu/browse/DEV-6985)].
10 |
11 | * Reinstated the `Relationship` entity and its associated properties `relates_to`, `relates_from`, `related_to_by`, and `related_from_by` as these are in production data modelling use, as their sudden removal led to runtime exceptions and prevented code reliant on CROM from operating [[DEV-6985](https://jira.getty.edu/browse/DEV-6985)].
12 |
13 | * Reinstated the `Geometry` and `CoordinateSystem` entities as these are in production data modelling use, as their sudden removal led to runtime exceptions and prevented code reliant on CROM from operating [[DEV-6985](https://jira.getty.edu/browse/DEV-6985)].
14 |
15 | * Reinstated the `current_keeper` and `current_keeper_of` properties as these are in production data modelling use, as their sudden removal led to runtime exceptions and prevented code reliant on CROM from operating [[DEV-6985](https://jira.getty.edu/browse/DEV-6985)].
16 |
17 | ## Changed
18 |
19 | * Imported the updated Getty-local `linked-art.json` context document from the `getty-contexts` repository to ensure consistency [[DEV-6984](https://jira.getty.edu/browse/DEV-6984)].
20 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 |
27 | # PyInstaller
28 | # Usually these files are written by a python script from a template
29 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 |
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 |
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 |
48 | # Translations
49 | *.mo
50 | *.pot
51 |
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 |
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 |
60 | # Scrapy stuff:
61 | .scrapy
62 |
63 | # Sphinx documentation
64 | docs/_build/
65 |
66 | # PyBuilder
67 | target/
68 |
69 | # IPython Notebook
70 | .ipynb_checkpoints
71 |
72 | # pyenv
73 | .python-version
74 |
75 | # celery beat schedule file
76 | celerybeat-schedule
77 |
78 | # dotenv
79 | .env
80 |
81 | # virtualenv
82 | venv/
83 | ENV/
84 |
85 | # Spyder project settings
86 | .spyderproject
87 |
88 | # Rope project settings
89 | .ropeproject
90 |
91 | .DS_Store
92 | err_output
93 | tests/fishbat.bar
94 |
--------------------------------------------------------------------------------
/utils/old/merge_inverses.py:
--------------------------------------------------------------------------------
1 | from lxml import etree
2 | import codecs
3 |
4 | NS = {'rdf':"http://www.w3.org/1999/02/22-rdf-syntax-ns#",
5 | 'xsd':"http://www.w3.org/2001/XMLSchema#",
6 | 'rdfs':"http://www.w3.org/2000/01/rdf-schema#",
7 | 'dcterms':"http://purl.org/dc/terms/",
8 | 'owl':"http://www.w3.org/2002/07/owl#",
9 | 'crm':"http://www.cidoc-crm.org/cidoc-crm/",
10 | 'xml': "http://www.w3.org/XML/1998/namespace"
11 | }
12 |
13 | fh = file('data/inverses.xml')
14 | data = fh.read()
15 | fh.close()
16 | dom = etree.XML(data)
17 |
18 | inverses = {}
19 | props = dom.xpath("//rdf:Property",namespaces=NS)
20 | for p in props:
21 | name = p.xpath('@rdf:about', namespaces=NS)[0]
22 | try:
23 | inv = p.xpath('./owl:inverseOf/@rdf:resource', namespaces=NS)[0]
24 | inverses[name] = inv
25 | except:
26 | pass
27 |
28 | fh = file('data/cidoc.xml')
29 | data = fh.read()
30 | fh.close()
31 | dom = etree.XML(data)
32 |
33 | # Now insert them into the right blocks
34 |
35 | for (n,i) in inverses.items():
36 | try:
37 | elem = dom.xpath('//rdf:Property[@rdf:about="%s"]' % n, namespaces=NS)[0]
38 | if not elem.xpath('./owl:inverseOf', namespaces=NS):
39 | inv = etree.SubElement(elem, "{http://www.w3.org/2002/07/owl#}inverseOf")
40 | inv.set("{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource", i)
41 | inv.tail = "\n"
42 | except:
43 | print "Could not find property %s" % n
44 |
45 |
46 | # And rewrite the file
47 |
48 | fh = file('data/cidoc_inversed.xml', 'w')
49 | fh.write(etree.tostring(dom, pretty_print=True))
50 | fh.close()
51 |
--------------------------------------------------------------------------------
/cromulent/data/key_order.json:
--------------------------------------------------------------------------------
1 | {
2 | "@context": 0,
3 | "id": 1,
4 | "type": 2,
5 | "_label": 3,
6 | "classified_as": 4,
7 |
8 | "value": 5,
9 | "content": 5,
10 | "upper_value_limit": 6,
11 | "lower_value_limit": 7,
12 | "unit": 8,
13 |
14 | "identified_by": 10,
15 | "defined_by": 11,
16 | "referred_to_by" : 15,
17 | "about": 18,
18 | "technique": 19,
19 |
20 | "timespan": 20,
21 | "begin_of_the_begin": 21,
22 | "end_of_the_begin": 22,
23 | "begin_of_the_end": 23,
24 | "end_of_the_end": 24,
25 | "duration": 25,
26 |
27 | "started_by": 26,
28 | "continued": 26,
29 | "finished_by": 27,
30 | "continued_by": 27,
31 |
32 | "took_place_at": 30,
33 | "carried_out_by": 31,
34 | "used_specific_object": 33,
35 | "removed": 34,
36 | "diminished": 35,
37 | "added": 34,
38 | "augmented": 35,
39 | "transformed": 35,
40 | "produced": 38,
41 | "destroyed": 39,
42 | "born": 38,
43 | "died": 39,
44 | "formed": 38,
45 | "dissolved": 39,
46 | "created": 38,
47 |
48 | "assigned_by": 39,
49 |
50 | "carried_out": 40,
51 | "dimension": 41,
52 | "made_of": 42,
53 | "language": 42,
54 | "part_of": 43,
55 | "approximated_by": 44,
56 | "member_of": 45,
57 |
58 | "transferred_title_of": 50,
59 | "transferred_title_from": 51,
60 | "transferred_title_to": 52,
61 | "transferred_custody_of": 50,
62 | "transferred_custody_from": 51,
63 | "transferred_custody_to": 52,
64 | "paid_amount": 50,
65 | "paid_from": 51,
66 | "paid_to": 52,
67 | "moved": 50,
68 | "moved_from": 51,
69 | "moved_to": 52,
70 | "participant": 53,
71 |
72 | "shows": 60,
73 | "carries": 61,
74 |
75 | "consists_of": 10001,
76 | "composed_of": 10001,
77 | "part": 10001,
78 | "temporally_contains": 10001,
79 | "spatially_contains": 10001,
80 | "member": 10001
81 | }
--------------------------------------------------------------------------------
/examples/example.py:
--------------------------------------------------------------------------------
1 |
2 | from cidoc_orm import factory, Document, Activity, Event, TimeSpan, ManMadeObject, Acquisition, Type
3 |
4 | # Locally "subclass" to create consistent patterns with E55 and AAT
5 | class Painting(ManMadeObject):
6 | def __init__(self, *args, **kw):
7 | super(Painting, self).__init__(*args, **kw)
8 | self.has_type = Type("http://vocab.getty.edu/aat/300033618")
9 |
10 | class LugtNumber(Identifier):
11 | def __init__(self, *args, **kw):
12 | super(LugtNumber, self).__init__(*args, **kw)
13 | # ???
14 | self.has_type = Type("http://vocab.getty.edu/aat/300033618")
15 |
16 | class TMSNumber(Identifier):
17 | def __init__(self, *args, **kw):
18 | super(TMSNumber, self).__init__(*args, **kw)
19 | # Repository Number
20 | self.has_type = Type("http://vocab.getty.edu/aat/300404621")
21 |
22 | class LotNumber(Identifier):
23 | def __init__(self, *args, **kw):
24 | super(TMSNumber, self).__init__(*args, **kw)
25 | # Lot Number
26 | self.has_type = Type("http://vocab.getty.edu/aat/300404628")
27 |
28 |
29 | # Or actually subclass in an extension vocab
30 | class Mosaic(ManMadeObject):
31 | _type = "extension:Mosaic"
32 |
33 | factory.base_url = "http://data.getty.edu/provenance/"
34 | factory.default_lang = "en"
35 |
36 | catalog = Document("catalog")
37 | page = Document("catalog-entry")
38 | catalog.has_component = page
39 | auction = Activity("auction")
40 | catalog.documents = auction
41 | lot = Activity("lot")
42 | auction.consists_of = lot
43 | page.documents = lot
44 | txn = Acquisition("sale")
45 | lot.consists_of = txn
46 | what = Painting('my-painting')
47 | txn.transferred_title_of = what
48 | what.label = "My First Paint By Numbers"
49 | what.is_identified_by = TMSNumber("")
50 |
51 |
52 | print factory.toString(catalog, compact=False)
53 |
--------------------------------------------------------------------------------
/.circleci/config.yml:
--------------------------------------------------------------------------------
1 | # Python CircleCI 2.0 configuration file
2 | #
3 | # Check https://circleci.com/docs/2.0/language-python/ for more details
4 | #
5 | version: 2
6 | jobs:
7 | build:
8 | docker:
9 | # specify the version you desire here
10 | # use `-browsers` prefix for selenium tests, e.g. `3.6.1-browsers`
11 | - image: circleci/python:3.8.0
12 |
13 | # Specify service dependencies here if necessary
14 | # CircleCI maintains a library of pre-built images
15 | # documented at https://circleci.com/docs/2.0/circleci-images/
16 | # - image: circleci/postgres:9.4
17 |
18 | working_directory: ~/repo
19 |
20 | steps:
21 | - checkout
22 |
23 | # Download and cache dependencies
24 | - restore_cache:
25 | keys:
26 | - v1-dependencies-{{ checksum "requirements.txt" }}
27 | # fallback to using the latest cache if no exact match is found
28 | - v1-dependencies-
29 |
30 | - run:
31 | name: install dependencies
32 | command: |
33 | python3 -m venv venv
34 | . venv/bin/activate
35 | pip install -r requirements.txt
36 |
37 | - save_cache:
38 | paths:
39 | - ./venv
40 | key: v1-dependencies-{{ checksum "requirements.txt" }}
41 |
42 | # run tests!
43 | # this example uses Django's built-in test-runner
44 | # other common Python testing frameworks include pytest and nose
45 | # https://pytest.org
46 | # https://nose.readthedocs.io
47 | - run:
48 | name: run tests
49 | command: |
50 | . venv/bin/activate
51 | python setup.py test
52 |
53 | - store_artifacts:
54 | path: test-reports
55 | destination: test-reports
56 |
--------------------------------------------------------------------------------
/utils/old/make_inverses.py:
--------------------------------------------------------------------------------
1 | from lxml import etree
2 | import codecs
3 |
4 | fh = file('cidoc.xml')
5 | data = fh.read()
6 | fh.close()
7 |
8 | NS = {'rdf':"http://www.w3.org/1999/02/22-rdf-syntax-ns#",
9 | 'xsd':"http://www.w3.org/2001/XMLSchema#",
10 | 'rdfs':"http://www.w3.org/2000/01/rdf-schema#",
11 | 'dcterms':"http://purl.org/dc/terms/",
12 | 'owl':"http://www.w3.org/2002/07/owl#",
13 | 'crm':"http://www.cidoc-crm.org/cidoc-crm/",
14 | 'xml': "http://www.w3.org/XML/1998/namespace"
15 | }
16 |
17 | dom = etree.XML(data)
18 | names = []
19 | inverses = {}
20 |
21 | props = dom.xpath("//rdf:Property",namespaces=NS)
22 | for p in props:
23 | name = p.xpath('@rdf:about', namespaces=NS)[0]
24 | names.append(name)
25 |
26 | for p in props:
27 | name = p.xpath('@rdf:about', namespaces=NS)[0]
28 | fu = name.find('_')
29 | pid = name[:fu]
30 | if pid[-1] in ['a', 'b']:
31 | # No inverses for botb eote
32 | continue
33 | inverse = ""
34 | if pid[-1] == "i":
35 | pid = pid[:-1]
36 | else:
37 | pid = pid + "i"
38 | pid += "_"
39 |
40 | for i in names:
41 | if i.startswith(pid) and i != name:
42 | inverse = i
43 | break
44 | if inverse:
45 | inverses[name] = inverse
46 |
47 | # Now print ONLY the inverses
48 | outlines = [
49 | ''
50 | ]
51 |
52 | for n in names:
53 | if n in inverses:
54 | outlines.append(' ' % n )
55 | outlines.append(' ' % inverses[n])
56 | outlines.append(' ')
57 | outlines.append('')
58 | outstr = '\n'.join(outlines)
59 |
60 | fh = file('data/inverses.xml', 'w')
61 | fh.write(outstr)
62 | fh.close()
63 |
--------------------------------------------------------------------------------
/cromulent/data/overrides.json:
--------------------------------------------------------------------------------
1 | {
2 | "P45": "made_of",
3 | "P7i": "location_of",
4 | "P5": "subState",
5 | "P5i": "subState_of",
6 | "P20i": "specific_purpose_of",
7 | "P42": "assigned_type",
8 | "P42i": "type_assigned_by",
9 | "P37": "assigned_identifier",
10 | "P37i": "identifier_assigned_by",
11 | "P35i": "condition_identified_by",
12 |
13 | "P28": "transferred_custody_from",
14 | "P29": "transferred_custody_to",
15 | "P29i": "acquired_custody_through",
16 | "P14i": "carried_out",
17 | "P140": "assigned_to",
18 | "P50": "current_custodian",
19 | "P50i": "current_custodian_of",
20 |
21 | "P9": "part",
22 | "P9i": "part_of",
23 | "P46": "part",
24 | "P46i": "part_of",
25 | "P86": "part_of",
26 | "P86i": "part",
27 | "P89": "part_of",
28 | "P89i": "part",
29 | "P106": "part",
30 | "P106i": "part_of",
31 | "P127i": "part",
32 | "P127": "part_of",
33 |
34 | "P148": "c_part",
35 | "P148i": "c_part_of",
36 |
37 | "P107": "member",
38 | "P107i": "member_of",
39 | "P56": "bears",
40 | "la:has_member": "member",
41 | "la:member_of": "member_of",
42 |
43 | "P32": "technique",
44 | "P33": "specific_technique",
45 | "P12": "involved",
46 | "P101": "general_use",
47 | "P100i": "died",
48 | "P74": "residence",
49 |
50 | "P65": "shows",
51 | "P2": "classified_as",
52 | "P190": "content",
53 | "P177": "assigned_property",
54 |
55 | "P133": "distinct_from",
56 | "P164i": "timespan_of_presence",
57 | "P151i": "participated_in_formation",
58 | "P165i": "incorporated_by",
59 | "P132": "volume_overlaps_with",
60 | "P135i": "type_created_by",
61 | "P139": "alternative",
62 |
63 | "P172": "spatially_contains",
64 | "P186i": "type_produced_by",
65 | "P168": "defined_by",
66 |
67 | "P165": "presence_of",
68 | "P195": "presence_of_thing",
69 | "P195i": "thing_presence",
70 | "P196i": "thing_defined_by",
71 |
72 | "skos:closeMatch": "close_match",
73 | "skos:exactMatch": "exact_match",
74 | "dcterms:conformsTo": "conforms_to",
75 | "dcterms:relation": "related",
76 | "schema:genre": "style",
77 | "rdfs:seeAlso": "see_also",
78 | "rdfs:label": "_label",
79 | "sci:O13_triggers": "caused",
80 | "sci:O13i_is_triggered_by": "caused_by",
81 | "sci:O19_encountered_object": "encountered",
82 | "sci:O19i_was_object_encountered_at": "encountered_by"
83 | }
84 |
--------------------------------------------------------------------------------
/utils/info.py:
--------------------------------------------------------------------------------
1 |
2 | import sys, argparse
3 | from cromulent import model, vocab
4 |
5 | parser = argparse.ArgumentParser()
6 | parser.add_argument('what')
7 | parser.add_argument('--okay', '--profile', dest="okay", type=bool)
8 | parser.add_argument('--filter', dest="filter")
9 | parser.add_argument('--self', dest="onlySelf", type=bool)
10 | args = parser.parse_args()
11 |
12 | def list_all_props(what, filter=None, okay=None):
13 | props = []
14 | ks = []
15 | for c in what._classhier:
16 | for k,v in c._all_properties.items():
17 | if not k in ks and \
18 | (not okay or (okay and v.profile_okay)) and \
19 | (filter is None or isinstance(filter, v.range) or \
20 | filter is v.range):
21 | props.append(v)
22 | ks.append(k)
23 | props.sort(key=lambda x: x.property)
24 | return props
25 |
26 | def list_my_props(what, filter=None, okay=None):
27 | props = []
28 | ks = []
29 | for k,v in what._all_properties.items():
30 | if not k in ks and \
31 | (not okay or (okay and v.profile_okay)) and \
32 | (filter is None or isinstance(filter, v.range) or \
33 | filter is v.range):
34 | props.append(v)
35 | ks.append(k)
36 | props.sort(key=lambda x: x.property)
37 | return props
38 |
39 | what = args.what
40 | try:
41 | c = getattr(model, what)
42 | except:
43 | try:
44 | c = getattr(vocab, what)
45 | except:
46 | print(f"Unknown model or vocab class: {what}")
47 | sys.exit(1)
48 |
49 | if args.filter:
50 | try:
51 | cf = getattr(model, args.filter)
52 | f = cf()
53 | except:
54 | f = None
55 | else:
56 | cf = None
57 | f = None
58 |
59 |
60 | print(f"Main Class: \033[95m{c.__name__}\033[0m")
61 | if cf:
62 | print(f"Filtered To: \033[95m{cf.__name__}\033[0m")
63 | else:
64 | print("Filtered To: None")
65 | print(f"Using Profile: {args.okay}")
66 |
67 |
68 |
69 | model.factory.validate_profile = False
70 | instance = c()
71 |
72 | if args.onlySelf:
73 | ap = list_my_props(instance, okay=args.okay, filter=f)
74 | else:
75 | ap = list_all_props(instance, okay=args.okay, filter=f)
76 |
77 | #ap2 = instance.list_all_props(okay=args.okay, filter=f)
78 |
79 |
80 | for pi in ap:
81 | if pi.property in ['close_match', 'exact_match']:
82 | continue
83 | out = f"{pi.property} ({pi.predicate})"
84 | if pi.inverse_property:
85 | inv = f"{pi.inverse_property} ({pi.inverse_predicate})"
86 | else:
87 | inv = ""
88 | if pi.range == str:
89 | rng = "\033[93mLiteral"
90 | else:
91 | rng = pi.range.__name__
92 | # old skool colorizing
93 | print(f"\033[95m{what:<15} \033[92m{out:<50} / {inv:<50} \033[95m{rng}\033[0m")
94 |
95 |
--------------------------------------------------------------------------------
/tests/test_reader.py:
--------------------------------------------------------------------------------
1 |
2 | import unittest
3 |
4 | try:
5 | from collections import OrderedDict
6 | except:
7 | # 2.6
8 | from ordereddict import OrderedDict
9 |
10 | from cromulent import reader
11 | from cromulent.model import factory, Person, DataError, BaseResource, \
12 | Dimension, override_okay, AttributeAssignment
13 |
14 | from cromulent import vocab
15 |
16 | class TestReader(unittest.TestCase):
17 |
18 | def setUp(self):
19 | self.reader = reader.Reader()
20 | # ensure we can use parent_of
21 | override_okay(Person, 'parent_of')
22 | # Person._properties['parent_of']['multiple'] = 1
23 |
24 | def test_read(self):
25 | self.assertRaises(DataError, self.reader.read, "")
26 | self.assertRaises(DataError, self.reader.read, "This is not JSON")
27 | self.assertRaises(DataError, self.reader.read, "{}")
28 |
29 | whostr = '{"type": "Person", "_label": "me"}'
30 | self.assertTrue(isinstance(self.reader.read(whostr), Person))
31 |
32 | whostr = '{"@context": "fishbat", "type": "Person", "_label": "me"}'
33 | self.assertTrue(isinstance(self.reader.read(whostr), Person))
34 |
35 | levelstr = '{"type": "Person", "parent_of": {"type": "Person", "_label": "child"}}'
36 | self.assertTrue(isinstance(self.reader.read(levelstr).parent_of[0], Person))
37 |
38 | basestr = '{"_label": "base"}'
39 | self.assertTrue(isinstance(self.reader.read(basestr), BaseResource))
40 |
41 | unknown = '{"type":"FishBat"}'
42 | self.assertRaises(DataError, self.reader.read, unknown)
43 |
44 | unknown2 = '{"type":"Person", "fishbat": "bob"}'
45 | self.assertRaises(DataError, self.reader.read, unknown)
46 |
47 | def test_attrib_assign(self):
48 | vocab.add_attribute_assignment_check()
49 |
50 | data = """
51 | {
52 | "id": "https://linked.art/example/activity/12",
53 | "type": "AttributeAssignment",
54 | "assigned": {
55 | "id": "https://linked.art/example/name/10",
56 | "type": "Name",
57 | "content": "Exhibition Specific Name"
58 | },
59 | "assigned_property": "identified_by",
60 | "assigned_to": {
61 | "id": "https://linked.art/example/object/12",
62 | "type": "HumanMadeObject",
63 | "_label": "Real Painting Name"
64 | }
65 | }
66 | """
67 | d = self.reader.read(data)
68 | self.assertTrue(isinstance(d, AttributeAssignment))
69 |
70 |
71 | def test_vocab_collision(self):
72 | # Test that the algorithm picks the right vocab instance
73 | # if multiple have the same AAT term but different base class
74 |
75 | data = """
76 | {
77 | "type": "LinguisticObject",
78 | "_label": "Sale recorded in catalog: B-267 0003 (1817) (record number 22947)",
79 | "part_of": [
80 | {
81 | "type": "LinguisticObject",
82 | "_label": "Sale Catalog B-267",
83 | "classified_as": [
84 | {
85 | "id": "http://vocab.getty.edu/aat/300026068",
86 | "type": "Type",
87 | "_label": "Auction Catalog"
88 | }
89 | ]
90 | }
91 | ]
92 | }
93 | """
94 | d = self.reader.read(data)
95 | self.assertTrue(isinstance(d.part_of[0], vocab.AuctionCatalogText))
96 |
97 |
--------------------------------------------------------------------------------
/experimental/bibframe_reader.py:
--------------------------------------------------------------------------------
1 | from lxml import etree
2 | import codecs
3 | import json
4 |
5 | default_key_order = 10000
6 |
7 | NS = {'rdf':"http://www.w3.org/1999/02/22-rdf-syntax-ns#",
8 | 'xsd':"http://www.w3.org/2001/XMLSchema#",
9 | 'rdfs':"http://www.w3.org/2000/01/rdf-schema#",
10 | 'dcterms':"http://purl.org/dc/terms/",
11 | 'owl':"http://www.w3.org/2002/07/owl#",
12 | 'crm':"http://www.cidoc-crm.org/cidoc-crm/",
13 | 'skos':"http://www.w3.org/2004/02/skos/core#",
14 | 'xml': "http://www.w3.org/XML/1998/namespace"
15 | }
16 |
17 | fh = file('bibframe.rdf')
18 | data = fh.read()
19 | fh.close()
20 | dom = etree.XML(data)
21 | stuff = []
22 |
23 | property_overrides = {}
24 |
25 | classes = dom.xpath("//rdfs:Class", namespaces=NS)
26 |
27 | if not classes:
28 | classes = dom.xpath('//owl:Class', namespaces=NS)
29 |
30 | for c in classes:
31 | label = c.xpath('./rdfs:label/text()', namespaces=NS)[0]
32 | try:
33 | comment = c.xpath('./rdfs:comment/text()', namespaces=NS)
34 | if not comment:
35 | comment = c.xpath('./skos:definition/text()', namespaces=NS)
36 | if comment:
37 | comment = comment[0]
38 | comment = comment.strip()
39 | comment = comment.replace('\n', '\\n').replace('\t', ' ')
40 | except:
41 | comment = ""
42 | name = c.xpath('@rdf:about', namespaces=NS)[0]
43 |
44 | subCls = c.xpath('./rdfs:subClassOf/@rdf:resource', namespaces=NS)
45 | if subCls:
46 | # could be multiples
47 | subCls = '|'.join(subCls)
48 | else:
49 | subCls = ""
50 |
51 | uc1 = name.rfind("/")
52 | ccname = name[uc1+1:]
53 | ccname = ccname.replace("_or_", "_Or_").replace("_of_", "_Of_")
54 | ccname = ccname.replace('-', '').replace('_', '')
55 |
56 | stuff.append([name, "class", ccname, label, comment, subCls])
57 |
58 | props = dom.xpath("//rdf:Property",namespaces=NS)
59 | if not props:
60 | props = dom.xpath('//owl:DatatypeProperty', namespaces=NS)
61 | props.extend(dom.xpath('owl:ObjectProperty', namespaces=NS))
62 |
63 | for p in props:
64 | label = p.xpath('./rdfs:label/text()', namespaces=NS)[0]
65 | try:
66 | comment = p.xpath('./rdfs:comment/text()', namespaces=NS)
67 | if not comment:
68 | comment = c.xpath('./skos:definition/text()', namespaces=NS)
69 | if comment:
70 | comment = comment[0]
71 | comment = comment.strip()
72 | comment = comment.replace('\n', '\\n').replace('\t', ' ')
73 | except:
74 | comment = ""
75 |
76 | name = p.xpath('@rdf:about', namespaces=NS)[0]
77 | domn = p.xpath('./rdfs:domain/@rdf:resource', namespaces=NS)
78 | if domn:
79 | domn = domn[0]
80 | for (k,v) in NS.items():
81 | domn = domn.replace(v,"%s:" % k)
82 | else:
83 | domn = ""
84 | rang = p.xpath('./rdfs:range/@rdf:resource', namespaces=NS)
85 | if rang:
86 | rang = rang[0]
87 | for (k,v) in NS.items():
88 | rang = rang.replace(v,"%s:" % k)
89 | else:
90 | rang = ""
91 | subProp = p.xpath('./rdfs:subPropertyOf/@rdf:resource', namespaces=NS)
92 | if subProp:
93 | subProp = subProp[0]
94 | else:
95 | subProp = ""
96 |
97 | inverse = p.xpath('./owl:inverseOf/@rdf:resource', namespaces=NS)
98 | if inverse:
99 | inverse = inverse[0]
100 | else:
101 | inverse = ""
102 |
103 | uc1 = name.find("_")
104 | pno = name[:uc1]
105 | if property_overrides.has_key(pno):
106 | ccname = property_overrides[pno]
107 | else:
108 | ccname = name[uc1+1:]
109 | ccname = ccname.replace("-", "")
110 | if ccname.startswith("is_"):
111 | ccname = ccname[3:]
112 | elif ccname.startswith("has_") or ccname.startswith("had_") or ccname.startswith("was_"):
113 | ccname = ccname[4:]
114 |
115 | # koi = str(key_order_hash.get(ccname, default_key_order))
116 | koi = "10000"
117 | stuff.append([name, "property", ccname, label, comment, subProp, domn, rang, inverse, koi])
118 |
119 | outdata = '\n'.join(['\t'.join(x) for x in stuff])
120 | fh = codecs.open('bibframe_vocab.tsv', 'w', 'utf-8')
121 | fh.write(outdata)
122 | fh.close()
123 |
--------------------------------------------------------------------------------
/tests/test_currency.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import unittest
4 | try:
5 | from contextlib import suppress
6 | except:
7 | # Python 2.7
8 | suppress = None
9 | import pprint
10 | from datetime import datetime
11 | from cromulent import model, vocab
12 | from cromulent.extract import extract_monetary_amount
13 | import cromulent.extract
14 |
15 | CUSTOM_MAPPING = {
16 | 'xxx': vocab.register_instance('xxx custom currency', {'parent': model.Currency, 'id': '999999999', 'label': 'My Dollars'}),
17 | 'zzz': 'us dollars'
18 | }
19 |
20 | class TestCurrencyExtraction(unittest.TestCase):
21 | '''
22 | Test the ability to extract currency data.
23 | '''
24 | def setUp(self):
25 | pass
26 |
27 | def tearDown(self):
28 | pass
29 |
30 | def test_extract_simple(self):
31 | e = extract_monetary_amount({
32 | 'price': '10.0',
33 | 'currency': 'pounds'
34 | })
35 | self.assertEqual(e.type, 'MonetaryAmount')
36 | self.assertEqual(e._label, '10.00 pounds')
37 | self.assertEqual(e.value, 10)
38 | c = e.currency
39 | self.assertEqual(c.type, 'Currency')
40 | self.assertEqual(c._label, 'British Pounds')
41 |
42 | def test_extract_comma_separated(self):
43 | e = extract_monetary_amount({
44 | 'price': '1,280.5',
45 | 'currency': 'pounds'
46 | })
47 | self.assertEqual(e.type, 'MonetaryAmount')
48 | self.assertEqual(e._label, '1,280.50 pounds')
49 | self.assertEqual(e.value, 1280.50)
50 | c = e.currency
51 | self.assertEqual(c.type, 'Currency')
52 | self.assertEqual(c._label, 'British Pounds')
53 |
54 | def test_extract_label_digits(self):
55 | e = extract_monetary_amount({
56 | 'price': '1,280.5',
57 | 'currency': 'pounds'
58 | }, truncate_label_digits=4)
59 | self.assertEqual(e.type, 'MonetaryAmount')
60 | self.assertEqual(e._label, '1,280.5000 pounds')
61 | self.assertEqual(e.value, 1280.50)
62 | c = e.currency
63 | self.assertEqual(c.type, 'Currency')
64 | self.assertEqual(c._label, 'British Pounds')
65 |
66 | def test_extract_multiple_comma_separated(self):
67 | e = extract_monetary_amount({
68 | 'price': '1,310,720.5',
69 | 'currency': 'pounds'
70 | })
71 | self.assertEqual(e.type, 'MonetaryAmount')
72 | self.assertEqual(e._label, '1,310,720.50 pounds')
73 | self.assertEqual(e.value, 1310720.5)
74 | c = e.currency
75 | self.assertEqual(c.type, 'Currency')
76 | self.assertEqual(c._label, 'British Pounds')
77 |
78 | def test_extract_est(self):
79 | e = extract_monetary_amount({
80 | 'est_price': '12.0',
81 | 'currency': 'pounds'
82 | })
83 | self.assertEqual(e.value, 12)
84 | c = e.currency
85 | self.assertEqual(e.classified_as[0]._label, 'Estimated Price')
86 | self.assertEqual(e.currency._label, 'British Pounds')
87 |
88 | def test_extract_start(self):
89 | e = extract_monetary_amount({
90 | 'start_price': '8.5',
91 | 'currency': 'pounds'
92 | })
93 | self.assertEqual(e.value, 8.5)
94 | c = e.currency
95 | self.assertEqual(e.classified_as[0]._label, 'Starting Price')
96 | self.assertEqual(e.currency._label, 'British Pounds')
97 |
98 | def test_extract_custom_currency_key(self):
99 | d = {
100 | 'price': '7',
101 | 'currency': 'zzz'
102 | }
103 | with self.assertRaises(AttributeError):
104 | e = extract_monetary_amount(d)
105 | self.assertEqual(e.currency._label, 'Custom Currency')
106 |
107 | e = extract_monetary_amount(d, currency_mapping=CUSTOM_MAPPING)
108 | self.assertEqual(e.value, 7)
109 | self.assertEqual(e.currency._label, 'US Dollars')
110 |
111 | def test_extract_custom_currency_instance(self):
112 | d = {
113 | 'price': '7',
114 | 'currency': 'xxx'
115 | }
116 | with self.assertRaises(AttributeError):
117 | e = extract_monetary_amount(d)
118 | self.assertEqual(e.currency._label, 'Custom Currency')
119 |
120 | e = extract_monetary_amount(d, currency_mapping=CUSTOM_MAPPING)
121 | self.assertEqual(e.value, 7)
122 | self.assertEqual(e.currency._label, 'My Dollars')
123 |
124 | def test_extract_price_with_citation(self):
125 | d = {
126 | 'price': '7',
127 | 'currency': 'pounds',
128 | 'citation': 'crom test suite'
129 | }
130 | e = extract_monetary_amount(d, add_citations=True)
131 | self.assertEqual(e.value, 7)
132 | self.assertEqual(e.currency._label, 'British Pounds')
133 | self.assertEqual(e.referred_to_by[0].content, 'crom test suite')
134 |
135 |
136 | if __name__ == '__main__':
137 | unittest.main()
138 |
--------------------------------------------------------------------------------
/cromulent/reader.py:
--------------------------------------------------------------------------------
1 | from cromulent import model, vocab
2 | from cromulent.model import factory, DataError, OrderedDict, BaseResource
3 | from cromulent.model import STR_TYPES
4 |
5 | import json
6 |
7 | class Reader(object):
8 |
9 | def __init__(self, validate_props=True, validate_profile=True):
10 | self.uri_object_map = {}
11 | self.forward_refs = []
12 | self.vocab_props = ['assigned_property']
13 | self.vocab_classes = {}
14 | self.validate_profile = validate_profile
15 | self.validate_props = validate_props
16 |
17 | for cx in dir(vocab):
18 | what = getattr(vocab, cx)
19 | # crying cat face -- type as a @property returns the function, not the value
20 | # when calling it on a class rather than an instance
21 | try:
22 | mytype = what._classhier[0].__name__
23 | except AttributeError:
24 | continue
25 | # find classes
26 | if (cx[0].isupper() and not hasattr(model, cx) and type(what) == type):
27 | # class
28 | self.vocab_classes[(mytype, what._classification[0].id)] = what
29 |
30 | def read(self, data):
31 | if not data:
32 | raise DataError("No data provided: %r" % data)
33 | elif type(data) in STR_TYPES:
34 | try:
35 | data = json.loads(data)
36 | except:
37 | raise DataError("Data is not valid JSON")
38 | if not data:
39 | raise DataError("No Data provided")
40 | self.uri_object_map = {}
41 | self.forward_refs = []
42 | try:
43 | what = self.construct(data)
44 | self.process_forward_refs()
45 | self.uri_object_map = {}
46 | self.forward_refs = []
47 | return what
48 | except:
49 | raise
50 |
51 | def process_forward_refs(self):
52 | for (what, prop, uri) in self.forward_refs:
53 | if uri in self.uri_object_map:
54 | setattr(what, prop, self.uri_object_map[uri])
55 | else:
56 | raise NotImplementedError("No class information for %s.%s = %s" % (what, prop, uri))
57 |
58 | def construct(self, js):
59 | # pass in json, get back object
60 | if '@context' in js:
61 | del js['@context']
62 |
63 | ident = js.get('id', '')
64 | typ = js.get('type', None)
65 |
66 | if typ == None:
67 | clx = BaseResource
68 | else:
69 | # Get class based on name
70 | try:
71 | clx = getattr(model, typ)
72 | except AttributeError:
73 | # No such class
74 | raise DataError("Resource %s has unknown class %s" % (ident, typ) )
75 |
76 | # now check vocab.ext_classes to try and refine
77 | trash = None
78 | if 'classified_as' in js:
79 | for c in js['classified_as']:
80 | i = c.get('id', '')
81 | clx2 = self.vocab_classes.get((typ, i), None)
82 | if clx2 is not None:
83 | clx = clx2
84 | trash = c
85 | break
86 |
87 | what = clx(ident=ident)
88 | what._validate_profile = self.validate_profile
89 | self.uri_object_map[ident] = what
90 |
91 | if self.validate_props:
92 | propList = what.list_all_props()
93 |
94 | # sort data by KOH to minimize chance of bad backrefs
95 | itms = list(js.items())
96 | itms.sort(key=lambda x: factory.key_order_hash.get(x[0], 10000))
97 |
98 | for (prop, value) in itms:
99 | if prop in ['id', 'type']:
100 | continue
101 |
102 | if self.validate_props and not prop in propList:
103 | raise DataError("Unknown property %s on %s" % (prop, clx.__name__))
104 |
105 | # Climb looking for range
106 | for c in what._classhier:
107 | if prop in c._all_properties:
108 | rng = c._all_properties[prop].range
109 | break
110 |
111 | if type(value) != list:
112 | value = [value]
113 | for subvalue in value:
114 | if trash is not None and prop == 'classified_as' and subvalue == trash:
115 | continue
116 | if rng == str:
117 | setattr(what, prop, subvalue)
118 | elif type(subvalue) == dict or isinstance(subvalue, OrderedDict):
119 | # recurse ...
120 | val = self.construct(subvalue)
121 | setattr(what, prop, val)
122 | elif type(subvalue) in STR_TYPES and prop in self.vocab_props:
123 | # keep as string
124 | setattr(what, prop, subvalue)
125 | elif type(subvalue) in STR_TYPES:
126 | # raw URI to be made into a class of type rng
127 | # or back reference
128 | if subvalue in self.uri_object_map:
129 | setattr(what, prop, self.uri_object_map[subvalue])
130 | elif rng in [model.Type, BaseResource]:
131 | # Always a X, often no more info
132 | setattr(what, prop, rng(ident=subvalue))
133 | else:
134 | self.forward_refs.append([what, prop, subvalue])
135 | else:
136 | # No idea!!
137 | raise DataError("Value %r is not expected for %s" % (subvalue, prop))
138 |
139 | return what
140 |
141 |
142 |
--------------------------------------------------------------------------------
/tests/test_vocab.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import sys
3 | import os
4 |
5 | from cromulent import vocab, model
6 | from cromulent.model import factory
7 |
8 | class TestClassBuilder(unittest.TestCase):
9 | def setUp(self):
10 | pass
11 |
12 | def tearDown(self):
13 | pass
14 |
15 | def test_class(self):
16 | vocab.register_aat_class("TestObject1", {"parent": model.HumanMadeObject, "id": "1", "label": "example 1"})
17 | from cromulent.vocab import TestObject1
18 | self.assertEqual(TestObject1._classification[0].id, 'http://vocab.getty.edu/aat/1')
19 |
20 | def test_instance(self):
21 | vocab.register_instance("TestMaterial2", {"parent": model.Material, "id": "2", "label": "example 2"})
22 | self.assertTrue('TestMaterial2' in vocab.instances)
23 | tm2 = vocab.instances['TestMaterial2']
24 | self.assertEqual(tm2.id, "http://vocab.getty.edu/aat/2")
25 |
26 | def test_metatype(self):
27 | vocab.register_instance("example", {"parent": model.Type, "id": "3", "label": "example type"})
28 | vocab.register_aat_class("TestObject2",
29 | {"parent": model.HumanMadeObject, "id": "4", "label": "example typed object", "metatype": "example"})
30 | from cromulent.vocab import TestObject2
31 | self.assertEqual(TestObject2._classification[0].classified_as[0].id, 'http://vocab.getty.edu/aat/3')
32 |
33 | def test_multitype(self):
34 | from cromulent.vocab import make_multitype_obj, Painting, Drawing
35 | inst = make_multitype_obj(Painting, Drawing)
36 | self.assertTrue(isinstance(inst, Painting))
37 | self.assertTrue(len(inst.classified_as) == 2)
38 | self.assertTrue(inst.classified_as[1].id == "http://vocab.getty.edu/aat/300033973")
39 |
40 | from cromulent.model import HumanMadeObject
41 |
42 | inst = make_multitype_obj(HumanMadeObject, Painting)
43 | self.assertTrue(len(inst.classified_as) == 1)
44 | self.assertTrue(inst.classified_as[0].id == "http://vocab.getty.edu/aat/300033618")
45 |
46 | def test_conceptual_parts(self):
47 | r = model.Right()
48 | r2 = model.Right()
49 | self.assertRaises(model.DataError, r.__setattr__, 'part', r2)
50 | r.c_part = r2
51 | self.assertTrue(r2 in r.c_part)
52 |
53 | vocab.conceptual_only_parts()
54 | r3 = model.Right()
55 | r4 = model.Right()
56 | r3.part = r4
57 | self.assertTrue(r4 in r3.c_part)
58 | self.assertTrue("part" in model.factory.toJSON(r3))
59 | self.assertTrue(r4 in r3.part)
60 |
61 |
62 | def test_art_setter(self):
63 | p = model.HumanMadeObject("a", art=1)
64 | p._label = "a"
65 | pj = p._toJSON(done={})
66 | self.assertFalse(pj.get('classified_as', None))
67 | vocab.add_art_setter()
68 | p2 = vocab.Painting("b", art=1)
69 | p2j = p2._toJSON(done={})
70 |
71 | def test_aa_check(self):
72 |
73 | # Make sure that some other test hasn't set it
74 | try:
75 | del model.AttributeAssignment.set_assigned_property
76 | except:
77 | pass
78 |
79 | t = model.Type()
80 | aa = model.AttributeAssignment()
81 | # First check that aa accepts a type
82 | aa.assigned_property = t
83 | # And will not accept a string
84 | self.assertRaises(model.DataError, aa.__setattr__, "assigned_property", "classified_as")
85 |
86 | # Check we can set anything to assigned / assigned_to
87 | aa.assigned_property = None
88 | aa.assigned = aa
89 | aa.assigned_to = aa
90 | self.assertEqual(aa.assigned, aa)
91 | self.assertEqual(aa.assigned_to, aa)
92 |
93 | vocab.add_attribute_assignment_check()
94 |
95 | # This should fail right now as can't classify as an AA
96 | self.assertRaises(model.DataError, aa.__setattr__, "assigned_property", "classified_as")
97 | aa.assigned = None
98 | aa.assigned_to = None
99 | aa.assigned = t
100 | aa.assigned_to = t
101 | aa.assigned_property = "classified_as"
102 | self.assertEqual(aa.assigned_property, 'classified_as')
103 |
104 |
105 | def test_boundary_setter(self):
106 | vocab.add_linked_art_boundary_check()
107 | p = model.Person()
108 | p2 = model.Person()
109 | n = model.Name()
110 | n.content = "Test"
111 | p2.identified_by = n
112 | p.exact_match = p2
113 | # Now, Test should not appear in the resulting JSON of p
114 | factory.linked_art_boundaries = True
115 | js = factory.toJSON(p)
116 | self.assertTrue(not 'identified_by' in js['exact_match'][0])
117 | factory.linked_art_boundaries = False
118 | js = factory.toJSON(p)
119 | self.assertTrue('identified_by' in js['exact_match'][0])
120 |
121 | def test_procurement_boundary(self):
122 | vocab.add_linked_art_boundary_check()
123 | a = model.Activity()
124 | p = vocab.ProvenanceEntry()
125 | a.caused = p
126 | js = factory.toJSON(a)
127 | self.assertTrue(not 'classified_as' in js['caused'][0])
128 |
129 | def test_linguistic_object_boundary(self):
130 | vocab.add_linked_art_boundary_check()
131 | jrnl = vocab.JournalText(label="journal")
132 | issue = vocab.IssueText(label="issue")
133 | issue.part_of = jrnl
134 | issue.referred_to_by = vocab.MaterialStatement(content="Statement")
135 |
136 | js = factory.toJSON(issue)
137 | # Have not embedded journal in issue
138 | self.assertTrue(not 'classified_as' in js['part_of'][0])
139 | # Have embedded statement in issue
140 | self.assertTrue('content' in js['referred_to_by'][0])
141 | self.assertTrue('type' in js['referred_to_by'][0]['classified_as'][0]['classified_as'][0])
142 |
143 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://travis-ci.org/thegetty/crom) [](https://coveralls.io/github/thegetty/crom?branch=master)
2 |
3 | # Cromulent
4 |
5 | A Python library to make creation of CIDOC CRM easier by mapping classes/predicates to python objects/properties, thereby making the CRM "CRoMulent", a Simpsons neologism for "acceptable" or "fine".
6 |
7 | ## Status: Beta
8 |
9 | The core vocabulary loading functionality is reasonably stable. The vocabulary section is expanding as we find new, useful terms to include and will likely constantly change.
10 |
11 | The code is actively being developed and compability breaking changes are thus to be expected as we use it in various projects across The J Paul Getty Trust, and beyond.
12 |
13 | ## How to Use It
14 |
15 | ### Basic Usage
16 |
17 | Import the classes from the model module. As the classes are dynamically generated, they're not in the code but will be there once the `build_classes` function has been called.
18 |
19 | ```python
20 | from cromulent.model import factory, Group
21 | g1 = Group(ident="Organization")
22 | g2 = Group(ident="Department")
23 | g1.member = g2
24 | print factory.toString(g1, compact=False)
25 | ```
26 |
27 | The constructor for the classes takes the following parameters:
28 |
29 | * `ident` - an identifier to use for this instance. If specified, it should be a URI represented as a string. If it is the empty string, it will result in no identifier. If not specified, or specified as `None`, then it will be auto-generated by the factory if `auto_assign_id` is true, or if `auto_assign_id` is false, then it will result in no identifier.
30 | * `label` - a human readable label for the resource, to act as internal documentation for the data
31 | * `value` or `content` - a data value for the class. Dimensions and MonetaryAmounts use `value` which must be a number, and Name, Identifier, LinguisticObject and similar use `content` which must be a string.
32 | * Additional keywords may be passed in, and will be sent to class-specific initialization code.
33 |
34 |
35 | ### Vocabulary
36 |
37 | ```python
38 | from cromulent.model import factory
39 | from cromulent.vocab import Height
40 | h = Height()
41 | h.value = 6
42 | print factory.toString(h, compact=False)
43 | ```
44 |
45 | ### Tricks and Gotchas
46 |
47 | * Assigning to the same property repeatedly does NOT overwrite the value, instead it appends. To overwrite a value, instead set it to a false value first.
48 |
49 |
50 | ### Factory settings
51 |
52 | There are quite a few settings for how the module works, which are managed by a `factory` object.
53 |
54 | URI and File System Configuration:
55 | * `base_url` The base url on to which to append any slug given when an object is created
56 | * `base_dir` The base directory into which to write files, via factory.toFile()
57 | * `filename_extension` The extension to use on files written via toFile(), defaults to ".json"
58 | * `default_lang` The code for the default language to use on text values
59 | * `context_uri` The URI to use for `@context` in the JSON-LD serialization
60 | * `context_json` The parsed JSON object of the context from which the prefixes are derived
61 | * `full_names` Should the serialization use the full CRM names for classes and properties instead of the more readable ones defined in the mapping, defaults to False
62 | * `prefixes` A dictionary of prefix to URI for URIs to compress down to `prefix:slug` format
63 | * `prefixes_rev` The reverse of the prefixes dictionary
64 | * `pipe_scoped_contexts` A convenience setting for generating documentation, where properties that map to the same JSON output are represented as `short_name|full_name` to be post-processed.
65 | * `json_indent` How many spaces should each level of indentation be when serializing to a human readable form, defaults to 2
66 | * `id_type_label` Should the id, type and label properties all be used when serializing resources that have already been processed, defaults to True
67 | * `elasticsearch_compatible` Despite JSON-LD 1.0 compaction rules, should a single URI be represented as {"@id": "URI"} rather than just "URI", to make the resulting JSON compatible with elasticsearch and similar JSON processing engines. Defaults to False.
68 | * `serialize_all_resources` NOT YET IMPLEMENTED. If true, then all resources will be serialized separately, not just the top level resource.
69 |
70 | Model Validation and Generation:
71 | * `materialize_inverses` Should the inverse relationships be set automatically, defaults to False
72 | * `validate_properties` Should the model be validated at run time when setting properties, defaults to True (this allows you to save processing time once you're certain your code does the right thing)
73 | * `validate_properties` Should the properties be validated as being part of the model at all
74 | * `validate_profile` Should the profile of which terms should be used be validated
75 | * `process_multiplicity` Should properties that allow multiple values always be an array
76 | * `validate_range` Should the object be validated that it is legal to be the value of the property
77 | * `auto_assign_id` Should a URI be autogenerated and assigned, defaults to True
78 | * `auto_id_type` The method by which the URI is generated, taken from the following values:
79 | * "int" (just increment an integer in a single value space)
80 | * "int-per-type" (increment an integer, with a separate value space per class)
81 | * "int-per-segment" (increment an integer, with a separate value space per URI segment associated with a class)
82 | * "uuid" (just use UUIDs everywhere)
83 |
84 | Internal:
85 | * `debug_level` Settings for debugging errors and warnings, defaults to "warn"
86 | * `log_stream` An object implementing the stream API to write log messages to, defaults to sys.stderr
87 |
88 |
89 |
90 | ## How it Works
91 |
92 | At import time, the library parses the vocabulary data file (data/crm_vocab.tsv) and creates Python classes in the module's global scope from each of the defined RDF classes. The names of the classes are intended to be easy to use and remember, not necessarily identical to the CRM ontology's names. It also records the properties that can be used with that class, and at run time checks whether the property is defined and that the value fits the defined range.
93 |
94 | ## Hacking
95 |
96 | You can change the mapping by tweaking `utils/vocab_reader.py` and rerunning it to build a new TSV input file. See also the experimental code for loading completely different ontologies.
97 |
98 |
--------------------------------------------------------------------------------
/utils/make_jsonld_context.py:
--------------------------------------------------------------------------------
1 |
2 | import codecs
3 | import json
4 |
5 | try:
6 | from collections import OrderedDict
7 | except:
8 | try:
9 | from ordereddict import OrderedDict
10 | except:
11 | raise Exception("To run with old pythons you must: easy_install ordereddict")
12 |
13 | fn = '../cromulent/data/crm_vocab.tsv'
14 | fh = codecs.open(fn, 'r', 'utf-8')
15 | lines = fh.readlines()[1:] # Chomp header line
16 | fh.close()
17 |
18 | context = OrderedDict()
19 | context['@version'] = 1.1
20 | context['crm'] = "http://www.cidoc-crm.org/cidoc-crm/"
21 | context['sci'] = "http://www.ics.forth.gr/isl/CRMsci/"
22 | context['rdf'] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
23 | context['rdfs'] = "http://www.w3.org/2000/01/rdf-schema#"
24 | context['dc'] = "http://purl.org/dc/elements/1.1/"
25 | context['dcterms'] = "http://purl.org/dc/terms/"
26 | context['schema'] = "http://schema.org/"
27 | context['skos'] = "http://www.w3.org/2004/02/skos/core#"
28 | context['foaf'] = 'http://xmlns.com/foaf/0.1/'
29 | context['xsd'] = "http://www.w3.org/2001/XMLSchema#"
30 | context['dig'] = "http://www.ics.forth.gr/isl/CRMdig/"
31 | context["la"] = "https://linked.art/ns/terms/"
32 | context["archaeo"] = "http://www.cidoc-crm.org/cidoc-crm/CRMarchaeo/"
33 |
34 | ## These are only aliases. The processing is defined by the spec.
35 | context['id'] = "@id"
36 | context['type'] = "@type"
37 |
38 | extension = OrderedDict()
39 | extension['@version'] = 1.1
40 | extension['crm'] = "http://www.cidoc-crm.org/cidoc-crm/"
41 |
42 | vocab_properties = ["assigned_property"]
43 |
44 | parts = {
45 | "P9": ["crm:P9_consists_of", "crm:P9i_forms_part_of"],
46 | "P46": ["crm:P46_is_composed_of", "crm:P46i_forms_part_of"],
47 | "P106": ["crm:P106_is_composed_of", "crm:P106i_forms_part_of"],
48 | "P86": ["crm:P86i_contains", "crm:P86_falls_within"],
49 | "P89": ["crm:P89i_contains", "crm:P89_falls_within"],
50 | "P148": ["crm:P148_has_component", "crm:P148i_is_component_of"],
51 | "skos": ["skos:narrower", "skos:broader"],
52 | "set": ["la:has_member", "la:member_of"],
53 | "P107": ["crm:P107_has_current_or_former_member", "crm:P107i_is_current_or_former_member_of"]
54 | }
55 |
56 | p177_context = {
57 | "part": None,
58 | "temporal_part": "crm:P9_consists_of",
59 | "physical_part": "crm:P46_is_composed_of",
60 | "symbolic_part": "crm:P106_is_composed_of",
61 | "propositional_part": "crm:P148_has_component",
62 | "timespan_part": "crm:P86i_contains",
63 | "location_part": "crm:P89i_contains",
64 | "interest_part": "la:interest_part",
65 | "part_of": None,
66 | "temporal_part_of": "crm:P9i_forms_part_of",
67 | "physical_part_of": "crm:P46i_forms_part_of",
68 | "symbolic_part_of": "crm:P106i_forms_part_of",
69 | "propositional_part_of": "crm:P148i_is_component_of",
70 | "timespan_part_of": "crm:P86_falls_within",
71 | "location_part_of": "crm:P89_falls_within",
72 | }
73 |
74 | scoped_classes = {
75 | "Activity": "P9",
76 | "Acquisition": "P9",
77 | "TransferOfCustody": "P9",
78 | "Production": "P9",
79 | "AttributeAssignment": "P9",
80 | "HumanMadeObject": "P46",
81 | "LinguisticObject": "P106",
82 | "VisualItem": "P106", # XXX This is the symbolic partitioning, not the conceptual partitioning of P149
83 | "Identifier": "P106",
84 | "TimeSpan": "P86",
85 | "Place": "P89",
86 | "Type": "skos",
87 | "Language": "skos",
88 | "Material": "skos",
89 | "MeasurementUnit": "skos",
90 | "BeginningOfExistence": "P9",
91 | "EndOfExistence": "P9",
92 | "Creation": "P9",
93 | "Formation": "P9",
94 | "InformationObject": "P106",
95 | "Transformation": "P9",
96 | "Joining": "P9",
97 | "Leaving": "P9",
98 | "PropositionalObject": "P148",
99 | "Currency": "skos",
100 | "Payment": "P9",
101 | "Right": "P148",
102 | "Name": "P106",
103 | "Birth": "P9",
104 | "Death": "P9",
105 | "Event": "P9",
106 | "Destruction": "P9",
107 | "Move": "P9",
108 | "Modification": "P9",
109 | "Dissolution": "P9",
110 | "Period": "P9",
111 | "PhysicalThing": "P46",
112 | "PhysicalObject": "P46",
113 | "PhysicalFeature": "P46",
114 | "BiologicalObject": "P46",
115 | "Site": "P46",
116 | "PhysicalHumanMadeThing": "P46",
117 | "HumanMadeFeature": "P46",
118 | "Title": "P106",
119 | "Inscription": "P106",
120 | "Mark": "P106",
121 | "Appellation": "P106",
122 | "PartAddition": "P9",
123 | "PartRemoval": "P9",
124 | "SymbolicObject": "P106",
125 | "Purchase": "P9",
126 | "Set": "set",
127 | "Group": "P107",
128 | "Person": "P107"
129 | }
130 |
131 | other_scoped = {
132 | }
133 |
134 | # enforce these in the context
135 | literal_types = [
136 | "xsd:dateTime"
137 | ]
138 | # Let these default
139 | empty_literal_types = [
140 | "rdfs:Literal",
141 | "xsd:string"
142 | ]
143 |
144 |
145 | for l in lines:
146 | l = l[:-1] # chomp
147 | info= l.split('\t')
148 | name = info[0]
149 | if info[1] == "class":
150 | # map json key to ontology for @type:@vocab
151 | ctname = info[2]
152 | if name.startswith("E"):
153 | name = "crm:%s" % name
154 | context[ctname] = {"@id": name}
155 | if ctname in scoped_classes:
156 | part = parts[scoped_classes[ctname]][0]
157 | part_of = parts[scoped_classes[ctname]][1]
158 |
159 | # XXX member_of needs to be added to person and Group as Group one
160 | # and member_of_set for Set one
161 | # then member_of is Set for everything else
162 |
163 | if scoped_classes[ctname] in ['set', 'P107']:
164 | context[ctname]['@context'] = {
165 | "member": {"@id": part, "@type": "@id", "@container": "@set"},
166 | "member_of": {"@id": part_of, "@type": "@id", "@container": "@set"}
167 | }
168 | else:
169 | context[ctname]['@context'] = {
170 | "part": {"@id": part, "@type": "@id", "@container": "@set"},
171 | "part_of": {"@id": part_of, "@type": "@id", "@container": "@set"},
172 | "member_of": {"@id": parts["set"][1], "@type": "@id", "@container": "@set"}
173 | }
174 | # Add other scopes if needed
175 | if ctname in other_scoped:
176 | context[ctname]['@context'] = other_scoped[ctname]
177 |
178 | else:
179 | ctname = info[2]
180 | write = not ctname in ['part', 'part_of', 'member', 'member_of']
181 | # These need to be added correctly to all parents in the ontology
182 | # ... as above
183 |
184 | dmn = info[6]
185 | rng = info[7]
186 | mult = info[11] or '1'
187 | if ctname in context:
188 | print("Already found: %s (%s vs %s)" % (ctname, context[ctname]['@id'], name))
189 | else:
190 |
191 | if rng:
192 | if rng in empty_literal_types:
193 | typ = None
194 | elif rng in literal_types:
195 | typ = rng
196 | elif ctname in vocab_properties:
197 | typ = "@vocab"
198 | else:
199 | typ = "@id"
200 | else:
201 | typ = None
202 |
203 | if name.startswith("P"):
204 | name = "crm:%s" % name
205 |
206 | if write:
207 | if not typ:
208 | context[ctname] = {"@id": name}
209 | elif mult == '1':
210 | context[ctname] = {"@id": name, "@type": typ, "@container":"@set"}
211 | else:
212 | context[ctname] = {"@id": name, "@type": typ}
213 |
214 | if ctname == "assigned_property_type":
215 | context['assigned_property_type']['@context'] = p177_context
216 |
217 | # Otherwise, we're part / part_of, so ignore
218 | # print("scoped context: %s: %s on %s" % (ctname, name, dmn))
219 |
220 | ctxt = {"@context": context}
221 |
222 | outstr = json.dumps(ctxt, indent=2)
223 | fh = open("../cromulent/data/linked-art.json", 'w')
224 | fh.write(outstr)
225 | fh.close()
226 |
--------------------------------------------------------------------------------
/utils/process_ontologies.py:
--------------------------------------------------------------------------------
1 | from lxml import etree
2 | import codecs
3 | import json
4 | import sys
5 |
6 | PROFILE_ONLY = '--profile' in sys.argv
7 | default_key_order = 10000
8 |
9 | NS = {'rdf':"http://www.w3.org/1999/02/22-rdf-syntax-ns#",
10 | 'xsd':"http://www.w3.org/2001/XMLSchema#",
11 | 'rdfs':"http://www.w3.org/2000/01/rdf-schema#",
12 | 'dcterms':"http://purl.org/dc/terms/",
13 | 'owl':"http://www.w3.org/2002/07/owl#",
14 | 'crm':"http://www.cidoc-crm.org/cidoc-crm/",
15 | 'xml': "http://www.w3.org/XML/1998/namespace",
16 | 'ore': "http://www.openarchives.org/ore/terms/",
17 | 'la': "https://linked.art/ns/terms/",
18 | "skos": "http://www.w3.org/2004/02/skos/core#",
19 | "schema": "http://schema.org/",
20 | "dc": "http://purl.org/dc/elements/1.1/",
21 | "geo": "http://www.ics.forth.gr/isl/CRMgeo/",
22 | "dig": "http://www.ics.forth.gr/isl/CRMdig/",
23 | "sci": "http://www.ics.forth.gr/isl/CRMsci/",
24 | "archaeo": "http://www.cidoc-crm.org/cidoc-crm/CRMarchaeo/"
25 | }
26 |
27 |
28 | # Order imposed by the library
29 | # @context = 0, id = 1, rdf:type = 2
30 | # rdfs:label = 5, rdf:value = 6, dc:description = 7
31 |
32 | fh = open('../cromulent/data/key_order.json')
33 | data = fh.read()
34 | fh.close()
35 | key_order_hash = json.loads(data)
36 |
37 | # Allow configuration of overrides for the mapping of ontology to python/json
38 | fh = open('../cromulent/data/overrides.json')
39 | data = fh.read()
40 | fh.close()
41 | property_overrides = json.loads(data)
42 |
43 | # Allow subsetting of CRM into in-use / not-in-use to enable the library
44 | # to warn on instantiation of not-in-use properties or classes
45 | fh = open('../cromulent/data/crm-profile.json')
46 | data = fh.read()
47 | fh.close()
48 | profile_flags = json.loads(data)
49 |
50 | stuff = []
51 | propXHash = {}
52 | classXHash = {}
53 |
54 | def process_classes(dom):
55 | classes = dom.xpath("//rdfs:Class", namespaces=NS)
56 | for c in classes:
57 | name = c.xpath('@rdf:about', namespaces=NS)[0]
58 | for (pref,ns) in NS.items():
59 | if name.startswith(ns):
60 | name = name.replace(ns, "%s:" % pref)
61 | break
62 |
63 | if not name in profile_flags:
64 | print(" WARNING: %s not in profile" % name)
65 | useflag = str(profile_flags.get(name, 0))
66 | if name in classXHash:
67 | classXHash[name][0] = c
68 | else:
69 | classXHash[name] = [c, useflag]
70 |
71 | label = c.xpath('./rdfs:label[@xml:lang="en"]/text()', namespaces=NS)[0]
72 | try:
73 | comment = c.xpath('./rdfs:comment/text()', namespaces=NS)[0]
74 | comment = comment.strip()
75 | comment = comment.replace('\n', '\\n').replace('\t', ' ')
76 | except:
77 | comment = ""
78 |
79 | subClsL = c.xpath('./rdfs:subClassOf/@rdf:resource', namespaces=NS)
80 | if subClsL:
81 | # could be multiples
82 | subCls = '|'.join(subClsL)
83 | for s in subClsL:
84 | try:
85 | classXHash[s][1] = 3
86 | except KeyError:
87 | classXHash[s] = [None, 3]
88 | else:
89 | subCls = ""
90 |
91 | # Hack extensions to be readable :(
92 | if name == "geo:SP4_Spatial_Coordinate_Reference_System":
93 | ccname = "CoordinateSystem"
94 | elif name == "geo:SP5_Geometric_Place_Expression":
95 | ccname = "Geometry"
96 | elif name == "geo:SP6_Declarative_Place":
97 | ccname = "DeclarativePlace"
98 | elif name == "E33_E41_Linguistic_Appellation":
99 | ccname = "Name"
100 | elif name == "dig:D1_Digital_Object":
101 | ccname = "DigitalObject"
102 | elif name == "sci:S19_Encounter_Event":
103 | ccname = "Encounter"
104 | else:
105 | # Assume that we've done our job okay and put in overrides for NSS
106 | cidx = name.find(":")
107 | if cidx > -1:
108 | ccname = name[cidx+1:]
109 | else:
110 | uc1 = name.find("_")
111 | ccname = name[uc1+1:]
112 | ccname = ccname.replace("_or_", "_Or_").replace("_of_", "_Of_")
113 | ccname = ccname.replace('-', '').replace('_', '')
114 |
115 | stuff.append([name, "class", ccname, label, comment, subCls, useflag])
116 |
117 | def process_props(dom):
118 | props = dom.xpath("//rdf:Property",namespaces=NS)
119 | for p in props:
120 | name = p.xpath('@rdf:about', namespaces=NS)[0]
121 |
122 |
123 | # replace archaeo first, as a superstring of crm base :(
124 | if name.startswith("http://www.cidoc-crm.org/cidoc-crm/CRMarchaeo/"):
125 | name = name.replace("http://www.cidoc-crm.org/cidoc-crm/CRMarchaeo/", "archaeo:")
126 |
127 | for (pref,ns) in NS.items():
128 | if name.startswith(ns):
129 | name = name.replace(ns, "%s:" % pref)
130 | break
131 |
132 | if not name in profile_flags:
133 | print(" WARNING: %s not in profile" % name)
134 | useflags = profile_flags.get(name, [0,0]) or [0,0]
135 | propXHash[name] = [p, useflags[0]]
136 |
137 | try:
138 | label = p.xpath('./rdfs:label[@xml:lang="en"]/text()', namespaces=NS)[0]
139 | except:
140 | print(p.xpath('./@rdf:about', namespaces=NS))
141 | print(p.xpath('./rdfs:label/text()', namespaces=NS))
142 | raise ValueError
143 | try:
144 | comment = p.xpath('./rdfs:comment/text()', namespaces=NS)[0]
145 | comment = comment.strip()
146 | comment = comment.replace('\n', '\\n').replace('\t', ' ')
147 | except:
148 | comment = ""
149 |
150 | domn = p.xpath('./rdfs:domain/@rdf:resource', namespaces=NS)
151 | if domn:
152 | domn = domn[0]
153 | for (k,v) in NS.items():
154 | domn = domn.replace(v,"%s:" % k)
155 | else:
156 | domn = ""
157 | rang = p.xpath('./rdfs:range/@rdf:resource', namespaces=NS)
158 | if rang:
159 | rang = rang[0]
160 | for (k,v) in NS.items():
161 | rang = rang.replace(v,"%s:" % k)
162 | else:
163 | rang = ""
164 |
165 | subProp = p.xpath('./rdfs:subPropertyOf/@rdf:resource', namespaces=NS)
166 | if subProp:
167 | subProp = subProp[0]
168 | else:
169 | subProp = ""
170 |
171 | inverse = p.xpath('./owl:inverseOf/@rdf:resource', namespaces=NS)
172 | if inverse:
173 | inverse = inverse[0]
174 | for (pref,ns) in NS.items():
175 | if inverse.startswith(ns):
176 | inverse = inverse.replace(ns, "%s:" % pref)
177 | break
178 | else:
179 | inverse = ""
180 |
181 | cidx = name.find(":")
182 | if name in property_overrides:
183 | ccname = property_overrides[name]
184 | elif cidx > -1:
185 | ccname = name[cidx+1:]
186 | else:
187 | uc1 = name.find("_")
188 | pno = name[:uc1]
189 | if pno in property_overrides:
190 | ccname = property_overrides[pno]
191 | else:
192 | ccname = name[uc1+1:]
193 | ccname = ccname.replace("-", "")
194 | if ccname.startswith("is_"):
195 | ccname = ccname[3:]
196 | elif ccname.startswith("has_") or ccname.startswith("had_") or ccname.startswith("was_"):
197 | ccname = ccname[4:]
198 |
199 | koi = str(key_order_hash.get(ccname, default_key_order))
200 |
201 | # [0/1/2, 0/1] for [no/okay/warn, single/multiple]
202 | stuff.append([name, "property", ccname, label, comment, subProp, domn, rang, inverse, koi,
203 | str(useflags[0]), str(useflags[1])])
204 |
205 |
206 | # This order is important.
207 | # Need to process the class definition before the properties of the class
208 | # linkedart defines properties against the classes in the core and extensions
209 | # so needs to come last
210 |
211 | files = ['cidoc.xml', 'linkedart_crm_enhancements.xml', 'linkedart.xml']
212 |
213 | for fn in files:
214 | print("processing: %s" % fn)
215 | fh = open('data/%s' % fn)
216 | data = fh.read()
217 | fh.close()
218 | try:
219 | dom = etree.XML(data.encode('utf-8'))
220 | except:
221 | dom = etree.XML(data)
222 | process_classes(dom)
223 | process_props(dom)
224 |
225 |
226 |
227 | headers = ["term", "term type", "json-ld key", "label", "scope note", "subPropertyOf", "domain", \
228 | "range", "inverse", "key order", "okay to use?", "okay for multiple?"]
229 |
230 | # outdata = '\n'.join(['\t'.join(x) for x in stuff])
231 | fh = codecs.open('../cromulent/data/crm_vocab.tsv', 'w', 'utf-8')
232 | # write header
233 | line = '\t'.join(headers) + '\n'
234 | fh.write(line)
235 |
236 | for l in stuff:
237 | name = l[0]
238 | line = '\t'.join(l) + "\n"
239 | if name in classXHash:
240 | okay = classXHash[name][1]
241 | elif name in propXHash:
242 | okay = propXHash[name][1]
243 | else:
244 | okay = 0
245 | print("Could not find %s" % name)
246 | if not PROFILE_ONLY or okay:
247 | fh.write(line)
248 | fh.close()
249 |
--------------------------------------------------------------------------------
/tests/test_dimensions.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import unittest
4 | try:
5 | from contextlib import suppress
6 | except:
7 | # Python 2.7
8 | suppress = None
9 | import pprint
10 | from datetime import datetime
11 | from cromulent.extract import Dimension, normalized_dimension_object
12 | import cromulent.extract
13 |
14 | class TestDimensionExtraction(unittest.TestCase):
15 | '''
16 | Test the ability to extract various formats of dimensions.
17 | '''
18 | def setUp(self):
19 | pass
20 |
21 | def tearDown(self):
22 | pass
23 |
24 | def test_parse_simple_dimensions(self):
25 | '''
26 | Test the documented formats that `cromulent.extract.parse_simple_dimensions` can parse
27 | and ensure that it returns the expected data.
28 | '''
29 | tests = {
30 | "3'": [Dimension(3, 'feet', None)],
31 | '3 feet': [Dimension(3, 'feet', None)],
32 | '3 foot': [Dimension(3, 'feet', None)],
33 | '3 ft': [Dimension(3, 'feet', None)],
34 | '3 ft.': [Dimension(3, 'feet', None)],
35 | '2"': [Dimension(2, 'inches', None)],
36 | '2 in': [Dimension(2, 'inches', None)],
37 | '2 in.': [Dimension(2, 'inches', None)],
38 | '2 inch': [Dimension(2, 'inches', None)],
39 | '2 inches': [Dimension(2, 'inches', None)],
40 | '2 duymen': [Dimension(2, 'inches', None)],
41 | '2 d.': [Dimension(2, 'inches', None)],
42 | '2 d': [Dimension(2, 'inches', None)],
43 | '''2'8"''': [Dimension(2, 'feet', None), Dimension(8, 'inches', None)],
44 | '4cm': [Dimension(4, 'cm', None)],
45 | '2 pieds 3 pouces': [Dimension(2, 'fr_feet', None), Dimension(3, 'fr_inches', None)],
46 | '1 pied 7 pouces': [Dimension(1, 'fr_feet', None), Dimension(7, 'fr_inches', None)],
47 | '8 pouce': [Dimension(8, 'fr_inches', None)],
48 | '8 pouces': [Dimension(8, 'fr_inches', None)],
49 | '8 1/2 pouces': [Dimension(8.5, 'fr_inches', None)],
50 | '8 1/4 pouces': [Dimension(8.25, 'fr_inches', None)],
51 | '8 1/8 pouces': [Dimension(8.125, 'fr_inches', None)],
52 | '1': [Dimension(1, None, None)],
53 |
54 | # values without a unit that follow values with a unit stay in the same system but using the next-finer unit
55 | '2 pieds 3': [Dimension(2, 'fr_feet', None), Dimension(3, 'fr_inches', None)],
56 | "1' 3": [Dimension(1, 'feet', None), Dimension(3, 'inches', None)],
57 | }
58 |
59 | for value, expected in tests.items():
60 | dims = cromulent.extract.parse_simple_dimensions(value)
61 | if expected is not None:
62 | self.assertIsInstance(dims, list)
63 | self.assertEqual(dims, expected, msg='dimensions: %r' % (value,))
64 | else:
65 | self.assertIsNone(dims)
66 |
67 | def test_dimension_cleaner(self):
68 | '''
69 | Test the documented formats that `cromulent.extract.dimensions_cleaner` can parse
70 | and ensure that it returns the expected data.
71 | '''
72 | tests = {
73 | '''2 in by 1 in''': ([Dimension(2, 'inches', None)], [Dimension(1, 'inches', None)]),
74 | '''2'2"h x 2'8"w''': ([Dimension(2, 'feet', 'height'), Dimension(2, 'inches', 'height')], [Dimension(2, 'feet', 'width'), Dimension(8, 'inches', 'width')]),
75 | '''1'3"x4cm h''': ([Dimension(1, 'feet', None), Dimension(3, 'inches', None)], [Dimension(4, 'cm', 'height')]),
76 | '''1'3" by 4"''': ([Dimension(1, 'feet', None), Dimension(3, 'inches', None)], [Dimension(4, 'inches', None)]),
77 | 'Haut 14 pouces, large 10 pouces': ([Dimension(14, 'fr_inches', 'height')], [Dimension(10, 'fr_inches', 'width')]),
78 | 'Haut. 48 pouces, large 68 pouces': ([Dimension(48, 'fr_inches', 'height')], [Dimension(68, 'fr_inches', 'width')]),
79 | '1 by 4': ([Dimension(1, None, None)], [Dimension(4, None, None)]),
80 | 'Hoog. 6 v., breed 3 v': ([Dimension(6, 'feet', 'height')], [Dimension(3, 'feet', 'width')]),
81 | 'Breedt 6 v., hoog 3 v': ([Dimension(6, 'feet', 'width')], [Dimension(3, 'feet', 'height')]),
82 | '20 cm x 24,5 cm': ([Dimension(20, 'cm', None)], [Dimension(24.5, 'cm', None)]),
83 | '2 w by 5 h': ([Dimension(2, None, 'width')], [Dimension(5, None, 'height')]),
84 | 'Hauteur 1 pied 4 pouces, largeur 1 pied 1/2 pouc.': ([Dimension(1, 'fr_feet', 'height'), Dimension(value=4, unit='fr_inches', which='height')], [Dimension(1, 'fr_feet', 'width'), Dimension(value=0.5, unit='fr_inches', which='width')]),
85 | 'h.73 pouces 4 lignes, l.50 pouces': ([Dimension(value=73, unit='fr_inches', which='height'), Dimension(value=4, unit='ligne', which='height')], [Dimension(value=50, unit='fr_inches', which='width')]),
86 | 'haut. 5 pouc. larg. 5 pouc. 4 linges': ([Dimension(value=5, unit='fr_inches', which='height')], [Dimension(value=5, unit='fr_inches', which='width'), Dimension(value=4, unit='ligne', which='width')]),
87 | 'haut. 9 pouc. 4 lignes larg. 10 pouc. 4 linges': ([Dimension(value=9, unit='fr_inches', which='height'), Dimension(value=4, unit='ligne', which='height')], [Dimension(value=10, unit='fr_inches', which='width'), Dimension(value=4, unit='ligne', which='width')]),
88 | 'h 38 cm, w 27 cm': ([Dimension(38, 'cm', 'height')], [Dimension(27, 'cm', 'width')]),
89 | "hauteur 9 pouces, largeur 7": ([Dimension(value=9, unit='fr_inches', which='height')], [Dimension(value=7, unit=None, which='width')]),
90 | }
91 |
92 | for value, expected in tests.items():
93 | dims = cromulent.extract.dimensions_cleaner(value)
94 | if expected is not None:
95 | self.assertIsInstance(dims, tuple)
96 | # print('===== got:')
97 | # pprint.pprint(dims)
98 | # print('----- expected:')
99 | # pprint.pprint(expected)
100 | # print('=====')
101 | self.assertEqual(dims, expected, msg='dimensions: %r' % (value,))
102 | else:
103 | self.assertIsNone(dims)
104 |
105 | def test_extract_physical_dimensions(self):
106 | '''
107 | Test the documented formats that `cromulent.extract.extract_physical_dimensions`
108 | can parse and ensure that it returns the expected data.
109 | '''
110 | tests = {}
111 | h9l7_height = cromulent.vocab.Height(ident='', content=9.0)
112 | h9l7_height.identified_by = cromulent.model.Name(ident='', content='9 French inches')
113 | h9l7_height.unit = cromulent.vocab.instances.get('fr_inches')
114 | h9l7_width = cromulent.vocab.Width(ident='', content=7.0)
115 | tests["hauteur 9 pouces, largeur 7"] = [h9l7_height, h9l7_width]
116 |
117 | for value, expected_dims in tests.items():
118 | dims = list(cromulent.extract.extract_physical_dimensions(value))
119 | for got, expected in zip(dims, expected_dims):
120 | self.assertEqual(got.value, expected.value)
121 | self.assertEqual(got.type, expected.type)
122 |
123 | if suppress is None:
124 | # Python 2.7
125 | if hasattr(expected, 'unit'):
126 | self.assertEqual(got.unit, expected.unit)
127 | if hasattr(expected, 'classified_as'):
128 | self.assertEqual(got.classified_as, expected.classified_as)
129 | if hasattr(expected, 'identified_by'):
130 | self.assertEqual(got.identified_by, expected.identified_by)
131 | else:
132 | with suppress(AttributeError):
133 | self.assertEqual(got.unit, expected.unit)
134 | with suppress(AttributeError):
135 | self.assertEqual(got.classified_as, expected.classified_as)
136 | with suppress(AttributeError):
137 | self.assertEqual(got.identified_by, expected.identified_by)
138 |
139 | def test_extract_physical_dimensions_with_default(self):
140 | '''
141 | Test the documented formats that `cromulent.extract.extract_physical_dimensions`
142 | can parse, specifying a default unit, and ensure that it returns the expected data.
143 | '''
144 | tests = {}
145 | h9l7_height = cromulent.vocab.Height(ident='', content=9.0)
146 | h9l7_height.identified_by = cromulent.model.Name(ident='', content='9 French inches')
147 | h9l7_height.unit = cromulent.vocab.instances.get('fr_inches')
148 | h9l7_width = cromulent.vocab.Width(ident='', content=7.0)
149 | h9l7_width.unit = cromulent.vocab.instances.get('inches')
150 | tests["hauteur 9 pouces, largeur 7"] = [h9l7_height, h9l7_width]
151 |
152 | for value, expected_dims in tests.items():
153 | dims = list(cromulent.extract.extract_physical_dimensions(value, default_unit='inches'))
154 | for got, expected in zip(dims, expected_dims):
155 | self.assertEqual(got.value, expected.value)
156 | self.assertEqual(got.type, expected.type)
157 | self.assertEqual(got.unit, expected.unit)
158 |
159 | def test_normalize_dimension(self):
160 | tests = {
161 | '1 ft, 2 in': ('1 foot, 2 inches', Dimension(value=14, unit='inches', which=None)),
162 | '8 1/2 pouces': ('8.5 French inches', Dimension(value=8.5, unit='fr_inches', which=None)),
163 | '1 pied 7 pouces': ('1 French foot, 7 French inches', Dimension(value=19, unit='fr_inches', which=None)),
164 | '2 pied 1/2 pouces': ('2 French feet, 0.5 French inches', Dimension(value=24.5, unit='fr_inches', which=None)),
165 | '1 pied 3 pouce. 3 linges': ('1 French foot, 3 French inches, 3 lignes', Dimension(value=15.25, unit='fr_inches', which=None)),
166 | "4' 8": ('4 feet, 8 inches', Dimension(value=56, unit='inches', which=None)),
167 | "1 pied 2": ('1 French foot, 2 French inches', Dimension(value=14, unit='fr_inches', which=None)),
168 | }
169 | for value, expected in tests.items():
170 | elabel, edim = expected
171 | dims = cromulent.extract.parse_simple_dimensions(value)
172 | dim, label = normalized_dimension_object(dims)
173 | self.assertEqual(label, elabel)
174 | self.assertEqual(dim, edim)
175 |
176 | if __name__ == '__main__':
177 | unittest.main()
178 |
--------------------------------------------------------------------------------
/utils/data/linkedart_crm_enhancements.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | Digital Object
7 | This class comprises identifiable immaterial items that can be represented as sets of bit sequences, such as data sets, e-texts, images, audio or video items, software, etc., and are documented as single units. Any aggregation of instances of D1 Digital Object into a whole treated as single unit is also regarded as an instance of D1 Digital Object. This means that for instance, the content of a DVD, an XML file on it, and an element of this file, are regarded as distinct instances of D1 Digital Object, mutually related by the P106 is composed of (forms part of) property. A D1 Digital Object does not depend on a specific physical carrier, and it can exist on one or more carriers simultaneously.
8 |
9 |
10 |
11 |
12 | triggers
13 |
14 |
15 |
16 |
17 |
18 |
19 | triggered by
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 | Encounter
28 |
29 | This class comprises activities of S4 Observation (substance) where an E39 Actor encounters an instance of E18 Physical Thing of a kind relevant for the mission of the observation or regarded as potentially relevant for some community (identity). This observation produces knowledge about the existence of the respective thing at a particular place in or on surrounding matter. This knowledge may be new to the group of people the actor belongs to. In that case we would talk about a discovery. The observer may recognize or assign an individual identity of the thing encountered or regard only the type as noteworthy in the associated documentation or report.
30 | Note that this representation treats S19 as a subClass of only E7 Activity for ease of implementation, as we do not need the full set of relationships available via the complete hierarcy. In the full CRMsci, it is Activity -> Attribute Assignment -> Observation -> Encounter.
31 |
32 |
33 |
34 |
35 | encountered object
36 | This property associates an instance of S19 Encounter Event with an instance of E18 Physical
37 | Thing that has been found. e.g. The finding (S19) encountered (O19) the 18 arrowheads (E18) from Lerna in Argolis
38 |
39 |
40 |
41 |
42 |
43 |
44 | was encountered at
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 | occurs during
54 | This property identifies a situation in which the entire instance of the E52 Time-Span of an instance of E2 Temporal Entity is within the instance of the E52 Time-Span of another instance of E2 Temporal Entity that starts before and ends after the included temporal entity.
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 | label
68 | A human-readable name for the subject.
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 | exactMatch
82 | Exact Match, not quite sameAs, good for most uses
83 |
84 |
85 |
86 |
87 |
88 | closeMatch
89 | Close Match, good for some uses
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 | has narrower term
98 | Or is broader term of
99 |
100 |
101 |
102 |
103 |
104 |
105 | has broader term
106 | Or is narrower term of
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 | has top concept
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 | is top concept of
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 | is in scheme
134 | Relates a resource (for example a concept) to a concept scheme in which it is included.
135 | A concept may be a member of more than one concept scheme.
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 | seeAlso
145 | A related resource, that is machine readable and related to the current resource.
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 | conforms to
155 | Some thing conforms to some standard
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 | format
164 | The media type of the information object
165 |
166 |
167 |
168 |
169 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "{}"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright {yyyy} {name of copyright owner}
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/examples/json-to-lod.py:
--------------------------------------------------------------------------------
1 |
2 | import json
3 | from cidoc_orm import factory, TimeSpan, ManMadeObject, Type, Identifier, \
4 | Production, Person, Place, Group, Material, Type, Mark, Right, Document, \
5 | Activity
6 | import re
7 |
8 | # Meta meta
9 | ext_classes = {
10 | "TMSNumber": {"parent": Identifier, "vocab": "aat", "id": "300404621"},
11 | "AccessionNumber": {"parent": Identifier, "vocab": "aat", "id": "300312355"},
12 | "Inscription": {"parent": Mark, "vocab": "aat", "id": "300028702"},
13 | "Signature": {"parent": Mark, "vocab": "aat", "id": "300028705"},
14 | "Exhibition": {"parent": Activity, "vocab": "aat", "id": "300054766"},
15 | "Painting": {"parent": ManMadeObject, "vocab": "aat", "id": "300033618"},
16 | "Sculpture": {"parent": ManMadeObject, "vocab": "aat", "id": "300047090"},
17 | "Drawing": {"parent": ManMadeObject, "vocab": "aat", "id": "300033973"},
18 | "Miniature": {"parent": ManMadeObject, "vocab": "aat", "id": "300033936"},
19 | "Tapestry": {"parent": ManMadeObject, "vocab": "aat", "id": "300205002"},
20 | "Furniture": {"parent": ManMadeObject, "vocab": "aat", "id": "300037680"},
21 | "Mosaic": {"parent": ManMadeObject, "vocab": "aat", "id": "300015342"},
22 | "Photograph": {"parent": ManMadeObject, "vocab": "aat", "id": "300046300"},
23 | "Drawing": {"parent": ManMadeObject, "vocab": "aat", "id": "300033973"},
24 | "Coin": {"parent": ManMadeObject, "vocab": "aat", "id": "300037222"},
25 | "Vessel": {"parent": ManMadeObject, "vocab": "aat", "id": "300193015"},
26 | "PhotographPrint": {"parent": ManMadeObject, "vocab": "aat", "id": "300127104"},
27 | "PhotographAlbum": {"parent": ManMadeObject, "vocab": "aat", "id": "300026695"},
28 | "PhotographBook": {"parent": ManMadeObject, "vocab": "aat", "id": "300265728"}
29 | }
30 |
31 | # Jewelry
32 | # Text Book Album
33 | # Implement
34 |
35 | # Note many sub types of Vessels, including
36 | # Bowl, Flask, Beaker, Cup, Jar, Amphora,
37 |
38 | for (name,v) in ext_classes.items():
39 | c = type(name, (v['parent'],), {})
40 | c._p2_has_type = "http://vocab.getty.edu/%s/%s" % (v['vocab'], v['id'])
41 | globals()[name] = c
42 |
43 | aat_type_mapping = {
44 | "Painting": Painting,
45 | "Paintings": Painting,
46 | "Drawing": Drawing,
47 | "Furniture": Furniture,
48 | "Coin": Coin,
49 | "Sculpture": Sculpture,
50 | "Vessels": Vessel
51 | }
52 |
53 | # "panel": "300014657" # A wooden support
54 |
55 | aat_part_mapping = {
56 | "supports": "300014844" # The thing that is painted on
57 | }
58 |
59 | aat_material_mapping = {
60 | "watercolor": "300015045",
61 | "oil": "300015050",
62 | "tempera": "300015062",
63 | "canvas": "300014078",
64 | "oak": "300012264",
65 | "gold leaf": "300264831",
66 | "paper": "300014109",
67 | "copper": "300011020",
68 | "terracotta": "300010669",
69 | "glass": "300010797",
70 | "chalk": "300011727",
71 | "bronze": "300010957",
72 | "marble": "300011443",
73 | "albumen silver print": "300127121",
74 | "gelatin silver print": "300128695",
75 | "silver": "300011029"
76 | }
77 |
78 | aat_culture_mapping = {
79 | "french": "300111188",
80 | "italian": "300111198",
81 | "german": "300111192",
82 | "dutch": "300020929"
83 | }
84 |
85 | dim_type_mapping = {
86 | "height": "300055644",
87 | "width": "300055647",
88 | "depth": "300072633",
89 | "diameter": "300055624",
90 | "weight": "300056240"
91 | }
92 |
93 |
94 | # Meta
95 | class CreditLine(Right):
96 | def __init__(self, *args, **kw):
97 | super(CreditLine, self).__init__(*args, **kw)
98 | # XXX Find a good Type for this
99 | self.has_type = Type("http://example.org/ns/creditline")
100 | CreditLine._properties['value'] = {"rdf": "rdfs:value", "range": str}
101 |
102 | class SourceCreditLine(CreditLine):
103 | def __init__(self, *args, **kw):
104 | super(SourceCreditLine, self).__init__(*args, **kw)
105 | # XXX Find a good Type for this
106 | self.has_type = []
107 | self.has_type = Type("http://example.org/ns/sourcecreditline")
108 |
109 | class Department(Group):
110 | def __init__(self, *args, **kw):
111 | super(Department, self).__init__(*args, **kw)
112 | self.is_current_or_former_member_of = Museum
113 |
114 |
115 | # Monkey patch Type's _toJSON to only emit full data if not just URI+type
116 | def typeToJSON(self, top=False):
117 | props = self.__dict__.keys()
118 | if len(props) > 3:
119 | return super(Type, self)._toJSON()
120 | else:
121 | return self.id
122 |
123 | Type._toJSON = typeToJSON
124 | Person._properties['familyName'] = {"rdf": "schema:familyName", "range": str}
125 | Person._properties['givenName'] = {"rdf": "schema:givenName", "range": str}
126 | ManMadeObject._properties['culture'] = {"rdf": "schema:genre", "range": Type}
127 |
128 |
129 | factory.base_url = "http://data.getty.edu/museum/"
130 | factory.default_lang = "en"
131 |
132 | departments = {}
133 | locations = {}
134 |
135 | GettyTrust = Group("http://vocab.getty.edu/ulan/500115987")
136 | GettyTrust.label = "J. Paul Getty Trust"
137 | Museum = Group("http://vocab.getty.edu/ulan/500115988")
138 | Museum.is_current_or_former_member_of = GettyTrust
139 | Museum.label = "J Paul Getty Museum"
140 |
141 | painting_on_re = re.compile("^(.+?) on (.+?)$")
142 | painting_and_re = re.compile("^(.+?) and (.+?)$")
143 |
144 | def parse_materials(materials, typ):
145 |
146 | mats = []
147 | if typ == Painting:
148 | # Test for X on Y
149 | mat = materials.lower()
150 | m = painting_on_re.match(mat)
151 |
152 | if m:
153 | paint = m.groups()[0]
154 |
155 | # x and y
156 | m2 = painting_and_re.match(paint)
157 | if m2:
158 | paints = m2.groups()
159 | else:
160 | paints = [paint]
161 | for p in paints:
162 | if aat_material_mapping.has_key(p):
163 | mats.append(Material("http://vocab.getty.edu/aat/%s" % aat_material_mapping[p]))
164 | else:
165 | pass
166 | # print "Paint: %s" % paint
167 |
168 | support = m.groups()[1]
169 | if aat_material_mapping.has_key(support):
170 | mats.append(Material("http://vocab.getty.edu/aat/%s" % aat_material_mapping[support]))
171 | else:
172 | # look for common adjectives, ()s
173 | swords = support.split(' ')
174 | for sw in swords:
175 | if aat_material_mapping.has_key(sw):
176 | mats.append(Material("http://vocab.getty.edu/aat/%s" % aat_material_mapping[sw]))
177 |
178 | return mats
179 |
180 |
181 | fh = file('record_cache.json')
182 | data = fh.read()
183 | fh.close()
184 | cache = json.loads(data)
185 |
186 |
187 | # Load up people
188 | fh = file('500_people.json')
189 | data = fh.read()
190 | fh.close()
191 | peoplel = json.loads(data)
192 | people = {}
193 | for who in peoplel:
194 | whoid = str(who['id'])
195 | if people.has_key(whoid):
196 | continue
197 | else:
198 | wrec = {'id': whoid}
199 | wrec['type'] = who['type']
200 | wrec['date'] = who['display_date']
201 | wrec['name'] = who['display_name']
202 | wrec['nationality'] = who['display_nationality']
203 | wrec['birthplace'] = who['display_birthplace']
204 | wrec['deathplace'] = who['display_deathplace']
205 | wrec['institution'] = who['display_institution']
206 | wrec['image'] = who['display_image']
207 | wrec['biography'] = who['display_biography']
208 | people[whoid] = wrec
209 |
210 | print "Processing..."
211 |
212 | #recs = cache.values()
213 | recs = [cache['645']]
214 |
215 | ldrecs = []
216 | for rec in recs:
217 | ident = str(rec['id'])
218 |
219 | # Build a Foo type of MMO
220 | clslabel = rec['classification']['name']
221 | clsid = str(rec['classification']['id'])
222 | ot = rec['object_types'] # {'primary': {}, '???': {}}
223 | try:
224 | otid = str(ot['primary']['id'])
225 | otlabel = ot['primary']['display_value']
226 | except:
227 | otid = ""
228 | otlabel = ""
229 |
230 | if clslabel == "Photographs":
231 | if otlabel == "Print":
232 | obj = PhotographPrint(ident)
233 | elif otlabel == "Album":
234 | obj = PhotographAlbum(ident)
235 | elif otlabel == "Book":
236 | obj = PhotographBook(ident)
237 | elif otlabel.lower() == "cased object":
238 | # Treat as print?
239 | obj = PhotographPrint(ident)
240 | else:
241 | print "Unknown photograph subtype: %s" % otlabel
242 | elif aat_type_mapping.has_key(otlabel):
243 | obj = aat_type_mapping[otlabel](ident)
244 | elif aat_type_mapping.has_key(clslabel):
245 | obj = aat_type_mapping[clslabel](ident)
246 | else:
247 | obj = ManMadeObject(ident)
248 | # print "ot: '%s' ; cls: '%s'" % (otlabel, clslabel)
249 | t = Type(str(clsid))
250 | t.label = clslabel
251 | obj.has_type = t
252 |
253 | tms = TMSNumber(ident)
254 | tms.value = ident
255 | obj.is_identified_by = tms
256 |
257 | recno = rec['number']
258 | accno = AccessionNumber(recno)
259 | accno.value = recno
260 | obj.is_identified_by = accno
261 |
262 | obj.label = rec['title']
263 | try:
264 | obj.description = rec['description']['display']['value']
265 | except:
266 | pass
267 |
268 | production = Production(ident)
269 | obj.was_produced_by = production
270 | ts = TimeSpan(ident)
271 | ts.description = rec['date']
272 | # XXX Parse date string for dates
273 | production.has_timespan = ts
274 |
275 | # XXX if there are multiple makers with different roles,
276 | # create a super Production with components, and each
277 | # role gets a separate component
278 |
279 | for mk in rec['makers']:
280 | mkid = str(mk['id'])
281 | role = mk['role']
282 |
283 | who = Person(mkid)
284 |
285 | # Find in person db or deref
286 | first = mk['name_first']
287 | last = mk['name_last']
288 |
289 | try:
290 | person = people[mkid]
291 | who.label = person['name']
292 | who.description = person['biography']
293 | who.givenName = first
294 | who.familyName = last
295 | who.birthPlace = Place()
296 | who.deathPlace = Place()
297 | who.birthDate = ""
298 | who.deathDate = ""
299 | except:
300 | pass
301 |
302 | production.carried_out_by = who
303 | # XXX Link to ULAN
304 |
305 | # found, depicted, created
306 |
307 | if rec['places'] and rec['places'].has_key('place_created'):
308 | p = rec['places']['place_created']
309 | pid = str(p['id'])
310 | where = Place(pid)
311 | where.label = p['display_value']
312 | production.took_place_at = where
313 |
314 | # XXX Check for place_depicted (find out all possible keys)
315 |
316 | m = Material(ident)
317 | m.description = rec['medium']
318 | obj.consists_of = m
319 | mats = parse_materials(rec['medium'], obj.__class__)
320 | if mats:
321 | for mat in mats:
322 | m.defines_typical_wholes_for = mat
323 |
324 | dpt = rec['department']
325 | dptid = dpt['id']
326 | try:
327 | dept = departments[dptid]
328 | except:
329 | dept = Department(str(dpt['id']))
330 | dept.label = dpt['name']
331 | departments[dptid] = dept
332 | obj.has_current_owner = dept
333 |
334 | if rec['location']:
335 | loc = rec['location'][0]
336 | locid = str(loc['id'])
337 | try:
338 | where = locations[locid]
339 | except:
340 | where = Place(locid)
341 | where.label = loc['name']
342 | locations[locid] = where
343 | obj.has_current_location = where
344 |
345 | culture = Type("culture")
346 | culture.label = rec['culture']
347 | obj.culture = culture
348 | # XXX Map to AAT
349 |
350 | if rec['markings']:
351 | markings = Mark()
352 | markings.description = rec['markings']
353 | obj.shows_visual_item = markings
354 | if rec['signature']:
355 | sig = Signature()
356 | sig.description = rec['signature']
357 | obj.shows_visual_item = sig
358 | if rec['inscription']:
359 | insc = Inscription()
360 | insc.description = rec['inscription']
361 | obj.shows_visual_item = insc
362 |
363 | if rec.has_key('creditline'):
364 | credit = CreditLine()
365 | credit.value = rec['creditline']
366 | obj.is_subject_to = credit
367 | if rec['source_creditline']:
368 | srcCredit = SourceCreditLine()
369 | srcCredit.value = rec['source_creditline']
370 | obj.is_subject_to = srcCredit
371 |
372 | if rec['bibliography']:
373 | bx = 0
374 | for bib in rec['bibliography']:
375 | bt = bib['display_source_type']
376 | bv = bib['display_value']
377 | doc = Document("%s/%s" % (ident, str(bx)))
378 | bx += 1
379 | doc.label = bv
380 | doc.has_type = Type(bt)
381 | # XXX extract actual bib data and map to something sensible
382 | obj.is_documented_in = doc
383 |
384 | if rec['provenance']:
385 | for prov in rec['provenance']:
386 | date = prov['display_date']
387 | pid = str(prov['id'])
388 | who = prov['display_constituent']
389 | # XXX Parse constituent and map to provenance patterns
390 |
391 |
392 | if rec['related_exhibitions']:
393 | for exh in rec['related_exhibitions']:
394 | exhid = str(exh['record_identifier'])
395 | ttl = exh['display_title']
396 | dates = exh['display_dates']
397 |
398 | exhibition = Exhibition(exhid)
399 | exhibition.label = ttl
400 | if dates:
401 | ts = TimeSpan(exhid)
402 | ts.description = dates
403 | exhibition.has_timespan = ts
404 | # XXX parse for begin, end dates
405 |
406 | vens = exh['display_venues']
407 | for v in vens:
408 | name = v['display_name']
409 | loc = v['display_location']
410 | vid = str(v['record_identifier'])
411 | vdates = v['display_dates']
412 |
413 | venue = Activity(vid)
414 | venue.label = name
415 | if vdates:
416 | vts = TimeSpan(vid)
417 | vts.description = vdates
418 | venue.has_timespan = vts
419 | # XXX Parse for begin, end dates
420 | if loc:
421 | place = Place(vid)
422 | place.description = loc
423 | # XXX Parse location
424 | venue.took_place_at = place
425 | exhibition.consists_of = venue
426 |
427 | # XXX Catalog Number is a Document that documents the Venue or Exhibition
428 |
429 | obj.was_present_at = exhibition
430 |
431 | ldrecs.append(obj)
432 | # print factory.toString(obj, compact=False)
433 | #break
434 |
435 |
436 |
--------------------------------------------------------------------------------
/examples/sales-to-lod.py:
--------------------------------------------------------------------------------
1 |
2 | from lxml import etree
3 | import json
4 | import csv
5 | import codecs
6 | import re
7 | import os
8 | import sys
9 | from dateutil.parser import parse as dateparse
10 |
11 | # for cidoc_orm, see: https://github.com/azaroth42/Python-CIDOC-ORM
12 | from cidoc_orm import factory, TimeSpan, Identifier, LegalBody, \
13 | Production, Actor, Place, Group, Material, Mark, \
14 | Activity, InformationObject, Purchase, Acquisition, MonetaryAmount, \
15 | Currency, MeasurementUnit, Dimension, PhysicalObject, VisualItem, Title
16 |
17 | from aat_mapping import ManMadeObject, Type, Person, materialTypes, register_aat_class, \
18 | Painting, Sculpture, Drawing, Miniature, Graphic, Enamel, Tapestry, Mosaic, \
19 | Embroidery, Furniture, LocalNumber, dimensionUnits
20 |
21 |
22 | PhysicalObject._properties['had_starting_price'] = {"rdf": "gri:had_starting_price", "range": MonetaryAmount}
23 |
24 | cmUnit = dimensionUnits['cm']
25 |
26 | # Cache of repeated Objects
27 | catalogO = {}
28 | placeO = {}
29 | nationalityO = {}
30 | personO = {}
31 |
32 | materialO = {}
33 |
34 | bad_price = {}
35 | bad_dates = {}
36 | bad_types = {}
37 | bad_materials = {}
38 |
39 | # XXX -- Distinguish Local from Lugt
40 | register_aat_class("LugtNumber", Identifier, "300404621")
41 |
42 | factory.base_url = "http://data.getty.edu/provenance/"
43 | factory.default_lang = "en"
44 |
45 | objTypeMap = {
46 | u'gem\xe4lde': Painting,
47 | 'skulptur': Sculpture,
48 | 'zeichnung': Drawing,
49 | 'miniatur': Miniature,
50 | 'graphik': Graphic,
51 | 'painting': Painting,
52 | 'enamel': Enamel,
53 | 'miniature': Miniature,
54 | 'sculpture': Sculpture,
55 | 'drawing': Drawing,
56 | 'tapestry': Tapestry,
57 | 'embroidery': Embroidery,
58 | 'furniture': Furniture,
59 | 'mosaic': Mosaic,
60 | 'watercolor': Painting
61 | }
62 |
63 |
64 | r = "(je|l|h|d|b|durchm|durchmesser|dm[.]?)?[ ]*(ca.|h)?[ ]*([0-9,.]+)([ ]*(cm)?[ ]*x[ ]*([0-9,.]+)[ ]*(cm)?)?"
65 | dimre = re.compile(r)
66 | # dim1 = groups()[2], dim2 = groups()[5]
67 |
68 |
69 | def process_record(rec):
70 | recData = {}
71 | for elm in rec.getchildren():
72 | tag = elm.tag
73 | curr = recData.get(tag, None)
74 |
75 | if elm.getchildren():
76 | value = elm
77 | else:
78 | value = elm.text
79 |
80 | if curr is None:
81 | recData[tag] = value
82 | elif type(recData[tag]) == list:
83 | recData[tag].append(value)
84 | else:
85 | recData[tag] = [curr, value]
86 |
87 | cno = recData['Catalogue_No']
88 | try:
89 | catalog = catalogO[cno]
90 | auction = catalog.refers_to
91 |
92 | # Try and update end of timespan
93 | if not hasattr(auction.has_timespan, 'end_of_the_end'):
94 | sed = recData.get('Sale_End_Date', '')
95 | if sed:
96 | try:
97 | dt = dateparse(sed)
98 | span.end_of_the_end = "%s-%s-%sT00:00:00 CET" % (dt.year, dt.month, dt.day)
99 | except:
100 | try:
101 | bad_dates[sed] += 1
102 | except:
103 | bad_dates[sed] = 1
104 |
105 | except:
106 | catalog = InformationObject(cno)
107 | catalogO[cno] = catalog
108 | auction = Activity(cno)
109 | catalog.refers_to = auction
110 | catalog.has_representation = VisualItem(recData['GSC_link_to_pdf'])
111 | catalog.is_identified_by = LocalNumber(cno)
112 |
113 | # Auction date
114 | span = TimeSpan(cno)
115 | sbd = recData['Sale_Begin_Date']
116 | sed = recData.get('Sale_End_Date', '')
117 | try:
118 | dt = dateparse(sbd)
119 | span.begin_of_the_begin = "%s-%s-%sT00:00:00 CET" % (dt.year, dt.month, dt.day)
120 | except:
121 | try:
122 | bad_dates[sbd] += 1
123 | except:
124 | bad_dates[sbd] = 1
125 | if sed:
126 | try:
127 | dt = dateparse(sed)
128 | span.end_of_the_end = "%s-%s-%sT00:00:00 CET" % (dt.year, dt.month, dt.day)
129 | except:
130 | try:
131 | bad_dates[sed] += 1
132 | except:
133 | bad_dates[sed] = 1
134 | auction.has_timespan = span
135 |
136 | # Auction location
137 | city = recData['City_of_Sale']
138 | try:
139 | cityPlace = placeO[city]
140 | except:
141 | cityPlace = Place(city)
142 | cityPlace.label = city
143 | placeO[city] = cityPlace
144 | auction.took_place_at = cityPlace
145 |
146 | try:
147 | country = cityPlace.falls_within
148 | except:
149 | try:
150 | country = recData['Country_Auth']
151 | countryPlace = Place(country)
152 | countryPlace.label = country
153 | cityPlace.falls_within = countryPlace
154 | except:
155 | # No country
156 | pass
157 |
158 | try:
159 | # Auction House
160 | house = recData['Auction_House']
161 | try:
162 | ahouse = houseO[house]
163 | except:
164 | ahouse = LegalBody(house)
165 | ahouse.label = house
166 | auction.carried_out_by = ahouse
167 | except:
168 | # No auction house? :(
169 | pass
170 |
171 | try:
172 | lno = recData["Lot_Number"]
173 | except:
174 | print "No lot number for %s" % recData['recno']
175 | return
176 |
177 | cnolot = cno +'/'+ lno
178 |
179 | # Build an aggregation of objects for the lot
180 | lotset = PhysicalObject(cnolot + "-set")
181 | # InfoObj for the entry
182 | entry = InformationObject(cnolot)
183 | entry.refers_to = lotset
184 | catalog.is_composed_of = entry
185 |
186 | if recData.has_key('Price'):
187 | pr = recData['Price']
188 | # Process prinfo
189 |
190 | if type(pr) == list:
191 | pr = pr[0]
192 | if not type(pr) in [str, unicode]:
193 | try:
194 | pr = pr.text
195 | except:
196 | pr = ""
197 |
198 | pr = pr.replace("[?]", "")
199 | pr = pr.replace('?', '')
200 | pr = pr.strip()
201 |
202 | if pr:
203 | pr = pr.replace('1/2', '.5')
204 | pr = pr.replace('1/4', '.25')
205 | pr = pr.replace('3/4', '.75')
206 | pr = pr.replace(' .', '.')
207 |
208 | # Unknown: x-y-z x.y.z
209 | # x"y' x=y x:y x=y-z
210 | #
211 |
212 | if pr.find(" frs") > -1:
213 | curr = Currency("francs")
214 | curr.label = "francs"
215 | pr = pr.replace(' frs', '')
216 | elif pr.find(" fl") > -1:
217 | curr = Currency("fl.s")
218 | curr.label = "fl.s"
219 | pr = pr.replace(' fl', '')
220 | elif pr.find(" livres") > -1:
221 | curr = Currency("pounds")
222 | curr.label = "pounds"
223 | pr = pr.replace(' livres', '')
224 | else:
225 | curr = None
226 |
227 | pr = pr.replace('[or]', 'or')
228 | oidx = pr.find(' or ')
229 | if oidx > -1:
230 | pr = pr[:oidx]
231 | pr = pr.strip()
232 |
233 | fidx = pr.find(' for ')
234 | if fidx > -1:
235 | pr = pr[:fidx]
236 | pr = pr.strip()
237 |
238 | try:
239 | p = float(pr)
240 | except:
241 | p = -1
242 | try:
243 | bad_price[pr] += 1
244 | except:
245 | bad_price[pr] = 1
246 | if p >= 0:
247 | amnt = MonetaryAmount(cnolot + "-start")
248 | amnt.has_value = p
249 | if curr:
250 | amnt.has_currency = curr
251 | lotset.had_starting_price = amnt
252 |
253 |
254 | # Build the object
255 |
256 | try:
257 | typs = recData['Object_Types'].xpath('./Object_Type/text()')
258 | ot = typs[0]
259 | cls = objTypeMap[ot]
260 | except:
261 | cls = ManMadeObject
262 | try:
263 | bad_types[ot] += 1
264 | except:
265 | bad_types[ot] = 1
266 |
267 | obj = cls(cnolot)
268 | lotset.is_composed_of = obj
269 |
270 | title = Title(cnolot)
271 | obj.has_title = title
272 | try:
273 | title.value = recData['Title']
274 | except:
275 | title.value = "[No Title Known]"
276 | if recData.has_key('Title_Modifier'):
277 | title.has_note = unicode(recData['Title_Modifier'])
278 |
279 | if recData.has_key('Materials'):
280 | for mat in recData['Materials'].xpath('./Material/text()'):
281 | ot = mat.lower()
282 | ot = ot.replace(',', '')
283 | ot = ot.replace('#', '')
284 | ot = ot.replace('.', '')
285 | ot = ot.replace('?', '')
286 | ot = ot.replace('auf', '')
287 | ot = ot.replace('und', '')
288 | ot = ot.replace("on", " ")
289 | ot = ot.replace("and", " ")
290 | ot = ot.replace(" ", ' ')
291 |
292 | words = ot.split(' ')
293 | obj.consists_of = []
294 | for w in words:
295 | if w:
296 | try:
297 | material = materialO[w]
298 | except:
299 | material = Material(w)
300 | materialO[w] = material
301 | material.value = w
302 | obj.consists_of = material
303 |
304 | if recData.has_key("Dimensions"):
305 | for dimtext in recData['Dimensions'].xpath('./Dimension_Text/text()'):
306 | m = dimre.match(dimtext)
307 | if m:
308 | d1 = m.groups()[2]
309 | d2 = m.groups()[5]
310 |
311 | dim1 = Dimension(cnolot + "_d1")
312 | dim1.has_value = d1
313 | dim1.has_unit = cmUnit
314 | obj.has_dimension = dim1
315 |
316 | if d2:
317 | dim2 = Dimension(cnolot + "_d2")
318 | dim2.has_value = d2
319 | dim2.has_unit = cmUnit
320 | obj.has_dimension = dim2
321 |
322 | else:
323 | #print "Can't handle dimension data:"
324 | #print dimtext
325 | try:
326 | bad_materials[dimtext] += 1
327 | except:
328 | bad_materials[dimtext] = 1
329 |
330 | # Artist could be modified by Attrib_Mod
331 | # e.g. zugeschrieben --> attributed (2200)
332 | # Kopie von --> copy from (1)
333 | # stil --> style [of] (24)
334 |
335 | # Artist
336 | if recData.has_key('Artist'):
337 | arts = recData['Artist']
338 | if type(arts) != list:
339 | arts = [arts]
340 | for artist in arts:
341 | va = artist.xpath('./Verb_Artist/text()')
342 | aa = artist.xpath('./Artist_Auth/text()')
343 | natl = artist.xpath('./Nationality/text()')
344 | mod = artist.xpath('./Attrib_Mod/text()')
345 |
346 | # first try to detect non names
347 | if aa:
348 | aa = unicode(aa[0])
349 | if aa == "NEW":
350 | # treat as if not present
351 | pass
352 | elif aa.startswith('['):
353 | # anonymous artist with some known features
354 | pass
355 | aname = aa.lower()
356 | aname = aname.replace(" ", "_")
357 | try:
358 | who = personO[aname]
359 | except:
360 | who = Person(aname)
361 | personO[aname] = who
362 | # put verbatim name somewhere
363 | # and authority name in p131 is identified by
364 | who.label = aa
365 |
366 | if natl:
367 | natl = unicode(natl[0])
368 | try:
369 | nat = nationalityO[natl]
370 | except:
371 | nat = Group(natl)
372 | nat.label = natl
373 | who.is_current_or_former_member_of = nat
374 |
375 | # one production event per artist?
376 | prod = Production(cnolot + aname)
377 | prod.carried_out_by = who
378 | obj.was_produced_by = prod
379 |
380 | # seller if we know
381 | seller = None
382 | if recData.has_key("Seller"):
383 | sells = recData['Seller']
384 | if type(sells) != list:
385 | sells = [sells]
386 | sx = 0
387 | for s in sells:
388 | try:
389 | lbl = unicode(s.xpath("./Seller_Auth/text()")[0])
390 | except:
391 | try:
392 | lbl = unicode(s.xpath("./Verb_Seller/text()")[0])
393 | except:
394 | # ???!!!
395 | continue
396 | end = "-seller"
397 | if sx:
398 | end += "-%s" % sx
399 | seller = Actor(cnolot + end)
400 | seller.label = lbl
401 | obj.has_former_or_current_owner = seller
402 | sx += 1
403 |
404 | try:
405 | txn = recData['Transaction']
406 | txn = txn.lower()
407 | except:
408 | txn = "unknown"
409 | txn = txn.replace('[?]', '')
410 | txn = txn.replace("unknown or ", "")
411 | txn = txn.replace(" or unknown", "")
412 | txn = txn.strip()
413 |
414 | if txn.find(" or ") > -1:
415 | # Don't know what to do with X or Y
416 | print "Not processing or for txn"
417 | return
418 |
419 | if txn in ["sold", "bought in", "passed"]:
420 | lot = Activity(cnolot)
421 | auction.consists_of = lot
422 | lot.used_specific_object = lotset
423 |
424 | span = TimeSpan(cnolot)
425 | try:
426 | date = recData['Lot_Sale_Date']
427 | # NB: this is going to end up strange due to UK vs EU timezones
428 | try:
429 | dt = dateparse(date)
430 | begin = "%s-%s-%sT00:00:00 CET" % (dt.year, dt.month, dt.day)
431 | end = "%s-%s-%sT23:59:59 CET" % (dt.year, dt.month, dt.day)
432 | span.begin_of_the_begin = begin
433 | span.end_of_the_end = end
434 | lot.has_timespan = span
435 | except:
436 | try:
437 | bad_dates[date] += 1
438 | except:
439 | bad_dates[date] = 1
440 | except:
441 | print "No Sale Date: %s" % recData['recno']
442 |
443 | ln = recData.get('Lot_Notes', None)
444 | if ln is not None:
445 | lot.has_note = unicode(ln)
446 |
447 | entry.refers_to = lot
448 |
449 | if txn in ['sold', 'bought in']:
450 | # Bidding activity exists
451 | bidding = Activity(cnolot + "-bidding")
452 | lot.consists_of = bidding
453 |
454 | if txn == "sold":
455 | txn = Purchase(cnolot+"-transaction")
456 | lot.consists_of = txn
457 | bidding.occurs_before = txn
458 |
459 | acq = Purchase(cnolot + "-acquisition")
460 | txn.consists_of = acq
461 | acq.transferred_title_of = obj
462 |
463 | if seller:
464 | sellers = obj.has_former_or_current_owner
465 | if type(sellers) == list:
466 | for s in sellers:
467 | acq.transferred_title_from = s
468 | else:
469 | acq.transferred_title_from = sellers
470 |
471 | bx = 0
472 | if recData.has_key("Buyer"):
473 | buys = recData['Buyer']
474 | if type(buys) != list:
475 | buys = [buys]
476 | for b in buys:
477 | try:
478 | lbl = unicode(b.xpath("./Buyer_Auth/text()")[0])
479 | except:
480 | try:
481 | lbl = unicode(b.xpath("./Verb_Buyer/text()")[0])
482 | except:
483 | # ???!!!
484 | continue
485 |
486 | end = "-buyer"
487 | if bx:
488 | end += "-%s" % bx
489 | buyer = Actor(cnolot + end)
490 | buyer.label = lbl
491 | acq.transferred_title_to = buyer
492 | bx += 1
493 |
494 |
495 | return catalog
496 |
497 |
498 |
499 | recdir = '/Users/rsanderson/Box Sync/PI_Sales/'
500 | files = ['salesdb1.xml', 'salesdb2.xml', 'salesdb3.xml', 'salesdb4.xml']
501 | files = ['salesdb1.xml']
502 |
503 | for fn in files:
504 | rec = []
505 | fh = file(os.path.join(recdir, fn))
506 | #fh = codecs.open(os.path.join(recdir, fn), 'r', 'iso-8859-1')
507 |
508 | # chomp first three lines
509 | fh.readline() ; fh.readline() ; fh.readline()
510 |
511 | header = '\n'
512 |
513 | line = fh.readline()
514 | while line != "\n":
515 | while (line.find('') == -1):
516 | rec.append(line)
517 | line = fh.readline()
518 | if not line:
519 | break
520 | rec.append(line)
521 |
522 | data = ''.join(rec)
523 | data = header + data
524 | data = data.replace("Catalogue_No.", "Catalogue_No")
525 | data = data.replace("Country_Auth.", "Country_Auth")
526 | data = data.replace('\x04', '')
527 | data = data.replace('\x1f', '')
528 |
529 | try:
530 | dom = etree.XML(data)
531 | except:
532 | print "Invalid record data: %s" % data[:200]
533 |
534 | top = process_record(dom)
535 | # break
536 |
537 | line = fh.readline()
538 | rec = []
539 |
540 | fh.close()
541 |
542 |
--------------------------------------------------------------------------------
/cromulent/data/crm-profile.json:
--------------------------------------------------------------------------------
1 | {
2 | "E10_Transfer_of_Custody": 1,
3 | "E11_Modification": 1,
4 | "E12_Production": 1,
5 | "E13_Attribute_Assignment": 1,
6 | "E14_Condition_Assessment": 0,
7 | "E15_Identifier_Assignment": 0,
8 | "E16_Measurement": 0,
9 | "E17_Type_Assignment": 0,
10 | "E18_Physical_Thing": 0,
11 | "E19_Physical_Object": 0,
12 | "E1_CRM_Entity": 0,
13 | "E20_Biological_Object": 0,
14 | "E21_Person": 1,
15 | "E22_Human-Made_Object": 1,
16 | "E24_Physical_Human-Made_Thing": 0,
17 | "E25_Human-Made_Feature": 0,
18 | "E26_Physical_Feature": 0,
19 | "E27_Site": 0,
20 | "E28_Conceptual_Object": 0,
21 | "E29_Design_or_Procedure": 0,
22 | "E2_Temporal_Entity": 0,
23 | "E30_Right": 1,
24 | "E31_Document": 0,
25 | "E32_Authority_Document": 1,
26 | "E33_Linguistic_Object": 1,
27 | "E34_Inscription": 0,
28 | "E35_Title": 0,
29 | "E36_Visual_Item": 1,
30 | "E37_Mark": 0,
31 | "E38_Image": 0,
32 | "E39_Actor": 1,
33 | "E3_Condition_State": 0,
34 | "E40_Legal_Body": 0,
35 | "E41_Appellation": 0,
36 | "E42_Identifier": 1,
37 | "E4_Period": 1,
38 | "E52_Time-Span": 1,
39 | "E53_Place": 1,
40 | "E54_Dimension": 1,
41 | "E55_Type": 1,
42 | "E56_Language": 1,
43 | "E57_Material": 1,
44 | "E58_Measurement_Unit": 1,
45 | "E5_Event": 1,
46 | "E63_Beginning_of_Existence": 0,
47 | "E64_End_of_Existence": 0,
48 | "E65_Creation": 1,
49 | "E66_Formation": 1,
50 | "E67_Birth": 1,
51 | "E68_Dissolution": 1,
52 | "E69_Death": 1,
53 | "E6_Destruction": 1,
54 | "E70_Thing": 0,
55 | "E71_Human-Made_Thing": 0,
56 | "E72_Legal_Object": 0,
57 | "E73_Information_Object": 1,
58 | "E74_Group": 1,
59 | "E77_Persistent_Item": 0,
60 | "E78_Curated_Holding": 0,
61 | "E79_Part_Addition": 1,
62 | "E7_Activity": 1,
63 | "E80_Part_Removal": 1,
64 | "E81_Transformation": 1,
65 | "E83_Type_Creation": 0,
66 | "E84_Information_Carrier": 0,
67 | "E85_Joining": 1,
68 | "E86_Leaving": 1,
69 | "E87_Curation_Activity": 0,
70 | "E89_Propositional_Object": 1,
71 | "E8_Acquisition": 1,
72 | "E90_Symbolic_Object": 0,
73 | "E92_Spacetime_Volume": 0,
74 | "E93_Presence": 0,
75 | "E96_Purchase": 0,
76 | "E97_Monetary_Amount": 1,
77 | "E98_Currency": 1,
78 | "E99_Product_Type": 0,
79 | "E9_Move": 1,
80 | "E33_E41_Linguistic_Appellation": 1,
81 | "dig:D1_Digital_Object": 1,
82 | "geo:SP5_Geometric_Place_Expression": 1,
83 | "geo:SP4_Spatial_Coordinate_Reference_System": 1,
84 | "geo:SP6_Declarative_Place": 1,
85 | "sci:S19_Encounter_Event": 1,
86 | "la:Phase": 0,
87 | "la:RightAcquisition": 1,
88 | "la:Payment": 1,
89 | "la:Relationship": 1,
90 | "la:Set": 1,
91 | "la:Addition": 1,
92 | "la:Removal": 1,
93 | "la:DigitalService": 1,
94 |
95 | "P100_was_death_of": [1,0],
96 | "P100i_died_in": [1,0],
97 | "P101_had_as_general_use": [1,1],
98 | "P101i_was_use_of": [0,1],
99 | "P102_has_title": [0,1],
100 | "P102i_is_title_of": [0,0],
101 | "P103_was_intended_for": [0,1],
102 | "P103i_was_intention_of": [0,1],
103 | "P104_is_subject_to": [1,1],
104 | "P104i_applies_to": [1,1],
105 | "P105_right_held_by": [0,1],
106 | "P105i_has_right_on": [0,1],
107 | "P106_is_composed_of": [1,1],
108 | "P106i_forms_part_of": [1,1],
109 | "P107_has_current_or_former_member": [1,1],
110 | "P107i_is_current_or_former_member_of": [1,1],
111 | "P108_has_produced": [1,1],
112 | "P108i_was_produced_by": [1,0],
113 | "P109_has_current_or_former_curator": [0,1],
114 | "P109i_is_current_or_former_curator_of": [0,1],
115 | "P10_falls_within": [0,1],
116 | "P10i_contains": [0,1],
117 | "P110_augmented": [1,0],
118 | "P110i_was_augmented_by": [1,1],
119 | "P111_added": [1,0],
120 | "P111i_was_added_by": [1,1],
121 | "P112_diminished": [1,0],
122 | "P112i_was_diminished_by": [1,1],
123 | "P113_removed": [1,0],
124 | "P113i_was_removed_by": [1,1],
125 | "P11_had_participant": [1,1],
126 | "P11i_participated_in": [1,1],
127 | "P121_overlaps_with": [0,1],
128 | "P122_borders_with": [0,1],
129 | "P123_resulted_in": [1,1],
130 | "P123i_resulted_from": [1,1],
131 | "P124_transformed": [1,1],
132 | "P124i_was_transformed_by": [1,1],
133 | "P125_used_object_of_type": [0,1],
134 | "P125i_was_type_of_object_used_in": [0,1],
135 | "P126_employed": [1,1],
136 | "P126i_was_employed_in": [0,1],
137 | "P127_has_broader_term": [0,1],
138 | "P127i_has_narrower_term": [0,1],
139 | "P128_carries": [1,1],
140 | "P128i_is_carried_by": [1,1],
141 | "P129_is_about": [1,1],
142 | "P129i_is_subject_of": [1,1],
143 | "P12_occurred_in_the_presence_of": [1,1],
144 | "P12i_was_present_at": [1,1],
145 | "P130_shows_features_of": [0,1],
146 | "P130i_features_are_also_found_on": [0,1],
147 | "P131_is_identified_by": [0,1],
148 | "P131i_identifies": [0,1],
149 | "P132_overlaps_with": [0,1],
150 | "P133_is_separated_from": [0,1],
151 | "P134_continued": [1,1],
152 | "P134i_was_continued_by": [1,1],
153 | "P135_created_type": [0,1],
154 | "P135i_was_created_by": [0,1],
155 | "P136_was_based_on": [0,1],
156 | "P136i_supported_type_creation": [0,1],
157 | "P137_exemplifies": [0,1],
158 | "P137i_is_exemplified_by": [0,1],
159 | "P138_represents": [1,1],
160 | "P138i_has_representation": [1,1],
161 | "P139_has_alternative_form": [1,1],
162 | "P13_destroyed": [1,0],
163 | "P13i_was_destroyed_by": [1,0],
164 | "P140_assigned_attribute_to": [1,0],
165 | "P140i_was_attributed_by": [1,1],
166 | "P141_assigned": [1,1],
167 | "P141i_was_assigned_by": [1,1],
168 | "P142_used_constituent": [0,1],
169 | "P142i_was_used_in": [0,1],
170 | "P143_joined": [1,0],
171 | "P143i_was_joined_by": [1,1],
172 | "P144_joined_with": [1,0],
173 | "P144i_gained_member_by": [1,1],
174 | "P145_separated": [1,0],
175 | "P145i_left_by": [1,1],
176 | "P146_separated_from": [1,0],
177 | "P146i_lost_member_by": [1,1],
178 | "P147_curated": [0,1],
179 | "P147i_was_curated_by": [0,1],
180 | "P148_has_component": [1,1],
181 | "P148i_is_component_of": [1,1],
182 | "P149_is_identified_by": [0,1],
183 | "P149i_identifies": [0,1],
184 | "P14_carried_out_by": [1,1],
185 | "P14i_performed": [1,1],
186 | "P150_defines_typical_parts_of": [0,1],
187 | "P150i_defines_typical_wholes_for": [0,1],
188 | "P151_was_formed_from": [0,1],
189 | "P151i_participated_in": [0,1],
190 | "P152_has_parent": [0,1],
191 | "P152i_is_parent_of": [0,1],
192 | "P156_occupies": [1,1],
193 | "P156i_is_occupied_by": [1,1],
194 | "P157_is_at_rest_relative_to": [0,1],
195 | "P157i_provides_reference_space_for": [0,1],
196 | "P15_was_influenced_by": [1,1],
197 | "P15i_influenced": [1,1],
198 | "P160_has_temporal_projection": [0,1],
199 | "P161_has_spatial_projection": [0,1],
200 | "P164_during": [0,1],
201 | "P164i_was_time-span_of": [0,1],
202 | "P165_incorporates": [0,1],
203 | "P165i_is_incorporated_in": [0,1],
204 | "P166_was_a_presence_of": [0,1],
205 | "P166i_had_presence": [0,1],
206 | "P167_at": [0,1],
207 | "P167i_was_place_of": [0,1],
208 | "P168_place_is_defined_by": [1,1],
209 | "P168i_defines_place": [0,1],
210 | "P16_used_specific_object": [1,1],
211 | "P16i_was_used_for": [1,1],
212 | "P177_assigned_property_of_type": [1,0],
213 | "P179_had_sales_price": [0,1],
214 | "P179i_was_sales_price_of": [0,1],
215 | "P17_was_motivated_by": [1,1],
216 | "P17i_motivated": [1,1],
217 | "P180_has_currency": [1,0],
218 | "P180i_was_currency_of": [1,1],
219 | "P181_has_amount": [0,0],
220 | "P182_ends_before_or_with_the_start_of": [1,1],
221 | "P182i_starts_after_or_with_the_end_of": [1,1],
222 | "P183_ends_before_the_start_of": [1,1],
223 | "P183i_starts_after_the_end_of": [1,1],
224 | "P184_ends_before_or_with_the_end_of": [1,1],
225 | "P184i_ends_with_or_after_the_end_of": [1,1],
226 | "P185_ends_before_the_end_of": [1,1],
227 | "P185i_ends_after_the_end_of": [1,1],
228 | "P189_approximates": [1,1],
229 | "P189i_is_approximated_by": [1,1],
230 | "P19_was_intended_use_of": [0,1],
231 | "P19i_was_made_for": [0,1],
232 | "P190_has_symbolic_content": [1,0],
233 | "P191_had_duration": [1,0],
234 | "P191i_was_duration_of": [1,0],
235 | "P1_is_identified_by": [1, 1],
236 | "P1i_identifies": [1, 0],
237 | "P20_had_specific_purpose": [1,1],
238 | "P20i_was_purpose_of": [1,1],
239 | "P21_had_general_purpose": [1,1],
240 | "P21i_was_purpose_of": [0,1],
241 | "P22_transferred_title_to": [1,1],
242 | "P22i_acquired_title_through": [1,1],
243 | "P23_transferred_title_from": [1,1],
244 | "P23i_surrendered_title_through": [1,1],
245 | "P24_transferred_title_of": [1,1],
246 | "P24i_changed_ownership_through": [1,1],
247 | "P25_moved": [1,1],
248 | "P25i_moved_by": [1,1],
249 | "P26_moved_to": [1,0],
250 | "P26i_was_destination_of": [1,1],
251 | "P27_moved_from": [1,0],
252 | "P27i_was_origin_of": [1,1],
253 | "P28_custody_surrendered_by": [1,1],
254 | "P28i_surrendered_custody_through": [1,1],
255 | "P29_custody_received_by": [1,1],
256 | "P29i_received_custody_through": [1,1],
257 | "P2_has_type": [1,1],
258 | "P2i_is_type_of": [0,1],
259 | "P30_transferred_custody_of": [1,1],
260 | "P30i_custody_transferred_through": [1,1],
261 | "P31_has_modified": [1,1],
262 | "P31i_was_modified_by": [1,1],
263 | "P32_used_general_technique": [1,1],
264 | "P32i_was_technique_of": [2,1],
265 | "P33_used_specific_technique": [0,1],
266 | "P33i_was_used_by": [0,1],
267 | "P34_concerned": [0,1],
268 | "P34i_was_assessed_by": [0,1],
269 | "P35_has_identified": [0,1],
270 | "P35i_was_identified_by": [0,1],
271 | "P37_assigned": [0,1],
272 | "P37i_was_assigned_by": [0,1],
273 | "P38_deassigned": [0,1],
274 | "P38i_was_deassigned_by": [0,1],
275 | "P39_measured": [0,1],
276 | "P39i_was_measured_by": [0,1],
277 | "P3_has_note": [0,1],
278 | "P40_observed_dimension": [0,1],
279 | "P40i_was_observed_in": [0,1],
280 | "P41_classified": [0,1],
281 | "P41i_was_classified_by": [0,1],
282 | "P42_assigned": [0,1],
283 | "P42i_was_assigned_by": [0,1],
284 | "P43_has_dimension": [1,1],
285 | "P43i_is_dimension_of": [1,0],
286 | "P44_has_condition": [0,1],
287 | "P44i_is_condition_of": [0,1],
288 | "P45_consists_of": [1,1],
289 | "P45i_is_incorporated_in": [1,1],
290 | "P46_is_composed_of": [1,1],
291 | "P46i_forms_part_of": [1,1],
292 | "P48_has_preferred_identifier": [0,1],
293 | "P48i_is_preferred_identifier_of": [0,1],
294 | "P49_has_former_or_current_keeper": [0,1],
295 | "P49i_is_former_or_current_keeper_of": [0,1],
296 | "P4_has_time-span": [1,0],
297 | "P4i_is_time-span_of": [0,1],
298 | "P50_has_current_keeper": [1,1],
299 | "P50i_is_current_keeper_of": [1,1],
300 | "P51_has_former_or_current_owner": [0,1],
301 | "P51i_is_former_or_current_owner_of": [0,1],
302 | "P52_has_current_owner": [1,1],
303 | "P52i_is_current_owner_of": [1,1],
304 | "P53_has_former_or_current_location": [0,1],
305 | "P53i_is_former_or_current_location_of": [0,1],
306 | "P54_has_current_permanent_location": [0,1],
307 | "P54i_is_current_permanent_location_of": [0,1],
308 | "P55_has_current_location": [1,0],
309 | "P55i_currently_holds": [1,1],
310 | "P56_bears_feature": [1,1],
311 | "P56i_is_found_on": [1,0],
312 | "P57_has_number_of_parts": [0,1],
313 | "P58_has_section_definition": [0,1],
314 | "P58i_defines_section": [0,1],
315 | "P59_has_section": [0,1],
316 | "P59i_is_located_on_or_within": [0,1],
317 | "P5_consists_of": [0,1],
318 | "P5i_forms_part_of": [0,1],
319 | "P62_depicts": [1,1],
320 | "P62i_is_depicted_by": [1,1],
321 | "P65_shows_visual_item": [1,1],
322 | "P65i_is_shown_by": [1,1],
323 | "P67_refers_to": [1,1],
324 | "P67i_is_referred_to_by": [1,1],
325 | "P68_foresees_use_of": [0,1],
326 | "P68i_use_foreseen_by": [0,1],
327 | "P69_is_associated_with": [0,1],
328 | "P70_documents": [0,1],
329 | "P70i_is_documented_in": [0,1],
330 | "P71_lists": [1,1],
331 | "P71i_is_listed_in": [1,1],
332 | "P72_has_language": [1,1],
333 | "P72i_is_language_of": [1,1],
334 | "P73_has_translation": [1,1],
335 | "P73i_is_translation_of": [1,1],
336 | "P74_has_current_or_former_residence": [1,1],
337 | "P74i_is_current_or_former_residence_of": [1,1],
338 | "P75_possesses": [1,1],
339 | "P75i_is_possessed_by": [1,1],
340 | "P76_has_contact_point": [1,1],
341 | "P76i_provides_access_to": [1,1],
342 | "P78_is_identified_by": [0,1],
343 | "P78i_identifies": [0,1],
344 | "P79_beginning_is_qualified_by": [0,1],
345 | "P7_took_place_at": [1,1],
346 | "P7i_witnessed": [0,1],
347 | "P80_end_is_qualified_by": [0,1],
348 | "P81_ongoing_throughout": [0,1],
349 | "P81a_end_of_the_begin": [1,0],
350 | "P81b_begin_of_the_end": [1,0],
351 | "P82_at_some_time_within": [0,1],
352 | "P82a_begin_of_the_begin": [1,0],
353 | "P82b_end_of_the_end": [1,0],
354 | "P86_falls_within": [0,1],
355 | "P86i_contains": [0,1],
356 | "P87_is_identified_by": [0,1],
357 | "P87i_identifies": [0,1],
358 | "P89_falls_within": [1,1],
359 | "P89i_contains": [1,1],
360 | "P8_took_place_on_or_within": [0,1],
361 | "P8i_witnessed": [0,1],
362 | "P90_has_value": [1,0],
363 | "P90a_has_lower_value_limit": [1,0],
364 | "P90b_has_upper_value_limit": [1,0],
365 | "P91_has_unit": [1,0],
366 | "P91i_is_unit_of": [0,1],
367 | "P92_brought_into_existence": [0,1],
368 | "P92i_was_brought_into_existence_by": [0,0],
369 | "P93_took_out_of_existence": [0,1],
370 | "P93i_was_taken_out_of_existence_by": [0,0],
371 | "P94_has_created": [1,1],
372 | "P94i_was_created_by": [1,0],
373 | "P95_has_formed": [1,1],
374 | "P95i_was_formed_by": [1,0],
375 | "P96_by_mother": [0,0],
376 | "P96i_gave_birth": [0,1],
377 | "P97_from_father": [0,1],
378 | "P97i_was_father_for": [0,1],
379 | "P98_brought_into_life": [1,0],
380 | "P98i_was_born": [1,0],
381 | "P99_dissolved": [1,0],
382 | "P99i_was_dissolved_by": [1,0],
383 | "P9_consists_of": [1,1],
384 | "P9i_forms_part_of": [1,1],
385 |
386 | "P169i_spacetime_volume_is_defined_by": [0,0],
387 | "P170i_time_is_defined_by": [0,0],
388 | "P171_at_some_place_within": [0,0],
389 | "P172_contains": [0,0],
390 | "P173_starts_before_or_with_the_end_of": [1,1],
391 | "P173i_ends_after_or_with_the_start_of": [1,1],
392 | "P174_starts_before_the_end_of": [1,1],
393 | "P174i_ends_after_the_start_of": [1,1],
394 | "P175_starts_before_or_with_the_start_of": [1,1],
395 | "P175i_starts_with_or_after_the_start_of": [1,1],
396 | "P176_starts_before_the_start_of": [1,1],
397 | "P176i_starts_after_the_start_of": [1,1],
398 | "P186_produced_thing_of_product_type": [0,0],
399 | "P186i_is_produced_by": [0,0],
400 | "P187_has_production_plan": [0,0],
401 | "P187i_is_production_plan_for": [0,0],
402 | "P188_requires_production_tool": [0,0],
403 | "P188i_is_production_tool_for": [0,0],
404 | "P195_was_a_presence_of": [0,1],
405 | "P195i_had_presence": [0,1],
406 | "P196_defines": [0,1],
407 | "P196i_is_defined_by": [0,1],
408 | "P197_covered_parts_of": [0,1],
409 | "P197i_was_partially_covered_by": [0,1],
410 | "P198_holds_or_supports": [1,1],
411 | "P198i_is_held_or_supported_by": [1,1],
412 | "P199_represents_instance_of_type": [1,1],
413 | "P199i_has_instance_represented_by":[1,1],
414 |
415 | "la:paid_amount": [1,0],
416 | "la:paid_from": [1,1],
417 | "la:paid_to": [1,1],
418 | "la:establishes": [1,0],
419 | "la:established_by": [1,0],
420 | "la:invalidates": [1,1],
421 | "la:invalidated_by": [1,0],
422 | "la:relates_to": [1,0],
423 | "la:relates_from": [1,0],
424 | "la:related_to_by": [1,1],
425 | "la:related_from_by": [1,1],
426 | "la:initiated": [0,1],
427 | "la:initiated_by": [0,1],
428 | "la:terminated": [0,1],
429 | "la:terminated_by": [0,1],
430 | "la:has_phase": [0,1],
431 | "la:phase_of": [0,0],
432 | "la:related_entity": [0,0],
433 | "la:related_entity_of": [0,1],
434 | "la:relationship": [0,0],
435 | "la:has_member": [1,1],
436 | "la:member_of": [1,1],
437 | "la:added_to": [1,0],
438 | "la:added_to_by": [1,1],
439 | "la:added_member": [1,0],
440 | "la:added_member_by": [1,1],
441 | "la:removed_from": [1,0],
442 | "la:removed_from_by": [1,1],
443 | "la:removed_member": [1,0],
444 | "la:removed_member_by": [1,1],
445 | "la:digitally_carries": [1,1],
446 | "la:digitally_carried_by": [1,1],
447 | "la:digitally_shows": [1,1],
448 | "la:digitally_shown_by": [1,1],
449 | "la:digitally_available_via": [1,1],
450 | "la:digitally_makes_available": [1,1],
451 | "la:property_classified_as": [1,1],
452 | "la:represents_instance_of_type": [1,1],
453 | "la:instance_represented_by": [1,1],
454 | "la:current_permanent_custodian": [1,0],
455 | "la:current_permanent_custodian_of": [1,1],
456 | "la:equivalent": [1,1],
457 | "la:access_point": [1,1],
458 |
459 | "dc:format": [1,0],
460 | "dcterms:conformsTo": [1,1],
461 | "dcterms:subject": [1,1],
462 | "rdf:value": [1,0],
463 | "rdfs:seeAlso": [1,1],
464 | "rdfs:label": [1,0],
465 | "skos:exactMatch": [1,1],
466 | "skos:closeMatch": [1,1],
467 | "skos:narrower": [1,1],
468 | "skos:broader": [1,1],
469 | "skos:hasTopConcept": [1,1],
470 | "skos:topConceptOf": [1,1],
471 | "skos:inScheme": [1,1],
472 | "sci:O13_triggers": [1,1],
473 | "sci:O13i_is_triggered_by": [1,1],
474 | "sci:O19_encountered_object": [1,1],
475 | "sci:O19i_was_object_encountered_at": [1,1],
476 | "archaeo:AP25_occurs_during": [1,1]
477 | }
--------------------------------------------------------------------------------
/utils/data/linkedart.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 | Property Classified As
13 | Record dot one properties via Attribute Assignments
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 | Current Permanent Custodian
23 | This property records the normal custodian of the object at the time when the assertion was made. The object may have a temporary custodian, for example when it is loaned to another organization for an exhibition or between departments for conservation or storage.
24 |
25 |
26 |
27 |
28 |
29 |
30 | Current Permanent Custodian Of
31 | Inverse of Current Permanent Custodian
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 | has equivalent instance
41 | Similar to skos:exactMatch, the referenced entity is an equivalent instance to the referencing entity. This would not have the same inference issue as exactMatch, whereby the domain and range become skos:Concept, which is equivalent to E55_Type ... and thus everything becomes a Type.
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 | Payment
51 | Payment of Money
52 |
53 |
54 |
55 |
56 | Paid Amount
57 | The amount paid.
58 |
59 |
60 |
61 |
62 |
63 | Paid From
64 | Who the payment came from
65 |
66 |
67 |
68 |
69 |
70 | Paid To
71 | Who the payment went to
72 |
73 |
74 |
75 |
76 |
77 | Right Acquisition
78 | The acquiring or establishment of a particular E30 Right over some entity
79 |
80 |
81 |
82 |
83 | establishes
84 | The right established by a RightAcquisition
85 |
86 |
87 |
88 |
89 |
90 | established by
91 | The RightAcquisition that established this Right
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 | invalidates
100 | The right which is invalidated by a RightAcquisition
101 |
102 |
103 |
104 |
105 |
106 | invalidated by
107 | The RightAcquisition that invalidated this Right
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 | Phase
120 | The period of time during which an entity is in a certain phase or state of its existence. The phase can be physical (the box is open, the painting is 14 ft wide) or social (the sculpture is owned by some Actor, the building is used as a castle).
121 |
122 |
123 |
124 |
125 | initiated
126 | Events can start or initiate Phases, such as the Modification of a painting to trim it from 16 feet to 14 ft would initiate a 14 feet phase, or the Acquisition of a sculpture by some Actor would initiate that Actor's ownership phase. This relationship links the initiating Event to the Phase.
127 |
128 |
129 |
130 |
131 |
132 |
133 | initiated by
134 | The inverse of la:initiated.
135 |
136 |
137 |
138 |
139 |
140 |
141 | terminated
142 | Events can end or terminate Phases, such as the Modification of a painting to trim it from 16 feet to 14 ft would end the 16 feet phase, or the Acquisition of a sculpture by some Actor would end the seller's ownership. This relationship links the terminating Event to the Phase.
143 |
144 |
145 |
146 |
147 |
148 |
149 | terminated by
150 | The inverse of la:terminated.
151 |
152 |
153 |
154 |
155 |
156 |
157 | has phase
158 | The relationship between an E1 Entity and one of its Phases. A painting that had two sizes, 16 feet and 14 feet wide, would have two Phases, one for each width.
159 |
160 |
161 |
162 |
163 |
164 |
165 | phase of
166 | The inverse of la:has_phase.
167 |
168 |
169 |
170 |
171 |
172 |
173 | related entity
174 | An E1 Entity that defines the nature of the Phase. For an ownership phase, this would be the owning E39 Actor. For the size of painting phase, this would be the E54 Dimension that describes the size.
175 |
176 |
177 |
178 |
179 |
180 |
181 | related entity of
182 | The inverse of la:related_entity.
183 |
184 |
185 |
186 |
187 |
188 |
189 | related by
190 | The relationship between the Entity that the phase is of, and the defining entity. For an ownership phase, this would be P52 has current owner. For the size of painting phase, this would be P43 has dimension.
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 | Set
206 |
207 |
208 |
209 |
210 |
211 |
212 | has member
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 | member of
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 | Addition
229 | The addition of some entity to a Set
230 |
231 |
232 |
233 |
234 | added to
235 |
236 |
237 |
238 |
239 |
240 |
241 |
242 | added to by
243 |
244 |
245 |
246 |
247 |
248 |
249 |
250 | added
251 |
252 |
253 |
254 |
255 |
256 |
257 |
258 | added by
259 |
260 |
261 |
262 |
263 |
264 |
265 |
266 | Removal
267 | The removal of some entity from a Set
268 |
269 |
270 |
271 |
272 | removed from
273 |
274 |
275 |
276 |
277 |
278 |
279 |
280 | removed from by
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 | removed
289 |
290 |
291 |
292 |
293 |
294 |
295 |
296 | removed by
297 |
298 |
299 |
300 |
301 |
302 |
303 |
304 |
305 |
306 |
307 | digitally carries
308 |
309 |
310 |
311 |
312 |
313 |
314 |
315 | digitally carried by
316 |
317 |
318 |
319 |
320 |
321 |
322 |
323 | digitally shows
324 |
325 |
326 |
327 |
328 |
329 |
330 |
331 | digitally shown by
332 |
333 |
334 |
335 |
336 |
337 |
338 |
339 | access point
340 | From an ur- digital object to a single concrete representation.
341 | A locator as opposed to an identifier, similar to the approximated_by for Place.
342 |
343 |
344 |
345 |
346 |
347 | Digital Service
348 |
349 |
350 |
351 |
352 |
353 | digitally available via
354 |
355 |
356 |
357 |
358 |
359 |
360 |
361 | digitally makes available
362 |
363 |
364 |
365 |
366 |
367 |
368 |
369 |
--------------------------------------------------------------------------------
/examples/knoedler-to-lod.py:
--------------------------------------------------------------------------------
1 |
2 | import json
3 | import codecs
4 | import re
5 | import inspect
6 |
7 | # for cidoc_orm, see: https://github.com/azaroth42/Python-CIDOC-ORM
8 | from cidoc_orm import factory, TimeSpan, ManMadeObject, Type, Identifier, \
9 | Production, Actor, Person, Place, Group, Material, Mark, \
10 | Activity, InformationObject, Purchase, Acquisition, MonetaryAmount, \
11 | Currency, MeasurementUnit, Dimension, PhysicalObject
12 |
13 | # Meta meta
14 | ext_classes = {
15 | "LocalNumber": {"parent": Identifier, "vocab": "aat", "id": "300404621"},
16 | "AccessionNumber": {"parent": Identifier, "vocab": "aat", "id": "300312355"},
17 | "Inscription": {"parent": Mark, "vocab": "aat", "id": "300028702"},
18 | "Signature": {"parent": Mark, "vocab": "aat", "id": "300028705"},
19 | "Painting": {"parent": ManMadeObject, "vocab": "aat", "id": "300033618"},
20 | "Sculpture": {"parent": ManMadeObject, "vocab": "aat", "id": "300047090"},
21 | "Drawing": {"parent": ManMadeObject, "vocab": "aat", "id": "300033973"},
22 | "Miniature": {"parent": ManMadeObject, "vocab": "aat", "id": "300033936"},
23 | "Tapestry": {"parent": ManMadeObject, "vocab": "aat", "id": "300205002"},
24 | "Furniture": {"parent": ManMadeObject, "vocab": "aat", "id": "300037680"},
25 | "Mosaic": {"parent": ManMadeObject, "vocab": "aat", "id": "300015342"},
26 | "Photograph": {"parent": ManMadeObject, "vocab": "aat", "id": "300046300"},
27 | "Drawing": {"parent": ManMadeObject, "vocab": "aat", "id": "300033973"},
28 | "Coin": {"parent": ManMadeObject, "vocab": "aat", "id": "300037222"},
29 | "Vessel": {"parent": ManMadeObject, "vocab": "aat", "id": "300193015"},
30 | "PhotographPrint": {"parent": ManMadeObject, "vocab": "aat", "id": "300127104"},
31 | "PhotographAlbum": {"parent": ManMadeObject, "vocab": "aat", "id": "300026695"},
32 | "PhotographBook": {"parent": ManMadeObject, "vocab": "aat", "id": "300265728"}
33 | }
34 |
35 | factory.base_url = "http://data.getty.edu/provenance/"
36 | factory.context_uri = "http://data.getty.edu/contexts/crm_context.jsonld"
37 |
38 | for (name,v) in ext_classes.items():
39 | c = type(name, (v['parent'],), {})
40 | c._p2_has_type = "http://vocab.getty.edu/%s/%s" % (v['vocab'], v['id'])
41 | globals()[name] = c
42 |
43 | # At the moment it's just an activity, no subtype info
44 | class TakeInventory(Activity):
45 | pass
46 |
47 | class Payment(Activity):
48 | _properties = {
49 | "paid_amount": {"rdf": "pi:paid_amount", "range": MonetaryAmount},
50 | "paid_to": {"rdf": "pi:paid_to", "range": Actor},
51 | "paid_from": {"rdf": "pi:paid_from", "range": Actor}
52 | }
53 | _uri_segment = "Payment"
54 | _type = "pi:Payment"
55 |
56 | Payment._classhier = inspect.getmro(Payment)[:-1]
57 |
58 | # Object Types
59 | # {'Pastel': 265, 'Photograph': 4, 'Clocks': 1, 'Painting [?]': 8, 'Painting': 37313,
60 | # '[not identified]': 2, 'Clothing': 1, 'Playing Cards': 1, 'Watercolor': 547,
61 | # 'Maps': 1, 'Book': 35, 'Decorative Art': 9, 'Painting; Sculpture': 1, 'Print': 21,
62 | # 'Watercolor; Painting': 1, 'Sculpture': 1817, 'Drawing': 225, 'Tapestry': 61, 'Furniture': 22}
63 |
64 | endOfMonths = {'01': 31, '02': 28, '03':31, '04':30, '05':31, '06':30,\
65 | '07':31, '08':31, '09':30, '10':31, '11':30, '12':31}
66 |
67 | aat_type_mapping = {
68 | "Painting": Painting,
69 | "Drawing": Drawing,
70 | "Furniture": Furniture,
71 | "Sculpture": Sculpture,
72 | "Tapestry": Tapestry,
73 | "Watercolor": Painting,
74 | "Pastel": Painting
75 | }
76 |
77 | # # A wooden support
78 | aat_part_mapping = {
79 | "supports": "300014844" # The thing that is painted on
80 | }
81 |
82 | aat_material_mapping = {
83 | "panel": "300014657", # Is really a support
84 | "watercolor": "300015045",
85 | "oil": "300015050",
86 | "tempera": "300015062",
87 | "canvas": "300014078",
88 | "oak": "300012264",
89 | "gold leaf": "300264831",
90 | "paper": "300014109",
91 | "copper": "300011020",
92 | "terracotta": "300010669",
93 | "glass": "300010797",
94 | "chalk": "300011727",
95 | "bronze": "300010957",
96 | "marble": "300011443",
97 | "albumen silver print": "300127121",
98 | "gelatin silver print": "300128695",
99 | "silver": "300011029"
100 | }
101 |
102 | # pen, pencil, card, cardboard, porcelain, wax, ceramic, plaster
103 | # crayon, millboard, gouache, brass, stone, lead, iron, clay,
104 | # alabaster, limestone
105 |
106 |
107 | materialTypes = {}
108 | for (k,v) in aat_material_mapping.items():
109 | m = Material("http://vocab.getty.edu/aat/%s" % v)
110 | m.label = k
111 | materialTypes[k] = m
112 |
113 | aat_culture_mapping = {
114 | "french": "300111188",
115 | "italian": "300111198",
116 | "german": "300111192",
117 | "dutch": "300020929"
118 | }
119 |
120 | dim_type_mapping = {
121 | "height": "300055644",
122 | "width": "300055647",
123 | "depth": "300072633",
124 | "diameter": "300055624",
125 | "weight": "300056240"
126 | }
127 |
128 | dim_unit_mapping = {
129 | "inches": "300379100",
130 | "feet": "300379101",
131 | "cm": "300379098"
132 | }
133 |
134 | inches = MeasurementUnit("http://vocab.getty.edu/aat/%s" % dim_unit_mapping['inches']);
135 | inches.label = "inches"
136 |
137 | aat_genre_mapping = {
138 | "Abstract" : "300134134", # maybe?
139 | "abstract" : "300134134", # maybe?
140 | "Genre": "300139140", # maybe?
141 | "History": "300386045",
142 | "Landscape": "300015636",
143 | "Portrait": "300015637",
144 | "Still Life": "300015638"
145 | }
146 |
147 | genreTypes = {}
148 | for (k,v) in aat_genre_mapping.items():
149 | t = Type("http://vocab.getty.edu/aat/%s" % v)
150 | t.label = k
151 | genreTypes[k] = t
152 |
153 | aat_subject_mapping = {
154 | "Allegory": "",
155 | "Animals": "",
156 | "Architecture": "",
157 | "Battles": "",
158 | "Figure Studies": "",
159 | "Interiors": "",
160 | "Landscapes with figures": "",
161 | "Literature": "",
162 | "Marines": "",
163 | "Military": "",
164 | "Mythology (figures)": "",
165 | "Mythology (narrative)": "",
166 | "Religious (figures)": "",
167 | "Religious (narrative)": "",
168 | "Ruins": "",
169 | "Sporting": "",
170 | "Topographical Views": ""
171 | }
172 |
173 | # Monkey patch Type's _toJSON to only emit full data if not just URI+type
174 | def typeToJSON(self, top=False):
175 | props = self.__dict__.keys()
176 | if len(props) > 3:
177 | return super(Type, self)._toJSON()
178 | else:
179 | return self.id
180 |
181 | Type._toJSON = typeToJSON
182 | Person._properties['familyName'] = {"rdf": "schema:familyName", "range": str}
183 | Person._properties['givenName'] = {"rdf": "schema:givenName", "range": str}
184 | Person._properties['nationality'] = {"rdf": "schema:nationality", "range": Place}
185 | ManMadeObject._properties['culture'] = {"rdf": "schema:genre", "range": Type}
186 | ManMadeObject._properties['height'] = {"rdf": "schema:height", "range": Dimension}
187 | ManMadeObject._properties['width'] = {"rdf": "schema:width", "range": Dimension}
188 |
189 | knoedler = Group("knoedler")
190 | knoedler.label = "Knoedler"
191 |
192 | factory.materialize_inverses = True
193 |
194 | def process_money_value(value):
195 | value = value.replace('[', '')
196 | value = value.replace(']', '')
197 | value = value.replace('-', '')
198 | value = value.replace("?", '')
199 | value = value.replace('not written', '')
200 | value = value.replace('sold at auction', '')
201 | value = value.replace('x', '')
202 | value = value.replace('Lot Price', '')
203 | value = value.replace('See sales book', '')
204 | value = value.strip()
205 | return value
206 |
207 | def process_materials(what, materials):
208 | materials = materials.lower()
209 | materials = materials.replace("&", "and")
210 | materials = materials.replace("card [cardboard]", "cardboard")
211 | materials = materials.replace("c [canvas]", "canvas")
212 | materials = materials.replace("w/c [watercolor]", "watercolor")
213 | materials = materials.replace("[bronze]", "bronze")
214 | materials = materials.replace("c on p [canvas on panel]", "canvas on panel")
215 | materials = materials.replace("[from sales book 9, 1907-1912, f. 361]", "")
216 | materials = materials.replace("p [panel]", "panel")
217 | materials = materials.replace('terra cotta', 'terracotta')
218 | materials = materials.replace(',', '')
219 | materials = materials.replace('-', '')
220 | materials = materials.replace('procelain', 'porcelain')
221 | materials = materials.strip()
222 | matwds = materials.split(' ')
223 | mats = []
224 | for mw in matwds:
225 | if mw in ['on', 'and']:
226 | continue
227 | try:
228 | mats.append(materialTypes[mw])
229 | except:
230 | # print "Material not found: %s" % mw
231 | pass
232 |
233 | return mats
234 |
235 | divre = re.compile('^([0-9]+) ([0-9]+)/([0-9]+)( |$)')
236 | unitre = re.compile('^([0-9.]+) (high|height|h|long|length|l)( |$)')
237 |
238 | def process_dimensions(dims):
239 | dims = dims.lower()
240 | # assume default of inches
241 | dims = dims.replace('"', '')
242 | dims = dims.replace('in.', '')
243 | dims = dims.replace('inches', '')
244 | dims = dims.replace('//', '/')
245 | dims = dims.replace('[', '')
246 | dims = dims.replace(']', '')
247 | dims = dims.replace('X', 'x')
248 | dl = dims.split('x')
249 |
250 | dimensions = []
251 | p = 0
252 | for d in dl:
253 | d = d.strip()
254 | # check for (nn n/n)
255 | which = ""
256 | m = divre.match(d)
257 | if m:
258 | (main, numr, denom, end) = m.groups()
259 | ttl = int(main) + (float(numr) / float(denom))
260 | else:
261 | try:
262 | ttl = int(d)
263 | except:
264 | m = unitre.match(d)
265 | if m:
266 | (ttl, which, end) = m.groups()
267 | if which.startswith('h'):
268 | which = 'h'
269 | else:
270 | which = 'w'
271 | else:
272 | # print "----- %s" % d
273 | continue
274 | if not which:
275 | which = "w" if p else "h"
276 | p += 1
277 | dimensions.append([ttl, which])
278 | return dimensions
279 |
280 |
281 | def print_rec_full(rec):
282 | its = rec.items()
283 | its.sort()
284 | for (k,v) in its:
285 | if v:
286 | print "%s: %s" % (k, v)
287 |
288 | stock_books = {}
289 | pages = {}
290 |
291 | fh = file('knoedler_cache.json')
292 | cache = json.load(fh)
293 | fh.close()
294 |
295 | recs = cache.values()
296 | recs = sorted(recs, key=lambda x: x['star_id'])[:5000]
297 | # recs = [cache['72910']]
298 |
299 | for rec in recs:
300 |
301 | bookId = rec['stock_book_id']
302 | try:
303 | book = stock_books[bookId]
304 | except:
305 | # create the book
306 | book = InformationObject(bookId)
307 | book.label = "Knoedler Stock Book %s" % bookId
308 | stock_books[bookId] = book
309 |
310 | pageId = "%s/%s" % (bookId, rec['page_num'])
311 | try:
312 | page = pages[pageId]
313 | except:
314 | # create the page in the book
315 | page = InformationObject(pageId)
316 | page.label = "Page %s" % rec['page_num']
317 | pages[pageId] = page
318 | book.has_fragment = page
319 |
320 | # create the entry
321 | entryId = "%s/%s" % (pageId, rec['row_num'])
322 | entry = InformationObject(entryId)
323 | entry.label = "Row %s" % rec['row_num']
324 | page.has_fragment = entry
325 |
326 | # the description and notes fields are related to the entry
327 | # not the object
328 | if rec['notes']:
329 | entry.description = rec['notes']
330 | if rec['description']:
331 | entry.description = rec['description']
332 |
333 | oid = rec['pi_id']
334 |
335 | # create the activity that the entry describes
336 | # 'Sold':26598,'Unsold':11824,'Exchanged':103,'Presented':246,'Transferred':310
337 | # 'Returned':533,'Unknown': 629,
338 | # 'Lost': 7, 'Voided': 16, 'Disjointed': 16, 'Cancelled': 47, 'Removed': 6
339 |
340 | # The outbound activity
341 | txn = None
342 | inv = None
343 |
344 | txnType = rec['transaction']
345 | if txnType == "Sold":
346 | txn = Purchase(oid)
347 | elif txnType in ['Exchanged', 'Presented', 'Transferred']:
348 | if rec['price_amount']:
349 | txn = Purchase(oid)
350 | else:
351 | txn = Acquisition(oid)
352 | elif txnType in ["Unsold", "Cancelled"]:
353 | # Stock taking, or never left inventory due to no sale
354 | inv = TakeInventory(oid)
355 | elif txnType in ["Lost", "Removed"]:
356 | # Leaves inventory, but not via a transfer of ownership
357 | # E8 can represent end of ownership. There's just no new owner.
358 | txn = Acquisition(oid)
359 | elif txnType == "Voided":
360 | # Bad data; voided should be skipped (per Kelly)
361 | continue
362 | elif txnType == "Returned":
363 | # Can't tell what this actually means yet
364 | # Could be entering or leaving Knoedler stock!
365 | continue
366 | elif txnType == "Unknown":
367 | if rec['price_amount']:
368 | txn = Purchase(oid)
369 | else:
370 | inv = TakeInventory(oid)
371 | else:
372 | # I think this is only Disjointed
373 | # print_rec(rec)
374 | continue
375 |
376 | # The inbound activity that always happens
377 | # For consistency, always generate a Payment
378 | if rec['purchase_amount']:
379 | inTxn = Purchase("purch_%s" % oid)
380 | pay = Payment("purch_%s" % oid)
381 | inTxn.consists_of = pay
382 |
383 | amnt = MonetaryAmount("purch_price_%s" % oid)
384 | value = process_money_value(rec['purchase_amount'])
385 | if value:
386 | try:
387 | amnt.has_value = float(value)
388 | except:
389 | amnt.description = value
390 | if rec['purchase_currency']:
391 | curr = Currency(rec['purchase_currency'])
392 | curr.label = rec['purchase_currency']
393 | amnt.has_currency = curr
394 | if rec['purchase_note']:
395 | amnt.description = rec['purchase_note']
396 | pay.paid_amount = amnt
397 | pay.paid_from = knoedler
398 | inTxn.had_sales_price = amnt
399 |
400 | else:
401 | inTxn = Acquisition("purch_%s" % oid)
402 |
403 | inTxn.transferred_title_to = knoedler
404 | if rec['seller_name'] or rec['seller_name_auth']:
405 | # Look up in authority?
406 | seller = Actor("seller_%s" % oid)
407 | seller.label = rec['seller_name_auth'] if rec['seller_name_auth'] else rec['seller_name']
408 | if rec['seller_loc'] or rec['seller_loc_auth']:
409 | sellerPlace = Place("seller_place_%s" % oid)
410 | sellerPlace.label = rec['seller_loc_auth'] if rec['seller_loc_auth'] else rec['seller_loc']
411 | seller.has_current_or_former_residence = sellerPlace
412 | inTxn.transferred_title_from = seller
413 | if rec['purchase_amount']:
414 | pay.paid_to = seller
415 |
416 | # CurationPeriod
417 | curated = Activity("curated_%s" % oid)
418 | curated.is_started_by = inTxn
419 |
420 | if txn:
421 | # from
422 | txn.transferred_title_from = knoedler
423 | # to
424 | if rec['buyer_name'] or rec['buyer_name_auth']:
425 | # Look up in authority?
426 | buyer = Actor("buyer_%s" % oid)
427 | buyer.label = rec['buyer_name_auth'] if rec['buyer_name_auth'] else rec['buyer_name']
428 | if rec['buyer_loc'] or rec['buyer_loc_auth']:
429 | buyerPlace = Place("buyer_place_%s" % oid)
430 | buyerPlace.label = rec['buyer_loc_auth'] if rec['buyer_loc_auth'] else rec['buyer_loc']
431 | buyer.has_current_or_former_residence = buyerPlace
432 | txn.transferred_title_to = buyer
433 |
434 | # when
435 | if rec['sale_date_year']:
436 | # if year, then all. blank is "00"
437 | yr = rec['sale_date_year']
438 | mt = rec['sale_date_month']
439 | dy = rec['sale_date_day']
440 | if dy != "00":
441 | start = "%s-%s-%s" % (yr,mt,dy)
442 | end = start
443 | elif mt != "00":
444 | start = "%s-%s-01" % (yr,mt)
445 | end = "%s-%s-%s" % (yr,mt,endOfMonths[mt])
446 | else:
447 | start = "%s-01-01" % yr
448 | end = "%s-12-31" % yr
449 | span = TimeSpan("sale_span_%s" % oid)
450 | span.begin_of_the_begin = start
451 | span.end_of_the_end = end
452 | txn.has_timespan = span
453 |
454 | value = process_money_value(rec['price_amount'])
455 | if value:
456 | amnt = MonetaryAmount("sale_price_%s" % oid)
457 | try:
458 | amnt.has_value = float(value)
459 | except:
460 | amnt.description = value
461 | if rec['price_currency']:
462 | curr = Currency(rec['price_currency'])
463 | curr.label = rec['price_currency']
464 | amnt.has_currency = curr
465 | if rec['price_note']:
466 | amnt.description = rec['price_note']
467 | txn.had_sales_price = amnt
468 |
469 | # Check knoedler_share
470 | if rec['knoedler_share_amount']:
471 |
472 | value = process_money_value(rec['knoedler_share_amount'])
473 | if value:
474 | amnt = MonetaryAmount("shared_price_%s" % oid)
475 | try:
476 | amnt.has_value = float(value)
477 | except:
478 | amnt.description = value
479 | if rec['knoedler_share_currency']:
480 | curr = Currency(rec['knoedler_share_currency'])
481 | curr.label = rec['knoedler_share_currency']
482 | amnt.has_currency = curr
483 | if rec['knoedler_share_note']:
484 | amnt.description = rec['knoedler_share_note']
485 |
486 | pay = Payment("kshare_%s" % oid)
487 | txn.consists_of = pay
488 | pay.paid_amount = amnt
489 | pay.paid_to = knoedler
490 | if rec['buyer_name'] or rec['buyer_name_auth']:
491 | pay.paid_from = buyer
492 |
493 | else:
494 | pay = Payment("sale_%s" % oid)
495 | txn.consists_of = pay
496 | pay.paid_amount = amnt
497 | pay.paid_to = knoedler
498 | if rec['buyer_name'] or rec['buyer_name_auth']:
499 | pay.paid_from = buyer
500 |
501 | curated.is_finished_by = txn
502 | elif inv:
503 | # Taking of Inventory as part of the curation period
504 | curated.consists_of = inv
505 |
506 | # If taking inventory, then the entry date is for that
507 | if rec['entry_date_year']:
508 | # if year, then all. blank is "00"
509 | yr = rec['entry_date_year']
510 | mt = rec['entry_date_month']
511 | dy = rec['entry_date_day']
512 | if dy != "00":
513 | start = "%s-%s-%s" % (yr,mt,dy)
514 | end = start
515 | elif mt != "00":
516 | start = "%s-%s-01" % (yr,mt)
517 | end = "%s-%s-%s" % (yr,mt,endOfMonths[mt])
518 | else:
519 | start = "%s-01-01" % yr
520 | end = "%s-12-31" % yr
521 | span = TimeSpan("sale_span_%s" % oid)
522 | span.begin_of_the_begin = start
523 | span.end_of_the_end = end
524 | inv.has_timespan = span
525 |
526 |
527 | if not inv:
528 | # entry date is for purchase
529 | if rec['entry_date_year']:
530 | # if year, then all. blank is "00"
531 | yr = rec['entry_date_year']
532 | mt = rec['entry_date_month']
533 | dy = rec['entry_date_day']
534 | if dy != "00":
535 | start = "%s-%s-%s" % (yr,mt,dy)
536 | end = start
537 | elif mt != "00":
538 | start = "%s-%s-01" % (yr,mt)
539 | end = "%s-%s-%s" % (yr,mt,endOfMonths[mt])
540 | else:
541 | start = "%s-01-01" % yr
542 | end = "%s-12-31" % yr
543 | span = TimeSpan("purch_span_%s" % oid)
544 | span.begin_of_the_begin = start
545 | span.end_of_the_end = end
546 | inTxn.has_timespan = span
547 |
548 | # create the object of the transaction
549 |
550 | objectType = rec['object_type']
551 | try:
552 | what = aat_type_mapping[objectType](oid)
553 | except:
554 | what = ManMadeObject(oid)
555 |
556 | curated.used_specific_object = what
557 | inTxn.transferred_title_of = what
558 | entry.refers_to = inTxn
559 | if txn:
560 | txn.transferred_title_of = what
561 | entry.refers_to = txn
562 | elif txnType == "Voided":
563 | entry.refers_to = what
564 |
565 | what.label = rec['title']
566 |
567 | idnt = AccessionNumber("knoedler_%s" % oid)
568 | idnt.value = rec['knoedler_id']
569 | # No way to say it's Knoedler's number?
570 | # Could have a Creation of the Identifier performed by Knoedler :(
571 |
572 | if rec['artist_name'] or rec['artist_name_auth']:
573 | artist = Person("artist_%s" % oid)
574 | artist.label = rec['artist_name_auth'] if rec['artist_name_auth'] else rec['artist_name']
575 | if rec['nationality']:
576 | artist.nationality = Place("artist_natl_%s" % oid)
577 | artist.nationality.label = rec['nationality']
578 |
579 | prodn = Production("production_%s" % oid)
580 | prodn.carried_out_by = artist
581 | what.was_produced_by = prodn
582 |
583 | if rec['artist_name_2'] or rec['artist_name_auth_2']:
584 | artist = Person("artist2_%s" % oid)
585 | artist.label = rec['artist_name_auth_2'] if rec['artist_name_auth_2'] else rec['artist_name_2']
586 | if rec['nationality_2']:
587 | artist.nationality = Place('artist_2_natl_%s' % oid)
588 | artist.nationality.label = rec['nationality_2']
589 | prodn.carried_out_by = artist
590 |
591 |
592 | # genre
593 | if rec['genre'] and not rec['genre'] == '[not identified]':
594 | if not aat_genre_mapping.has_key(rec['genre']):
595 | print "Not found: %s" % (rec['genre'])
596 | else:
597 | what.has_type = genreTypes[rec['genre']]
598 |
599 | # subject
600 | if rec['subject']:
601 | s = rec['subject']
602 | if s.find(';'):
603 | ss = [x.strip() for x in s.split(';')]
604 | else:
605 | ss = [s]
606 | for s in ss:
607 | # s = s.replace('Int\xe9rieurs', 'Interiors')
608 | sid = s.replace(' ', '')
609 | sid = sid.replace('(', '')
610 | sid = sid.replace(')', '')
611 | t = Type(sid)
612 | t.label = s
613 | what.depicts = t
614 |
615 | # materials
616 | if rec['materials']:
617 | # XXX Finish this
618 | process_materials(what, rec['materials'])
619 | # what.made_of = material
620 |
621 | if rec['dimensions']:
622 | # XXX Finish this too
623 | dims = process_dimensions(rec['dimensions'])
624 | for d in dims:
625 | dim = Dimension("%s_%s" % (d[0], oid))
626 | dim.has_value = d[0]
627 | dim.has_unit = inches
628 | if d[1] == 'h':
629 | what.height = dim
630 | else:
631 | what.width = dim
632 |
633 | collection = InformationObject("collection")
634 | for s in stock_books.values():
635 | collection.has_fragment = s
636 |
637 |
638 | factory.full_names = True
639 | outstr = factory.toString(collection, compact=False)
640 |
641 | fh = file('knoedler.jsonld', 'w')
642 | fh.write(outstr)
643 | fh.close()
644 |
645 | # Note that these entries are really one transaction
646 | # 64699 ... 64732
647 |
648 |
--------------------------------------------------------------------------------
/tests/test_model.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import sys
3 | import os
4 | import shutil
5 | import json
6 | import pickle
7 | from collections import OrderedDict
8 | from cromulent import model, vocab
9 | from cromulent.model import override_okay
10 |
11 |
12 | class TestFactorySetup(unittest.TestCase):
13 |
14 | def setUp(self):
15 | model.factory.base_url = 'http://data.getty.edu/provenance/'
16 | model.factory.base_dir = 'tests/provenance_base_dir'
17 | model.factory.default_lang = 'en'
18 | #model.factory.context_uri = 'http://www.cidoc-crm.org/cidoc-crm/'
19 |
20 | def tearDown(self):
21 | model.factory.base_url = 'http://lod.example.org/museum/'
22 | model.factory.log_stream = sys.stderr
23 | model.factory.debug_level = 'warn'
24 |
25 | def test_base_url(self):
26 | self.assertEqual(model.factory.base_url, 'http://data.getty.edu/provenance/')
27 |
28 | def test_base_dir(self):
29 | self.assertEqual(model.factory.base_dir, 'tests/provenance_base_dir')
30 |
31 | def test_default_lang(self):
32 | self.assertEqual(model.factory.default_lang, 'en')
33 |
34 | def test_set_debug_stream(self):
35 | strm = open('err_output', 'w')
36 | model.factory.set_debug_stream(strm)
37 | self.assertEqual(model.factory.log_stream, strm)
38 |
39 | def test_set_debug(self):
40 | model.factory.set_debug('error_on_warning')
41 | self.assertEqual(model.factory.debug_level, 'error_on_warning')
42 | self.assertRaises(model.ConfigurationError, model.factory.set_debug, 'xxx')
43 | self.assertRaises(model.MetadataError, model.factory.maybe_warn, "test")
44 |
45 | def test_load_context(self):
46 | self.assertRaises(model.ConfigurationError, model.factory.load_context,
47 | "foo", {"foo":"does_not_exist.txt"})
48 | model.factory.load_context("foo", {"foo":"tests/test_context.json"})
49 | self.assertEqual(model.factory.context_json, {"@context":{"id":"@id"}})
50 | self.assertRaises(model.ConfigurationError, model.factory.load_context, "", {})
51 |
52 | def test_pickle(self):
53 | model.factory.log_stream = sys.stderr
54 | srlz = pickle.dumps(model.factory)
55 | newfac = pickle.loads(srlz)
56 | self.assertTrue(model.factory.log_stream is newfac.log_stream)
57 |
58 |
59 |
60 | class TestFactorySerialization(unittest.TestCase):
61 |
62 | def setUp(self):
63 | self.collection = model.InformationObject('collection')
64 | self.collection._label = "Test Object"
65 |
66 | def test_broken_unicode(self):
67 | model.factory.debug_level = "error_on_warning"
68 | try:
69 | badval = b"\xFF\xFE\x02"
70 | except:
71 | badval = "\xFF\xFE\x02"
72 | badjs = {"_label": badval}
73 | self.assertRaises(model.MetadataError, model.factory._buildString,
74 | js=badjs)
75 |
76 | def test_toJSON(self):
77 | # model.factory.context_uri = 'http://lod.getty.edu/context.json'
78 | expect = OrderedDict([
79 | ('@context', model.factory.context_uri),
80 | ('id', u'http://lod.example.org/museum/InformationObject/collection'),
81 | ('type', 'InformationObject'), ('_label', 'Test Object')])
82 | outj = model.factory.toJSON(self.collection)
83 | self.assertEqual(expect, outj)
84 |
85 | def test_toJSON_fast(self):
86 | model.factory.json_serializer = "fast"
87 | expect = {'@context': model.factory.context_uri,
88 | 'id': 'http://lod.example.org/museum/InformationObject/collection',
89 | 'type': 'InformationObject',
90 | '_label': 'Test Object'}
91 | outj = model.factory.toJSON(self.collection)
92 | self.assertEqual(expect, outj)
93 | model.factory.json_serializer = "normal"
94 |
95 | def test_toJSON_normal(self):
96 | expect = OrderedDict([(u'@context', model.factory.context_uri),
97 | (u'@id', u'http://lod.example.org/museum/Person/1'), (u'@type', u'crm:E21_Person'),
98 | ('rdfs:label', 'Test Person')])
99 | model.factory.full_names = True
100 | p = model.Person("1")
101 | p._label = "Test Person"
102 | outj = model.factory.toJSON(p)
103 | self.assertEqual(expect, outj)
104 | # reset
105 | model.factory.full_names = False
106 |
107 | def test_toString(self):
108 | expect = u'{"@context":"'+model.factory.context_uri+'","id":"http://lod.example.org/museum/InformationObject/collection","type":"InformationObject","_label":"Test Object"}'
109 | outs = model.factory.toString(self.collection)
110 | self.assertEqual(expect, outs)
111 |
112 | def test_toString_fast(self):
113 | # Should only be trusted in python 3
114 | if sys.version_info.major >= 3 and sys.version_info.minor >= 6:
115 | expect = u'{"@context":"'+model.factory.context_uri+'","id":"http://lod.example.org/museum/InformationObject/collection","type":"InformationObject","_label":"Test Object"}'
116 | model.factory.json_serializer = "fast"
117 | outs = model.factory.toString(self.collection)
118 | model.factory.json_serializer = "normal"
119 | self.assertEqual(expect, outs)
120 | else:
121 | print("Skipping toString_fast test in Python 2.x")
122 |
123 | def test_toFile(self):
124 | self.assertRaises(model.ConfigurationError, model.factory.toFile, self.collection)
125 | # Test auto filename determination
126 | model.factory.base_dir = 'tests'
127 | model.factory.toFile(self.collection)
128 | self.assertTrue(os.path.isfile('tests/InformationObject/collection.json'))
129 | # Test explicit filename setting
130 | model.factory.toFile(self.collection, filename='tests/fishbat.bar')
131 | self.assertTrue(os.path.isfile('tests/fishbat.bar'))
132 | # Tidy up
133 | shutil.rmtree('tests/InformationObject')
134 |
135 | def test_breadth(self):
136 | x = model.TransferOfCustody()
137 | e = model.Activity()
138 | fr = model.Group()
139 | to = model.Group()
140 | w = model.HumanMadeObject()
141 | fr._label = "From"
142 | to._label = "To"
143 | x.transferred_custody_of = w
144 | x.transferred_custody_from = fr
145 | x.transferred_custody_to = to
146 | e.used_specific_object = w
147 | e.carried_out_by = to
148 | w.current_owner = fr
149 | x.specific_purpose = e
150 | js = model.factory.toJSON(x)
151 | # Okay ... if we're breadth first, then custody_from is a resource
152 | # And now it's the first in the list
153 | self.assertTrue(isinstance(js['transferred_custody_from'][0], OrderedDict))
154 |
155 | def test_string_list(self):
156 | x = model.Activity()
157 | x._label = ["Label 1", "Label 2"]
158 | js = model.factory.toJSON(x)
159 | self.assertTrue(js['_label'] == x._label)
160 |
161 | def test_external(self):
162 | x = model.ExternalResource(ident="1")
163 | model.factory.elasticsearch_compatible = 1
164 | js = x._toJSON(done=None)
165 | self.assertTrue(type(js) == dict)
166 | model.factory.elasticsearch_compatible = 0
167 | js = x._toJSON(done=None)
168 | # testing unicode in 2, str in 3 :(
169 | self.assertTrue(type(js) != dict)
170 |
171 | def test_recursion(self):
172 | x = model.Activity()
173 | x.part = x
174 | js = model.factory.toJSON(x)
175 | # If our recursion checks have regressed, this will barf right here
176 | self.assertTrue(1)
177 |
178 | def test_pipe_scoped(self):
179 | x = model.Activity()
180 | y = model.Activity()
181 | x.part = y
182 | model.factory.pipe_scoped_contexts = True
183 | js = model.factory.toJSON(x)
184 | self.assertTrue('part|crm:P9_consists_of' in js)
185 | model.factory.pipe_scoped_contexts = False
186 | js = model.factory.toJSON(x)
187 | self.assertTrue('part|crm:P9_consists_of' not in js)
188 | self.assertTrue('part' in js)
189 |
190 | def test_collapse_json(self):
191 | model.factory.auto_id_type = "uuid"
192 | model.factory.base_url = "http://lod.example.org/museum/"
193 | model.factory.context_uri = "https://linked.art/ns/v1/linked-art.json"
194 | p = model.Person()
195 | p.classified_as = model.Type(ident="http://example.org/Type", label="Test")
196 | res1 = model.factory.toString(p, compact=False, collapse=60) # all new lines
197 | res2 = model.factory.toString(p, compact=False, collapse=120) # compact list of type
198 | self.assertEqual(len(res1.splitlines()), 12)
199 | self.assertEqual(len(res2.splitlines()), 6)
200 |
201 | def test_production_mode(self):
202 |
203 | # model.factory.production_mode()
204 | # Can't unset the cached hierarchy
205 | # and it causes the test for the hierarchy to fail
206 | model.factory.validate_profile = False
207 | model.factory.validate_properties = False
208 | model.factory.validate_range = False
209 | model.factory.validate_multiplicity = False
210 |
211 | p = model.Person()
212 | p.identified_by = model.Name(value="abc")
213 | p.part = model.HumanMadeObject()
214 | js = model.factory.toJSON(p)
215 |
216 | model.factory.production_mode(state=False)
217 |
218 |
219 | def test_ordering(self):
220 | p = model.Person(label="Person")
221 | p.classified_as = model.Type(ident="type-uri")
222 | p.referred_to_by = model.LinguisticObject(content="text")
223 | p.dimension = model.Dimension(value=1)
224 |
225 | outstr = model.factory.toString(p)
226 | lbl = outstr.index("_label")
227 | clsf = outstr.index("classified_as")
228 | r2b = outstr.index("referred_to_by")
229 | dim = outstr.index("dimension")
230 | self.assertTrue(lbl < clsf)
231 | self.assertTrue(clsf < r2b)
232 | self.assertTrue(r2b < dim)
233 |
234 |
235 | class TestProcessTSV(unittest.TestCase):
236 |
237 | def test_process_tsv(self):
238 | expect = {u'subs': [u'E84_Information_Carrier'], u'label': u'Human-Made Object', u'className': u'HumanMadeObject',
239 | u'subOf': u'E19_Physical_Object|E24_Physical_Human-Made_Thing', u'props': [], u'class': None, u'okay': u'1'}
240 | fn = 'cromulent/data/crm_vocab.tsv'
241 | vocabData = model.process_tsv(fn)
242 | man_made = vocabData['E22_Human-Made_Object']
243 | del man_made['desc'] # too long and volatile
244 | # check subs specifically - could be coming from an extension
245 | if man_made['subs'] != expect['subs']:
246 | del man_made['subs']
247 | del expect['subs']
248 | self.assertEqual(expect, man_made)
249 |
250 |
251 | class TestBuildClasses(unittest.TestCase):
252 |
253 | def test_build_classes(self):
254 | tsv = "\nClassName_full\tclass\tClassName_py\tClass Label\tClass Description\t\t1\t\n"
255 | fh = open('tests/temp.tsv', 'w')
256 | fh.write(tsv)
257 | fh.close()
258 | model.build_classes("tests/temp.tsv", "ClassName_full")
259 | from cromulent.model import ClassName_py
260 | self.assertEqual('Class Description', ClassName_py.__doc__)
261 | os.remove('tests/temp.tsv')
262 |
263 | class TestBuildClass(unittest.TestCase):
264 |
265 | def test_build_class(self):
266 | tsv = "\nClassName_full\tclass\tClassName_py2\tClass Label\tClass Description\t\t1\t\n"
267 | fh = open('tests/temp.tsv', 'w')
268 | fh.write(tsv)
269 | fh.close()
270 | vocabData = model.process_tsv('tests/temp.tsv')
271 | model.build_class('ClassName_full', model.BaseResource, vocabData)
272 | from cromulent.model import ClassName_py2
273 | self.assertEqual('Class Description', ClassName_py2.__doc__)
274 | os.remove('tests/temp.tsv')
275 |
276 | class TestAutoIdentifiers(unittest.TestCase):
277 |
278 | def test_bad_autoid(self):
279 | model.factory.auto_assign_id = True
280 | model.factory.auto_id_type = "broken"
281 | self.assertRaises(model.ConfigurationError, model.factory.generate_id,
282 | "irrelevant")
283 |
284 | def test_int(self):
285 | model.factory.auto_assign_id = True
286 | model.factory.auto_id_type = "int"
287 | p = model.Person()
288 | p2 = model.Activity()
289 | self.assertEqual(int(p.id[-1]), int(p2.id[-1])-1)
290 |
291 | def test_int_per_type(self):
292 | model.factory.auto_assign_id = True
293 | model.factory.auto_id_type = "int-per-type"
294 | p = model.Person()
295 | p2 = model.Person()
296 | self.assertEqual(int(p.id[-1]), int(p2.id[-1])-1)
297 | p3 = model.Activity()
298 | self.assertEqual(int(p.id[-1]), int(p3.id[-1]))
299 |
300 | def test_int_per_segment(self):
301 | model.factory.auto_assign_id = True
302 | model.factory._auto_id_segments = {}
303 | model.factory.auto_id_type = "int-per-segment"
304 | model.Activity._uri_segment = model.Person._uri_segment
305 | p = model.Person()
306 | p2 = model.Activity()
307 | self.assertEqual(int(p.id[-1]), int(p2.id[-1])-1)
308 | p3 = model.TimeSpan()
309 | self.assertEqual(int(p.id[-1]), int(p3.id[-1]))
310 |
311 | def test_uuid(self):
312 | model.factory.auto_assign_id = True
313 | model.factory.auto_id_type = "uuid"
314 | p = model.Person()
315 | self.assertTrue(p.id.startswith('urn:uuid:'))
316 |
317 | def test_prefixes(self):
318 |
319 | model.factory.prefixes = {'fish':'http://example.org/ns/'}
320 | p3 = model.Person('fish:3')
321 | self.assertEqual(p3.id, 'fish:3')
322 | self.assertEqual(p3._full_id, 'http://example.org/ns/3')
323 |
324 | model.factory.prefixes = {}
325 | p4 = model.Person('fish:4')
326 | self.assertTrue(p4.id.startswith(model.factory.base_url))
327 |
328 | def test_other_uris(self):
329 | p1 = model.Person(ident="tag:some-info-about-person")
330 | self.assertEqual(p1.id, "tag:some-info-about-person")
331 | p2 = model.Person(ident="info:ulan/500012345")
332 | self.assertEqual(p2.id, "info:ulan/500012345")
333 | p3 = model.Person(ident="some:random:thing:with:colons")
334 | self.assertFalse(p3.id == "some:random:thing:with:colons")
335 |
336 | def test_no_ident(self):
337 |
338 | model.factory.auto_assign_id = True
339 | p1 = model.Person() # auto assigned
340 | p2 = model.Person(ident=None) # auto assigned
341 | p3 = model.Person(ident="") # bnode explicitly
342 |
343 | self.assertTrue(p1.id.startswith('http'))
344 | self.assertTrue(p2.id.startswith('http'))
345 | self.assertEqual(p3.id, '')
346 |
347 | model.factory.auto_assign_id = False
348 | p4 = model.Person() # bnode is default
349 | p5 = model.Person(ident=None) # bnode is default
350 | p6 = model.Person(ident="") # bnode explicitly
351 |
352 | self.assertEqual(p4.id, '')
353 | self.assertEqual(p5.id, '')
354 | self.assertEqual(p6.id, '')
355 |
356 |
357 | class TestBaseResource(unittest.TestCase):
358 |
359 | def setUp(self):
360 | override_okay(model.Person, 'parent_of')
361 | self.artist = model.Person('00001', 'Jane Doe')
362 | self.son = model.Person('00002', 'John Doe')
363 |
364 | def test_init(self):
365 | self.assertEqual(self.artist.id, 'http://lod.example.org/museum/Person/00001')
366 | self.assertEqual(self.artist._type, 'crm:E21_Person')
367 | self.assertEqual(self.artist.type, 'Person')
368 | self.assertEqual(self.artist._label, 'Jane Doe')
369 | self.assertFalse(hasattr(self.artist, 'value'))
370 | self.assertFalse(hasattr(self.artist, 'has_type'))
371 |
372 | def test_check_prop(self):
373 | desc = self.artist._check_prop('_label', 'Jane Doe\'s Bio')
374 | self.assertEqual(desc, 1)
375 | parent = self.artist._check_prop('parent_of', self.son)
376 | self.assertEqual(parent, 2)
377 |
378 | def test_list_all_props(self):
379 | props = self.artist.list_all_props()
380 | props.sort()
381 | self.assertEqual(props[-1], 'witnessed')
382 | self.assertTrue('_label' in props)
383 | self.assertTrue('identified_by' in props)
384 |
385 | def test_list_my_props(self):
386 | p1 = model.Person()
387 | p1.classified_as = model.Type()
388 | props = p1.list_my_props()
389 | self.assertEqual(set(props), set(['classified_as', 'id']))
390 | props = p1.list_my_props(filter=model.Type)
391 | self.assertEqual(props, ['classified_as'])
392 |
393 | def test_allows_multiple(self):
394 | p = model.Person()
395 | self.assertTrue(p.allows_multiple('classified_as'))
396 | self.assertFalse(p.allows_multiple('born'))
397 | self.assertRaises(model.DataError, p.allows_multiple, 'fish')
398 |
399 | def test_check_reference(self):
400 | self.assertTrue(self.artist._check_reference('http'))
401 | self.assertFalse(self.artist._check_reference('xxx'))
402 | self.assertTrue(self.artist._check_reference({'id': 'xxx'}))
403 | self.assertFalse(self.artist._check_reference({'xxx': 'yyy'}))
404 | self.assertTrue(self.artist._check_reference(self.son))
405 | self.assertTrue(self.artist._check_reference(['http']))
406 | self.assertFalse(self.artist._check_reference(['xxx', 'yyy']))
407 | self.assertTrue(self.artist._check_reference(model.Person))
408 |
409 | def test_multiplicity(self):
410 | model.factory.process_multiplicity = True
411 | who = model.Actor()
412 | mmo = model.HumanMadeObject()
413 | prod = model.Production()
414 | mmo.produced_by = prod
415 | who.current_owner_of = mmo
416 | mmo.current_owner = who
417 | self.assertEqual(mmo.current_owner, [who])
418 | self.assertEqual(who.current_owner_of, [mmo])
419 | self.assertEqual(mmo.produced_by, prod)
420 |
421 | def test_init_params(self):
422 | p1 = model.Person(ident="urn:uuid:1234")
423 | self.assertEqual(p1.id, "urn:uuid:1234")
424 | p2 = model.Person(ident="http://schema.org/Foo")
425 | self.assertEqual(p2.id, "schema:Foo")
426 | p3 = model.Name(content="Test")
427 | self.assertEqual(p3.content, "Test")
428 | c = model.MonetaryAmount(value=10)
429 | self.assertEqual(c.value, 10)
430 | n = model.Name(value="Rob")
431 | self.assertEqual(n.content, "Rob")
432 | i = model.Identifier(content="xyz123")
433 | self.assertEqual(i.content, "xyz123")
434 | i2 = model.Identifier(value="abc")
435 | self.assertEqual(i2.content, "abc")
436 |
437 | def test_dir(self):
438 | props = dir(self.artist)
439 | self.assertTrue('identified_by' in props)
440 |
441 |
442 | class TestPropertyCache(unittest.TestCase):
443 |
444 | def test_cache_hierarchy(self):
445 | o = model.HumanMadeObject()
446 | self.assertEqual(o._all_properties, {})
447 | model.factory.cache_hierarchy()
448 | self.assertTrue(len(o._all_properties) > 50)
449 |
450 |
451 | class TestMagicMethods(unittest.TestCase):
452 |
453 | def setUp(self):
454 | override_okay(model.Person, 'parent_of')
455 | # model.Person._properties['parent_of']['multiple'] = 1
456 |
457 | def test_set_magic_resource(self):
458 | artist = model.Person('00001', 'Jane Doe')
459 | son = model.Person('00002', 'John Doe')
460 | daughter = model.Person('00002', 'Jenny Doe')
461 | son2 = model.Person('00002', 'Jim Doe')
462 | artist._set_magic_resource('parent_of', son)
463 | self.assertEqual(artist.parent_of, [son])
464 | artist._set_magic_resource('parent_of', daughter)
465 | try:
466 | self.assertIn(son, artist.parent_of)
467 | self.assertIn(daughter, artist.parent_of)
468 | except:
469 | # 2.6 doesn't have assertIn
470 | self.assertTrue(son in artist.parent_of)
471 | self.assertTrue(daughter in artist.parent_of)
472 |
473 | artist._set_magic_resource('parent_of', son2)
474 | try:
475 | self.assertIn(son, artist.parent_of)
476 | self.assertIn(daughter, artist.parent_of)
477 | self.assertIn(son2, artist.parent_of)
478 | except:
479 | self.assertTrue(son in artist.parent_of)
480 | self.assertTrue(daughter in artist.parent_of)
481 | self.assertTrue(son2 in artist.parent_of)
482 |
483 | def test_set_magic_resource_inverse(self):
484 | model.factory.materialize_inverses = True
485 | artist = model.Person('00001', 'Jane Doe')
486 | son = model.Person('00002', 'John Doe')
487 | artist._set_magic_resource('parent_of', son)
488 | self.assertEqual(son.parent, [artist])
489 | model.factory.materialize_inverses = False
490 |
491 | def test_validate_profile_off(self):
492 | model.factory.validate_profile = False
493 | ia = model.IdentifierAssignment()
494 | # If it's not turned off this should raise
495 | model.factory.validate_profile = True
496 | self.assertRaises(model.ProfileError, model.IdentifierAssignment)
497 | p1 = model.Person()
498 | self.assertRaises(model.ProfileError, p1.__setattr__, 'documented_in', "foo")
499 |
500 | def test_validation_unknown(self):
501 | model.factory.validate_properties = True
502 | artist = model.Person('00001', 'Jane Doe')
503 | self.assertRaises(model.DataError, artist.__setattr__, 'unknown_property', 1)
504 |
505 | def test_validation_wrong_type(self):
506 | model.factory.validate_properties = True
507 | artist = model.Person('00001', 'Jane Doe')
508 | self.assertRaises(model.DataError, artist.__setattr__, 'parent_of', 'Bad Value')
509 |
510 | def test_validation_off(self):
511 | model.factory.validate_properties = False
512 | artist = model.Person('00001', 'Jane Doe')
513 | artist.unknown_property = 1
514 | self.assertEqual(artist.unknown_property, 1)
515 | model.factory.validate_properties = True
516 |
517 | def test_validate_multiplicity(self):
518 | model.factory.validate_multiplicity = True
519 | who = model.Person()
520 | b1 = model.Birth()
521 | who.born = b1
522 | b2 = model.Birth()
523 | self.assertRaises(model.ProfileError, who.__setattr__, 'born', b2)
524 | model.factory.validate_multiplicity = False
525 | who.born = b2
526 | self.assertEqual(who.born, [b1, b2])
527 |
528 | def test_not_multiple_instance(self):
529 | who = model.Person()
530 | n = model.Name(content="Test")
531 | who.identified_by = n
532 |
533 | model.factory.multiple_instances_per_property = "error"
534 | self.assertRaises(model.DataError, who.__setattr__, 'identified_by', n)
535 | self.assertEqual(who.identified_by, [n])
536 |
537 | model.factory.multiple_instances_per_property = "drop"
538 | who.identified_by = n
539 | self.assertEqual(who.identified_by, [n,n])
540 | # and check that only serialized once
541 | js = model.factory.toJSON(who)
542 | self.assertEqual(len(js['identified_by']), 1)
543 |
544 | model.factory.multiple_instances_per_property = "allow"
545 | js = model.factory.toJSON(who)
546 | self.assertEqual(len(js['identified_by']), 2)
547 |
548 |
549 | class TestObjectEquality(unittest.TestCase):
550 | def setUp(self):
551 | self.artist = model.Person('00001', 'Jane Doe')
552 | self.son = model.Person('00002', 'John Doe')
553 | self.daughter = model.Person('00002', 'Jenny Doe')
554 | self.son2 = model.Person('00002', 'Jim Doe')
555 |
556 | def test_eq_ident(self):
557 | self.assertEqual(self.artist, self.artist)
558 | self.assertEqual(self.son, model.Person('00002', 'John Doe'))
559 | self.assertEqual(self.son2, model.Person('00002', 'Jim Doe'))
560 | self.assertEqual(self.daughter, model.Person('00002', 'Jenny Doe'))
561 |
562 | def test_eq_value(self):
563 | self.assertEqual(self.artist, model.Person('00001', 'Jane Doe'))
564 | self.assertEqual(self.son, self.son)
565 | self.assertEqual(self.son2, self.son2)
566 | self.assertEqual(self.daughter, self.daughter)
567 |
568 | def test_in_value(self):
569 | people = (
570 | model.Person('00001', 'Jane Doe'), # artist
571 | model.Person('00002', 'Jim Doe') # son2
572 | )
573 | self.assertIn(self.artist, people)
574 | self.assertNotIn(self.son, people)
575 | self.assertNotIn(self.daughter, people)
576 | self.assertIn(self.son2, people)
577 |
578 | def test_neq(self):
579 | self.assertNotEqual(self.artist, self.son)
580 | self.assertNotEqual(self.artist, model.Person('00001', 'Jane')) # label differs
581 | self.assertNotEqual(self.artist, self.daughter)
582 | self.assertNotEqual(self.artist, self.son2)
583 | self.assertNotEqual(self.son, self.daughter)
584 | self.assertNotEqual(self.son, self.son2)
585 | self.assertNotEqual(self.daughter, self.son2)
586 |
587 | def nation(self, name, ident):
588 | place = vocab.Place(ident='tag:getty.edu,2019:digital:pipeline:provenance:REPLACE-WITH-UUID#PLACE-COUNTRY-' + name, label=name)
589 | nation = model.Place(ident=ident)
590 | place.classified_as = vocab.instances['nation']
591 | place.identified_by = model.Name(ident='', content=name)
592 | return place
593 |
594 | def test_equality(self):
595 | from cromulent.model import factory
596 | place1 = self.nation('Belgium', 'http://vocab.getty.edu/aat/300128207')
597 | place2 = self.nation('Belgium', 'http://vocab.getty.edu/aat/300128207')
598 | self.assertEqual(place1, place2)
599 |
600 | if __name__ == '__main__':
601 | unittest.main()
602 |
603 |
--------------------------------------------------------------------------------