├── README
├── tests
    ├── __init__.py
    ├── test_context.json
    ├── test_add_classification.py
    ├── test_multiple_instantiation.py
    ├── test_reader.py
    ├── test_currency.py
    ├── test_vocab.py
    ├── test_dimensions.py
    └── test_model.py
├── cromulent
    ├── __init__.py
    ├── data
    │   ├── __init__.py
    │   ├── key_order.json
    │   ├── overrides.json
    │   └── crm-profile.json
    ├── multiple_instantiation.py
    └── reader.py
├── requirements.txt
├── setup.cfg
├── experimental
    ├── crm.py
    ├── bibframe.py
    └── bibframe_reader.py
├── .travis.yml
├── setup.py
├── CHANGELOG.md
├── .gitignore
├── utils
    ├── old
    │   ├── merge_inverses.py
    │   └── make_inverses.py
    ├── info.py
    ├── make_jsonld_context.py
    ├── process_ontologies.py
    └── data
    │   ├── linkedart_crm_enhancements.xml
    │   └── linkedart.xml
├── examples
    ├── example.py
    ├── json-to-lod.py
    ├── sales-to-lod.py
    └── knoedler-to-lod.py
├── .circleci
    └── config.yml
├── README.md
└── LICENSE


/README:
--------------------------------------------------------------------------------
1 | README.md


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/cromulent/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/cromulent/data/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | ordereddict
2 | rdflib
3 | PyLD
4 | 


--------------------------------------------------------------------------------
/tests/test_context.json:
--------------------------------------------------------------------------------
1 | {"@context": {"id": "@id"}}
2 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [wheel]
 2 | universal = 1
 3 | 
 4 | [check-manifest]
 5 | ignore =
 6 |     .travis.yml
 7 |     tox.ini
 8 |     .gitignore
 9 |     
10 | 


--------------------------------------------------------------------------------
/experimental/crm.py:
--------------------------------------------------------------------------------
1 | import model
2 | from model import CromulentFactory, build_classes, \
3 | 	KEY_ORDER_HASH, KEY_ORDER_DEFAULT
4 | 
5 | factory = CromulentFactory("http://lod.example.org/museum/", \
6 | 	context="http://linked.art/ns/context/1/full.jsonld")
7 | build_classes()
8 | model.factory = factory


--------------------------------------------------------------------------------
/experimental/bibframe.py:
--------------------------------------------------------------------------------
1 | import model
2 | from model import CromulentFactory, build_classes, \
3 | 	KEY_ORDER_HASH, KEY_ORDER_DEFAULT
4 | 
5 | factory = CromulentFactory("http://lod.example.org/museum/", \
6 | 	load_context=False)
7 | build_classes("utils/bibframe_vocab.tsv", top="rdf:Resource")
8 | model.factory = factory


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | dist: xenial
 3 | 
 4 | python:
 5 |   - '2.7'
 6 |   - '3.6'
 7 |   - '3.7'
 8 |   - '3.8'
 9 | install: 
10 |   - pip install coveralls ordereddict
11 |   - python setup.py install  
12 | notifications:
13 |   email:
14 |     recipients:
15 |       - azaroth42@gmail.com
16 | script:
17 |   coverage run --source=cromulent setup.py test
18 | after_success:
19 |   coveralls
20 | 


--------------------------------------------------------------------------------
/tests/test_add_classification.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import unittest 
 3 | 
 4 | try:
 5 | 	from collections import OrderedDict
 6 | except:
 7 | 	# 2.6
 8 | 	from ordereddict import OrderedDict
 9 | 
10 | from cromulent import model, vocab
11 | from cromulent.model import factory
12 | 
13 | class TestAddClassification(unittest.TestCase):
14 | 	def test_add_classification(self):
15 | 		amnt = model.MonetaryAmount(ident='')
16 | 		amnt.value = 7.0
17 | 		self.assertNotIn('Asking Price', factory.toString(amnt))
18 | 		vocab.add_classification(amnt, vocab.AskingPrice)
19 | 		self.assertIn('Asking Price', factory.toString(amnt))
20 | 
21 | if __name__ == '__main__':
22 | 	unittest.main()
23 | 


--------------------------------------------------------------------------------
/tests/test_multiple_instantiation.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import unittest 
 3 | 
 4 | try:
 5 | 	from collections import OrderedDict
 6 | except:
 7 | 	# 2.6
 8 | 	from ordereddict import OrderedDict
 9 | 
10 | from cromulent import multiple_instantiation as mi
11 | from cromulent.model import factory, Person, DataError, Dimension
12 | 
13 | 
14 | class TestMIClasses(unittest.TestCase):
15 | 
16 | 	def test_destruction(self):
17 | 		expect = OrderedDict([('id', u'http://lod.example.org/museum/Activity/1'), 
18 | 			('type', ['Destruction', 'Activity']), ('_label', "Test Destruction")])
19 | 		mi.DestructionActivity._okayToUse = 1
20 | 		da = mi.DestructionActivity("1")
21 | 		da._label = "Test Destruction"
22 | 		factory.context_uri = ""
23 | 		dajs = factory.toJSON(da)
24 | 		self.assertEqual(dajs, expect)
25 | 
26 | 


--------------------------------------------------------------------------------
/cromulent/multiple_instantiation.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # This assumes the default CIDOC-CRM, even though the model code 
 3 | # can generate classes for any ontology
 4 | 
 5 | import inspect
 6 | from cromulent.model import Destruction, EndOfExistence, Activity, Appellation, LinguisticObject	
 7 | 
 8 | # DestuctionActivity class as CRM has a Destruction Event and recommends multi-classing
 9 | # WARNING:  instantiating this class in the default profile will raise an error
10 | 
11 | class DestructionActivity(Destruction, Activity):
12 | 	_uri_segment = "Activity"
13 | 	_type = ["crm:E6_Destruction", "crm:E7_Activity"]
14 | 
15 | 	@property
16 | 	def type(self):
17 | 		return ["Destruction", "Activity"]
18 | DestructionActivity._classhier = inspect.getmro(DestructionActivity)[:-1]
19 | 
20 | # And hence we make an EndOfExistence+Activity class
21 | # for all activities that end existences
22 | class EoEActivity(EndOfExistence, Activity):
23 | 	_uri_segment = "Activity"
24 | 	_type = ["crm:64_End_of_Existence", "crm:E7_Activity"]
25 | 	_niceType = ["EndOfExistence", "Activity"]	
26 | 
27 | 	@property
28 | 	def type(self):
29 | 		return ["EndOfExistence", "Activity"]
30 | 
31 | EoEActivity._classhier = inspect.getmro(EoEActivity)[:-1]
32 | 
33 | # No need for Linguistic Appellation any more, as we have E33_E41_Linguistic_Appellation


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | import sys
 3 | 
 4 | if (sys.version_info[0:2] < (2,7)):
 5 |     install_requires =['ordereddict', 'future', 'rdflib', 'PyLD']
 6 | else:
 7 |     install_requires = ['rdflib', 'PyLD']
 8 | 
 9 | setup(
10 |     name = 'cromulent',
11 |     packages = ['cromulent'],
12 |     package_data = {
13 |         'cromulent': ['data/crm_vocab.tsv', 'data/overrides.json', 
14 |         'data/key_order.json', 'data/linked-art.json', 
15 |         'data/cidoc-extension.json', 'data/crm-profile.json']
16 |     },
17 |     test_suite="tests",
18 |     version = '0.16.11',
19 |     description = 'A library for mapping CIDOC-CRM (v7.1) classes to Python objects',
20 |     author = 'Rob Sanderson',
21 |     author_email = 'robert.sanderson@yale.edu',
22 |     url = 'https://github.com/linked-art/crom',
23 |     install_requires=install_requires,
24 |     classifiers = [
25 |         "Programming Language :: Python",
26 |         "Programming Language :: Python :: 3",
27 |         "Programming Language :: Python :: 2",
28 |         "License :: OSI Approved :: Apache Software License",
29 |         "Development Status :: 3 - Alpha",
30 |         "Intended Audience :: Developers",
31 |         "Operating System :: OS Independent",
32 |         "Topic :: Software Development :: Libraries :: Python Modules",
33 |     ]
34 | )
35 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Cromulent (CROM) Change Log
 2 | 
 3 | Any notable changes to CROM that affect either functionality or output will be documented in this file (the format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)).
 4 | 
 5 | ## [Unreleased] 2020-11-03
 6 | 
 7 | ## Added
 8 | 
 9 | * Added this change log [[DEV-6985](https://jira.getty.edu/browse/DEV-6985)].
10 | 
11 | * Reinstated the `Relationship` entity and its associated properties `relates_to`, `relates_from`, `related_to_by`, and `related_from_by` as these are in production data modelling use, as their sudden removal led to runtime exceptions and prevented code reliant on CROM from operating [[DEV-6985](https://jira.getty.edu/browse/DEV-6985)].
12 | 
13 | * Reinstated the `Geometry` and `CoordinateSystem` entities as these are in production data modelling use, as their sudden removal led to runtime exceptions and prevented code reliant on CROM from operating [[DEV-6985](https://jira.getty.edu/browse/DEV-6985)].
14 | 
15 | * Reinstated the `current_keeper` and `current_keeper_of` properties as these are in production data modelling use, as their sudden removal led to runtime exceptions and prevented code reliant on CROM from operating [[DEV-6985](https://jira.getty.edu/browse/DEV-6985)].
16 | 
17 | ## Changed
18 | 
19 | * Imported the updated Getty-local `linked-art.json` context document from the `getty-contexts` repository to ensure consistency [[DEV-6984](https://jira.getty.edu/browse/DEV-6984)].
20 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 | 
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 | 
60 | # Scrapy stuff:
61 | .scrapy
62 | 
63 | # Sphinx documentation
64 | docs/_build/
65 | 
66 | # PyBuilder
67 | target/
68 | 
69 | # IPython Notebook
70 | .ipynb_checkpoints
71 | 
72 | # pyenv
73 | .python-version
74 | 
75 | # celery beat schedule file
76 | celerybeat-schedule
77 | 
78 | # dotenv
79 | .env
80 | 
81 | # virtualenv
82 | venv/
83 | ENV/
84 | 
85 | # Spyder project settings
86 | .spyderproject
87 | 
88 | # Rope project settings
89 | .ropeproject
90 | 
91 | .DS_Store
92 | err_output
93 | tests/fishbat.bar
94 | 


--------------------------------------------------------------------------------
/utils/old/merge_inverses.py:
--------------------------------------------------------------------------------
 1 | from lxml import etree
 2 | import codecs
 3 | 
 4 | NS = {'rdf':"http://www.w3.org/1999/02/22-rdf-syntax-ns#",
 5 | 	'xsd':"http://www.w3.org/2001/XMLSchema#",
 6 | 	'rdfs':"http://www.w3.org/2000/01/rdf-schema#",
 7 | 	'dcterms':"http://purl.org/dc/terms/",
 8 | 	'owl':"http://www.w3.org/2002/07/owl#",
 9 | 	'crm':"http://www.cidoc-crm.org/cidoc-crm/",
10 | 	'xml': "http://www.w3.org/XML/1998/namespace"
11 | }
12 | 
13 | fh = file('data/inverses.xml')
14 | data = fh.read()
15 | fh.close()
16 | dom = etree.XML(data)
17 | 
18 | inverses = {}
19 | props = dom.xpath("//rdf:Property",namespaces=NS)
20 | for p in props:
21 | 	name = p.xpath('@rdf:about', namespaces=NS)[0]
22 | 	try:
23 | 		inv = p.xpath('./owl:inverseOf/@rdf:resource', namespaces=NS)[0]
24 | 		inverses[name] = inv
25 | 	except:
26 | 		pass
27 | 
28 | fh = file('data/cidoc.xml')
29 | data = fh.read()
30 | fh.close()
31 | dom = etree.XML(data)
32 | 
33 | # Now insert them into the right blocks
34 | 
35 | for (n,i) in inverses.items():
36 | 	try:
37 | 		elem = dom.xpath('//rdf:Property[@rdf:about="%s"]' % n, namespaces=NS)[0]
38 | 		if not elem.xpath('./owl:inverseOf', namespaces=NS):
39 | 			inv = etree.SubElement(elem, "{http://www.w3.org/2002/07/owl#}inverseOf")
40 | 			inv.set("{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource", i)
41 | 			inv.tail = "\n"
42 | 	except:
43 | 		print "Could not find property %s" % n
44 | 
45 | 
46 | # And rewrite the file
47 | 
48 | fh = file('data/cidoc_inversed.xml', 'w')
49 | fh.write(etree.tostring(dom, pretty_print=True))
50 | fh.close()
51 | 


--------------------------------------------------------------------------------
/cromulent/data/key_order.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"@context": 0,
 3 | 	"id": 1,
 4 | 	"type": 2,
 5 | 	"_label": 3,
 6 | 	"classified_as": 4,  
 7 | 
 8 | 	"value": 5,
 9 | 	"content": 5,
10 | 	"upper_value_limit": 6,
11 | 	"lower_value_limit": 7,
12 | 	"unit": 8,
13 | 
14 | 	"identified_by": 10,
15 | 	"defined_by": 11,
16 | 	"referred_to_by" : 15,
17 | 	"about": 18,
18 | 	"technique": 19,
19 | 
20 | 	"timespan": 20, 
21 | 	"begin_of_the_begin": 21, 
22 | 	"end_of_the_begin": 22, 
23 | 	"begin_of_the_end": 23, 
24 | 	"end_of_the_end": 24,
25 | 	"duration": 25,
26 | 
27 | 	"started_by": 26, 
28 | 	"continued": 26,
29 | 	"finished_by": 27,
30 | 	"continued_by": 27,	
31 | 
32 | 	"took_place_at": 30,
33 | 	"carried_out_by": 31, 
34 | 	"used_specific_object": 33,
35 | 	"removed": 34,
36 | 	"diminished": 35,
37 | 	"added": 34,
38 | 	"augmented": 35,
39 | 	"transformed": 35,
40 | 	"produced": 38,
41 | 	"destroyed": 39,
42 | 	"born": 38,
43 | 	"died": 39,
44 | 	"formed": 38,
45 | 	"dissolved": 39,
46 | 	"created": 38,
47 | 
48 | 	"assigned_by": 39,
49 | 
50 | 	"carried_out": 40,
51 | 	"dimension": 41,
52 | 	"made_of": 42,
53 | 	"language": 42,
54 | 	"part_of": 43,
55 | 	"approximated_by": 44,
56 | 	"member_of": 45,
57 | 
58 | 	"transferred_title_of": 50, 
59 | 	"transferred_title_from": 51, 
60 | 	"transferred_title_to": 52,
61 | 	"transferred_custody_of": 50,
62 | 	"transferred_custody_from": 51,
63 | 	"transferred_custody_to": 52,
64 | 	"paid_amount": 50,
65 | 	"paid_from": 51,
66 | 	"paid_to": 52,
67 | 	"moved": 50,
68 | 	"moved_from": 51,
69 | 	"moved_to": 52,
70 | 	"participant": 53,
71 | 
72 | 	"shows": 60,
73 | 	"carries": 61,
74 | 
75 | 	"consists_of": 10001, 
76 | 	"composed_of": 10001,
77 | 	"part": 10001,
78 | 	"temporally_contains": 10001,
79 | 	"spatially_contains": 10001,
80 | 	"member": 10001
81 |  }


--------------------------------------------------------------------------------
/examples/example.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from cidoc_orm import factory, Document, Activity, Event, TimeSpan, ManMadeObject, Acquisition, Type
 3 | 
 4 | # Locally "subclass" to create consistent patterns with E55 and AAT
 5 | class Painting(ManMadeObject):
 6 | 	def __init__(self, *args, **kw):
 7 | 		super(Painting, self).__init__(*args, **kw)
 8 | 		self.has_type = Type("http://vocab.getty.edu/aat/300033618")
 9 | 
10 | class LugtNumber(Identifier):
11 | 	def __init__(self, *args, **kw):
12 | 		super(LugtNumber, self).__init__(*args, **kw)
13 | 		# ???
14 | 		self.has_type = Type("http://vocab.getty.edu/aat/300033618")	
15 | 
16 | class TMSNumber(Identifier):
17 | 	def __init__(self, *args, **kw):
18 | 		super(TMSNumber, self).__init__(*args, **kw)
19 | 		# Repository Number
20 | 		self.has_type = Type("http://vocab.getty.edu/aat/300404621")
21 | 
22 | class LotNumber(Identifier):
23 | 	def __init__(self, *args, **kw):
24 | 		super(TMSNumber, self).__init__(*args, **kw)
25 | 		# Lot Number
26 | 		self.has_type = Type("http://vocab.getty.edu/aat/300404628")
27 | 
28 | 
29 | # Or actually subclass in an extension vocab
30 | class Mosaic(ManMadeObject):
31 | 	_type = "extension:Mosaic"
32 | 
33 | factory.base_url = "http://data.getty.edu/provenance/"
34 | factory.default_lang = "en"
35 | 
36 | catalog = Document("catalog")
37 | page = Document("catalog-entry")
38 | catalog.has_component = page
39 | auction = Activity("auction")
40 | catalog.documents = auction
41 | lot = Activity("lot")
42 | auction.consists_of = lot
43 | page.documents = lot
44 | txn = Acquisition("sale")
45 | lot.consists_of = txn
46 | what = Painting('my-painting')
47 | txn.transferred_title_of = what
48 | what.label = "My First Paint By Numbers"
49 | what.is_identified_by = TMSNumber("")
50 | 
51 | 
52 | print factory.toString(catalog, compact=False)
53 | 


--------------------------------------------------------------------------------
/.circleci/config.yml:
--------------------------------------------------------------------------------
 1 | # Python CircleCI 2.0 configuration file
 2 | #
 3 | # Check https://circleci.com/docs/2.0/language-python/ for more details
 4 | #
 5 | version: 2
 6 | jobs:
 7 |   build:
 8 |     docker:
 9 |       # specify the version you desire here
10 |       # use `-browsers` prefix for selenium tests, e.g. `3.6.1-browsers`
11 |       - image: circleci/python:3.8.0
12 | 
13 |       # Specify service dependencies here if necessary
14 |       # CircleCI maintains a library of pre-built images
15 |       # documented at https://circleci.com/docs/2.0/circleci-images/
16 |       # - image: circleci/postgres:9.4
17 | 
18 |     working_directory: ~/repo
19 | 
20 |     steps:
21 |       - checkout
22 | 
23 |       # Download and cache dependencies
24 |       - restore_cache:
25 |           keys:
26 |             - v1-dependencies-{{ checksum "requirements.txt" }}
27 |             # fallback to using the latest cache if no exact match is found
28 |             - v1-dependencies-
29 | 
30 |       - run:
31 |           name: install dependencies
32 |           command: |
33 |             python3 -m venv venv
34 |             . venv/bin/activate
35 |             pip install -r requirements.txt
36 | 
37 |       - save_cache:
38 |           paths:
39 |             - ./venv
40 |           key: v1-dependencies-{{ checksum "requirements.txt" }}
41 | 
42 |       # run tests!
43 |       # this example uses Django's built-in test-runner
44 |       # other common Python testing frameworks include pytest and nose
45 |       # https://pytest.org
46 |       # https://nose.readthedocs.io
47 |       - run:
48 |           name: run tests
49 |           command: |
50 |             . venv/bin/activate
51 |             python setup.py test
52 | 
53 |       - store_artifacts:
54 |           path: test-reports
55 |           destination: test-reports
56 | 


--------------------------------------------------------------------------------
/utils/old/make_inverses.py:
--------------------------------------------------------------------------------
 1 | from lxml import etree
 2 | import codecs
 3 | 
 4 | fh = file('cidoc.xml')
 5 | data = fh.read()
 6 | fh.close()
 7 | 
 8 | NS = {'rdf':"http://www.w3.org/1999/02/22-rdf-syntax-ns#",
 9 | 	'xsd':"http://www.w3.org/2001/XMLSchema#",
10 | 	'rdfs':"http://www.w3.org/2000/01/rdf-schema#",
11 | 	'dcterms':"http://purl.org/dc/terms/",
12 | 	'owl':"http://www.w3.org/2002/07/owl#",
13 | 	'crm':"http://www.cidoc-crm.org/cidoc-crm/",
14 | 	'xml': "http://www.w3.org/XML/1998/namespace"
15 | }
16 | 
17 | dom = etree.XML(data)
18 | names = []
19 | inverses = {}
20 | 
21 | props = dom.xpath("//rdf:Property",namespaces=NS)
22 | for p in props:
23 | 	name = p.xpath('@rdf:about', namespaces=NS)[0]
24 | 	names.append(name)
25 | 
26 | for p in props:
27 | 	name = p.xpath('@rdf:about', namespaces=NS)[0]	
28 | 	fu = name.find('_')
29 | 	pid = name[:fu]
30 | 	if pid[-1] in ['a', 'b']:
31 | 		# No inverses for botb eote
32 | 		continue
33 | 	inverse = ""
34 | 	if pid[-1] == "i":
35 | 		pid = pid[:-1]
36 | 	else:
37 | 		pid = pid + "i"
38 | 	pid += "_"
39 | 
40 | 	for i in names:
41 | 		if i.startswith(pid) and i != name:
42 | 			inverse = i
43 | 			break
44 | 	if inverse:
45 | 		inverses[name] = inverse
46 | 
47 | # Now print ONLY the inverses
48 | outlines = [
49 | '<rdf:RDF xml:lang="en" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xml:base="http://www.cidoc-crm.org/cidoc-crm/" xmlns:la="http://linked.art/ns/terms/">'
50 | ]
51 | 
52 | for n in names:
53 | 	if n in inverses:
54 | 		outlines.append('  <rdf:Property rdf:about="%s">' % n )
55 | 		outlines.append('    <owl:inverseOf rdf:resource="%s"/>' % inverses[n])
56 | 		outlines.append('  </rdf:Property>')
57 | outlines.append('</rdf:RDF>')
58 | outstr = '\n'.join(outlines)
59 | 
60 | fh = file('data/inverses.xml', 'w')
61 | fh.write(outstr)
62 | fh.close()
63 | 


--------------------------------------------------------------------------------
/cromulent/data/overrides.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"P45":  "made_of",
 3 | 	"P7i":  "location_of",
 4 | 	"P5":   "subState",
 5 | 	"P5i":  "subState_of",
 6 | 	"P20i": "specific_purpose_of",
 7 | 	"P42":  "assigned_type",
 8 | 	"P42i": "type_assigned_by",
 9 | 	"P37":  "assigned_identifier",
10 | 	"P37i": "identifier_assigned_by",
11 | 	"P35i": "condition_identified_by",
12 | 
13 | 	"P28":  "transferred_custody_from",
14 | 	"P29":  "transferred_custody_to",
15 | 	"P29i": "acquired_custody_through",
16 | 	"P14i": "carried_out",
17 | 	"P140": "assigned_to",
18 | 	"P50": "current_custodian",
19 | 	"P50i": "current_custodian_of",
20 | 
21 | 	"P9": "part",
22 | 	"P9i": "part_of",
23 | 	"P46":   "part",
24 | 	"P46i":  "part_of",
25 | 	"P86":   "part_of",
26 | 	"P86i":  "part",
27 | 	"P89":   "part_of",
28 | 	"P89i":  "part",
29 | 	"P106":  "part",
30 | 	"P106i": "part_of",
31 | 	"P127i": "part",
32 | 	"P127": "part_of",
33 | 
34 | 	"P148": "c_part",
35 | 	"P148i": "c_part_of",
36 | 
37 | 	"P107":  "member",
38 | 	"P107i": "member_of",
39 | 	"P56":   "bears",
40 | 	"la:has_member": "member",
41 | 	"la:member_of": "member_of",
42 | 
43 | 	"P32":  "technique",
44 | 	"P33":  "specific_technique",
45 | 	"P12":  "involved",
46 | 	"P101": "general_use",
47 | 	"P100i": "died",
48 | 	"P74":  "residence",
49 | 
50 | 	"P65":   "shows",
51 | 	"P2":    "classified_as", 
52 | 	"P190":  "content",
53 | 	"P177":  "assigned_property",
54 | 
55 | 	"P133":  "distinct_from",
56 | 	"P164i": "timespan_of_presence",
57 | 	"P151i": "participated_in_formation",
58 | 	"P165i": "incorporated_by",
59 | 	"P132":  "volume_overlaps_with",
60 | 	"P135i": "type_created_by",
61 | 	"P139": "alternative",
62 | 
63 | 	"P172": "spatially_contains",
64 | 	"P186i": "type_produced_by",
65 | 	"P168": "defined_by",
66 | 
67 | 	"P165": "presence_of",
68 | 	"P195": "presence_of_thing",
69 | 	"P195i": "thing_presence",
70 | 	"P196i": "thing_defined_by",
71 | 
72 | 	"skos:closeMatch": "close_match",
73 | 	"skos:exactMatch": "exact_match",
74 | 	"dcterms:conformsTo": "conforms_to",
75 | 	"dcterms:relation": "related",
76 | 	"schema:genre": "style",
77 | 	"rdfs:seeAlso": "see_also",
78 | 	"rdfs:label": "_label",
79 | 	"sci:O13_triggers": "caused",
80 | 	"sci:O13i_is_triggered_by": "caused_by",
81 | 	"sci:O19_encountered_object": "encountered",
82 | 	"sci:O19i_was_object_encountered_at": "encountered_by"
83 | }
84 | 


--------------------------------------------------------------------------------
/utils/info.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import sys, argparse
 3 | from cromulent import model, vocab
 4 | 
 5 | parser = argparse.ArgumentParser()
 6 | parser.add_argument('what')
 7 | parser.add_argument('--okay', '--profile', dest="okay", type=bool)
 8 | parser.add_argument('--filter', dest="filter")
 9 | parser.add_argument('--self', dest="onlySelf", type=bool)
10 | args = parser.parse_args()
11 | 
12 | def list_all_props(what, filter=None, okay=None):
13 | 	props = []
14 | 	ks = []
15 | 	for c in what._classhier:	
16 | 		for k,v in c._all_properties.items():
17 | 			if not k in ks and \
18 | 				(not okay or (okay and v.profile_okay)) and \
19 | 				(filter is None or isinstance(filter, v.range) or \
20 | 					filter is v.range):
21 | 				props.append(v)
22 | 				ks.append(k)
23 | 	props.sort(key=lambda x: x.property)
24 | 	return props
25 | 
26 | def list_my_props(what, filter=None, okay=None):
27 | 	props = []
28 | 	ks = []
29 | 	for k,v in what._all_properties.items():
30 | 		if not k in ks and \
31 | 			(not okay or (okay and v.profile_okay)) and \
32 | 			(filter is None or isinstance(filter, v.range) or \
33 | 				filter is v.range):
34 | 			props.append(v)
35 | 			ks.append(k)
36 | 	props.sort(key=lambda x: x.property)
37 | 	return props
38 | 
39 | what = args.what
40 | try:
41 | 	c = getattr(model, what)
42 | except:
43 | 	try:
44 | 		c = getattr(vocab, what)
45 | 	except:
46 | 		print(f"Unknown model or vocab class: {what}")
47 | 		sys.exit(1)
48 | 
49 | if args.filter:
50 | 	try:
51 | 		cf = getattr(model, args.filter)
52 | 		f = cf()
53 | 	except:
54 | 		f = None
55 | else:
56 | 	cf = None
57 | 	f = None
58 | 
59 | 
60 | print(f"Main Class: \033[95m{c.__name__}\033[0m")
61 | if cf:
62 | 	print(f"Filtered To: \033[95m{cf.__name__}\033[0m")
63 | else:
64 | 	print("Filtered To: None")
65 | print(f"Using Profile: {args.okay}")
66 | 
67 | 
68 | 
69 | model.factory.validate_profile = False
70 | instance = c()
71 | 
72 | if args.onlySelf:
73 | 	ap = list_my_props(instance, okay=args.okay, filter=f)
74 | else:
75 | 	ap = list_all_props(instance, okay=args.okay, filter=f)
76 | 
77 | #ap2 = instance.list_all_props(okay=args.okay, filter=f)
78 | 
79 | 
80 | for pi in ap:
81 | 	if pi.property in ['close_match', 'exact_match']:
82 | 		continue
83 | 	out = f"{pi.property} ({pi.predicate})"
84 | 	if pi.inverse_property:
85 | 		inv = f"{pi.inverse_property} ({pi.inverse_predicate})"
86 | 	else:
87 | 		inv = ""
88 | 	if pi.range == str:
89 | 		rng = "\033[93mLiteral"
90 | 	else:
91 | 		rng = pi.range.__name__
92 | 	# old skool colorizing
93 | 	print(f"\033[95m{what:<15} \033[92m{out:<50} / {inv:<50} \033[95m{rng}\033[0m")
94 | 
95 | 


--------------------------------------------------------------------------------
/tests/test_reader.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import unittest 
 3 | 
 4 | try:
 5 | 	from collections import OrderedDict
 6 | except:
 7 | 	# 2.6
 8 | 	from ordereddict import OrderedDict
 9 | 
10 | from cromulent import reader
11 | from cromulent.model import factory, Person, DataError, BaseResource, \
12 | 	Dimension, override_okay, AttributeAssignment
13 | 
14 | from cromulent import vocab
15 | 
16 | class TestReader(unittest.TestCase):
17 | 
18 | 	def setUp(self):
19 | 		self.reader = reader.Reader()
20 | 		# ensure we can use parent_of
21 | 		override_okay(Person, 'parent_of')
22 | 		# Person._properties['parent_of']['multiple'] = 1
23 | 
24 | 	def test_read(self):
25 | 		self.assertRaises(DataError, self.reader.read, "")
26 | 		self.assertRaises(DataError, self.reader.read, "This is not JSON")
27 | 		self.assertRaises(DataError, self.reader.read, "{}")
28 | 
29 | 		whostr = '{"type": "Person", "_label": "me"}'
30 | 		self.assertTrue(isinstance(self.reader.read(whostr), Person))
31 | 
32 | 		whostr = '{"@context": "fishbat", "type": "Person", "_label": "me"}'
33 | 		self.assertTrue(isinstance(self.reader.read(whostr), Person))
34 | 
35 | 		levelstr = '{"type": "Person", "parent_of": {"type": "Person", "_label": "child"}}'
36 | 		self.assertTrue(isinstance(self.reader.read(levelstr).parent_of[0], Person))
37 | 
38 | 		basestr = '{"_label": "base"}'
39 | 		self.assertTrue(isinstance(self.reader.read(basestr), BaseResource))
40 | 
41 | 		unknown = '{"type":"FishBat"}'
42 | 		self.assertRaises(DataError, self.reader.read, unknown)
43 | 
44 | 		unknown2 = '{"type":"Person", "fishbat": "bob"}'
45 | 		self.assertRaises(DataError, self.reader.read, unknown)
46 | 
47 | 	def test_attrib_assign(self):
48 | 		vocab.add_attribute_assignment_check()
49 | 
50 | 		data = """
51 | 		{
52 | 		  "id": "https://linked.art/example/activity/12", 
53 | 		  "type": "AttributeAssignment", 
54 | 		  "assigned": {
55 | 		    "id": "https://linked.art/example/name/10", 
56 | 			"type": "Name", 
57 | 		    "content": "Exhibition Specific Name"
58 | 		  }, 
59 | 		  "assigned_property": "identified_by", 
60 | 		  "assigned_to": {
61 | 		    "id": "https://linked.art/example/object/12", 
62 | 		    "type": "HumanMadeObject", 
63 | 		    "_label": "Real Painting Name"
64 | 		  }
65 | 		}
66 | 		"""
67 | 		d = self.reader.read(data)
68 | 		self.assertTrue(isinstance(d, AttributeAssignment))
69 | 
70 | 
71 | 	def test_vocab_collision(self):
72 | 		# Test that the algorithm picks the right vocab instance
73 | 		# if multiple have the same AAT term but different base class
74 | 
75 | 		data = """
76 |         {
77 |           "type": "LinguisticObject",
78 |           "_label": "Sale recorded in catalog: B-267 0003 (1817) (record number 22947)",
79 | 	      "part_of": [
80 |             {
81 |               "type": "LinguisticObject",
82 |               "_label": "Sale Catalog B-267",
83 |               "classified_as": [
84 |                 {
85 |                   "id": "http://vocab.getty.edu/aat/300026068",
86 |                   "type": "Type",
87 |                   "_label": "Auction Catalog"
88 |                 }
89 |               ]
90 |             }
91 |           ]
92 |         }
93 |         """
94 | 		d = self.reader.read(data)
95 | 		self.assertTrue(isinstance(d.part_of[0], vocab.AuctionCatalogText))
96 | 
97 | 


--------------------------------------------------------------------------------
/experimental/bibframe_reader.py:
--------------------------------------------------------------------------------
  1 | from lxml import etree
  2 | import codecs
  3 | import json
  4 | 
  5 | default_key_order = 10000
  6 | 
  7 | NS = {'rdf':"http://www.w3.org/1999/02/22-rdf-syntax-ns#",
  8 | 	'xsd':"http://www.w3.org/2001/XMLSchema#",
  9 | 	'rdfs':"http://www.w3.org/2000/01/rdf-schema#",
 10 | 	'dcterms':"http://purl.org/dc/terms/",
 11 | 	'owl':"http://www.w3.org/2002/07/owl#",
 12 | 	'crm':"http://www.cidoc-crm.org/cidoc-crm/",
 13 | 	'skos':"http://www.w3.org/2004/02/skos/core#",
 14 | 	'xml': "http://www.w3.org/XML/1998/namespace"
 15 | }
 16 | 
 17 | fh = file('bibframe.rdf')
 18 | data = fh.read()
 19 | fh.close()
 20 | dom = etree.XML(data)
 21 | stuff = []
 22 | 
 23 | property_overrides = {}
 24 | 
 25 | classes = dom.xpath("//rdfs:Class", namespaces=NS)
 26 | 
 27 | if not classes:
 28 | 	classes = dom.xpath('//owl:Class', namespaces=NS)
 29 | 
 30 | for c in classes:
 31 | 	label = c.xpath('./rdfs:label/text()', namespaces=NS)[0]
 32 | 	try:
 33 | 		comment = c.xpath('./rdfs:comment/text()', namespaces=NS)
 34 | 		if not comment:
 35 | 			comment = c.xpath('./skos:definition/text()', namespaces=NS)
 36 | 		if comment:
 37 | 			comment = comment[0]
 38 | 		comment = comment.strip()
 39 | 		comment = comment.replace('\n', '\\n').replace('\t', ' ')
 40 | 	except:
 41 | 		comment = ""
 42 | 	name = c.xpath('@rdf:about', namespaces=NS)[0]
 43 | 
 44 | 	subCls = c.xpath('./rdfs:subClassOf/@rdf:resource', namespaces=NS)
 45 | 	if subCls:
 46 | 		# could be multiples
 47 | 		subCls = '|'.join(subCls)
 48 | 	else:
 49 | 		subCls = ""
 50 | 
 51 | 	uc1 = name.rfind("/")
 52 | 	ccname = name[uc1+1:]
 53 | 	ccname = ccname.replace("_or_", "_Or_").replace("_of_", "_Of_")
 54 | 	ccname = ccname.replace('-', '').replace('_', '')
 55 | 
 56 | 	stuff.append([name, "class", ccname, label, comment, subCls])
 57 | 
 58 | props = dom.xpath("//rdf:Property",namespaces=NS)
 59 | if not props:
 60 | 	props = dom.xpath('//owl:DatatypeProperty', namespaces=NS)
 61 | 	props.extend(dom.xpath('owl:ObjectProperty', namespaces=NS))
 62 | 
 63 | for p in props:
 64 | 	label = p.xpath('./rdfs:label/text()', namespaces=NS)[0]
 65 | 	try:
 66 | 		comment = p.xpath('./rdfs:comment/text()', namespaces=NS)
 67 | 		if not comment:
 68 | 			comment = c.xpath('./skos:definition/text()', namespaces=NS)
 69 | 		if comment:
 70 | 			comment = comment[0]
 71 | 		comment = comment.strip()
 72 | 		comment = comment.replace('\n', '\\n').replace('\t', ' ')
 73 | 	except:
 74 | 		comment = ""
 75 | 
 76 | 	name = p.xpath('@rdf:about', namespaces=NS)[0]
 77 | 	domn = p.xpath('./rdfs:domain/@rdf:resource', namespaces=NS)
 78 | 	if domn:		
 79 | 		domn = domn[0]
 80 | 		for (k,v) in NS.items():
 81 | 			domn = domn.replace(v,"%s:" % k)
 82 | 	else:
 83 | 		domn = ""
 84 | 	rang = p.xpath('./rdfs:range/@rdf:resource', namespaces=NS)
 85 | 	if rang:
 86 | 		rang = rang[0]
 87 | 		for (k,v) in NS.items():
 88 | 			rang = rang.replace(v,"%s:" % k)
 89 | 	else:
 90 | 		rang = ""
 91 | 	subProp = p.xpath('./rdfs:subPropertyOf/@rdf:resource', namespaces=NS)
 92 | 	if subProp:
 93 | 		subProp = subProp[0]
 94 | 	else:
 95 | 		subProp = ""
 96 | 
 97 | 	inverse = p.xpath('./owl:inverseOf/@rdf:resource', namespaces=NS)
 98 | 	if inverse:
 99 | 		inverse = inverse[0]
100 | 	else:
101 | 		inverse = ""
102 | 
103 | 	uc1 = name.find("_")
104 | 	pno = name[:uc1]
105 | 	if property_overrides.has_key(pno):
106 | 		ccname = property_overrides[pno]
107 | 	else:
108 | 		ccname = name[uc1+1:]
109 | 		ccname = ccname.replace("-", "")
110 | 		if ccname.startswith("is_"):
111 | 			ccname = ccname[3:]
112 | 		elif ccname.startswith("has_") or ccname.startswith("had_") or ccname.startswith("was_"):
113 | 			ccname = ccname[4:]
114 | 
115 | 	# koi = str(key_order_hash.get(ccname, default_key_order))
116 | 	koi = "10000"
117 | 	stuff.append([name, "property", ccname, label, comment, subProp, domn, rang, inverse, koi])
118 | 
119 | outdata = '\n'.join(['\t'.join(x) for x in stuff])
120 | fh = codecs.open('bibframe_vocab.tsv', 'w', 'utf-8')
121 | fh.write(outdata)
122 | fh.close()
123 | 


--------------------------------------------------------------------------------
/tests/test_currency.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import unittest
  4 | try:
  5 | 	from contextlib import suppress
  6 | except:
  7 | 	# Python 2.7
  8 | 	suppress = None
  9 | import pprint
 10 | from datetime import datetime
 11 | from cromulent import model, vocab
 12 | from cromulent.extract import extract_monetary_amount
 13 | import cromulent.extract
 14 | 
 15 | CUSTOM_MAPPING = {
 16 | 	'xxx': vocab.register_instance('xxx custom currency', {'parent': model.Currency, 'id': '999999999', 'label': 'My Dollars'}),
 17 | 	'zzz': 'us dollars'
 18 | }
 19 | 
 20 | class TestCurrencyExtraction(unittest.TestCase):
 21 | 	'''
 22 | 	Test the ability to extract currency data.
 23 | 	'''
 24 | 	def setUp(self):
 25 | 		pass
 26 | 
 27 | 	def tearDown(self):
 28 | 		pass
 29 | 
 30 | 	def test_extract_simple(self):
 31 | 		e = extract_monetary_amount({
 32 | 			'price': '10.0',
 33 | 			'currency': 'pounds'
 34 | 		})
 35 | 		self.assertEqual(e.type, 'MonetaryAmount')
 36 | 		self.assertEqual(e._label, '10.00 pounds')
 37 | 		self.assertEqual(e.value, 10)
 38 | 		c = e.currency
 39 | 		self.assertEqual(c.type, 'Currency')
 40 | 		self.assertEqual(c._label, 'British Pounds')
 41 | 
 42 | 	def test_extract_comma_separated(self):
 43 | 		e = extract_monetary_amount({
 44 | 			'price': '1,280.5',
 45 | 			'currency': 'pounds'
 46 | 		})
 47 | 		self.assertEqual(e.type, 'MonetaryAmount')
 48 | 		self.assertEqual(e._label, '1,280.50 pounds')
 49 | 		self.assertEqual(e.value, 1280.50)
 50 | 		c = e.currency
 51 | 		self.assertEqual(c.type, 'Currency')
 52 | 		self.assertEqual(c._label, 'British Pounds')
 53 | 
 54 | 	def test_extract_label_digits(self):
 55 | 		e = extract_monetary_amount({
 56 | 			'price': '1,280.5',
 57 | 			'currency': 'pounds'
 58 | 		}, truncate_label_digits=4)
 59 | 		self.assertEqual(e.type, 'MonetaryAmount')
 60 | 		self.assertEqual(e._label, '1,280.5000 pounds')
 61 | 		self.assertEqual(e.value, 1280.50)
 62 | 		c = e.currency
 63 | 		self.assertEqual(c.type, 'Currency')
 64 | 		self.assertEqual(c._label, 'British Pounds')
 65 | 
 66 | 	def test_extract_multiple_comma_separated(self):
 67 | 		e = extract_monetary_amount({
 68 | 			'price': '1,310,720.5',
 69 | 			'currency': 'pounds'
 70 | 		})
 71 | 		self.assertEqual(e.type, 'MonetaryAmount')
 72 | 		self.assertEqual(e._label, '1,310,720.50 pounds')
 73 | 		self.assertEqual(e.value, 1310720.5)
 74 | 		c = e.currency
 75 | 		self.assertEqual(c.type, 'Currency')
 76 | 		self.assertEqual(c._label, 'British Pounds')
 77 | 
 78 | 	def test_extract_est(self):
 79 | 		e = extract_monetary_amount({
 80 | 			'est_price': '12.0',
 81 | 			'currency': 'pounds'
 82 | 		})
 83 | 		self.assertEqual(e.value, 12)
 84 | 		c = e.currency
 85 | 		self.assertEqual(e.classified_as[0]._label, 'Estimated Price')
 86 | 		self.assertEqual(e.currency._label, 'British Pounds')
 87 | 
 88 | 	def test_extract_start(self):
 89 | 		e = extract_monetary_amount({
 90 | 			'start_price': '8.5',
 91 | 			'currency': 'pounds'
 92 | 		})
 93 | 		self.assertEqual(e.value, 8.5)
 94 | 		c = e.currency
 95 | 		self.assertEqual(e.classified_as[0]._label, 'Starting Price')
 96 | 		self.assertEqual(e.currency._label, 'British Pounds')
 97 | 
 98 | 	def test_extract_custom_currency_key(self):
 99 | 		d = {
100 | 			'price': '7',
101 | 			'currency': 'zzz'
102 | 		}
103 | 		with self.assertRaises(AttributeError):
104 | 			e = extract_monetary_amount(d)
105 | 			self.assertEqual(e.currency._label, 'Custom Currency')
106 | 		
107 | 		e = extract_monetary_amount(d, currency_mapping=CUSTOM_MAPPING)
108 | 		self.assertEqual(e.value, 7)
109 | 		self.assertEqual(e.currency._label, 'US Dollars')
110 | 
111 | 	def test_extract_custom_currency_instance(self):
112 | 		d = {
113 | 			'price': '7',
114 | 			'currency': 'xxx'
115 | 		}
116 | 		with self.assertRaises(AttributeError):
117 | 			e = extract_monetary_amount(d)
118 | 			self.assertEqual(e.currency._label, 'Custom Currency')
119 | 		
120 | 		e = extract_monetary_amount(d, currency_mapping=CUSTOM_MAPPING)
121 | 		self.assertEqual(e.value, 7)
122 | 		self.assertEqual(e.currency._label, 'My Dollars')
123 | 
124 | 	def test_extract_price_with_citation(self):
125 | 		d = {
126 | 			'price': '7',
127 | 			'currency': 'pounds',
128 | 			'citation': 'crom test suite'
129 | 		}
130 | 		e = extract_monetary_amount(d, add_citations=True)
131 | 		self.assertEqual(e.value, 7)
132 | 		self.assertEqual(e.currency._label, 'British Pounds')
133 | 		self.assertEqual(e.referred_to_by[0].content, 'crom test suite')
134 | 
135 | 
136 | if __name__ == '__main__':
137 | 	unittest.main()
138 | 


--------------------------------------------------------------------------------
/cromulent/reader.py:
--------------------------------------------------------------------------------
  1 | from cromulent import model, vocab
  2 | from cromulent.model import factory, DataError, OrderedDict, BaseResource
  3 | from cromulent.model import STR_TYPES
  4 | 
  5 | import json
  6 | 
  7 | class Reader(object):
  8 | 
  9 | 	def __init__(self, validate_props=True, validate_profile=True):
 10 | 		self.uri_object_map = {}
 11 | 		self.forward_refs = []
 12 | 		self.vocab_props = ['assigned_property']
 13 | 		self.vocab_classes = {}
 14 | 		self.validate_profile = validate_profile
 15 | 		self.validate_props = validate_props
 16 | 
 17 | 		for cx in dir(vocab):
 18 | 			what = getattr(vocab, cx)
 19 | 			# crying cat face -- type as a @property returns the function, not the value
 20 | 			# when calling it on a class rather than an instance
 21 | 			try:
 22 | 				mytype = what._classhier[0].__name__
 23 | 			except AttributeError:
 24 | 				continue
 25 | 			# find classes
 26 | 			if (cx[0].isupper() and not hasattr(model, cx) and type(what) == type):
 27 | 				# class
 28 | 				self.vocab_classes[(mytype, what._classification[0].id)] = what
 29 | 
 30 | 	def read(self, data):
 31 | 		if not data:
 32 | 			raise DataError("No data provided: %r" % data)
 33 | 		elif type(data) in STR_TYPES:
 34 | 			try:
 35 | 				data = json.loads(data)
 36 | 			except:
 37 | 				raise DataError("Data is not valid JSON")
 38 | 		if not data:
 39 | 			raise DataError("No Data provided")
 40 | 		self.uri_object_map = {}
 41 | 		self.forward_refs = []
 42 | 		try:
 43 | 			what = self.construct(data)
 44 | 			self.process_forward_refs()
 45 | 			self.uri_object_map = {}
 46 | 			self.forward_refs = []
 47 | 			return what
 48 | 		except:
 49 | 			raise
 50 | 
 51 | 	def process_forward_refs(self):
 52 | 		for (what, prop, uri) in self.forward_refs:
 53 | 			if uri in self.uri_object_map:
 54 | 				setattr(what, prop, self.uri_object_map[uri])
 55 | 			else:
 56 | 				raise NotImplementedError("No class information for %s.%s = %s" % (what, prop, uri))
 57 | 
 58 | 	def construct(self, js):
 59 | 		# pass in json, get back object
 60 | 		if '@context' in js:
 61 | 			del js['@context']
 62 | 
 63 | 		ident = js.get('id', '')
 64 | 		typ = js.get('type', None)
 65 | 
 66 | 		if typ == None:
 67 | 			clx = BaseResource
 68 | 		else:
 69 | 			# Get class based on name
 70 | 			try:
 71 | 				clx = getattr(model, typ)
 72 | 			except AttributeError:
 73 | 				# No such class
 74 | 				raise DataError("Resource %s has unknown class %s" % (ident, typ) )
 75 | 
 76 | 		# now check vocab.ext_classes to try and refine
 77 | 		trash = None 
 78 | 		if 'classified_as' in js:
 79 | 			for c in js['classified_as']:
 80 | 				i = c.get('id', '')
 81 | 				clx2 = self.vocab_classes.get((typ, i), None)
 82 | 				if clx2 is not None:
 83 | 					clx = clx2
 84 | 					trash = c
 85 | 					break
 86 | 
 87 | 		what = clx(ident=ident)
 88 | 		what._validate_profile = self.validate_profile
 89 | 		self.uri_object_map[ident] = what
 90 | 
 91 | 		if self.validate_props:
 92 | 			propList = what.list_all_props()
 93 | 
 94 | 		# sort data by KOH to minimize chance of bad backrefs
 95 | 		itms = list(js.items())
 96 | 		itms.sort(key=lambda x: factory.key_order_hash.get(x[0], 10000))
 97 | 
 98 | 		for (prop, value) in itms:
 99 | 			if prop in ['id', 'type']:
100 | 				continue
101 | 
102 | 			if self.validate_props and not prop in propList:
103 | 				raise DataError("Unknown property %s on %s" % (prop, clx.__name__))
104 | 
105 | 			# Climb looking for range
106 | 			for c in what._classhier:		
107 | 				if prop in c._all_properties:
108 | 					rng = c._all_properties[prop].range
109 | 					break
110 | 
111 | 			if type(value) != list:
112 | 				value = [value]
113 | 			for subvalue in value:
114 | 				if trash is not None and prop == 'classified_as' and subvalue == trash:
115 | 					continue
116 | 				if rng == str:
117 | 					setattr(what, prop, subvalue)				
118 | 				elif type(subvalue) == dict or isinstance(subvalue, OrderedDict):
119 | 					# recurse ...
120 | 					val = self.construct(subvalue)
121 | 					setattr(what, prop, val)
122 | 				elif type(subvalue) in STR_TYPES and prop in self.vocab_props:
123 | 					# keep as string
124 | 					setattr(what, prop, subvalue)
125 | 				elif type(subvalue) in STR_TYPES:
126 | 					# raw URI to be made into a class of type rng
127 | 					# or back reference
128 | 					if subvalue in self.uri_object_map:
129 | 						setattr(what, prop, self.uri_object_map[subvalue])
130 | 					elif rng in [model.Type, BaseResource]:
131 | 						# Always a X, often no more info
132 | 						setattr(what, prop, rng(ident=subvalue))
133 | 					else:
134 | 						self.forward_refs.append([what, prop, subvalue])
135 | 				else:
136 | 					# No idea!!
137 | 					raise DataError("Value %r is not expected for %s" % (subvalue, prop))
138 | 
139 | 		return what
140 | 
141 | 
142 | 


--------------------------------------------------------------------------------
/tests/test_vocab.py:
--------------------------------------------------------------------------------
  1 | import unittest 
  2 | import sys
  3 | import os
  4 | 
  5 | from cromulent import vocab, model
  6 | from cromulent.model import factory
  7 | 
  8 | class TestClassBuilder(unittest.TestCase):
  9 | 	def setUp(self):
 10 | 		pass
 11 | 
 12 | 	def tearDown(self):
 13 | 		pass
 14 | 
 15 | 	def test_class(self):
 16 | 		vocab.register_aat_class("TestObject1", {"parent": model.HumanMadeObject, "id": "1", "label": "example 1"})
 17 | 		from cromulent.vocab import TestObject1
 18 | 		self.assertEqual(TestObject1._classification[0].id, 'http://vocab.getty.edu/aat/1')
 19 | 
 20 | 	def test_instance(self):
 21 | 		vocab.register_instance("TestMaterial2", {"parent": model.Material, "id": "2", "label": "example 2"})
 22 | 		self.assertTrue('TestMaterial2' in vocab.instances)
 23 | 		tm2 = vocab.instances['TestMaterial2']
 24 | 		self.assertEqual(tm2.id, "http://vocab.getty.edu/aat/2")
 25 | 
 26 | 	def test_metatype(self):
 27 | 		vocab.register_instance("example", {"parent": model.Type, "id": "3", "label": "example type"}) 
 28 | 		vocab.register_aat_class("TestObject2", 
 29 | 			{"parent": model.HumanMadeObject, "id": "4", "label": "example typed object", "metatype": "example"})
 30 | 		from cromulent.vocab import TestObject2
 31 | 		self.assertEqual(TestObject2._classification[0].classified_as[0].id, 'http://vocab.getty.edu/aat/3')
 32 | 
 33 | 	def test_multitype(self):
 34 | 		from cromulent.vocab import make_multitype_obj, Painting, Drawing
 35 | 		inst = make_multitype_obj(Painting, Drawing)
 36 | 		self.assertTrue(isinstance(inst, Painting))
 37 | 		self.assertTrue(len(inst.classified_as) == 2)
 38 | 		self.assertTrue(inst.classified_as[1].id == "http://vocab.getty.edu/aat/300033973")
 39 | 
 40 | 		from cromulent.model import HumanMadeObject
 41 | 
 42 | 		inst = make_multitype_obj(HumanMadeObject, Painting)
 43 | 		self.assertTrue(len(inst.classified_as) == 1)
 44 | 		self.assertTrue(inst.classified_as[0].id == "http://vocab.getty.edu/aat/300033618")
 45 | 
 46 | 	def test_conceptual_parts(self):
 47 | 		r = model.Right()
 48 | 		r2 = model.Right()
 49 | 		self.assertRaises(model.DataError, r.__setattr__, 'part', r2)
 50 | 		r.c_part = r2
 51 | 		self.assertTrue(r2 in r.c_part)
 52 | 
 53 | 		vocab.conceptual_only_parts()
 54 | 		r3 = model.Right()
 55 | 		r4 = model.Right()
 56 | 		r3.part = r4
 57 | 		self.assertTrue(r4 in r3.c_part)
 58 | 		self.assertTrue("part" in model.factory.toJSON(r3))
 59 | 		self.assertTrue(r4 in r3.part)
 60 | 
 61 | 
 62 | 	def test_art_setter(self):
 63 | 		p = model.HumanMadeObject("a", art=1)
 64 | 		p._label = "a"
 65 | 		pj = p._toJSON(done={})
 66 | 		self.assertFalse(pj.get('classified_as', None))
 67 | 		vocab.add_art_setter()
 68 | 		p2 = vocab.Painting("b", art=1)
 69 | 		p2j = p2._toJSON(done={})
 70 | 
 71 | 	def test_aa_check(self):
 72 | 
 73 | 		# Make sure that some other test hasn't set it
 74 | 		try:
 75 | 			del model.AttributeAssignment.set_assigned_property
 76 | 		except:
 77 | 			pass
 78 | 
 79 | 		t = model.Type()
 80 | 		aa = model.AttributeAssignment()
 81 | 		# First check that aa accepts a type
 82 | 		aa.assigned_property = t
 83 | 		# And will not accept a string
 84 | 		self.assertRaises(model.DataError, aa.__setattr__, "assigned_property", "classified_as")
 85 | 
 86 | 		# Check we can set anything to assigned / assigned_to
 87 | 		aa.assigned_property = None
 88 | 		aa.assigned = aa
 89 | 		aa.assigned_to = aa
 90 | 		self.assertEqual(aa.assigned, aa)
 91 | 		self.assertEqual(aa.assigned_to, aa)
 92 | 
 93 | 		vocab.add_attribute_assignment_check()
 94 | 
 95 | 		# This should fail right now as can't classify as an AA
 96 | 		self.assertRaises(model.DataError, aa.__setattr__, "assigned_property", "classified_as")
 97 | 		aa.assigned = None
 98 | 		aa.assigned_to = None
 99 | 		aa.assigned = t
100 | 		aa.assigned_to = t
101 | 		aa.assigned_property = "classified_as"
102 | 		self.assertEqual(aa.assigned_property, 'classified_as')
103 | 
104 | 
105 | 	def test_boundary_setter(self):
106 | 		vocab.add_linked_art_boundary_check()
107 | 		p = model.Person()
108 | 		p2 = model.Person()
109 | 		n = model.Name()
110 | 		n.content = "Test"
111 | 		p2.identified_by = n
112 | 		p.exact_match = p2
113 | 		# Now, Test should not appear in the resulting JSON of p
114 | 		factory.linked_art_boundaries = True
115 | 		js = factory.toJSON(p)
116 | 		self.assertTrue(not 'identified_by' in js['exact_match'][0])
117 | 		factory.linked_art_boundaries = False
118 | 		js = factory.toJSON(p)
119 | 		self.assertTrue('identified_by' in js['exact_match'][0])		
120 | 
121 | 	def test_procurement_boundary(self):
122 | 		vocab.add_linked_art_boundary_check()
123 | 		a = model.Activity()
124 | 		p = vocab.ProvenanceEntry()
125 | 		a.caused = p
126 | 		js = factory.toJSON(a)
127 | 		self.assertTrue(not 'classified_as' in js['caused'][0])		
128 | 
129 | 	def test_linguistic_object_boundary(self):
130 | 		vocab.add_linked_art_boundary_check()
131 | 		jrnl = vocab.JournalText(label="journal")
132 | 		issue = vocab.IssueText(label="issue")
133 | 		issue.part_of = jrnl
134 | 		issue.referred_to_by = vocab.MaterialStatement(content="Statement")
135 | 
136 | 		js = factory.toJSON(issue)
137 | 		# Have not embedded journal in issue
138 | 		self.assertTrue(not 'classified_as' in js['part_of'][0])
139 | 		# Have embedded statement in issue
140 | 		self.assertTrue('content' in js['referred_to_by'][0])
141 | 		self.assertTrue('type' in js['referred_to_by'][0]['classified_as'][0]['classified_as'][0])
142 | 
143 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![Build Status](https://travis-ci.org/thegetty/crom.svg?branch=master)](https://travis-ci.org/thegetty/crom) [![Coverage Status](https://coveralls.io/repos/github/thegetty/crom/badge.svg?branch=master)](https://coveralls.io/github/thegetty/crom?branch=master)
 2 | 
 3 | # Cromulent
 4 | 
 5 | A Python library to make creation of CIDOC CRM easier by mapping classes/predicates to python objects/properties, thereby making the CRM "CRoMulent", a Simpsons neologism for "acceptable" or "fine".  
 6 | 
 7 | ## Status: Beta
 8 | 
 9 | The core vocabulary loading functionality is reasonably stable. The vocabulary section is expanding as we find new, useful terms to include and will likely constantly change.
10 | 
11 | The code is actively being developed and compability breaking changes are thus to be expected as we use it in various projects across The J Paul Getty Trust, and beyond.
12 | 
13 | ## How to Use It
14 | 
15 | ### Basic Usage
16 | 
17 | Import the classes from the model module. As the classes are dynamically generated, they're not in the code but will be there once the `build_classes` function has been called.
18 | 
19 | ```python
20 | from cromulent.model import factory, Group
21 | g1 = Group(ident="Organization")
22 | g2 = Group(ident="Department")
23 | g1.member = g2
24 | print factory.toString(g1, compact=False)
25 | ```
26 | 
27 | The constructor for the classes takes the following parameters:
28 | 
29 | * `ident` - an identifier to use for this instance. If specified, it should be a URI represented as a string. If it is the empty string, it will result in no identifier. If not specified, or specified as `None`, then it will be auto-generated by the factory if `auto_assign_id` is true, or if `auto_assign_id` is false, then it will result in no identifier.
30 | * `label` - a human readable label for the resource, to act as internal documentation for the data
31 | * `value` or `content` - a data value for the class. Dimensions and MonetaryAmounts use `value` which must be a number, and Name, Identifier, LinguisticObject and similar use `content` which must be a string.
32 | * Additional keywords may be passed in, and will be sent to class-specific initialization code.
33 | 
34 | 
35 | ### Vocabulary
36 | 
37 | ```python
38 | from cromulent.model import factory
39 | from cromulent.vocab import Height
40 | h = Height()
41 | h.value = 6
42 | print factory.toString(h, compact=False)
43 | ```
44 | 
45 | ### Tricks and Gotchas
46 | 
47 | * Assigning to the same property repeatedly does NOT overwrite the value, instead it appends. To overwrite a value, instead set it to a false value first.
48 | 
49 | 
50 | ### Factory settings
51 | 
52 | There are quite a few settings for how the module works, which are managed by a `factory` object.  
53 | 
54 | URI and File System Configuration:
55 | * `base_url` The base url on to which to append any slug given when an object is created
56 | * `base_dir` The base directory into which to write files, via factory.toFile()
57 | * `filename_extension` The extension to use on files written via toFile(), defaults to ".json"
58 | * `default_lang` The code for the default language to use on text values
59 | * `context_uri` The URI to use for `@context` in the JSON-LD serialization
60 | * `context_json` The parsed JSON object of the context from which the prefixes are derived
61 | * `full_names` Should the serialization use the full CRM names for classes and properties instead of the more readable ones defined in the mapping, defaults to False
62 | * `prefixes` A dictionary of prefix to URI for URIs to compress down to `prefix:slug` format
63 | * `prefixes_rev` The reverse of the prefixes dictionary
64 | * `pipe_scoped_contexts` A convenience setting for generating documentation, where properties that map to the same JSON output are represented as `short_name|full_name` to be post-processed.
65 | * `json_indent` How many spaces should each level of indentation be when serializing to a human readable form, defaults to 2
66 | * `id_type_label` Should the id, type and label properties all be used when serializing resources that have already been processed, defaults to True
67 | * `elasticsearch_compatible` Despite JSON-LD 1.0 compaction rules, should a single URI be represented as {"@id": "URI"} rather than just "URI", to make the resulting JSON compatible with elasticsearch and similar JSON processing engines. Defaults to False.
68 | * `serialize_all_resources` NOT YET IMPLEMENTED. If true, then all resources will be serialized separately, not just the top level resource.
69 | 
70 | Model Validation and Generation:
71 | * `materialize_inverses` Should the inverse relationships be set automatically, defaults to False
72 | * `validate_properties` Should the model be validated at run time when setting properties, defaults to True  (this allows you to save processing time once you're certain your code does the right thing)
73 | * `validate_properties` Should the properties be validated as being part of the model at all
74 | * `validate_profile` Should the profile of which terms should be used be validated
75 | * `process_multiplicity` Should properties that allow multiple values always be an array
76 | * `validate_range` Should the object be validated that it is legal to be the value of the property
77 | * `auto_assign_id` Should a URI be autogenerated and assigned, defaults to True
78 | * `auto_id_type` The method by which the URI is generated, taken from the following values:
79 |   * "int" (just increment an integer in a single value space)
80 |   * "int-per-type" (increment an integer, with a separate value space per class)
81 |   * "int-per-segment" (increment an integer, with a separate value space per URI segment associated with a class)
82 |   * "uuid" (just use UUIDs everywhere)
83 | 
84 | Internal:
85 | * `debug_level` Settings for debugging errors and warnings, defaults to "warn"
86 | * `log_stream` An object implementing the stream API to write log messages to, defaults to sys.stderr
87 | 
88 | 
89 | 
90 | ## How it Works
91 | 
92 | At import time, the library parses the vocabulary data file (data/crm_vocab.tsv) and creates Python classes in the module's global scope from each of the defined RDF classes.  The names of the classes are intended to be easy to use and remember, not necessarily identical to the CRM ontology's names. It also records the properties that can be used with that class, and at run time checks whether the property is defined and that the value fits the defined range.
93 | 
94 | ## Hacking 
95 | 
96 | You can change the mapping by tweaking `utils/vocab_reader.py` and rerunning it to build a new TSV input file.  See also the experimental code for loading completely different ontologies.
97 | 
98 | 


--------------------------------------------------------------------------------
/utils/make_jsonld_context.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import codecs
  3 | import json
  4 | 
  5 | try:
  6 |     from collections import OrderedDict
  7 | except:
  8 |     try:
  9 |         from ordereddict import OrderedDict
 10 |     except:
 11 |         raise Exception("To run with old pythons you must: easy_install ordereddict")
 12 | 
 13 | fn = '../cromulent/data/crm_vocab.tsv'
 14 | fh = codecs.open(fn, 'r', 'utf-8')
 15 | lines = fh.readlines()[1:] # Chomp header line
 16 | fh.close()
 17 | 
 18 | context = OrderedDict()
 19 | context['@version'] = 1.1
 20 | context['crm'] = "http://www.cidoc-crm.org/cidoc-crm/"
 21 | context['sci'] = "http://www.ics.forth.gr/isl/CRMsci/"
 22 | context['rdf'] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
 23 | context['rdfs'] = "http://www.w3.org/2000/01/rdf-schema#"
 24 | context['dc'] = "http://purl.org/dc/elements/1.1/"
 25 | context['dcterms'] = "http://purl.org/dc/terms/"
 26 | context['schema'] = "http://schema.org/"
 27 | context['skos'] = "http://www.w3.org/2004/02/skos/core#"
 28 | context['foaf'] = 'http://xmlns.com/foaf/0.1/'
 29 | context['xsd'] = "http://www.w3.org/2001/XMLSchema#"
 30 | context['dig'] = "http://www.ics.forth.gr/isl/CRMdig/"
 31 | context["la"] = "https://linked.art/ns/terms/"
 32 | context["archaeo"] = "http://www.cidoc-crm.org/cidoc-crm/CRMarchaeo/"
 33 | 
 34 | ## These are only aliases. The processing is defined by the spec.
 35 | context['id'] = "@id"
 36 | context['type'] = "@type"
 37 | 
 38 | extension = OrderedDict()
 39 | extension['@version'] = 1.1
 40 | extension['crm'] = "http://www.cidoc-crm.org/cidoc-crm/"
 41 | 
 42 | vocab_properties = ["assigned_property"]
 43 | 
 44 | parts = {
 45 | 	"P9": ["crm:P9_consists_of", "crm:P9i_forms_part_of"],
 46 | 	"P46": ["crm:P46_is_composed_of", "crm:P46i_forms_part_of"],
 47 | 	"P106": ["crm:P106_is_composed_of", "crm:P106i_forms_part_of"],
 48 | 	"P86": ["crm:P86i_contains", "crm:P86_falls_within"],
 49 | 	"P89": ["crm:P89i_contains", "crm:P89_falls_within"],
 50 | 	"P148": ["crm:P148_has_component", "crm:P148i_is_component_of"],
 51 | 	"skos": ["skos:narrower", "skos:broader"],
 52 | 	"set": ["la:has_member", "la:member_of"],
 53 | 	"P107": ["crm:P107_has_current_or_former_member", "crm:P107i_is_current_or_former_member_of"]
 54 | }
 55 | 
 56 | p177_context = {
 57 |     "part": None,
 58 |     "temporal_part": "crm:P9_consists_of",
 59 |     "physical_part": "crm:P46_is_composed_of",
 60 |     "symbolic_part": "crm:P106_is_composed_of",
 61 |     "propositional_part": "crm:P148_has_component",
 62 |     "timespan_part": "crm:P86i_contains",
 63 |     "location_part": "crm:P89i_contains",
 64 |     "interest_part": "la:interest_part",
 65 |     "part_of": None,
 66 |     "temporal_part_of": "crm:P9i_forms_part_of",
 67 |     "physical_part_of": "crm:P46i_forms_part_of",
 68 |     "symbolic_part_of": "crm:P106i_forms_part_of",
 69 |     "propositional_part_of": "crm:P148i_is_component_of",
 70 |     "timespan_part_of": "crm:P86_falls_within",
 71 |     "location_part_of": "crm:P89_falls_within",
 72 | }
 73 | 
 74 | scoped_classes = {
 75 | 	"Activity": "P9",
 76 | 	"Acquisition": "P9",
 77 | 	"TransferOfCustody": "P9",
 78 | 	"Production": "P9",
 79 | 	"AttributeAssignment": "P9",		
 80 | 	"HumanMadeObject": "P46",
 81 | 	"LinguisticObject": "P106",
 82 | 	"VisualItem": "P106", # XXX This is the symbolic partitioning, not the conceptual partitioning of P149
 83 | 	"Identifier": "P106",
 84 | 	"TimeSpan": "P86",
 85 | 	"Place": "P89",
 86 | 	"Type": "skos",
 87 | 	"Language": "skos",
 88 | 	"Material": "skos",
 89 | 	"MeasurementUnit": "skos",
 90 | 	"BeginningOfExistence": "P9",
 91 | 	"EndOfExistence": "P9",
 92 | 	"Creation": "P9",
 93 | 	"Formation": "P9",
 94 | 	"InformationObject": "P106",
 95 | 	"Transformation": "P9",
 96 | 	"Joining": "P9",
 97 | 	"Leaving": "P9",
 98 | 	"PropositionalObject": "P148",
 99 | 	"Currency": "skos",
100 | 	"Payment": "P9",
101 | 	"Right": "P148",
102 | 	"Name": "P106",
103 | 	"Birth": "P9",
104 | 	"Death": "P9",
105 | 	"Event": "P9",
106 | 	"Destruction": "P9",
107 | 	"Move": "P9",
108 | 	"Modification": "P9",
109 | 	"Dissolution": "P9",
110 | 	"Period": "P9",
111 | 	"PhysicalThing": "P46",
112 | 	"PhysicalObject": "P46",
113 | 	"PhysicalFeature": "P46",
114 | 	"BiologicalObject": "P46",
115 | 	"Site": "P46",
116 | 	"PhysicalHumanMadeThing": "P46",
117 | 	"HumanMadeFeature": "P46",
118 | 	"Title": "P106",
119 | 	"Inscription": "P106",
120 | 	"Mark": "P106",
121 | 	"Appellation": "P106",
122 | 	"PartAddition": "P9",
123 | 	"PartRemoval": "P9",
124 | 	"SymbolicObject": "P106",
125 | 	"Purchase": "P9",
126 | 	"Set": "set",
127 | 	"Group": "P107",
128 | 	"Person": "P107"
129 | }
130 | 
131 | other_scoped = {
132 | }
133 | 
134 | # enforce these in the context
135 | literal_types = [
136 | 	"xsd:dateTime"
137 | ]
138 | # Let these default
139 | empty_literal_types = [
140 | 	"rdfs:Literal",
141 | 	"xsd:string"
142 | ]
143 | 
144 | 
145 | for l in lines:
146 | 	l = l[:-1] # chomp
147 | 	info= l.split('\t')
148 | 	name = info[0]	
149 | 	if info[1] == "class":
150 | 		# map json key to ontology for @type:@vocab
151 | 		ctname = info[2]
152 | 		if name.startswith("E"):
153 | 			name = "crm:%s" % name		
154 | 		context[ctname] = {"@id": name}
155 | 		if ctname in scoped_classes:
156 | 			part = parts[scoped_classes[ctname]][0]
157 | 			part_of = parts[scoped_classes[ctname]][1]
158 | 
159 | 			# XXX member_of needs to be added to person and Group as Group one
160 | 			# and member_of_set for Set one
161 | 			# then member_of is Set for everything else
162 | 
163 | 			if scoped_classes[ctname] in ['set', 'P107']:
164 | 				context[ctname]['@context'] = {
165 | 					"member": {"@id": part, "@type": "@id", "@container": "@set"},
166 | 					"member_of": {"@id": part_of, "@type": "@id", "@container": "@set"}
167 | 				}
168 | 			else:
169 | 				context[ctname]['@context'] = {
170 | 					"part": {"@id": part, "@type": "@id", "@container": "@set"},
171 | 					"part_of": {"@id": part_of, "@type": "@id", "@container": "@set"},
172 | 					"member_of": {"@id": parts["set"][1], "@type": "@id", "@container": "@set"}					
173 | 				}
174 | 		# Add other scopes if needed
175 | 		if ctname in other_scoped:
176 | 			context[ctname]['@context'] = other_scoped[ctname]
177 | 
178 | 	else:
179 | 		ctname = info[2]
180 | 		write = not ctname in ['part', 'part_of', 'member', 'member_of']
181 | 		# These need to be added correctly to all parents in the ontology
182 | 		# ... as above
183 | 
184 | 		dmn = info[6]
185 | 		rng = info[7]
186 | 		mult = info[11] or '1'
187 | 		if ctname in context:
188 | 			print("Already found: %s   (%s vs %s)" % (ctname, context[ctname]['@id'], name))
189 | 		else:
190 | 
191 | 			if rng:
192 | 				if rng in empty_literal_types:
193 | 					typ = None
194 | 				elif rng in literal_types:
195 | 					typ = rng
196 | 				elif ctname in vocab_properties:
197 | 					typ = "@vocab"
198 | 				else:
199 | 					typ = "@id"
200 | 			else:
201 | 				typ = None
202 | 
203 | 			if name.startswith("P"):
204 | 				name = "crm:%s" % name
205 | 
206 | 			if write:
207 | 				if not typ:
208 | 					context[ctname] = {"@id": name}
209 | 				elif mult == '1':
210 | 					context[ctname] = {"@id": name, "@type": typ, "@container":"@set"}
211 | 				else:
212 | 					context[ctname] = {"@id": name, "@type": typ}
213 | 
214 | 				if ctname == "assigned_property_type":
215 | 					context['assigned_property_type']['@context'] = p177_context
216 | 
217 | 			# Otherwise, we're part / part_of, so ignore
218 | 			# print("scoped context: %s: %s on %s" % (ctname, name, dmn))
219 | 
220 | ctxt = {"@context": context}
221 | 
222 | outstr = json.dumps(ctxt, indent=2)
223 | fh = open("../cromulent/data/linked-art.json", 'w')
224 | fh.write(outstr)
225 | fh.close()
226 | 


--------------------------------------------------------------------------------
/utils/process_ontologies.py:
--------------------------------------------------------------------------------
  1 | from lxml import etree
  2 | import codecs
  3 | import json
  4 | import sys
  5 | 
  6 | PROFILE_ONLY = '--profile' in sys.argv
  7 | default_key_order = 10000
  8 | 
  9 | NS = {'rdf':"http://www.w3.org/1999/02/22-rdf-syntax-ns#",
 10 | 	'xsd':"http://www.w3.org/2001/XMLSchema#",
 11 | 	'rdfs':"http://www.w3.org/2000/01/rdf-schema#",
 12 | 	'dcterms':"http://purl.org/dc/terms/",
 13 | 	'owl':"http://www.w3.org/2002/07/owl#",
 14 | 	'crm':"http://www.cidoc-crm.org/cidoc-crm/",
 15 | 	'xml': "http://www.w3.org/XML/1998/namespace",
 16 | 	'ore': "http://www.openarchives.org/ore/terms/",
 17 | 	'la': "https://linked.art/ns/terms/",
 18 | 	"skos": "http://www.w3.org/2004/02/skos/core#",
 19 | 	"schema": "http://schema.org/",
 20 | 	"dc": "http://purl.org/dc/elements/1.1/",
 21 | 	"geo": "http://www.ics.forth.gr/isl/CRMgeo/",
 22 | 	"dig": "http://www.ics.forth.gr/isl/CRMdig/",
 23 | 	"sci": "http://www.ics.forth.gr/isl/CRMsci/",
 24 | 	"archaeo": "http://www.cidoc-crm.org/cidoc-crm/CRMarchaeo/"
 25 | }
 26 | 
 27 | 
 28 | # Order imposed by the library
 29 | # @context = 0, id = 1, rdf:type = 2
 30 | # rdfs:label = 5, rdf:value = 6, dc:description = 7
 31 | 
 32 | fh = open('../cromulent/data/key_order.json')
 33 | data = fh.read()
 34 | fh.close()
 35 | key_order_hash = json.loads(data)
 36 | 
 37 | # Allow configuration of overrides for the mapping of ontology to python/json
 38 | fh = open('../cromulent/data/overrides.json')
 39 | data = fh.read()
 40 | fh.close()
 41 | property_overrides = json.loads(data)
 42 | 
 43 | # Allow subsetting of CRM into in-use / not-in-use to enable the library
 44 | # to warn on instantiation of not-in-use properties or classes
 45 | fh = open('../cromulent/data/crm-profile.json')
 46 | data = fh.read()
 47 | fh.close()
 48 | profile_flags = json.loads(data)
 49 | 
 50 | stuff = []
 51 | propXHash = {}
 52 | classXHash = {}
 53 | 
 54 | def process_classes(dom):
 55 | 	classes = dom.xpath("//rdfs:Class", namespaces=NS)
 56 | 	for c in classes:
 57 | 		name = c.xpath('@rdf:about', namespaces=NS)[0]
 58 | 		for (pref,ns) in NS.items():
 59 | 			if name.startswith(ns):
 60 | 				name = name.replace(ns, "%s:" % pref)
 61 | 				break
 62 | 
 63 | 		if not name in profile_flags:
 64 | 			print("  WARNING: %s not in profile" % name)
 65 | 		useflag = str(profile_flags.get(name, 0))
 66 | 		if name in classXHash:
 67 | 			classXHash[name][0] = c
 68 | 		else:
 69 | 			classXHash[name] = [c, useflag]
 70 | 
 71 | 		label = c.xpath('./rdfs:label[@xml:lang="en"]/text()', namespaces=NS)[0]
 72 | 		try:
 73 | 			comment = c.xpath('./rdfs:comment/text()', namespaces=NS)[0]
 74 | 			comment = comment.strip()
 75 | 			comment = comment.replace('\n', '\\n').replace('\t', ' ')
 76 | 		except:
 77 | 			comment = ""
 78 | 
 79 | 		subClsL = c.xpath('./rdfs:subClassOf/@rdf:resource', namespaces=NS)
 80 | 		if subClsL:
 81 | 			# could be multiples
 82 | 			subCls = '|'.join(subClsL)
 83 | 			for s in subClsL:
 84 | 				try:
 85 | 					classXHash[s][1] = 3
 86 | 				except KeyError:
 87 | 					classXHash[s] = [None, 3]
 88 | 		else:
 89 | 			subCls = ""
 90 | 
 91 | 		# Hack extensions to be readable :(
 92 | 		if name == "geo:SP4_Spatial_Coordinate_Reference_System":
 93 | 			ccname = "CoordinateSystem"
 94 | 		elif name == "geo:SP5_Geometric_Place_Expression":
 95 | 			ccname = "Geometry"
 96 | 		elif name == "geo:SP6_Declarative_Place":
 97 | 			ccname = "DeclarativePlace"
 98 | 		elif name == "E33_E41_Linguistic_Appellation":
 99 | 			ccname = "Name"
100 | 		elif name == "dig:D1_Digital_Object":
101 | 			ccname = "DigitalObject"
102 | 		elif name == "sci:S19_Encounter_Event":
103 | 			ccname = "Encounter"
104 | 		else:
105 | 			# Assume that we've done our job okay and put in overrides for NSS
106 | 			cidx = name.find(":")
107 | 			if cidx > -1:
108 | 				ccname = name[cidx+1:]			
109 | 			else:
110 | 				uc1 = name.find("_")
111 | 				ccname = name[uc1+1:]
112 | 				ccname = ccname.replace("_or_", "_Or_").replace("_of_", "_Of_")
113 | 				ccname = ccname.replace('-', '').replace('_', '')
114 | 
115 | 		stuff.append([name, "class", ccname, label, comment, subCls, useflag])
116 | 
117 | def process_props(dom):
118 | 	props = dom.xpath("//rdf:Property",namespaces=NS)
119 | 	for p in props:
120 | 		name = p.xpath('@rdf:about', namespaces=NS)[0]
121 | 
122 | 
123 | 		# replace archaeo first, as a superstring of crm base :(
124 | 		if name.startswith("http://www.cidoc-crm.org/cidoc-crm/CRMarchaeo/"):
125 | 			name = name.replace("http://www.cidoc-crm.org/cidoc-crm/CRMarchaeo/", "archaeo:")
126 | 
127 | 		for (pref,ns) in NS.items():
128 | 			if name.startswith(ns):
129 | 				name = name.replace(ns, "%s:" % pref)
130 | 				break		
131 | 
132 | 		if not name in profile_flags:
133 | 			print("  WARNING: %s not in profile" % name)
134 | 		useflags = profile_flags.get(name, [0,0]) or [0,0]
135 | 		propXHash[name] = [p, useflags[0]]
136 | 
137 | 		try:
138 | 			label = p.xpath('./rdfs:label[@xml:lang="en"]/text()', namespaces=NS)[0]
139 | 		except:
140 | 			print(p.xpath('./@rdf:about', namespaces=NS))
141 | 			print(p.xpath('./rdfs:label/text()', namespaces=NS))
142 | 			raise ValueError
143 | 		try:
144 | 			comment = p.xpath('./rdfs:comment/text()', namespaces=NS)[0]
145 | 			comment = comment.strip()
146 | 			comment = comment.replace('\n', '\\n').replace('\t', ' ')
147 | 		except:
148 | 			comment = ""
149 | 
150 | 		domn = p.xpath('./rdfs:domain/@rdf:resource', namespaces=NS)
151 | 		if domn:		
152 | 			domn = domn[0]
153 | 			for (k,v) in NS.items():
154 | 				domn = domn.replace(v,"%s:" % k)
155 | 		else:
156 | 			domn = ""
157 | 		rang = p.xpath('./rdfs:range/@rdf:resource', namespaces=NS)
158 | 		if rang:
159 | 			rang = rang[0]
160 | 			for (k,v) in NS.items():
161 | 				rang = rang.replace(v,"%s:" % k)
162 | 		else:
163 | 			rang = ""
164 | 
165 | 		subProp = p.xpath('./rdfs:subPropertyOf/@rdf:resource', namespaces=NS)
166 | 		if subProp:
167 | 			subProp = subProp[0]
168 | 		else:
169 | 			subProp = ""
170 | 
171 | 		inverse = p.xpath('./owl:inverseOf/@rdf:resource', namespaces=NS)
172 | 		if inverse:
173 | 			inverse = inverse[0]
174 | 			for (pref,ns) in NS.items():
175 | 				if inverse.startswith(ns):
176 | 					inverse = inverse.replace(ns, "%s:" % pref)
177 | 					break
178 | 		else:
179 | 			inverse = ""
180 | 
181 | 		cidx = name.find(":")
182 | 		if name in property_overrides:
183 | 			ccname = property_overrides[name]
184 | 		elif cidx > -1:
185 | 			ccname = name[cidx+1:]
186 | 		else:
187 | 			uc1 = name.find("_")
188 | 			pno = name[:uc1]
189 | 			if pno in property_overrides:
190 | 				ccname = property_overrides[pno]
191 | 			else:
192 | 				ccname = name[uc1+1:]
193 | 				ccname = ccname.replace("-", "")
194 | 				if ccname.startswith("is_"):
195 | 					ccname = ccname[3:]
196 | 				elif ccname.startswith("has_") or ccname.startswith("had_") or ccname.startswith("was_"):
197 | 					ccname = ccname[4:]
198 | 
199 | 		koi = str(key_order_hash.get(ccname, default_key_order))
200 | 
201 | 		# [0/1/2, 0/1] for [no/okay/warn, single/multiple]
202 | 		stuff.append([name, "property", ccname, label, comment, subProp, domn, rang, inverse, koi, 
203 | 			str(useflags[0]), str(useflags[1])])
204 | 
205 | 
206 | # This order is important.
207 | # Need to process the class definition before the properties of the class
208 | # linkedart defines properties against the classes in the core and extensions
209 | # so needs to come last
210 | 
211 | files = ['cidoc.xml', 'linkedart_crm_enhancements.xml', 'linkedart.xml']
212 | 
213 | for fn in files:
214 | 	print("processing: %s" % fn)
215 | 	fh = open('data/%s' % fn)
216 | 	data = fh.read()
217 | 	fh.close()
218 | 	try:
219 | 		dom = etree.XML(data.encode('utf-8'))
220 | 	except:
221 | 		dom = etree.XML(data)
222 | 	process_classes(dom)
223 | 	process_props(dom)
224 | 
225 | 
226 | 
227 | headers = ["term", "term type", "json-ld key", "label", "scope note", "subPropertyOf", "domain", \
228 | 		"range", "inverse", "key order", "okay to use?", "okay for multiple?"]
229 | 
230 | # outdata = '\n'.join(['\t'.join(x) for x in stuff])
231 | fh = codecs.open('../cromulent/data/crm_vocab.tsv', 'w', 'utf-8')
232 | # write header
233 | line = '\t'.join(headers) + '\n'
234 | fh.write(line)
235 | 
236 | for l in stuff:
237 | 	name = l[0]
238 | 	line = '\t'.join(l) + "\n"	
239 | 	if name in classXHash:
240 | 		okay = classXHash[name][1]
241 | 	elif name in propXHash:
242 | 		okay = propXHash[name][1]
243 | 	else:
244 | 		okay = 0
245 | 		print("Could not find %s" % name)
246 | 	if not PROFILE_ONLY or okay:
247 | 		fh.write(line)
248 | fh.close()
249 | 


--------------------------------------------------------------------------------
/tests/test_dimensions.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import unittest
  4 | try:
  5 | 	from contextlib import suppress
  6 | except:
  7 | 	# Python 2.7
  8 | 	suppress = None
  9 | import pprint
 10 | from datetime import datetime
 11 | from cromulent.extract import Dimension, normalized_dimension_object
 12 | import cromulent.extract
 13 | 
 14 | class TestDimensionExtraction(unittest.TestCase):
 15 | 	'''
 16 | 	Test the ability to extract various formats of dimensions.
 17 | 	'''
 18 | 	def setUp(self):
 19 | 		pass
 20 | 
 21 | 	def tearDown(self):
 22 | 		pass
 23 | 
 24 | 	def test_parse_simple_dimensions(self):
 25 | 		'''
 26 | 		Test the documented formats that `cromulent.extract.parse_simple_dimensions` can parse
 27 | 		and ensure that it returns the expected data.
 28 | 		'''
 29 | 		tests = {
 30 | 			"3'": [Dimension(3, 'feet', None)],
 31 | 			'3 feet': [Dimension(3, 'feet', None)],
 32 | 			'3 foot': [Dimension(3, 'feet', None)],
 33 | 			'3 ft': [Dimension(3, 'feet', None)],
 34 | 			'3 ft.': [Dimension(3, 'feet', None)],
 35 | 			'2"': [Dimension(2, 'inches', None)],
 36 | 			'2 in': [Dimension(2, 'inches', None)],
 37 | 			'2 in.': [Dimension(2, 'inches', None)],
 38 | 			'2 inch': [Dimension(2, 'inches', None)],
 39 | 			'2 inches': [Dimension(2, 'inches', None)],
 40 | 			'2 duymen': [Dimension(2, 'inches', None)],
 41 | 			'2 d.': [Dimension(2, 'inches', None)],
 42 | 			'2 d': [Dimension(2, 'inches', None)],
 43 | 			'''2'8"''': [Dimension(2, 'feet', None), Dimension(8, 'inches', None)],
 44 | 			'4cm': [Dimension(4, 'cm', None)],
 45 | 			'2 pieds 3 pouces': [Dimension(2, 'fr_feet', None), Dimension(3, 'fr_inches', None)],
 46 | 			'1 pied 7 pouces': [Dimension(1, 'fr_feet', None), Dimension(7, 'fr_inches', None)],
 47 | 			'8 pouce': [Dimension(8, 'fr_inches', None)],
 48 | 			'8 pouces': [Dimension(8, 'fr_inches', None)],
 49 | 			'8 1/2 pouces': [Dimension(8.5, 'fr_inches', None)],
 50 | 			'8 1/4 pouces': [Dimension(8.25, 'fr_inches', None)],
 51 | 			'8 1/8 pouces': [Dimension(8.125, 'fr_inches', None)],
 52 | 			'1': [Dimension(1, None, None)],
 53 | 			
 54 | 			# values without a unit that follow values with a unit stay in the same system but using the next-finer unit
 55 | 			'2 pieds 3': [Dimension(2, 'fr_feet', None), Dimension(3, 'fr_inches', None)],
 56 | 			"1' 3": [Dimension(1, 'feet', None), Dimension(3, 'inches', None)],
 57 | 		}
 58 | 
 59 | 		for value, expected in tests.items():
 60 | 			dims = cromulent.extract.parse_simple_dimensions(value)
 61 | 			if expected is not None:
 62 | 				self.assertIsInstance(dims, list)
 63 | 				self.assertEqual(dims, expected, msg='dimensions: %r' % (value,))
 64 | 			else:
 65 | 				self.assertIsNone(dims)
 66 | 
 67 | 	def test_dimension_cleaner(self):
 68 | 		'''
 69 | 		Test the documented formats that `cromulent.extract.dimensions_cleaner` can parse
 70 | 		and ensure that it returns the expected data.
 71 | 		'''
 72 | 		tests = {
 73 | 			'''2 in by 1 in''': ([Dimension(2, 'inches', None)], [Dimension(1, 'inches', None)]),
 74 | 			'''2'2"h x 2'8"w''': ([Dimension(2, 'feet', 'height'), Dimension(2, 'inches', 'height')], [Dimension(2, 'feet', 'width'), Dimension(8, 'inches', 'width')]),
 75 | 			'''1'3"x4cm h''': ([Dimension(1, 'feet', None), Dimension(3, 'inches', None)], [Dimension(4, 'cm', 'height')]),
 76 | 			'''1'3" by 4"''': ([Dimension(1, 'feet', None), Dimension(3, 'inches', None)], [Dimension(4, 'inches', None)]),
 77 | 			'Haut 14 pouces, large 10 pouces': ([Dimension(14, 'fr_inches', 'height')], [Dimension(10, 'fr_inches', 'width')]),
 78 | 			'Haut. 48 pouces, large 68 pouces': ([Dimension(48, 'fr_inches', 'height')], [Dimension(68, 'fr_inches', 'width')]),
 79 | 			'1 by 4': ([Dimension(1, None, None)], [Dimension(4, None, None)]),
 80 | 			'Hoog. 6 v., breed 3 v': ([Dimension(6, 'feet', 'height')], [Dimension(3, 'feet', 'width')]),
 81 | 			'Breedt 6 v., hoog 3 v': ([Dimension(6, 'feet', 'width')], [Dimension(3, 'feet', 'height')]),
 82 | 			'20 cm x 24,5 cm': ([Dimension(20, 'cm', None)], [Dimension(24.5, 'cm', None)]),
 83 | 			'2 w by 5 h': ([Dimension(2, None, 'width')], [Dimension(5, None, 'height')]),
 84 | 			'Hauteur 1 pied 4 pouces, largeur 1 pied 1/2 pouc.': ([Dimension(1, 'fr_feet', 'height'), Dimension(value=4, unit='fr_inches', which='height')], [Dimension(1, 'fr_feet', 'width'), Dimension(value=0.5, unit='fr_inches', which='width')]),
 85 | 			'h.73 pouces 4 lignes, l.50 pouces': ([Dimension(value=73, unit='fr_inches', which='height'), Dimension(value=4, unit='ligne', which='height')], [Dimension(value=50, unit='fr_inches', which='width')]),
 86 | 			'haut. 5 pouc. larg. 5 pouc. 4 linges': ([Dimension(value=5, unit='fr_inches', which='height')], [Dimension(value=5, unit='fr_inches', which='width'), Dimension(value=4, unit='ligne', which='width')]),
 87 | 			'haut. 9 pouc. 4 lignes larg. 10 pouc. 4 linges': ([Dimension(value=9, unit='fr_inches', which='height'), Dimension(value=4, unit='ligne', which='height')], [Dimension(value=10, unit='fr_inches', which='width'), Dimension(value=4, unit='ligne', which='width')]),
 88 | 			'h 38 cm, w 27 cm': ([Dimension(38, 'cm', 'height')], [Dimension(27, 'cm', 'width')]),
 89 | 			"hauteur 9 pouces, largeur 7": ([Dimension(value=9, unit='fr_inches', which='height')], [Dimension(value=7, unit=None, which='width')]),
 90 | 		}
 91 | 
 92 | 		for value, expected in tests.items():
 93 | 			dims = cromulent.extract.dimensions_cleaner(value)
 94 | 			if expected is not None:
 95 | 				self.assertIsInstance(dims, tuple)
 96 | # 				print('===== got:')
 97 | # 				pprint.pprint(dims)
 98 | # 				print('----- expected:')
 99 | # 				pprint.pprint(expected)
100 | # 				print('=====')
101 | 				self.assertEqual(dims, expected, msg='dimensions: %r' % (value,))
102 | 			else:
103 | 				self.assertIsNone(dims)
104 | 
105 | 	def test_extract_physical_dimensions(self):
106 | 		'''
107 | 		Test the documented formats that `cromulent.extract.extract_physical_dimensions`
108 | 		can parse and ensure that it returns the expected data.
109 | 		'''
110 | 		tests = {}
111 | 		h9l7_height = cromulent.vocab.Height(ident='', content=9.0)
112 | 		h9l7_height.identified_by = cromulent.model.Name(ident='', content='9 French inches')
113 | 		h9l7_height.unit = cromulent.vocab.instances.get('fr_inches')
114 | 		h9l7_width = cromulent.vocab.Width(ident='', content=7.0)
115 | 		tests["hauteur 9 pouces, largeur 7"] = [h9l7_height, h9l7_width]
116 | 
117 | 		for value, expected_dims in tests.items():
118 | 			dims = list(cromulent.extract.extract_physical_dimensions(value))
119 | 			for got, expected in zip(dims, expected_dims):
120 | 				self.assertEqual(got.value, expected.value)
121 | 				self.assertEqual(got.type, expected.type)
122 | 
123 | 				if suppress is None:
124 | 					# Python 2.7
125 | 					if hasattr(expected, 'unit'):
126 | 						self.assertEqual(got.unit, expected.unit)						
127 | 					if hasattr(expected, 'classified_as'):
128 | 						self.assertEqual(got.classified_as, expected.classified_as)						
129 | 					if hasattr(expected, 'identified_by'):
130 | 						self.assertEqual(got.identified_by, expected.identified_by)						
131 | 				else:			
132 | 					with suppress(AttributeError):
133 | 						self.assertEqual(got.unit, expected.unit)
134 | 					with suppress(AttributeError):
135 | 						self.assertEqual(got.classified_as, expected.classified_as)
136 | 					with suppress(AttributeError):
137 | 						self.assertEqual(got.identified_by, expected.identified_by)
138 | 
139 | 	def test_extract_physical_dimensions_with_default(self):
140 | 		'''
141 | 		Test the documented formats that `cromulent.extract.extract_physical_dimensions`
142 | 		can parse, specifying a default unit, and ensure that it returns the expected data.
143 | 		'''
144 | 		tests = {}
145 | 		h9l7_height = cromulent.vocab.Height(ident='', content=9.0)
146 | 		h9l7_height.identified_by = cromulent.model.Name(ident='', content='9 French inches')
147 | 		h9l7_height.unit = cromulent.vocab.instances.get('fr_inches')
148 | 		h9l7_width = cromulent.vocab.Width(ident='', content=7.0)
149 | 		h9l7_width.unit = cromulent.vocab.instances.get('inches')
150 | 		tests["hauteur 9 pouces, largeur 7"] = [h9l7_height, h9l7_width]
151 | 
152 | 		for value, expected_dims in tests.items():
153 | 			dims = list(cromulent.extract.extract_physical_dimensions(value, default_unit='inches'))
154 | 			for got, expected in zip(dims, expected_dims):
155 | 				self.assertEqual(got.value, expected.value)
156 | 				self.assertEqual(got.type, expected.type)
157 | 				self.assertEqual(got.unit, expected.unit)
158 | 
159 | 	def test_normalize_dimension(self):
160 | 		tests = {
161 | 			'1 ft, 2 in': ('1 foot, 2 inches', Dimension(value=14, unit='inches', which=None)),
162 | 			'8 1/2 pouces': ('8.5 French inches', Dimension(value=8.5, unit='fr_inches', which=None)),
163 | 			'1 pied 7 pouces': ('1 French foot, 7 French inches', Dimension(value=19, unit='fr_inches', which=None)),
164 | 			'2 pied 1/2 pouces': ('2 French feet, 0.5 French inches', Dimension(value=24.5, unit='fr_inches', which=None)),
165 | 			'1 pied 3 pouce. 3 linges': ('1 French foot, 3 French inches, 3 lignes', Dimension(value=15.25, unit='fr_inches', which=None)),
166 | 			"4' 8": ('4 feet, 8 inches', Dimension(value=56, unit='inches', which=None)),
167 | 			"1 pied 2": ('1 French foot, 2 French inches', Dimension(value=14, unit='fr_inches', which=None)),
168 | 		}
169 | 		for value, expected in tests.items():
170 | 			elabel, edim = expected
171 | 			dims = cromulent.extract.parse_simple_dimensions(value)
172 | 			dim, label = normalized_dimension_object(dims)
173 | 			self.assertEqual(label, elabel)
174 | 			self.assertEqual(dim, edim)
175 | 
176 | if __name__ == '__main__':
177 | 	unittest.main()
178 | 


--------------------------------------------------------------------------------
/utils/data/linkedart_crm_enhancements.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | 
  3 | <rdf:RDF xml:lang="en" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xml:base="http://www.cidoc-crm.org/cidoc-crm/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:la="https://linked.art/ns/terms/" xmlns:schema="http://schema.org/" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:ore="http://www.openarchives.org/ore/terms/" xmlns:geo="http://www.ics.forth.gr/isl/CRMgeo/" xmlns:sci="http://www.ics.forth.gr/isl/CRMsci/" xmlns:dig="http://www.ics.forth.gr/isl/CRMdig/" xmlns:archaeo="http://www.cidoc-crm.org/cidoc-crm/CRMarchaeo/">
  4 | 
  5 |     <rdfs:Class rdf:about="http://www.ics.forth.gr/isl/CRMdig/D1_Digital_Object">
  6 |         <rdfs:label xml:lang="en">Digital Object</rdfs:label>
  7 |         <rdfs:comment>This class comprises identifiable immaterial items that can be represented as sets of bit sequences, such as data sets, e-texts, images, audio or video items, software, etc., and are documented as single units. Any aggregation of instances of D1 Digital Object into a whole treated as single unit is also regarded as an instance of D1 Digital Object.  This means that for instance, the content of a DVD, an XML file on it, and an element of this file, are regarded as distinct instances of D1 Digital Object, mutually related by the P106 is composed of (forms part of) property. A D1 Digital Object does not depend on a specific physical carrier, and it can exist on one or more carriers simultaneously.</rdfs:comment>
  8 |         <rdfs:subClassOf rdf:resource="E73_Information_Object"/>
  9 |     </rdfs:Class>
 10 | 
 11 |     <rdf:Property rdf:about="http://www.ics.forth.gr/isl/CRMsci/O13_triggers">
 12 |         <rdfs:label xml:lang="en">triggers</rdfs:label>
 13 |         <rdfs:domain rdf:resource="E5_Event"/>
 14 |         <rdfs:range rdf:resource="E5_Event"/>
 15 |         <owl:inverseOf rdf:resource="http://www.ics.forth.gr/isl/CRMsci/O13i_is_triggered_by" />
 16 |     </rdf:Property>
 17 | 
 18 |     <rdf:Property rdf:about="http://www.ics.forth.gr/isl/CRMsci/O13i_is_triggered_by">
 19 |         <rdfs:label xml:lang="en">triggered by</rdfs:label>
 20 |         <rdfs:domain rdf:resource="E5_Event"/>
 21 |         <rdfs:range rdf:resource="E5_Event"/>
 22 |         <owl:inverseOf rdf:resource="http://www.ics.forth.gr/isl/CRMsci/O13_triggers" />     
 23 |     </rdf:Property>
 24 | 
 25 | 
 26 |   <rdfs:Class rdf:about="http://www.ics.forth.gr/isl/CRMsci/S19_Encounter_Event">
 27 |     <rdfs:label xml:lang="en">Encounter</rdfs:label>
 28 |     <rdfs:subClassOf rdf:resource="E7_Activity"/>
 29 |     <rdfs:comment>This class comprises activities of S4 Observation (substance) where an E39 Actor encounters an instance of E18 Physical Thing of a kind relevant for the mission of the observation or regarded as potentially relevant for some community (identity). This observation produces knowledge about the existence of the respective thing at a particular place in or on surrounding matter. This knowledge may be new to the group of people the actor belongs to. In that case we would talk about a discovery. The observer may recognize or assign an individual identity of the thing encountered or regard only the type as noteworthy in the associated documentation or report.
 30 |     Note that this representation treats S19 as a subClass of only E7 Activity for ease of implementation, as we do not need the full set of relationships available via the complete hierarcy. In the full CRMsci, it is Activity -> Attribute Assignment -> Observation -> Encounter.
 31 | </rdfs:comment>
 32 |   </rdfs:Class>
 33 | 
 34 |     <rdf:Property rdf:about="http://www.ics.forth.gr/isl/CRMsci/O19_encountered_object">
 35 |         <rdfs:label xml:lang="en">encountered object</rdfs:label>
 36 |         <rdfs:comment>This property associates an instance of S19 Encounter Event with an instance of E18 Physical
 37 | Thing that has been found. e.g. The finding (S19) encountered (O19) the 18 arrowheads (E18) from Lerna in Argolis</rdfs:comment>
 38 |         <rdfs:domain rdf:resource="http://www.ics.forth.gr/isl/CRMsci/S19_Encounter_Event"/>
 39 |         <rdfs:range rdf:resource="E18_Physical_Thing"/>  
 40 |         <owl:inverseOf rdf:resource="http://www.ics.forth.gr/isl/CRMsci/O19i_was_object_encountered_at"/>
 41 |     </rdf:Property>
 42 | 
 43 |     <rdf:Property rdf:about="http://www.ics.forth.gr/isl/CRMsci/O19i_was_object_encountered_at">
 44 |         <rdfs:label xml:lang="en">was encountered at</rdfs:label>
 45 |         <rdfs:domain rdf:resource="E18_Physical_Thing"/>
 46 |         <rdfs:range rdf:resource="http://www.ics.forth.gr/isl/CRMsci/S19_Encounter_Event"/>
 47 |         <owl:inverseOf rdf:resource="http://www.ics.forth.gr/isl/CRMsci/O19_encountered_object" />     
 48 |     </rdf:Property>
 49 | 
 50 | 
 51 | <!-- Moved from CRM base in 7.0 but still useful! -->
 52 | <rdf:Property rdf:about="http://www.cidoc-crm.org/cidoc-crm/CRMarchaeo/AP25_occurs_during">
 53 |   <rdfs:label xml:lang="en">occurs during</rdfs:label>
 54 |   <rdfs:comment>This property identifies a situation in which the entire instance of the E52 Time-Span of an instance of E2 Temporal Entity is within the instance of the E52 Time-Span of another instance of E2 Temporal Entity that starts before and ends after the included temporal entity.</rdfs:comment>
 55 |   <rdfs:domain rdf:resource="E2_Temporal_Entity"/>
 56 |   <rdfs:range rdf:resource="E2_Temporal_Entity"/>
 57 | </rdf:Property>
 58 | 
 59 | 
 60 | <!-- These redefine external classes and properties in relation to CRM -->
 61 | <!-- All care has been taken in this mapping to avoid introducing semantics -->
 62 | <!-- where non existed previously -->
 63 | 
 64 | 
 65 | <rdf:Property rdf:about="http://www.w3.org/2000/01/rdf-schema#label">
 66 |   <rdfs:isDefinedBy rdf:resource="http://www.w3.org/2000/01/rdf-schema#"/>
 67 |   <rdfs:label xml:lang="en">label</rdfs:label>
 68 |   <rdfs:comment>A human-readable name for the subject.</rdfs:comment>
 69 |   <rdfs:domain rdf:resource="E1_CRM_Entity"/>
 70 |   <rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#string"/>
 71 | </rdf:Property>
 72 | 
 73 | <!-- Similarity of resources -->
 74 | <!-- E1: skos:exactMatch, skos:closeMatch -->
 75 | <!-- owl:sameAs considered too dangerous, dct:relation considered too pointless -->
 76 | <!-- exactMatch / closeMatch should only be used between E55_Type in CRM, but usage is not restricted -->
 77 | <!-- This allows adopters to assert that a person is exactMatch to a ulan entry -->
 78 | <!-- But see la:equivalent for a local but semantically better option -->
 79 | 
 80 | <rdf:Property rdf:about="http://www.w3.org/2004/02/skos/core#exactMatch">
 81 |         <rdfs:label xml:lang="en">exactMatch</rdfs:label>
 82 |         <rdfs:comment xml:lang="en">Exact Match, not quite sameAs, good for most uses</rdfs:comment>
 83 |         <rdfs:domain rdf:resource="E1_CRM_Entity"/>
 84 |         <rdfs:range rdf:resource="E1_CRM_Entity"/>
 85 | </rdf:Property>
 86 | 
 87 | <rdf:Property rdf:about="http://www.w3.org/2004/02/skos/core#closeMatch">
 88 |         <rdfs:label xml:lang="en">closeMatch</rdfs:label>
 89 |         <rdfs:comment xml:lang="en">Close Match, good for some uses</rdfs:comment>
 90 |         <rdfs:domain rdf:resource="E1_CRM_Entity"/>
 91 |         <rdfs:range rdf:resource="E1_CRM_Entity"/>
 92 | </rdf:Property>
 93 | 
 94 | <!-- These replace P127 / P127i -->
 95 | 
 96 | <rdf:Property rdf:about="http://www.w3.org/2004/02/skos/core#narrower">
 97 |         <rdfs:label xml:lang="en">has narrower term</rdfs:label>
 98 |         <rdfs:comment xml:lang="en">Or is broader term of</rdfs:comment>
 99 |         <rdfs:domain rdf:resource="E55_Type"/>
100 |         <rdfs:range rdf:resource="E55_Type"/>
101 |         <owl:inverseOf rdf:resource="http://www.w3.org/2004/02/skos/core#broader"/>
102 | </rdf:Property>
103 | 
104 | <rdf:Property rdf:about="http://www.w3.org/2004/02/skos/core#broader">
105 |         <rdfs:label xml:lang="en">has broader term</rdfs:label>
106 |         <rdfs:comment xml:lang="en">Or is narrower term of</rdfs:comment>
107 |         <rdfs:domain rdf:resource="E55_Type"/>
108 |         <rdfs:range rdf:resource="E55_Type"/>
109 |         <owl:inverseOf rdf:resource="http://www.w3.org/2004/02/skos/core#narrower"/>
110 | </rdf:Property>
111 | 
112 | 
113 | <!-- And SKOS / CRM equivalences -->
114 | 
115 | <rdf:Property rdf:about="http://www.w3.org/2004/02/skos/core#hasTopConcept">
116 |         <rdfs:label xml:lang="en">has top concept</rdfs:label>
117 |         <rdfs:comment xml:lang="en"></rdfs:comment>
118 |         <rdfs:domain rdf:resource="E32_Authority_Document"/>
119 |         <rdfs:range rdf:resource="E55_Type"/>
120 |         <owl:inverseOf rdf:resource="http://www.w3.org/2004/02/skos/core#topConceptOf"/>
121 | </rdf:Property>
122 | 
123 | 
124 | <rdf:Property rdf:about="http://www.w3.org/2004/02/skos/core#topConceptOf">
125 |         <rdfs:label xml:lang="en">is top concept of</rdfs:label>
126 |         <rdfs:comment xml:lang="en"></rdfs:comment>
127 |         <rdfs:domain rdf:resource="E55_Type"/>
128 |         <rdfs:range rdf:resource="E32_Authority_Document"/>
129 |         <owl:inverseOf rdf:resource="http://www.w3.org/2004/02/skos/core#hasTopConcept"/>
130 | </rdf:Property>
131 | 
132 |   <rdf:Property rdf:about="http://www.w3.org/2004/02/skos/core#inScheme">
133 |     <rdfs:label xml:lang="en">is in scheme</rdfs:label>
134 |     <rdfs:comment xml:lang="en">Relates a resource (for example a concept) to a concept scheme in which it is included.</rdfs:comment>
135 |     <skos:scopeNote xml:lang="en">A concept may be a member of more than one concept scheme.</skos:scopeNote>
136 |     <rdfs:domain rdf:resource="E55_Type"/>
137 |     <rdfs:range rdf:resource="E32_Authority_Document"/>
138 |   </rdf:Property>
139 | 
140 | <!-- SeeAlso to other descriptions -->
141 | 
142 | 
143 | <rdf:Property rdf:about="http://www.w3.org/2000/01/rdf-schema#seeAlso">
144 |         <rdfs:label xml:lang="en">seeAlso</rdfs:label>
145 |         <rdfs:comment xml:lang="en">A related resource, that is machine readable and related to the current resource.</rdfs:comment>
146 |         <rdfs:domain rdf:resource="E1_CRM_Entity"/>
147 |         <rdfs:range rdf:resource="E73_Information_Object"/>
148 | </rdf:Property>
149 | 
150 | 
151 | <!-- Conformance of an object to a standard -->
152 | 
153 | <rdf:Property rdf:about="http://purl.org/dc/terms/conformsTo">
154 |         <rdfs:label xml:lang="en">conforms to</rdfs:label>
155 |         <rdfs:comment xml:lang="en">Some thing conforms to some standard</rdfs:comment>
156 |         <rdfs:domain rdf:resource="E71_Human-Made_Thing"/>
157 |         <rdfs:range rdf:resource="E73_Information_Object"/>
158 | </rdf:Property>
159 | 
160 | <!-- InfoObj: dc:format -->
161 | 
162 | <rdf:Property rdf:about="http://purl.org/dc/elements/1.1/format">
163 |         <rdfs:label xml:lang="en">format</rdfs:label>
164 |         <rdfs:comment xml:lang="en">The media type of the information object</rdfs:comment>
165 |         <rdfs:domain rdf:resource="E73_Information_Object"/>
166 |         <rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#string"/>
167 | </rdf:Property>
168 | 
169 | </rdf:RDF>


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/examples/json-to-lod.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import json
  3 | from cidoc_orm import factory, TimeSpan, ManMadeObject, Type, Identifier, \
  4 | 	Production, Person, Place, Group, Material, Type, Mark, Right, Document, \
  5 | 	Activity
  6 | import re
  7 | 
  8 | # Meta meta
  9 | ext_classes = {
 10 | 	"TMSNumber": {"parent": Identifier, "vocab": "aat", "id": "300404621"},	
 11 | 	"AccessionNumber": {"parent": Identifier, "vocab": "aat", "id": "300312355"},	
 12 | 	"Inscription": {"parent": Mark, "vocab": "aat", "id": "300028702"},
 13 | 	"Signature": {"parent": Mark, "vocab": "aat", "id": "300028705"},
 14 | 	"Exhibition": {"parent": Activity, "vocab": "aat", "id": "300054766"},
 15 | 	"Painting": {"parent": ManMadeObject, "vocab": "aat", "id": "300033618"},
 16 | 	"Sculpture": {"parent": ManMadeObject, "vocab": "aat", "id": "300047090"},
 17 | 	"Drawing": {"parent": ManMadeObject, "vocab": "aat", "id": "300033973"},
 18 | 	"Miniature": {"parent": ManMadeObject, "vocab": "aat", "id": "300033936"},
 19 | 	"Tapestry": {"parent": ManMadeObject, "vocab": "aat", "id": "300205002"},
 20 | 	"Furniture": {"parent": ManMadeObject, "vocab": "aat", "id": "300037680"},
 21 | 	"Mosaic": {"parent": ManMadeObject, "vocab": "aat", "id": "300015342"},
 22 | 	"Photograph": {"parent": ManMadeObject, "vocab": "aat", "id": "300046300"},
 23 | 	"Drawing": {"parent": ManMadeObject, "vocab": "aat", "id": "300033973"},
 24 | 	"Coin": {"parent": ManMadeObject, "vocab": "aat", "id": "300037222"},
 25 | 	"Vessel": {"parent": ManMadeObject, "vocab": "aat", "id": "300193015"},
 26 | 	"PhotographPrint": {"parent": ManMadeObject, "vocab": "aat", "id": "300127104"},
 27 | 	"PhotographAlbum": {"parent": ManMadeObject, "vocab": "aat", "id": "300026695"},
 28 | 	"PhotographBook": {"parent": ManMadeObject, "vocab": "aat", "id": "300265728"}
 29 | }
 30 | 
 31 | # Jewelry
 32 | # Text Book Album
 33 | # Implement
 34 | 
 35 | # Note many sub types of Vessels, including
 36 | # Bowl, Flask, Beaker, Cup, Jar, Amphora, 
 37 | 
 38 | for (name,v) in ext_classes.items():
 39 | 	c = type(name, (v['parent'],), {})
 40 | 	c._p2_has_type = "http://vocab.getty.edu/%s/%s" % (v['vocab'], v['id'])
 41 | 	globals()[name] = c
 42 | 
 43 | aat_type_mapping = {
 44 | 	"Painting": Painting,
 45 | 	"Paintings": Painting,
 46 | 	"Drawing": Drawing,
 47 | 	"Furniture": Furniture,
 48 | 	"Coin": Coin,
 49 | 	"Sculpture": Sculpture,
 50 | 	"Vessels": Vessel
 51 | }
 52 | 
 53 | #	"panel": "300014657"  # A wooden support
 54 | 
 55 | aat_part_mapping = {
 56 | 	"supports": "300014844"  # The thing that is painted on
 57 | }
 58 | 
 59 | aat_material_mapping = {
 60 | 	"watercolor": "300015045",
 61 | 	"oil": "300015050",
 62 | 	"tempera": "300015062",
 63 | 	"canvas": "300014078",
 64 | 	"oak": "300012264",
 65 | 	"gold leaf": "300264831",
 66 | 	"paper": "300014109",
 67 | 	"copper": "300011020",
 68 | 	"terracotta": "300010669",
 69 | 	"glass": "300010797",
 70 | 	"chalk": "300011727",
 71 | 	"bronze": "300010957",
 72 | 	"marble": "300011443",
 73 | 	"albumen silver print": "300127121",
 74 | 	"gelatin silver print": "300128695",
 75 | 	"silver": "300011029"
 76 | }
 77 | 
 78 | aat_culture_mapping = {
 79 | 	"french": "300111188",
 80 | 	"italian": "300111198",
 81 | 	"german": "300111192",
 82 | 	"dutch": "300020929"
 83 | }
 84 | 
 85 | dim_type_mapping = {
 86 | 	"height": "300055644",
 87 | 	"width": "300055647",
 88 | 	"depth": "300072633",
 89 | 	"diameter": "300055624",
 90 | 	"weight": "300056240"
 91 | }
 92 | 
 93 | 
 94 | # Meta
 95 | class CreditLine(Right):
 96 | 	def __init__(self, *args, **kw):
 97 | 		super(CreditLine, self).__init__(*args, **kw)
 98 | 		# XXX Find a good Type for this
 99 | 		self.has_type = Type("http://example.org/ns/creditline")
100 | CreditLine._properties['value'] = {"rdf": "rdfs:value", "range": str}
101 | 
102 | class SourceCreditLine(CreditLine):
103 | 	def __init__(self, *args, **kw):
104 | 		super(SourceCreditLine, self).__init__(*args, **kw)
105 | 		# XXX Find a good Type for this
106 | 		self.has_type = []
107 | 		self.has_type = Type("http://example.org/ns/sourcecreditline")
108 | 
109 | class Department(Group):
110 | 	def __init__(self, *args, **kw):
111 | 		super(Department, self).__init__(*args, **kw)
112 | 		self.is_current_or_former_member_of = Museum
113 | 
114 | 
115 | # Monkey patch Type's _toJSON to only emit full data if not just URI+type
116 | def typeToJSON(self, top=False):
117 | 	props = self.__dict__.keys()
118 | 	if len(props) > 3:
119 | 		return super(Type, self)._toJSON()
120 | 	else:
121 | 		return self.id
122 | 
123 | Type._toJSON = typeToJSON
124 | Person._properties['familyName'] = {"rdf": "schema:familyName", "range": str}
125 | Person._properties['givenName'] = {"rdf": "schema:givenName", "range": str}
126 | ManMadeObject._properties['culture'] = {"rdf": "schema:genre", "range": Type}
127 | 
128 | 
129 | factory.base_url = "http://data.getty.edu/museum/"
130 | factory.default_lang = "en"
131 | 
132 | departments = {}
133 | locations = {}
134 | 
135 | GettyTrust = Group("http://vocab.getty.edu/ulan/500115987")
136 | GettyTrust.label = "J. Paul Getty Trust"
137 | Museum = Group("http://vocab.getty.edu/ulan/500115988")
138 | Museum.is_current_or_former_member_of = GettyTrust
139 | Museum.label = "J Paul Getty Museum"
140 | 
141 | painting_on_re = re.compile("^(.+?) on (.+?)$")
142 | painting_and_re = re.compile("^(.+?) and (.+?)$")
143 | 
144 | def parse_materials(materials, typ):
145 | 
146 | 	mats = []
147 | 	if typ == Painting:
148 | 		# Test for X on Y
149 | 		mat = materials.lower()
150 | 		m = painting_on_re.match(mat)
151 | 
152 | 		if m:
153 | 			paint = m.groups()[0]
154 | 
155 | 			# x and y
156 | 			m2 = painting_and_re.match(paint)
157 | 			if m2:
158 | 				paints = m2.groups()
159 | 			else:
160 | 				paints = [paint]
161 | 			for p in paints:
162 | 				if aat_material_mapping.has_key(p):
163 | 					mats.append(Material("http://vocab.getty.edu/aat/%s" % aat_material_mapping[p]))
164 | 				else:
165 | 					pass
166 | 					# print "Paint: %s" % paint
167 | 
168 | 			support = m.groups()[1]
169 | 			if aat_material_mapping.has_key(support):
170 | 				mats.append(Material("http://vocab.getty.edu/aat/%s" % aat_material_mapping[support]))
171 | 			else:
172 | 				# look for common adjectives, ()s
173 | 				swords = support.split(' ')
174 | 				for sw in swords:
175 | 					if aat_material_mapping.has_key(sw):
176 | 						mats.append(Material("http://vocab.getty.edu/aat/%s" % aat_material_mapping[sw]))					
177 | 
178 | 	return mats
179 | 
180 | 
181 | fh = file('record_cache.json')
182 | data = fh.read()
183 | fh.close()
184 | cache = json.loads(data)
185 | 
186 | 
187 | # Load up people
188 | fh = file('500_people.json')
189 | data = fh.read()
190 | fh.close()
191 | peoplel = json.loads(data)
192 | people = {}
193 | for who in peoplel:
194 | 	whoid = str(who['id'])
195 | 	if people.has_key(whoid):
196 | 		continue
197 | 	else:
198 | 		wrec = {'id': whoid}
199 | 		wrec['type'] = who['type']
200 | 		wrec['date'] = who['display_date']
201 | 		wrec['name'] = who['display_name']
202 | 		wrec['nationality'] = who['display_nationality']
203 | 		wrec['birthplace'] = who['display_birthplace']
204 | 		wrec['deathplace'] = who['display_deathplace']
205 | 		wrec['institution'] = who['display_institution']
206 | 		wrec['image'] = who['display_image']
207 | 		wrec['biography'] = who['display_biography']
208 | 		people[whoid] = wrec 
209 | 
210 | print "Processing..."
211 | 
212 | #recs = cache.values()
213 | recs = [cache['645']]
214 | 
215 | ldrecs = []
216 | for rec in recs:
217 | 	ident = str(rec['id'])
218 | 
219 | 	# Build a Foo type of MMO
220 | 	clslabel = rec['classification']['name']
221 | 	clsid = str(rec['classification']['id'])
222 | 	ot = rec['object_types'] # {'primary': {}, '???': {}}
223 | 	try:
224 | 		otid = str(ot['primary']['id'])
225 | 		otlabel = ot['primary']['display_value']
226 | 	except:
227 | 		otid = ""
228 | 		otlabel = ""
229 | 
230 | 	if clslabel == "Photographs":
231 | 		if otlabel == "Print":
232 | 			obj = PhotographPrint(ident)
233 | 		elif otlabel == "Album":
234 | 			obj = PhotographAlbum(ident)
235 | 		elif otlabel == "Book":
236 | 			obj = PhotographBook(ident)
237 | 		elif otlabel.lower() == "cased object":
238 | 			# Treat as print?
239 | 			obj = PhotographPrint(ident)
240 | 		else:
241 | 			print "Unknown photograph subtype: %s" % otlabel
242 | 	elif aat_type_mapping.has_key(otlabel):
243 | 		obj = aat_type_mapping[otlabel](ident)
244 | 	elif aat_type_mapping.has_key(clslabel):
245 | 		obj = aat_type_mapping[clslabel](ident)
246 | 	else:
247 | 		obj = ManMadeObject(ident)
248 | 		# print "ot: '%s' ; cls: '%s'" % (otlabel, clslabel)
249 | 		t = Type(str(clsid))
250 | 		t.label = clslabel
251 | 		obj.has_type = t
252 | 
253 | 	tms = TMSNumber(ident)
254 | 	tms.value = ident
255 | 	obj.is_identified_by = tms
256 | 
257 | 	recno = rec['number']
258 | 	accno = AccessionNumber(recno)
259 | 	accno.value = recno
260 | 	obj.is_identified_by = accno
261 | 
262 | 	obj.label = rec['title']
263 | 	try:
264 | 		obj.description = rec['description']['display']['value']
265 | 	except:
266 | 		pass
267 | 
268 | 	production = Production(ident)
269 | 	obj.was_produced_by = production
270 | 	ts = TimeSpan(ident)
271 | 	ts.description = rec['date']
272 | 	# XXX Parse date string for dates
273 | 	production.has_timespan = ts
274 | 
275 | 	# XXX if there are multiple makers with different roles,
276 | 	# create a super Production with components, and each
277 | 	# role gets a separate component
278 | 
279 | 	for mk in rec['makers']:
280 | 		mkid = str(mk['id'])
281 | 		role = mk['role']
282 | 
283 | 		who = Person(mkid)
284 | 
285 | 		# Find in person db or deref
286 | 		first = mk['name_first']
287 | 		last = mk['name_last']
288 | 
289 | 		try:
290 | 			person = people[mkid]
291 | 			who.label = person['name']
292 | 			who.description = person['biography']
293 | 			who.givenName = first
294 | 			who.familyName = last
295 | 			who.birthPlace = Place()
296 | 			who.deathPlace = Place()
297 | 			who.birthDate = ""
298 | 			who.deathDate = ""
299 | 		except:
300 | 			pass
301 | 
302 | 		production.carried_out_by = who
303 | 		# XXX Link to ULAN
304 | 
305 | 	# found, depicted, created
306 | 
307 | 	if rec['places'] and rec['places'].has_key('place_created'):
308 | 		p = rec['places']['place_created']
309 | 		pid = str(p['id'])
310 | 		where = Place(pid)
311 | 		where.label = p['display_value']
312 | 		production.took_place_at = where
313 | 
314 | 	# XXX Check for place_depicted (find out all possible keys)
315 | 
316 | 	m = Material(ident)
317 | 	m.description = rec['medium']
318 | 	obj.consists_of = m
319 | 	mats = parse_materials(rec['medium'], obj.__class__)
320 | 	if mats:
321 | 		for mat in mats:
322 | 			m.defines_typical_wholes_for = mat
323 | 
324 | 	dpt = rec['department']
325 | 	dptid = dpt['id']
326 | 	try:
327 | 		dept = departments[dptid]
328 | 	except:
329 | 		dept = Department(str(dpt['id']))
330 | 		dept.label = dpt['name']
331 | 		departments[dptid] = dept
332 | 	obj.has_current_owner = dept
333 | 
334 | 	if rec['location']:
335 | 		loc = rec['location'][0]
336 | 		locid = str(loc['id'])
337 | 		try:
338 | 			where = locations[locid]
339 | 		except:
340 | 			where = Place(locid)
341 | 			where.label = loc['name']
342 | 			locations[locid] = where
343 | 		obj.has_current_location = where
344 | 
345 | 	culture = Type("culture")
346 | 	culture.label = rec['culture']
347 | 	obj.culture = culture
348 | 	# XXX Map to AAT
349 | 
350 | 	if rec['markings']:
351 | 		markings = Mark()
352 | 		markings.description = rec['markings']
353 | 		obj.shows_visual_item = markings
354 | 	if rec['signature']:
355 | 		sig = Signature()
356 | 		sig.description = rec['signature']
357 | 		obj.shows_visual_item = sig
358 | 	if rec['inscription']:
359 | 		insc = Inscription()
360 | 		insc.description = rec['inscription']
361 | 		obj.shows_visual_item = insc
362 | 
363 | 	if rec.has_key('creditline'):
364 | 		credit = CreditLine()
365 | 		credit.value = rec['creditline']
366 | 		obj.is_subject_to = credit
367 | 	if rec['source_creditline']:
368 | 		srcCredit = SourceCreditLine()
369 | 		srcCredit.value = rec['source_creditline']
370 | 		obj.is_subject_to = srcCredit
371 | 
372 | 	if rec['bibliography']:
373 | 		bx = 0
374 | 		for bib in rec['bibliography']:
375 | 			bt = bib['display_source_type']
376 | 			bv = bib['display_value']
377 | 			doc = Document("%s/%s" % (ident, str(bx)))
378 | 			bx += 1
379 | 			doc.label = bv
380 | 			doc.has_type = Type(bt)
381 | 			# XXX extract actual bib data and map to something sensible
382 | 			obj.is_documented_in = doc
383 | 
384 | 	if rec['provenance']:
385 | 		for prov in rec['provenance']:
386 | 			date = prov['display_date']
387 | 			pid = str(prov['id'])
388 | 			who = prov['display_constituent']
389 | 			# XXX Parse constituent and map to provenance patterns
390 | 
391 | 
392 | 	if rec['related_exhibitions']:
393 | 		for exh in rec['related_exhibitions']:
394 | 			exhid = str(exh['record_identifier'])
395 | 			ttl = exh['display_title']
396 | 			dates = exh['display_dates']
397 | 
398 | 			exhibition = Exhibition(exhid)
399 | 			exhibition.label = ttl
400 | 			if dates:
401 | 				ts = TimeSpan(exhid)
402 | 				ts.description = dates
403 | 				exhibition.has_timespan = ts
404 | 				# XXX parse for begin, end dates
405 | 
406 | 			vens  = exh['display_venues']
407 | 			for v in vens:
408 | 				name = v['display_name']
409 | 				loc = v['display_location']
410 | 				vid = str(v['record_identifier'])
411 | 				vdates = v['display_dates']
412 | 
413 | 				venue = Activity(vid)
414 | 				venue.label = name
415 | 				if vdates:
416 | 					vts = TimeSpan(vid)
417 | 					vts.description = vdates
418 | 					venue.has_timespan = vts
419 | 					# XXX Parse for begin, end dates
420 | 				if loc:
421 | 					place = Place(vid)
422 | 					place.description = loc
423 | 					# XXX Parse location
424 | 					venue.took_place_at = place
425 | 				exhibition.consists_of = venue
426 | 
427 | 				# XXX Catalog Number is a Document that documents the Venue or Exhibition
428 | 
429 | 			obj.was_present_at = exhibition
430 | 
431 | 	ldrecs.append(obj)
432 | 	# print factory.toString(obj, compact=False)
433 | 	#break
434 | 
435 | 
436 | 


--------------------------------------------------------------------------------
/examples/sales-to-lod.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from lxml import etree
  3 | import json
  4 | import csv
  5 | import codecs
  6 | import re
  7 | import os
  8 | import sys
  9 | from dateutil.parser import parse as dateparse
 10 | 
 11 | # for cidoc_orm, see: https://github.com/azaroth42/Python-CIDOC-ORM
 12 | from cidoc_orm import factory, TimeSpan, Identifier, LegalBody, \
 13 | 	Production, Actor, Place, Group, Material, Mark, \
 14 | 	Activity, InformationObject, Purchase, Acquisition, MonetaryAmount, \
 15 | 	Currency, MeasurementUnit, Dimension, PhysicalObject, VisualItem, Title
 16 | 
 17 | from aat_mapping import ManMadeObject, Type, Person, materialTypes, register_aat_class, \
 18 | 	Painting, Sculpture, Drawing, Miniature, Graphic, Enamel, Tapestry, Mosaic, \
 19 | 	Embroidery, Furniture, LocalNumber, dimensionUnits
 20 | 
 21 | 
 22 | PhysicalObject._properties['had_starting_price'] = {"rdf": "gri:had_starting_price", "range": MonetaryAmount}
 23 | 
 24 | cmUnit = dimensionUnits['cm']
 25 | 
 26 | # Cache of repeated Objects
 27 | catalogO = {}
 28 | placeO = {}
 29 | nationalityO = {}
 30 | personO = {}
 31 | 
 32 | materialO = {}
 33 | 
 34 | bad_price = {}
 35 | bad_dates = {}
 36 | bad_types = {}
 37 | bad_materials = {}
 38 | 
 39 | # XXX -- Distinguish Local from Lugt
 40 | register_aat_class("LugtNumber", Identifier, "300404621")
 41 | 
 42 | factory.base_url = "http://data.getty.edu/provenance/"
 43 | factory.default_lang = "en"
 44 | 
 45 | objTypeMap = {
 46 | 	u'gem\xe4lde': Painting,
 47 | 	'skulptur': Sculpture,
 48 | 	'zeichnung': Drawing,
 49 | 	'miniatur': Miniature,
 50 | 	'graphik': Graphic,
 51 | 	'painting': Painting,
 52 | 	'enamel': Enamel,
 53 | 	'miniature': Miniature,
 54 | 	'sculpture': Sculpture,
 55 | 	'drawing': Drawing,
 56 | 	'tapestry': Tapestry,
 57 | 	'embroidery': Embroidery,
 58 | 	'furniture': Furniture,
 59 | 	'mosaic': Mosaic,
 60 | 	'watercolor': Painting 
 61 | }
 62 | 
 63 | 
 64 | r = "(je|l|h|d|b|durchm|durchmesser|dm[.]?)?[ ]*(ca.|h)?[ ]*([0-9,.]+)([ ]*(cm)?[ ]*x[ ]*([0-9,.]+)[ ]*(cm)?)?"
 65 | dimre = re.compile(r)
 66 | # dim1 = groups()[2], dim2 = groups()[5]
 67 | 
 68 | 
 69 | def process_record(rec):
 70 | 	recData = {}
 71 | 	for elm in rec.getchildren():
 72 | 		tag = elm.tag
 73 | 		curr = recData.get(tag, None)
 74 | 
 75 | 		if elm.getchildren():
 76 | 			value = elm
 77 | 		else:
 78 | 			value = elm.text
 79 | 
 80 | 		if curr is None:
 81 | 			recData[tag] = value
 82 | 		elif type(recData[tag]) == list:
 83 | 			recData[tag].append(value)
 84 | 		else:
 85 | 			recData[tag] = [curr, value]
 86 | 
 87 | 	cno = recData['Catalogue_No']
 88 | 	try:
 89 | 		catalog = catalogO[cno]
 90 | 		auction = catalog.refers_to
 91 | 
 92 | 		# Try and update end of timespan
 93 | 		if not hasattr(auction.has_timespan, 'end_of_the_end'):
 94 | 			sed = recData.get('Sale_End_Date', '')			
 95 | 			if sed:
 96 | 				try:
 97 | 					dt = dateparse(sed)
 98 | 					span.end_of_the_end = "%s-%s-%sT00:00:00 CET" % (dt.year, dt.month, dt.day)
 99 | 				except:
100 | 					try:
101 | 						bad_dates[sed] += 1
102 | 					except:
103 | 						bad_dates[sed] = 1
104 | 
105 | 	except:
106 | 		catalog = InformationObject(cno)
107 | 		catalogO[cno] = catalog
108 | 		auction = Activity(cno)
109 | 		catalog.refers_to = auction
110 | 		catalog.has_representation = VisualItem(recData['GSC_link_to_pdf'])
111 | 		catalog.is_identified_by = LocalNumber(cno)
112 | 
113 | 		# Auction date
114 | 		span = TimeSpan(cno)
115 | 		sbd = recData['Sale_Begin_Date']
116 | 		sed = recData.get('Sale_End_Date', '')
117 | 		try:
118 | 			dt = dateparse(sbd)
119 | 			span.begin_of_the_begin = "%s-%s-%sT00:00:00 CET" % (dt.year, dt.month, dt.day)
120 | 		except:
121 | 			try:
122 | 				bad_dates[sbd] += 1
123 | 			except:
124 | 				bad_dates[sbd] = 1
125 | 		if sed:
126 | 			try:
127 | 				dt = dateparse(sed)
128 | 				span.end_of_the_end = "%s-%s-%sT00:00:00 CET" % (dt.year, dt.month, dt.day)
129 | 			except:
130 | 				try:
131 | 					bad_dates[sed] += 1
132 | 				except:
133 | 					bad_dates[sed] = 1
134 | 		auction.has_timespan = span
135 | 
136 | 		# Auction location
137 | 		city = recData['City_of_Sale']
138 | 		try:
139 | 			cityPlace = placeO[city]
140 | 		except:
141 | 			cityPlace = Place(city)
142 | 			cityPlace.label = city
143 | 			placeO[city] = cityPlace
144 | 		auction.took_place_at = cityPlace
145 | 
146 | 		try:
147 | 			country = cityPlace.falls_within
148 | 		except:
149 | 			try:
150 | 				country = recData['Country_Auth']
151 | 				countryPlace = Place(country)
152 | 				countryPlace.label = country
153 | 				cityPlace.falls_within = countryPlace
154 | 			except:
155 | 				# No country
156 | 				pass
157 | 
158 | 		try:
159 | 			# Auction House
160 | 			house = recData['Auction_House']
161 | 			try:
162 | 				ahouse = houseO[house]
163 | 			except:
164 | 				ahouse = LegalBody(house)
165 | 				ahouse.label = house
166 | 			auction.carried_out_by = ahouse
167 | 		except:
168 | 			# No auction house? :(
169 | 			pass
170 | 
171 | 	try:
172 | 		lno = recData["Lot_Number"]
173 | 	except:
174 | 		print "No lot number for %s" % recData['recno']
175 | 		return
176 | 
177 | 	cnolot = cno +'/'+ lno
178 | 
179 | 	# Build an aggregation of objects for the lot
180 | 	lotset = PhysicalObject(cnolot + "-set")
181 | 	# InfoObj for the entry
182 | 	entry = InformationObject(cnolot)
183 | 	entry.refers_to = lotset
184 | 	catalog.is_composed_of = entry
185 | 
186 | 	if recData.has_key('Price'):
187 | 		pr = recData['Price']
188 | 		# Process prinfo
189 | 
190 | 		if type(pr) == list:
191 | 			pr = pr[0]
192 | 		if not type(pr) in [str, unicode]:
193 | 			try:	
194 | 				pr = pr.text
195 | 			except:
196 | 				pr = ""
197 | 
198 | 		pr = pr.replace("[?]", "")
199 | 		pr = pr.replace('?', '')
200 | 		pr = pr.strip()
201 | 
202 | 		if pr:
203 | 			pr = pr.replace('1/2', '.5')
204 | 			pr = pr.replace('1/4', '.25')
205 | 			pr = pr.replace('3/4', '.75')
206 | 			pr = pr.replace(' .', '.')
207 | 
208 | 			# Unknown:  x-y-z  x.y.z
209 | 			# x"y' x=y  x:y x=y-z
210 | 			# 
211 | 
212 | 			if pr.find(" frs") > -1:
213 | 				curr = Currency("francs")
214 | 				curr.label = "francs"
215 | 				pr = pr.replace(' frs', '')
216 | 			elif pr.find(" fl") > -1:
217 | 				curr = Currency("fl.s")
218 | 				curr.label = "fl.s"
219 | 				pr = pr.replace(' fl', '')
220 | 			elif pr.find(" livres") > -1:
221 | 				curr = Currency("pounds")
222 | 				curr.label = "pounds"
223 | 				pr = pr.replace(' livres', '')
224 | 			else:
225 | 				curr = None
226 | 
227 | 			pr = pr.replace('[or]', 'or')
228 | 			oidx = pr.find(' or ') 
229 | 			if oidx > -1:
230 | 				pr = pr[:oidx]
231 | 				pr = pr.strip()
232 | 
233 | 			fidx = pr.find(' for ')
234 | 			if fidx > -1:
235 | 				pr = pr[:fidx]
236 | 				pr = pr.strip()
237 | 
238 | 			try:
239 | 				p = float(pr)				
240 | 			except:
241 | 				p = -1
242 | 				try:
243 | 					bad_price[pr] += 1
244 | 				except:
245 | 					bad_price[pr] = 1
246 | 			if p >= 0:
247 | 				amnt = MonetaryAmount(cnolot + "-start")
248 | 				amnt.has_value = p
249 | 				if curr:
250 | 					amnt.has_currency = curr
251 | 		 		lotset.had_starting_price = amnt
252 | 
253 | 
254 | 	# Build the object
255 | 
256 | 	try:
257 | 		typs = recData['Object_Types'].xpath('./Object_Type/text()')
258 | 		ot = typs[0]
259 | 		cls = objTypeMap[ot]
260 | 	except:
261 | 		cls = ManMadeObject
262 | 		try:
263 | 			bad_types[ot] += 1
264 | 		except:
265 | 			bad_types[ot] = 1
266 | 
267 | 	obj = cls(cnolot)
268 | 	lotset.is_composed_of = obj
269 | 
270 | 	title = Title(cnolot)
271 | 	obj.has_title = title
272 | 	try:
273 | 		title.value = recData['Title']
274 | 	except:
275 | 		title.value = "[No Title Known]"
276 | 	if recData.has_key('Title_Modifier'):
277 | 		title.has_note = unicode(recData['Title_Modifier'])
278 | 
279 | 	if recData.has_key('Materials'):
280 | 		for mat in recData['Materials'].xpath('./Material/text()'):
281 | 			ot = mat.lower()
282 | 			ot = ot.replace(',', '')
283 | 			ot = ot.replace('#', '')
284 | 			ot = ot.replace('.', '')
285 | 			ot = ot.replace('?', '')
286 | 			ot = ot.replace('auf', '')
287 | 			ot = ot.replace('und', '')
288 | 			ot = ot.replace("on", " ")
289 | 			ot = ot.replace("and", " ")			
290 | 			ot = ot.replace("  ", ' ')
291 | 
292 | 			words = ot.split(' ')
293 | 			obj.consists_of = []
294 | 			for w in words:
295 | 				if w:
296 | 					try:
297 | 						material = materialO[w]
298 | 					except:
299 | 						material = Material(w)
300 | 						materialO[w] = material
301 | 						material.value = w
302 | 					obj.consists_of = material
303 | 
304 | 	if recData.has_key("Dimensions"):
305 | 		for dimtext in recData['Dimensions'].xpath('./Dimension_Text/text()'):
306 | 			m = dimre.match(dimtext)
307 | 			if m:
308 | 				d1 = m.groups()[2]
309 | 				d2 = m.groups()[5]
310 | 
311 | 				dim1 = Dimension(cnolot + "_d1")
312 | 				dim1.has_value = d1
313 | 				dim1.has_unit = cmUnit
314 | 				obj.has_dimension = dim1
315 | 
316 | 				if d2:
317 | 					dim2 = Dimension(cnolot + "_d2")
318 | 					dim2.has_value = d2				
319 | 					dim2.has_unit = cmUnit
320 | 					obj.has_dimension = dim2
321 | 
322 | 			else:
323 | 				#print "Can't handle dimension data:"
324 | 				#print dimtext			
325 | 				try:
326 | 					bad_materials[dimtext] += 1
327 | 				except:
328 | 					bad_materials[dimtext] = 1
329 | 
330 | 	# Artist could be modified by Attrib_Mod
331 | 	# e.g. zugeschrieben --> attributed (2200)
332 | 	# Kopie von  --> copy from (1)
333 | 	# stil --> style [of] (24)
334 | 
335 | 	# Artist
336 | 	if recData.has_key('Artist'):
337 | 		arts = recData['Artist']
338 | 		if type(arts) != list:
339 | 			arts = [arts]
340 | 		for artist in arts:
341 | 			va = artist.xpath('./Verb_Artist/text()')
342 | 			aa = artist.xpath('./Artist_Auth/text()')
343 | 			natl = artist.xpath('./Nationality/text()')
344 | 			mod = artist.xpath('./Attrib_Mod/text()')
345 | 
346 | 			# first try to detect non names
347 | 			if aa:
348 | 				aa = unicode(aa[0])
349 | 				if aa == "NEW":
350 | 					# treat as if not present
351 | 					pass
352 | 				elif aa.startswith('['):
353 | 					# anonymous artist with some known features
354 | 					pass
355 | 				aname = aa.lower()
356 | 				aname = aname.replace(" ", "_")
357 | 				try:
358 | 					who = personO[aname]
359 | 				except:
360 | 					who = Person(aname)
361 | 					personO[aname] = who
362 | 					# put verbatim name somewhere
363 | 					# and authority name in p131 is identified by
364 | 					who.label = aa
365 | 
366 | 					if natl: 
367 | 						natl = unicode(natl[0])
368 | 						try:
369 | 							nat = nationalityO[natl]
370 | 						except:
371 | 							nat = Group(natl)
372 | 							nat.label = natl
373 | 						who.is_current_or_former_member_of = nat
374 | 
375 | 				# one production event per artist?
376 | 				prod = Production(cnolot + aname)
377 | 				prod.carried_out_by = who
378 | 				obj.was_produced_by = prod 
379 | 
380 | 	# seller if we know
381 | 	seller = None
382 | 	if recData.has_key("Seller"):
383 | 		sells = recData['Seller']
384 | 		if type(sells) != list:
385 | 			sells = [sells]
386 | 		sx = 0
387 | 		for s in sells:
388 | 			try:
389 | 				lbl = unicode(s.xpath("./Seller_Auth/text()")[0])
390 | 			except:	
391 | 				try:
392 | 					lbl = unicode(s.xpath("./Verb_Seller/text()")[0])				
393 | 				except:
394 | 					# ???!!!
395 | 					continue
396 | 			end = "-seller"
397 | 			if sx:
398 | 				end += "-%s" % sx
399 | 			seller = Actor(cnolot + end)
400 | 			seller.label = lbl
401 | 			obj.has_former_or_current_owner = seller
402 | 			sx += 1
403 | 
404 | 	try:
405 | 		txn = recData['Transaction']
406 | 		txn = txn.lower()
407 | 	except:
408 | 		txn = "unknown"
409 | 	txn = txn.replace('[?]', '')
410 | 	txn = txn.replace("unknown or ", "")
411 | 	txn = txn.replace(" or unknown", "")
412 | 	txn = txn.strip()
413 | 
414 | 	if txn.find(" or ") > -1:
415 | 		# Don't know what to do with X or Y
416 | 		print "Not processing or for txn"
417 | 		return
418 | 
419 | 	if txn in ["sold", "bought in", "passed"]:
420 | 		lot = Activity(cnolot)
421 | 		auction.consists_of = lot
422 | 		lot.used_specific_object = lotset
423 | 		
424 | 		span = TimeSpan(cnolot)
425 | 		try:
426 | 			date = recData['Lot_Sale_Date']
427 | 			# NB: this is going to end up strange due to UK vs EU timezones
428 | 			try:
429 | 				dt = dateparse(date)
430 | 				begin = "%s-%s-%sT00:00:00 CET" % (dt.year, dt.month, dt.day)
431 | 				end = "%s-%s-%sT23:59:59 CET" % (dt.year, dt.month, dt.day)
432 | 				span.begin_of_the_begin = begin
433 | 				span.end_of_the_end = end
434 | 				lot.has_timespan = span
435 | 			except:
436 | 				try:
437 | 					bad_dates[date] += 1
438 | 				except:
439 | 					bad_dates[date] = 1
440 | 		except:
441 | 			print "No Sale Date: %s" % recData['recno']
442 | 
443 | 		ln = recData.get('Lot_Notes', None)
444 | 		if ln is not None:
445 | 			lot.has_note = unicode(ln)
446 | 
447 | 		entry.refers_to = lot
448 | 
449 | 	if txn in ['sold', 'bought in']:
450 | 		# Bidding activity exists
451 | 		bidding = Activity(cnolot + "-bidding")
452 | 		lot.consists_of = bidding
453 | 
454 | 	if txn == "sold":
455 | 		txn = Purchase(cnolot+"-transaction")
456 | 		lot.consists_of = txn
457 | 		bidding.occurs_before = txn
458 | 
459 | 		acq = Purchase(cnolot + "-acquisition")
460 | 		txn.consists_of = acq
461 | 		acq.transferred_title_of = obj
462 | 
463 | 		if seller:
464 | 			sellers = obj.has_former_or_current_owner
465 | 			if type(sellers) == list:
466 | 				for s in sellers:
467 | 					acq.transferred_title_from = s
468 | 			else:	
469 | 				acq.transferred_title_from = sellers			
470 | 
471 | 		bx = 0
472 | 		if recData.has_key("Buyer"):
473 | 			buys = recData['Buyer']
474 | 			if type(buys) != list:
475 | 				buys = [buys]
476 | 			for b in buys:
477 | 				try:
478 | 					lbl = unicode(b.xpath("./Buyer_Auth/text()")[0])
479 | 				except:
480 | 					try:
481 | 						lbl = unicode(b.xpath("./Verb_Buyer/text()")[0])					
482 | 					except:
483 | 						# ???!!!
484 | 						continue
485 | 
486 | 				end = "-buyer"
487 | 				if bx:
488 | 					end += "-%s" % bx
489 | 				buyer = Actor(cnolot + end)
490 | 				buyer.label = lbl
491 | 				acq.transferred_title_to = buyer
492 | 				bx += 1
493 | 
494 | 
495 | 	return catalog
496 | 
497 | 
498 | 
499 | recdir = '/Users/rsanderson/Box Sync/PI_Sales/'
500 | files = ['salesdb1.xml', 'salesdb2.xml', 'salesdb3.xml', 'salesdb4.xml']
501 | files = ['salesdb1.xml']
502 | 
503 | for fn in files:
504 | 	rec = []
505 | 	fh = file(os.path.join(recdir, fn))
506 | 	#fh = codecs.open(os.path.join(recdir, fn), 'r', 'iso-8859-1')
507 | 
508 | 	# chomp first three lines
509 | 	fh.readline() ; fh.readline() ; fh.readline()
510 | 
511 | 	header = '<?xml version="1.0" encoding="UTF-8"?>\n'
512 | 
513 | 	line = fh.readline()
514 | 	while line != "</root>\n":
515 | 		while (line.find('</record>') == -1):
516 | 			rec.append(line)
517 | 			line = fh.readline()
518 | 			if not line:
519 | 				break
520 | 		rec.append(line)	
521 | 
522 | 		data = ''.join(rec)
523 | 		data = header + data
524 | 		data = data.replace("Catalogue_No.", "Catalogue_No")
525 | 		data = data.replace("Country_Auth.", "Country_Auth")
526 | 		data = data.replace('\x04', '')
527 | 		data = data.replace('\x1f', '')
528 | 
529 | 		try:
530 | 			dom = etree.XML(data)
531 | 		except:
532 | 			print "Invalid record data: %s" % data[:200]
533 | 
534 | 		top = process_record(dom)
535 | 		# break
536 | 
537 | 		line = fh.readline()
538 | 		rec = []
539 | 
540 | 	fh.close()
541 | 
542 | 


--------------------------------------------------------------------------------
/cromulent/data/crm-profile.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "E10_Transfer_of_Custody": 1, 
  3 |   "E11_Modification": 1, 
  4 |   "E12_Production": 1, 
  5 |   "E13_Attribute_Assignment": 1, 
  6 |   "E14_Condition_Assessment": 0, 
  7 |   "E15_Identifier_Assignment": 0, 
  8 |   "E16_Measurement": 0, 
  9 |   "E17_Type_Assignment": 0, 
 10 |   "E18_Physical_Thing": 0, 
 11 |   "E19_Physical_Object": 0, 
 12 |   "E1_CRM_Entity": 0, 
 13 |   "E20_Biological_Object": 0, 
 14 |   "E21_Person": 1, 
 15 |   "E22_Human-Made_Object": 1, 
 16 |   "E24_Physical_Human-Made_Thing": 0, 
 17 |   "E25_Human-Made_Feature": 0, 
 18 |   "E26_Physical_Feature": 0, 
 19 |   "E27_Site": 0, 
 20 |   "E28_Conceptual_Object": 0, 
 21 |   "E29_Design_or_Procedure": 0, 
 22 |   "E2_Temporal_Entity": 0, 
 23 |   "E30_Right": 1, 
 24 |   "E31_Document": 0, 
 25 |   "E32_Authority_Document": 1, 
 26 |   "E33_Linguistic_Object": 1, 
 27 |   "E34_Inscription": 0, 
 28 |   "E35_Title": 0, 
 29 |   "E36_Visual_Item": 1, 
 30 |   "E37_Mark": 0, 
 31 |   "E38_Image": 0, 
 32 |   "E39_Actor": 1, 
 33 |   "E3_Condition_State": 0, 
 34 |   "E40_Legal_Body": 0, 
 35 |   "E41_Appellation": 0, 
 36 |   "E42_Identifier": 1, 
 37 |   "E4_Period": 1, 
 38 |   "E52_Time-Span": 1, 
 39 |   "E53_Place": 1, 
 40 |   "E54_Dimension": 1, 
 41 |   "E55_Type": 1, 
 42 |   "E56_Language": 1, 
 43 |   "E57_Material": 1, 
 44 |   "E58_Measurement_Unit": 1, 
 45 |   "E5_Event": 1, 
 46 |   "E63_Beginning_of_Existence": 0, 
 47 |   "E64_End_of_Existence": 0, 
 48 |   "E65_Creation": 1, 
 49 |   "E66_Formation": 1, 
 50 |   "E67_Birth": 1, 
 51 |   "E68_Dissolution": 1, 
 52 |   "E69_Death": 1, 
 53 |   "E6_Destruction": 1, 
 54 |   "E70_Thing": 0, 
 55 |   "E71_Human-Made_Thing": 0, 
 56 |   "E72_Legal_Object": 0, 
 57 |   "E73_Information_Object": 1, 
 58 |   "E74_Group": 1, 
 59 |   "E77_Persistent_Item": 0, 
 60 |   "E78_Curated_Holding": 0, 
 61 |   "E79_Part_Addition": 1, 
 62 |   "E7_Activity": 1, 
 63 |   "E80_Part_Removal": 1, 
 64 |   "E81_Transformation": 1, 
 65 |   "E83_Type_Creation": 0, 
 66 |   "E84_Information_Carrier": 0, 
 67 |   "E85_Joining": 1, 
 68 |   "E86_Leaving": 1, 
 69 |   "E87_Curation_Activity": 0, 
 70 |   "E89_Propositional_Object": 1, 
 71 |   "E8_Acquisition": 1, 
 72 |   "E90_Symbolic_Object": 0, 
 73 |   "E92_Spacetime_Volume": 0, 
 74 |   "E93_Presence": 0, 
 75 |   "E96_Purchase": 0, 
 76 |   "E97_Monetary_Amount": 1, 
 77 |   "E98_Currency": 1, 
 78 |   "E99_Product_Type": 0,
 79 |   "E9_Move": 1,
 80 |   "E33_E41_Linguistic_Appellation": 1,
 81 |   "dig:D1_Digital_Object": 1,
 82 |   "geo:SP5_Geometric_Place_Expression": 1,
 83 |   "geo:SP4_Spatial_Coordinate_Reference_System": 1,
 84 |   "geo:SP6_Declarative_Place": 1,
 85 |   "sci:S19_Encounter_Event": 1,
 86 |   "la:Phase": 0, 
 87 |   "la:RightAcquisition": 1,
 88 |   "la:Payment": 1, 
 89 |   "la:Relationship": 1,
 90 |   "la:Set": 1,
 91 |   "la:Addition": 1,
 92 |   "la:Removal": 1,
 93 |   "la:DigitalService": 1,
 94 | 
 95 |   "P100_was_death_of": [1,0], 
 96 |   "P100i_died_in": [1,0], 
 97 |   "P101_had_as_general_use": [1,1], 
 98 |   "P101i_was_use_of": [0,1], 
 99 |   "P102_has_title": [0,1], 
100 |   "P102i_is_title_of": [0,0], 
101 |   "P103_was_intended_for": [0,1], 
102 |   "P103i_was_intention_of": [0,1], 
103 |   "P104_is_subject_to": [1,1], 
104 |   "P104i_applies_to": [1,1], 
105 |   "P105_right_held_by": [0,1], 
106 |   "P105i_has_right_on": [0,1], 
107 |   "P106_is_composed_of": [1,1], 
108 |   "P106i_forms_part_of": [1,1], 
109 |   "P107_has_current_or_former_member": [1,1], 
110 |   "P107i_is_current_or_former_member_of": [1,1], 
111 |   "P108_has_produced": [1,1], 
112 |   "P108i_was_produced_by": [1,0], 
113 |   "P109_has_current_or_former_curator": [0,1], 
114 |   "P109i_is_current_or_former_curator_of": [0,1], 
115 |   "P10_falls_within": [0,1], 
116 |   "P10i_contains": [0,1], 
117 |   "P110_augmented": [1,0], 
118 |   "P110i_was_augmented_by": [1,1], 
119 |   "P111_added": [1,0], 
120 |   "P111i_was_added_by": [1,1], 
121 |   "P112_diminished": [1,0], 
122 |   "P112i_was_diminished_by": [1,1], 
123 |   "P113_removed": [1,0], 
124 |   "P113i_was_removed_by": [1,1], 
125 |   "P11_had_participant": [1,1], 
126 |   "P11i_participated_in": [1,1], 
127 |   "P121_overlaps_with": [0,1], 
128 |   "P122_borders_with": [0,1], 
129 |   "P123_resulted_in": [1,1], 
130 |   "P123i_resulted_from": [1,1], 
131 |   "P124_transformed": [1,1], 
132 |   "P124i_was_transformed_by": [1,1], 
133 |   "P125_used_object_of_type": [0,1], 
134 |   "P125i_was_type_of_object_used_in": [0,1], 
135 |   "P126_employed": [1,1], 
136 |   "P126i_was_employed_in": [0,1], 
137 |   "P127_has_broader_term": [0,1], 
138 |   "P127i_has_narrower_term": [0,1], 
139 |   "P128_carries": [1,1], 
140 |   "P128i_is_carried_by": [1,1], 
141 |   "P129_is_about": [1,1], 
142 |   "P129i_is_subject_of": [1,1], 
143 |   "P12_occurred_in_the_presence_of": [1,1], 
144 |   "P12i_was_present_at": [1,1], 
145 |   "P130_shows_features_of": [0,1],
146 |   "P130i_features_are_also_found_on": [0,1], 
147 |   "P131_is_identified_by": [0,1], 
148 |   "P131i_identifies": [0,1], 
149 |   "P132_overlaps_with": [0,1], 
150 |   "P133_is_separated_from": [0,1], 
151 |   "P134_continued": [1,1], 
152 |   "P134i_was_continued_by": [1,1], 
153 |   "P135_created_type": [0,1], 
154 |   "P135i_was_created_by": [0,1], 
155 |   "P136_was_based_on": [0,1], 
156 |   "P136i_supported_type_creation": [0,1], 
157 |   "P137_exemplifies": [0,1], 
158 |   "P137i_is_exemplified_by": [0,1], 
159 |   "P138_represents": [1,1], 
160 |   "P138i_has_representation": [1,1], 
161 |   "P139_has_alternative_form": [1,1], 
162 |   "P13_destroyed": [1,0], 
163 |   "P13i_was_destroyed_by": [1,0], 
164 |   "P140_assigned_attribute_to": [1,0], 
165 |   "P140i_was_attributed_by": [1,1], 
166 |   "P141_assigned": [1,1], 
167 |   "P141i_was_assigned_by": [1,1], 
168 |   "P142_used_constituent": [0,1], 
169 |   "P142i_was_used_in": [0,1], 
170 |   "P143_joined": [1,0], 
171 |   "P143i_was_joined_by": [1,1], 
172 |   "P144_joined_with": [1,0], 
173 |   "P144i_gained_member_by": [1,1], 
174 |   "P145_separated": [1,0], 
175 |   "P145i_left_by": [1,1], 
176 |   "P146_separated_from": [1,0], 
177 |   "P146i_lost_member_by": [1,1], 
178 |   "P147_curated": [0,1],
179 |   "P147i_was_curated_by": [0,1],
180 |   "P148_has_component": [1,1], 
181 |   "P148i_is_component_of": [1,1], 
182 |   "P149_is_identified_by": [0,1],
183 |   "P149i_identifies": [0,1],
184 |   "P14_carried_out_by": [1,1], 
185 |   "P14i_performed": [1,1], 
186 |   "P150_defines_typical_parts_of": [0,1],
187 |   "P150i_defines_typical_wholes_for": [0,1],
188 |   "P151_was_formed_from": [0,1],
189 |   "P151i_participated_in": [0,1],
190 |   "P152_has_parent": [0,1],
191 |   "P152i_is_parent_of": [0,1],
192 |   "P156_occupies": [1,1], 
193 |   "P156i_is_occupied_by": [1,1], 
194 |   "P157_is_at_rest_relative_to": [0,1],
195 |   "P157i_provides_reference_space_for": [0,1],
196 |   "P15_was_influenced_by": [1,1], 
197 |   "P15i_influenced": [1,1], 
198 |   "P160_has_temporal_projection": [0,1],
199 |   "P161_has_spatial_projection": [0,1],
200 |   "P164_during": [0,1],
201 |   "P164i_was_time-span_of": [0,1],
202 |   "P165_incorporates": [0,1],
203 |   "P165i_is_incorporated_in": [0,1],
204 |   "P166_was_a_presence_of": [0,1],
205 |   "P166i_had_presence": [0,1],
206 |   "P167_at": [0,1],
207 |   "P167i_was_place_of": [0,1],
208 |   "P168_place_is_defined_by": [1,1],
209 |   "P168i_defines_place": [0,1],
210 |   "P16_used_specific_object": [1,1], 
211 |   "P16i_was_used_for": [1,1], 
212 |   "P177_assigned_property_of_type": [1,0],
213 |   "P179_had_sales_price": [0,1],
214 |   "P179i_was_sales_price_of": [0,1],
215 |   "P17_was_motivated_by": [1,1], 
216 |   "P17i_motivated": [1,1], 
217 |   "P180_has_currency": [1,0], 
218 |   "P180i_was_currency_of": [1,1], 
219 |   "P181_has_amount": [0,0],
220 |   "P182_ends_before_or_with_the_start_of": [1,1],
221 |   "P182i_starts_after_or_with_the_end_of": [1,1],
222 |   "P183_ends_before_the_start_of": [1,1],
223 |   "P183i_starts_after_the_end_of": [1,1],
224 |   "P184_ends_before_or_with_the_end_of": [1,1],
225 |   "P184i_ends_with_or_after_the_end_of": [1,1],
226 |   "P185_ends_before_the_end_of": [1,1],
227 |   "P185i_ends_after_the_end_of": [1,1],
228 |   "P189_approximates": [1,1],
229 |   "P189i_is_approximated_by": [1,1],
230 |   "P19_was_intended_use_of": [0,1],
231 |   "P19i_was_made_for": [0,1],
232 |   "P190_has_symbolic_content": [1,0],
233 |   "P191_had_duration": [1,0],
234 |   "P191i_was_duration_of": [1,0],
235 |   "P1_is_identified_by": [1, 1], 
236 |   "P1i_identifies": [1, 0], 
237 |   "P20_had_specific_purpose": [1,1], 
238 |   "P20i_was_purpose_of": [1,1], 
239 |   "P21_had_general_purpose": [1,1],
240 |   "P21i_was_purpose_of": [0,1],
241 |   "P22_transferred_title_to": [1,1], 
242 |   "P22i_acquired_title_through": [1,1], 
243 |   "P23_transferred_title_from": [1,1], 
244 |   "P23i_surrendered_title_through": [1,1], 
245 |   "P24_transferred_title_of": [1,1], 
246 |   "P24i_changed_ownership_through": [1,1], 
247 |   "P25_moved": [1,1], 
248 |   "P25i_moved_by": [1,1], 
249 |   "P26_moved_to": [1,0], 
250 |   "P26i_was_destination_of": [1,1], 
251 |   "P27_moved_from": [1,0], 
252 |   "P27i_was_origin_of": [1,1], 
253 |   "P28_custody_surrendered_by": [1,1], 
254 |   "P28i_surrendered_custody_through": [1,1], 
255 |   "P29_custody_received_by": [1,1], 
256 |   "P29i_received_custody_through": [1,1], 
257 |   "P2_has_type": [1,1], 
258 |   "P2i_is_type_of": [0,1], 
259 |   "P30_transferred_custody_of": [1,1], 
260 |   "P30i_custody_transferred_through": [1,1], 
261 |   "P31_has_modified": [1,1], 
262 |   "P31i_was_modified_by": [1,1], 
263 |   "P32_used_general_technique": [1,1], 
264 |   "P32i_was_technique_of": [2,1], 
265 |   "P33_used_specific_technique": [0,1], 
266 |   "P33i_was_used_by": [0,1],
267 |   "P34_concerned": [0,1],
268 |   "P34i_was_assessed_by": [0,1],
269 |   "P35_has_identified": [0,1],
270 |   "P35i_was_identified_by": [0,1],
271 |   "P37_assigned": [0,1],
272 |   "P37i_was_assigned_by": [0,1],
273 |   "P38_deassigned": [0,1],
274 |   "P38i_was_deassigned_by": [0,1],
275 |   "P39_measured": [0,1],
276 |   "P39i_was_measured_by": [0,1],
277 |   "P3_has_note": [0,1],
278 |   "P40_observed_dimension": [0,1],
279 |   "P40i_was_observed_in": [0,1],
280 |   "P41_classified": [0,1],
281 |   "P41i_was_classified_by": [0,1],
282 |   "P42_assigned": [0,1],
283 |   "P42i_was_assigned_by": [0,1],
284 |   "P43_has_dimension": [1,1], 
285 |   "P43i_is_dimension_of": [1,0], 
286 |   "P44_has_condition": [0,1],
287 |   "P44i_is_condition_of": [0,1],
288 |   "P45_consists_of": [1,1], 
289 |   "P45i_is_incorporated_in": [1,1], 
290 |   "P46_is_composed_of": [1,1], 
291 |   "P46i_forms_part_of": [1,1], 
292 |   "P48_has_preferred_identifier": [0,1],
293 |   "P48i_is_preferred_identifier_of": [0,1],
294 |   "P49_has_former_or_current_keeper": [0,1],
295 |   "P49i_is_former_or_current_keeper_of": [0,1],
296 |   "P4_has_time-span": [1,0], 
297 |   "P4i_is_time-span_of": [0,1],
298 |   "P50_has_current_keeper": [1,1], 
299 |   "P50i_is_current_keeper_of": [1,1], 
300 |   "P51_has_former_or_current_owner": [0,1],
301 |   "P51i_is_former_or_current_owner_of": [0,1],
302 |   "P52_has_current_owner": [1,1], 
303 |   "P52i_is_current_owner_of": [1,1], 
304 |   "P53_has_former_or_current_location": [0,1],
305 |   "P53i_is_former_or_current_location_of": [0,1],
306 |   "P54_has_current_permanent_location": [0,1],
307 |   "P54i_is_current_permanent_location_of": [0,1],
308 |   "P55_has_current_location": [1,0], 
309 |   "P55i_currently_holds": [1,1], 
310 |   "P56_bears_feature": [1,1], 
311 |   "P56i_is_found_on": [1,0], 
312 |   "P57_has_number_of_parts": [0,1],
313 |   "P58_has_section_definition": [0,1],
314 |   "P58i_defines_section": [0,1],
315 |   "P59_has_section": [0,1],
316 |   "P59i_is_located_on_or_within": [0,1],
317 |   "P5_consists_of": [0,1],
318 |   "P5i_forms_part_of": [0,1],
319 |   "P62_depicts": [1,1], 
320 |   "P62i_is_depicted_by": [1,1], 
321 |   "P65_shows_visual_item": [1,1], 
322 |   "P65i_is_shown_by": [1,1], 
323 |   "P67_refers_to": [1,1], 
324 |   "P67i_is_referred_to_by": [1,1], 
325 |   "P68_foresees_use_of": [0,1],
326 |   "P68i_use_foreseen_by": [0,1],
327 |   "P69_is_associated_with": [0,1],
328 |   "P70_documents": [0,1],
329 |   "P70i_is_documented_in": [0,1],
330 |   "P71_lists": [1,1],
331 |   "P71i_is_listed_in": [1,1],
332 |   "P72_has_language": [1,1], 
333 |   "P72i_is_language_of": [1,1], 
334 |   "P73_has_translation": [1,1], 
335 |   "P73i_is_translation_of": [1,1], 
336 |   "P74_has_current_or_former_residence": [1,1], 
337 |   "P74i_is_current_or_former_residence_of": [1,1], 
338 |   "P75_possesses": [1,1],
339 |   "P75i_is_possessed_by": [1,1],
340 |   "P76_has_contact_point": [1,1], 
341 |   "P76i_provides_access_to": [1,1], 
342 |   "P78_is_identified_by": [0,1],
343 |   "P78i_identifies": [0,1],
344 |   "P79_beginning_is_qualified_by": [0,1],
345 |   "P7_took_place_at": [1,1], 
346 |   "P7i_witnessed": [0,1],
347 |   "P80_end_is_qualified_by": [0,1],
348 |   "P81_ongoing_throughout": [0,1],
349 |   "P81a_end_of_the_begin": [1,0], 
350 |   "P81b_begin_of_the_end": [1,0], 
351 |   "P82_at_some_time_within": [0,1],
352 |   "P82a_begin_of_the_begin": [1,0], 
353 |   "P82b_end_of_the_end": [1,0],
354 |   "P86_falls_within": [0,1],
355 |   "P86i_contains": [0,1],
356 |   "P87_is_identified_by": [0,1],
357 |   "P87i_identifies": [0,1],
358 |   "P89_falls_within": [1,1], 
359 |   "P89i_contains": [1,1], 
360 |   "P8_took_place_on_or_within": [0,1],
361 |   "P8i_witnessed": [0,1],
362 |   "P90_has_value": [1,0], 
363 |   "P90a_has_lower_value_limit": [1,0],
364 |   "P90b_has_upper_value_limit": [1,0],
365 |   "P91_has_unit": [1,0], 
366 |   "P91i_is_unit_of": [0,1],
367 |   "P92_brought_into_existence": [0,1], 
368 |   "P92i_was_brought_into_existence_by": [0,0], 
369 |   "P93_took_out_of_existence": [0,1], 
370 |   "P93i_was_taken_out_of_existence_by": [0,0], 
371 |   "P94_has_created": [1,1], 
372 |   "P94i_was_created_by": [1,0], 
373 |   "P95_has_formed": [1,1], 
374 |   "P95i_was_formed_by": [1,0], 
375 |   "P96_by_mother": [0,0],
376 |   "P96i_gave_birth": [0,1],
377 |   "P97_from_father": [0,1],
378 |   "P97i_was_father_for": [0,1],
379 |   "P98_brought_into_life": [1,0], 
380 |   "P98i_was_born": [1,0], 
381 |   "P99_dissolved": [1,0], 
382 |   "P99i_was_dissolved_by": [1,0], 
383 |   "P9_consists_of": [1,1], 
384 |   "P9i_forms_part_of": [1,1], 
385 | 
386 |   "P169i_spacetime_volume_is_defined_by": [0,0],
387 |   "P170i_time_is_defined_by": [0,0],
388 |   "P171_at_some_place_within": [0,0],
389 |   "P172_contains": [0,0],
390 |   "P173_starts_before_or_with_the_end_of": [1,1],
391 |   "P173i_ends_after_or_with_the_start_of": [1,1],
392 |   "P174_starts_before_the_end_of": [1,1],
393 |   "P174i_ends_after_the_start_of": [1,1],
394 |   "P175_starts_before_or_with_the_start_of": [1,1],
395 |   "P175i_starts_with_or_after_the_start_of": [1,1],
396 |   "P176_starts_before_the_start_of": [1,1],
397 |   "P176i_starts_after_the_start_of": [1,1],
398 |   "P186_produced_thing_of_product_type": [0,0],
399 |   "P186i_is_produced_by": [0,0],
400 |   "P187_has_production_plan": [0,0],
401 |   "P187i_is_production_plan_for": [0,0],
402 |   "P188_requires_production_tool": [0,0],
403 |   "P188i_is_production_tool_for": [0,0],
404 |   "P195_was_a_presence_of": [0,1],
405 |   "P195i_had_presence": [0,1],
406 |   "P196_defines": [0,1],
407 |   "P196i_is_defined_by": [0,1],
408 |   "P197_covered_parts_of": [0,1],
409 |   "P197i_was_partially_covered_by": [0,1],
410 |   "P198_holds_or_supports": [1,1],
411 |   "P198i_is_held_or_supported_by": [1,1],
412 |   "P199_represents_instance_of_type": [1,1],
413 |   "P199i_has_instance_represented_by":[1,1],
414 | 
415 |   "la:paid_amount": [1,0], 
416 |   "la:paid_from": [1,1], 
417 |   "la:paid_to": [1,1],
418 |   "la:establishes": [1,0],
419 |   "la:established_by": [1,0],
420 |   "la:invalidates": [1,1],
421 |   "la:invalidated_by": [1,0],
422 |   "la:relates_to": [1,0],
423 |   "la:relates_from": [1,0],
424 |   "la:related_to_by": [1,1],
425 |   "la:related_from_by": [1,1],
426 |   "la:initiated": [0,1],
427 |   "la:initiated_by": [0,1],
428 |   "la:terminated": [0,1],
429 |   "la:terminated_by": [0,1],
430 |   "la:has_phase": [0,1],
431 |   "la:phase_of": [0,0],
432 |   "la:related_entity": [0,0],
433 |   "la:related_entity_of": [0,1],
434 |   "la:relationship": [0,0],
435 |   "la:has_member": [1,1],
436 |   "la:member_of": [1,1],
437 |   "la:added_to": [1,0],
438 |   "la:added_to_by": [1,1],
439 |   "la:added_member": [1,0],
440 |   "la:added_member_by": [1,1],
441 |   "la:removed_from": [1,0],
442 |   "la:removed_from_by": [1,1],
443 |   "la:removed_member": [1,0],
444 |   "la:removed_member_by": [1,1],
445 |   "la:digitally_carries": [1,1],
446 |   "la:digitally_carried_by": [1,1],
447 |   "la:digitally_shows": [1,1],
448 |   "la:digitally_shown_by": [1,1],
449 |   "la:digitally_available_via": [1,1],
450 |   "la:digitally_makes_available": [1,1],
451 |   "la:property_classified_as": [1,1],
452 |   "la:represents_instance_of_type": [1,1],
453 |   "la:instance_represented_by": [1,1],
454 |   "la:current_permanent_custodian": [1,0],
455 |   "la:current_permanent_custodian_of": [1,1],
456 |   "la:equivalent": [1,1],
457 |   "la:access_point": [1,1],
458 | 
459 |   "dc:format": [1,0], 
460 |   "dcterms:conformsTo": [1,1], 
461 |   "dcterms:subject": [1,1], 
462 |   "rdf:value": [1,0],
463 |   "rdfs:seeAlso": [1,1], 
464 |   "rdfs:label": [1,0], 
465 |   "skos:exactMatch": [1,1],
466 |   "skos:closeMatch": [1,1],
467 |   "skos:narrower": [1,1],
468 |   "skos:broader": [1,1],
469 |   "skos:hasTopConcept": [1,1],
470 |   "skos:topConceptOf": [1,1],
471 |   "skos:inScheme": [1,1],
472 |   "sci:O13_triggers": [1,1],
473 |   "sci:O13i_is_triggered_by": [1,1],
474 |   "sci:O19_encountered_object": [1,1],
475 |   "sci:O19i_was_object_encountered_at": [1,1],
476 |   "archaeo:AP25_occurs_during": [1,1]
477 | }


--------------------------------------------------------------------------------
/utils/data/linkedart.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | 
  3 | <rdf:RDF xml:lang="en" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xml:base="http://www.cidoc-crm.org/cidoc-crm/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:la="https://linked.art/ns/terms/" xmlns:schema="http://schema.org/" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:ore="http://www.openarchives.org/ore/terms/" xmlns:dig="http://www.ics.forth.gr/isl/CRMdig/">
  4 | 
  5 | <!--                                -->
  6 | <!-- Hopefully temporary extensions -->
  7 | <!--                                -->
  8 | 
  9 | <!-- Allow dot ones via Attribute Assignment -->
 10 | 
 11 | <rdf:Property rdf:about="https://linked.art/ns/terms/property_classified_as">
 12 |         <rdfs:label xml:lang="en">Property Classified As</rdfs:label>
 13 |         <rdfs:comment xml:lang="en">Record dot one properties via Attribute Assignments</rdfs:comment>
 14 |         <rdfs:domain rdf:resource="E13_Attribute_Assignment"/>
 15 |         <rdfs:range rdf:resource="E55_Type"/>
 16 | </rdf:Property>
 17 | 
 18 | <!-- permanent_custodian -->
 19 | <!-- CRM turned down the inclusion of this at CRM SIG in March 2021 -->
 20 | 
 21 | <rdf:Property rdf:about="https://linked.art/ns/terms/current_permanent_custodian">
 22 |     <rdfs:label xml:lang="en">Current Permanent Custodian</rdfs:label>
 23 |     <rdfs:comment xml:lang="en">This property records the normal custodian of the object at the time when the assertion was made. The object may have a temporary custodian, for example when it is loaned to another organization for an exhibition or between departments for conservation or storage.</rdfs:comment>
 24 |     <rdfs:domain rdf:resource="E19_Physical_Object"/>
 25 |     <rdfs:range rdf:resource="E39_Actor"/>
 26 |     <owl:inverseOf rdf:resource="https://linked.art/ns/terms/current_permanent_custodian_of"/>
 27 | </rdf:Property>
 28 | 
 29 | <rdf:Property rdf:about="https://linked.art/ns/terms/current_permanent_custodian_of">
 30 |     <rdfs:label xml:lang="en">Current Permanent Custodian Of</rdfs:label>
 31 |     <rdfs:comment xml:lang="en">Inverse of Current Permanent Custodian</rdfs:comment>
 32 |     <rdfs:range rdf:resource="E19_Physical_Object"/>
 33 |     <rdfs:domain rdf:resource="E39_Actor"/>
 34 |     <owl:inverseOf rdf:resource="https://linked.art/ns/terms/current_permanent_custodian"/>
 35 | </rdf:Property>
 36 | 
 37 | <!-- Equivalence -->
 38 | 
 39 | <rdf:Property rdf:about="https://linked.art/ns/terms/equivalent">
 40 |     <rdfs:label xml:lang="en">has equivalent instance</rdfs:label>
 41 |     <rdfs:comment xml:lang="en">Similar to skos:exactMatch, the referenced entity is an equivalent instance to the referencing entity. This would not have the same inference issue as exactMatch, whereby the domain and range become skos:Concept, which is equivalent to E55_Type ... and thus everything becomes a Type.</rdfs:comment>
 42 |     <rdfs:domain rdf:resource="E1_CRM_Entity"/>
 43 |     <rdfs:range rdf:resource="E1_CRM_Entity"/>
 44 | </rdf:Property>
 45 | 
 46 | 
 47 | <!-- Many of these will go to CRM SOC in the future, hopefully -->
 48 | 
 49 | <rdfs:Class rdf:about="https://linked.art/ns/terms/Payment">
 50 |     <rdfs:label xml:lang="en">Payment</rdfs:label>
 51 |     <rdfs:comment>Payment of Money</rdfs:comment>
 52 |     <rdfs:subClassOf rdf:resource="E7_Activity"/>
 53 | </rdfs:Class>
 54 | 
 55 | <rdf:Property rdf:about="https://linked.art/ns/terms/paid_amount">
 56 |         <rdfs:label xml:lang="en">Paid Amount</rdfs:label>
 57 |         <rdfs:comment xml:lang="en">The amount paid.</rdfs:comment>
 58 |         <rdfs:domain rdf:resource="https://linked.art/ns/terms/Payment"/>
 59 |         <rdfs:range rdf:resource="E97_Monetary_Amount"/>
 60 | </rdf:Property>
 61 | 
 62 | <rdf:Property rdf:about="https://linked.art/ns/terms/paid_from">
 63 |         <rdfs:label xml:lang="en">Paid From</rdfs:label>
 64 |         <rdfs:comment xml:lang="en">Who the payment came from</rdfs:comment>
 65 |         <rdfs:domain rdf:resource="https://linked.art/ns/terms/Payment"/>
 66 |         <rdfs:range rdf:resource="E39_Actor"/>
 67 | </rdf:Property>
 68 | 
 69 | <rdf:Property rdf:about="https://linked.art/ns/terms/paid_to">
 70 |         <rdfs:label xml:lang="en">Paid To</rdfs:label>
 71 |         <rdfs:comment xml:lang="en">Who the payment went to</rdfs:comment>
 72 |         <rdfs:domain rdf:resource="https://linked.art/ns/terms/Payment"/>
 73 |         <rdfs:range rdf:resource="E39_Actor"/>
 74 | </rdf:Property>
 75 | 
 76 | <rdfs:Class rdf:about="https://linked.art/ns/terms/RightAcquisition">
 77 |     <rdfs:label xml:lang="en">Right Acquisition</rdfs:label>
 78 |     <rdfs:comment>The acquiring or establishment of a particular E30 Right over some entity </rdfs:comment>
 79 |     <rdfs:subClassOf rdf:resource="E7_Activity"/>
 80 | </rdfs:Class>
 81 | 
 82 | <rdf:Property rdf:about="https://linked.art/ns/terms/establishes">
 83 |         <rdfs:label xml:lang="en">establishes</rdfs:label>
 84 |         <rdfs:comment xml:lang="en">The right established by a RightAcquisition</rdfs:comment>
 85 |         <rdfs:domain rdf:resource="https://linked.art/ns/terms/RightAcquisition"/>
 86 |         <rdfs:range rdf:resource="E30_Right"/>
 87 |         <owl:inverseOf rdf:resource="https://linked.art/ns/terms/established_by"/>
 88 | </rdf:Property>
 89 | <rdf:Property rdf:about="https://linked.art/ns/terms/established_by">
 90 |         <rdfs:label xml:lang="en">established by</rdfs:label>
 91 |         <rdfs:comment xml:lang="en">The RightAcquisition that established this Right</rdfs:comment>
 92 |         <rdfs:domain rdf:resource="E30_Right"/>
 93 |         <rdfs:range rdf:resource="https://linked.art/ns/terms/RightAcquisition"/>
 94 |         <owl:inverseOf rdf:resource="https://linked.art/ns/terms/establishes"/>
 95 | </rdf:Property>
 96 | 
 97 | 
 98 | <rdf:Property rdf:about="https://linked.art/ns/terms/invalidates">
 99 |         <rdfs:label xml:lang="en">invalidates</rdfs:label>
100 |         <rdfs:comment xml:lang="en">The right which is invalidated by a RightAcquisition</rdfs:comment>
101 |         <rdfs:domain rdf:resource="https://linked.art/ns/terms/RightAcquisition"/>
102 |         <rdfs:range rdf:resource="E30_Right"/>
103 |         <owl:inverseOf rdf:resource="https://linked.art/ns/terms/invalidated_by"/>
104 | </rdf:Property>
105 | <rdf:Property rdf:about="https://linked.art/ns/terms/invalidated_by">
106 |         <rdfs:label xml:lang="en">invalidated by</rdfs:label>
107 |         <rdfs:comment xml:lang="en">The RightAcquisition that invalidated this Right</rdfs:comment>
108 |         <rdfs:domain rdf:resource="E30_Right"/>
109 |         <rdfs:range rdf:resource="https://linked.art/ns/terms/RightAcquisition"/>
110 |         <owl:inverseOf rdf:resource="https://linked.art/ns/terms/invalidates"/>
111 | </rdf:Property>
112 | 
113 | 
114 | <!-- Phase -->
115 | <!-- This looks like S16 State, but state is only for physical states not social ones like ownership. -->
116 | <!-- We don't currently use Phase in the profile, but leaving it here for the future -->
117 | 
118 | <rdfs:Class rdf:about="https://linked.art/ns/terms/Phase">
119 |     <rdfs:label xml:lang="en">Phase</rdfs:label>
120 |     <rdfs:comment> The period of time during which an entity is in a certain phase or state of its existence.  The phase can be physical (the box is open, the painting is 14 ft wide) or social (the sculpture is owned by some Actor, the building is used as a castle).</rdfs:comment>
121 |     <rdfs:subClassOf rdf:resource="E4_Period"/>
122 | </rdfs:Class>
123 | 
124 | <rdf:Property rdf:about="https://linked.art/ns/terms/initiated">
125 |     <rdfs:label xml:lang="en">initiated</rdfs:label>
126 |     <rdfs:comment xml:lang="en"> Events can start or initiate Phases, such as the Modification of a painting to trim it from 16 feet to 14 ft would initiate a 14 feet phase, or the Acquisition of a sculpture by some Actor would initiate that Actor's ownership phase. This relationship links the initiating Event to the Phase.</rdfs:comment>
127 |     <rdfs:domain rdf:resource="E5_Event"/>
128 |     <rdfs:range rdf:resource="https://linked.art/ns/terms/Phase"/>
129 |     <owl:inverseOf rdf:resource="https://linked.art/ns/terms/initiated_by"/>
130 | </rdf:Property>
131 | 
132 | <rdf:Property rdf:about="https://linked.art/ns/terms/initiated_by">
133 |     <rdfs:label xml:lang="en">initiated by</rdfs:label>
134 |     <rdfs:comment xml:lang="en"> The inverse of la:initiated.</rdfs:comment>
135 |     <rdfs:domain rdf:resource="https://linked.art/ns/terms/Phase"/>
136 |     <rdfs:range rdf:resource="E5_Event"/>
137 |     <owl:inverseOf rdf:resource="https://linked.art/ns/terms/initiated"/>
138 | </rdf:Property>
139 | 
140 | <rdf:Property rdf:about="https://linked.art/ns/terms/terminated">
141 |     <rdfs:label xml:lang="en">terminated</rdfs:label>
142 |     <rdfs:comment xml:lang="en"> Events can end or terminate Phases, such as the Modification of a painting to trim it from 16 feet to 14 ft would end the 16 feet phase, or the Acquisition of a sculpture by some Actor would end the seller's ownership. This relationship links the terminating Event to the Phase.</rdfs:comment>
143 |     <rdfs:domain rdf:resource="E5_Event"/>
144 |     <rdfs:range rdf:resource="https://linked.art/ns/terms/Phase"/>
145 |     <owl:inverseOf rdf:resource="https://linked.art/ns/terms/terminated_by"/>
146 | </rdf:Property>
147 | 
148 | <rdf:Property rdf:about="https://linked.art/ns/terms/terminated_by">
149 |     <rdfs:label xml:lang="en">terminated by</rdfs:label>
150 |     <rdfs:comment xml:lang="en"> The inverse of la:terminated.</rdfs:comment>
151 |     <rdfs:domain rdf:resource="https://linked.art/ns/terms/Phase"/>
152 |     <rdfs:range rdf:resource="E5_Event"/>
153 |     <owl:inverseOf rdf:resource="https://linked.art/ns/terms/terminated"/>
154 | </rdf:Property>
155 | 
156 | <rdf:Property rdf:about="https://linked.art/ns/terms/has_phase">
157 |     <rdfs:label xml:lang="en">has phase</rdfs:label>
158 |     <rdfs:comment xml:lang="en"> The relationship between an E1 Entity and one of its Phases. A painting that had two sizes, 16 feet and 14 feet wide, would have two Phases, one for each width. </rdfs:comment>
159 |     <rdfs:domain rdf:resource="E1_CRM_Entity"/>
160 |     <rdfs:range rdf:resource="https://linked.art/ns/terms/Phase"/>
161 |     <owl:inverseOf rdf:resource="https://linked.art/ns/terms/phase_of"/>
162 | </rdf:Property>
163 | 
164 | <rdf:Property rdf:about="https://linked.art/ns/terms/phase_of">
165 |     <rdfs:label xml:lang="en">phase of</rdfs:label>
166 |     <rdfs:comment xml:lang="en"> The inverse of la:has_phase.</rdfs:comment>
167 |     <rdfs:domain rdf:resource="https://linked.art/ns/terms/Phase"/>
168 |     <rdfs:range rdf:resource="E1_CRM_Entity"/>
169 |     <owl:inverseOf rdf:resource="https://linked.art/ns/terms/has_state"/>
170 | </rdf:Property>
171 | 
172 | <rdf:Property rdf:about="https://linked.art/ns/terms/related_entity">
173 |     <rdfs:label xml:lang="en">related entity</rdfs:label>
174 |     <rdfs:comment xml:lang="en"> An E1 Entity that defines the nature of the Phase. For an ownership phase, this would be the owning E39 Actor. For the size of painting phase, this would be the E54 Dimension that describes the size.</rdfs:comment>
175 |     <rdfs:domain rdf:resource="https://linked.art/ns/terms/Phase"/>
176 |     <rdfs:range rdf:resource="E1_CRM_Entity"/>
177 |     <owl:inverseOf rdf:resource="https://linked.art/ns/terms/terminated"/>
178 | </rdf:Property>
179 | 
180 | <rdf:Property rdf:about="https://linked.art/ns/terms/related_entity_of">
181 |     <rdfs:label xml:lang="en">related entity of</rdfs:label>
182 |     <rdfs:comment xml:lang="en"> The inverse of la:related_entity.</rdfs:comment>
183 |     <rdfs:domain rdf:resource="E1_CRM_Entity"/>
184 |     <rdfs:range rdf:resource="https://linked.art/ns/terms/Phase"/>
185 |     <owl:inverseOf rdf:resource="https://linked.art/ns/terms/related_entity"/>
186 | </rdf:Property>
187 | 
188 | <rdf:Property rdf:about="https://linked.art/ns/terms/relationship">
189 |     <rdfs:label xml:lang="en">related by</rdfs:label>
190 |     <rdfs:comment xml:lang="en"> The relationship between the Entity that the phase is of, and the defining entity. For an ownership phase, this would be P52 has current owner. For the size of painting phase, this would be P43 has dimension.</rdfs:comment>
191 |     <rdfs:domain rdf:resource="https://linked.art/ns/terms/Phase"/>
192 |     <rdfs:range rdf:resource="E55_Type"/>
193 |     <rdfs:subPropertyOf rdf:resource="P2_has_type"/>
194 | </rdf:Property>
195 | 
196 | 
197 | <!-- caused / caused_by is O13_triggered / O13i_triggered_by -->
198 | 
199 | 
200 | <!-- Set of Things, as potential superclass of Group and Collection -->
201 | <!-- e.g. an auction lot is not a curated holding, but we need a class for its membership -->
202 | <!-- Sets can have rights (right to add /remove) and be about subjects (e.g. exhibition) -->
203 | 
204 | <rdfs:Class rdf:about="https://linked.art/ns/terms/Set">
205 |     <rdfs:label xml:lang="en">Set</rdfs:label>
206 |     <rdfs:comment></rdfs:comment>
207 |     <rdfs:subClassOf rdf:resource="E89_Propositional_Object"/>
208 |     <rdfs:subClassOf rdf:resource="E72_Legal_Object"/>
209 | </rdfs:Class>
210 | 
211 | <rdf:Property rdf:about="https://linked.art/ns/terms/has_member">
212 |     <rdfs:label xml:lang="en">has member</rdfs:label>
213 |     <rdfs:comment xml:lang="en"></rdfs:comment>
214 |     <rdfs:domain rdf:resource="https://linked.art/ns/terms/Set"/>
215 |     <rdfs:range rdf:resource="E1_CRM_Entity"/>
216 |     <owl:inverseOf rdf:resource="https://linked.art/ns/terms/member_of"/>
217 | </rdf:Property>
218 | 
219 | <rdf:Property rdf:about="https://linked.art/ns/terms/member_of">
220 |     <rdfs:label xml:lang="en">member of</rdfs:label>
221 |     <rdfs:comment xml:lang="en"></rdfs:comment>
222 |     <rdfs:domain rdf:resource="E1_CRM_Entity"/>
223 |     <rdfs:range rdf:resource="https://linked.art/ns/terms/Set"/>
224 |     <owl:inverseOf rdf:resource="https://linked.art/ns/terms/has_member"/>
225 | </rdf:Property>
226 | 
227 | <rdfs:Class rdf:about="https://linked.art/ns/terms/Addition">
228 |     <rdfs:label xml:lang="en">Addition</rdfs:label>
229 |     <rdfs:comment>The addition of some entity to a Set</rdfs:comment>
230 |     <rdfs:subClassOf rdf:resource="E7_Activity"/>
231 | </rdfs:Class>
232 | 
233 | <rdf:Property rdf:about="https://linked.art/ns/terms/added_to">
234 |     <rdfs:label xml:lang="en">added to</rdfs:label>
235 |     <rdfs:comment xml:lang="en"></rdfs:comment>
236 |     <rdfs:domain rdf:resource="https://linked.art/ns/terms/Addition"/>
237 |     <rdfs:range rdf:resource="https://linked.art/ns/terms/Set"/>
238 |     <owl:inverseOf rdf:resource="https://linked.art/ns/terms/added_to_by"/>
239 | </rdf:Property>
240 | 
241 | <rdf:Property rdf:about="https://linked.art/ns/terms/added_to_by">
242 |     <rdfs:label xml:lang="en">added to by</rdfs:label>
243 |     <rdfs:comment xml:lang="en"></rdfs:comment>
244 |     <rdfs:domain rdf:resource="https://linked.art/ns/terms/Set"/>
245 |     <rdfs:range rdf:resource="https://linked.art/ns/terms/Addition"/>
246 |     <owl:inverseOf rdf:resource="https://linked.art/ns/terms/added_to"/>
247 | </rdf:Property>
248 | 
249 | <rdf:Property rdf:about="https://linked.art/ns/terms/added_member">
250 |     <rdfs:label xml:lang="en">added</rdfs:label>
251 |     <rdfs:comment xml:lang="en"></rdfs:comment>
252 |     <rdfs:domain rdf:resource="https://linked.art/ns/terms/Addition"/>
253 |     <rdfs:range rdf:resource="E1_CRM_Entity"/>
254 |     <owl:inverseOf rdf:resource="https://linked.art/ns/terms/added_member_by"/>
255 | </rdf:Property>
256 | 
257 | <rdf:Property rdf:about="https://linked.art/ns/terms/added_member_by">
258 |     <rdfs:label xml:lang="en">added by</rdfs:label>
259 |     <rdfs:comment xml:lang="en"></rdfs:comment>
260 |     <rdfs:domain rdf:resource="E1_CRM_Entity"/>
261 |     <rdfs:range rdf:resource="https://linked.art/ns/terms/Addition"/>
262 |     <owl:inverseOf rdf:resource="https://linked.art/ns/terms/added_member"/>
263 | </rdf:Property>
264 | 
265 | <rdfs:Class rdf:about="https://linked.art/ns/terms/Removal">
266 |     <rdfs:label xml:lang="en">Removal</rdfs:label>
267 |     <rdfs:comment>The removal of some entity from a Set</rdfs:comment>
268 |     <rdfs:subClassOf rdf:resource="E7_Activity"/>
269 | </rdfs:Class>
270 | 
271 | <rdf:Property rdf:about="https://linked.art/ns/terms/removed_from">
272 |     <rdfs:label xml:lang="en">removed from</rdfs:label>
273 |     <rdfs:comment xml:lang="en"></rdfs:comment>
274 |     <rdfs:domain rdf:resource="https://linked.art/ns/terms/Removal"/>
275 |     <rdfs:range rdf:resource="https://linked.art/ns/terms/Set"/>
276 |     <owl:inverseOf rdf:resource="https://linked.art/ns/terms/removed_from_by"/>
277 | </rdf:Property>
278 | 
279 | <rdf:Property rdf:about="https://linked.art/ns/terms/removed_from_by">
280 |     <rdfs:label xml:lang="en">removed from by</rdfs:label>
281 |     <rdfs:comment xml:lang="en"></rdfs:comment>
282 |     <rdfs:domain rdf:resource="https://linked.art/ns/terms/Set"/>
283 |     <rdfs:range rdf:resource="https://linked.art/ns/terms/Removal"/>
284 |     <owl:inverseOf rdf:resource="https://linked.art/ns/terms/removed_from"/>
285 | </rdf:Property>
286 | 
287 | <rdf:Property rdf:about="https://linked.art/ns/terms/removed_member">
288 |     <rdfs:label xml:lang="en">removed</rdfs:label>
289 |     <rdfs:comment xml:lang="en"></rdfs:comment>
290 |     <rdfs:domain rdf:resource="https://linked.art/ns/terms/Removal"/>
291 |     <rdfs:range rdf:resource="E1_CRM_Entity"/>
292 |     <owl:inverseOf rdf:resource="https://linked.art/ns/terms/removed_member_by"/>
293 | </rdf:Property>
294 | 
295 | <rdf:Property rdf:about="https://linked.art/ns/terms/removed_member_by">
296 |     <rdfs:label xml:lang="en">removed by</rdfs:label>
297 |     <rdfs:comment xml:lang="en"></rdfs:comment>
298 |     <rdfs:domain rdf:resource="E1_CRM_Entity"/>
299 |     <rdfs:range rdf:resource="https://linked.art/ns/terms/Removal"/>
300 |     <owl:inverseOf rdf:resource="https://linked.art/ns/terms/removed_member"/>
301 | </rdf:Property>
302 | 
303 | 
304 | <!-- Temporary (ish) digital relationships until update of CRMDig -->
305 | 
306 | <rdf:Property rdf:about="https://linked.art/ns/terms/digitally_carries">
307 |         <rdfs:label xml:lang="en">digitally carries</rdfs:label>
308 |         <rdfs:comment xml:lang="en"></rdfs:comment>
309 |         <rdfs:domain rdf:resource="http://www.ics.forth.gr/isl/CRMdig/D1_Digital_Object"/>
310 |         <rdfs:range rdf:resource="E90_Symbolic_Object"/>
311 |         <owl:inverseOf rdf:resource="https://linked.art/ns/terms/digitally_carried_by"/>
312 | </rdf:Property>
313 | 
314 | <rdf:Property rdf:about="https://linked.art/ns/terms/digitally_carried_by">
315 |         <rdfs:label xml:lang="en">digitally carried by</rdfs:label>
316 |         <rdfs:comment xml:lang="en"></rdfs:comment>
317 |         <rdfs:domain rdf:resource="E90_Symbolic_Object"/>
318 |         <rdfs:range rdf:resource="http://www.ics.forth.gr/isl/CRMdig/D1_Digital_Object"/>
319 |         <owl:inverseOf rdf:resource="https://linked.art/ns/terms/digitally_carries"/>
320 | </rdf:Property>
321 | 
322 | <rdf:Property rdf:about="https://linked.art/ns/terms/digitally_shows">
323 |         <rdfs:label xml:lang="en">digitally shows</rdfs:label>
324 |         <rdfs:comment xml:lang="en"></rdfs:comment>
325 |         <rdfs:domain rdf:resource="http://www.ics.forth.gr/isl/CRMdig/D1_Digital_Object"/>
326 |         <rdfs:range rdf:resource="E36_Visual_Item"/>
327 |         <owl:inverseOf rdf:resource="https://linked.art/ns/terms/digitally_shown_by"/>        
328 | </rdf:Property>
329 | 
330 | <rdf:Property rdf:about="https://linked.art/ns/terms/digitally_shown_by">
331 |         <rdfs:label xml:lang="en">digitally shown by</rdfs:label>
332 |         <rdfs:comment xml:lang="en"></rdfs:comment>
333 |         <rdfs:range rdf:resource="http://www.ics.forth.gr/isl/CRMdig/D1_Digital_Object"/>
334 |         <rdfs:domain rdf:resource="E36_Visual_Item"/>
335 |         <owl:inverseOf rdf:resource="https://linked.art/ns/terms/digitally_shows"/>        
336 | </rdf:Property>
337 | 
338 | <rdf:Property rdf:about="https://linked.art/ns/terms/access_point">
339 |     <rdfs:label xml:lang="en">access point</rdfs:label>
340 |     <rdfs:comment xml:lang="en">From an ur- digital object to a single concrete representation.
341 |         A locator as opposed to an identifier, similar to the approximated_by for Place.</rdfs:comment>
342 |     <rdfs:domain rdf:resource="http://www.ics.forth.gr/isl/CRMdig/D1_Digital_Object"/>
343 |     <rdfs:range rdf:resource="http://www.ics.forth.gr/isl/CRMdig/D1_Digital_Object"/>
344 | </rdf:Property>
345 | 
346 | <rdfs:Class rdf:about="https://linked.art/ns/terms/DigitalService">
347 |     <rdfs:label xml:lang="en">Digital Service</rdfs:label>
348 |     <rdfs:comment></rdfs:comment>
349 |     <rdfs:subClassOf rdf:resource="http://www.ics.forth.gr/isl/CRMdig/D1_Digital_Object"/>
350 | </rdfs:Class>
351 | 
352 | <rdf:Property rdf:about="https://linked.art/ns/terms/digitally_available_via">
353 |         <rdfs:label xml:lang="en">digitally available via</rdfs:label>
354 |         <rdfs:comment xml:lang="en"></rdfs:comment>
355 |         <rdfs:domain rdf:resource="http://www.ics.forth.gr/isl/CRMdig/D1_Digital_Object"/>
356 |         <rdfs:range rdf:resource="https://linked.art/ns/terms/DigitalService"/>
357 |         <owl:inverseOf rdf:resource="https://linked.art/ns/terms/digitally_makes_available"/>        
358 | </rdf:Property>
359 | 
360 | <rdf:Property rdf:about="https://linked.art/ns/terms/digitally_makes_available">
361 |         <rdfs:label xml:lang="en">digitally makes available</rdfs:label>
362 |         <rdfs:comment xml:lang="en"></rdfs:comment>
363 |         <rdfs:range rdf:resource="https://linked.art/ns/terms/DigitalService"/>
364 |         <rdfs:domain rdf:resource="http://www.ics.forth.gr/isl/CRMdig/D1_Digital_Object"/>
365 |         <owl:inverseOf rdf:resource="https://linked.art/ns/terms/digitally_available_via"/>        
366 | </rdf:Property>
367 | 
368 | </rdf:RDF>
369 | 


--------------------------------------------------------------------------------
/examples/knoedler-to-lod.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import json
  3 | import codecs
  4 | import re
  5 | import inspect
  6 | 
  7 | # for cidoc_orm, see: https://github.com/azaroth42/Python-CIDOC-ORM
  8 | from cidoc_orm import factory, TimeSpan, ManMadeObject, Type, Identifier, \
  9 | 	Production, Actor, Person, Place, Group, Material, Mark, \
 10 | 	Activity, InformationObject, Purchase, Acquisition, MonetaryAmount, \
 11 | 	Currency, MeasurementUnit, Dimension, PhysicalObject
 12 | 
 13 | # Meta meta
 14 | ext_classes = {
 15 | 	"LocalNumber": {"parent": Identifier, "vocab": "aat", "id": "300404621"},	
 16 | 	"AccessionNumber": {"parent": Identifier, "vocab": "aat", "id": "300312355"},	
 17 | 	"Inscription": {"parent": Mark, "vocab": "aat", "id": "300028702"},
 18 | 	"Signature": {"parent": Mark, "vocab": "aat", "id": "300028705"},
 19 | 	"Painting": {"parent": ManMadeObject, "vocab": "aat", "id": "300033618"},
 20 | 	"Sculpture": {"parent": ManMadeObject, "vocab": "aat", "id": "300047090"},
 21 | 	"Drawing": {"parent": ManMadeObject, "vocab": "aat", "id": "300033973"},
 22 | 	"Miniature": {"parent": ManMadeObject, "vocab": "aat", "id": "300033936"},
 23 | 	"Tapestry": {"parent": ManMadeObject, "vocab": "aat", "id": "300205002"},
 24 | 	"Furniture": {"parent": ManMadeObject, "vocab": "aat", "id": "300037680"},
 25 | 	"Mosaic": {"parent": ManMadeObject, "vocab": "aat", "id": "300015342"},
 26 | 	"Photograph": {"parent": ManMadeObject, "vocab": "aat", "id": "300046300"},
 27 | 	"Drawing": {"parent": ManMadeObject, "vocab": "aat", "id": "300033973"},
 28 | 	"Coin": {"parent": ManMadeObject, "vocab": "aat", "id": "300037222"},
 29 | 	"Vessel": {"parent": ManMadeObject, "vocab": "aat", "id": "300193015"},
 30 | 	"PhotographPrint": {"parent": ManMadeObject, "vocab": "aat", "id": "300127104"},
 31 | 	"PhotographAlbum": {"parent": ManMadeObject, "vocab": "aat", "id": "300026695"},
 32 | 	"PhotographBook": {"parent": ManMadeObject, "vocab": "aat", "id": "300265728"}
 33 | }
 34 | 
 35 | factory.base_url = "http://data.getty.edu/provenance/"
 36 | factory.context_uri = "http://data.getty.edu/contexts/crm_context.jsonld"
 37 |  
 38 | for (name,v) in ext_classes.items():
 39 | 	c = type(name, (v['parent'],), {})
 40 | 	c._p2_has_type = "http://vocab.getty.edu/%s/%s" % (v['vocab'], v['id'])
 41 | 	globals()[name] = c
 42 | 
 43 | # At the moment it's just an activity, no subtype info
 44 | class TakeInventory(Activity): 
 45 | 	pass
 46 | 
 47 | class Payment(Activity):
 48 | 	_properties = {
 49 | 		"paid_amount": {"rdf": "pi:paid_amount", "range": MonetaryAmount},
 50 | 		"paid_to": {"rdf": "pi:paid_to", "range": Actor},
 51 | 		"paid_from": {"rdf": "pi:paid_from", "range": Actor}
 52 | 	}
 53 | 	_uri_segment = "Payment"
 54 | 	_type = "pi:Payment"
 55 | 
 56 | Payment._classhier = inspect.getmro(Payment)[:-1]
 57 | 
 58 | # Object Types
 59 | # {'Pastel': 265, 'Photograph': 4, 'Clocks': 1, 'Painting [?]': 8, 'Painting': 37313, 
 60 | # '[not identified]': 2, 'Clothing': 1, 'Playing Cards': 1, 'Watercolor': 547, 
 61 | # 'Maps': 1, 'Book': 35, 'Decorative Art': 9, 'Painting; Sculpture': 1, 'Print': 21, 
 62 | # 'Watercolor; Painting': 1, 'Sculpture': 1817, 'Drawing': 225, 'Tapestry': 61, 'Furniture': 22}
 63 | 
 64 | endOfMonths = {'01': 31, '02': 28, '03':31, '04':30, '05':31, '06':30,\
 65 | 	'07':31, '08':31, '09':30, '10':31, '11':30, '12':31}
 66 | 
 67 | aat_type_mapping = {
 68 | 	"Painting": Painting,
 69 | 	"Drawing": Drawing,
 70 | 	"Furniture": Furniture,
 71 | 	"Sculpture": Sculpture,
 72 | 	"Tapestry": Tapestry,
 73 | 	"Watercolor": Painting,
 74 | 	"Pastel": Painting
 75 | }
 76 | 
 77 | #	  # A wooden support
 78 | aat_part_mapping = {
 79 | 	"supports": "300014844"  # The thing that is painted on
 80 | }
 81 | 
 82 | aat_material_mapping = {
 83 | 	"panel": "300014657",  # Is really a support
 84 | 	"watercolor": "300015045",
 85 | 	"oil": "300015050",
 86 | 	"tempera": "300015062",
 87 | 	"canvas": "300014078",
 88 | 	"oak": "300012264",
 89 | 	"gold leaf": "300264831",
 90 | 	"paper": "300014109",
 91 | 	"copper": "300011020",
 92 | 	"terracotta": "300010669",
 93 | 	"glass": "300010797",
 94 | 	"chalk": "300011727",
 95 | 	"bronze": "300010957",
 96 | 	"marble": "300011443",
 97 | 	"albumen silver print": "300127121",
 98 | 	"gelatin silver print": "300128695",
 99 | 	"silver": "300011029"
100 | }
101 | 
102 | # pen, pencil, card, cardboard, porcelain, wax, ceramic, plaster
103 | # crayon, millboard, gouache, brass, stone, lead, iron, clay,
104 | # alabaster, limestone
105 | 
106 | 
107 | materialTypes = {}
108 | for (k,v) in aat_material_mapping.items():
109 | 	m = Material("http://vocab.getty.edu/aat/%s" % v)
110 | 	m.label = k
111 | 	materialTypes[k] = m
112 | 
113 | aat_culture_mapping = {
114 | 	"french": "300111188",
115 | 	"italian": "300111198",
116 | 	"german": "300111192",
117 | 	"dutch": "300020929"
118 | }
119 | 
120 | dim_type_mapping = {
121 | 	"height": "300055644",
122 | 	"width": "300055647",
123 | 	"depth": "300072633",
124 | 	"diameter": "300055624",
125 | 	"weight": "300056240"
126 | }
127 | 
128 | dim_unit_mapping = {
129 | 	"inches": "300379100",	
130 | 	"feet": "300379101",
131 | 	"cm": "300379098"
132 | }
133 | 
134 | inches = MeasurementUnit("http://vocab.getty.edu/aat/%s" % dim_unit_mapping['inches']);
135 | inches.label = "inches"
136 | 
137 | aat_genre_mapping = { 
138 | 	"Abstract" : "300134134", # maybe?
139 | 	"abstract" : "300134134", # maybe?
140 | 	"Genre": "300139140", # maybe?
141 | 	"History": "300386045",  
142 | 	"Landscape": "300015636",
143 | 	"Portrait": "300015637",
144 | 	"Still Life": "300015638"
145 | }
146 | 
147 | genreTypes = {}
148 | for (k,v) in aat_genre_mapping.items():
149 | 	t = Type("http://vocab.getty.edu/aat/%s" % v)
150 | 	t.label = k
151 | 	genreTypes[k] = t
152 | 
153 | aat_subject_mapping = {
154 | 	"Allegory": "",
155 | 	"Animals": "",
156 | 	"Architecture": "",
157 | 	"Battles": "",
158 | 	"Figure Studies": "",
159 | 	"Interiors": "",
160 | 	"Landscapes with figures": "",
161 | 	"Literature": "",
162 | 	"Marines": "",
163 | 	"Military": "",
164 | 	"Mythology (figures)": "",
165 | 	"Mythology (narrative)": "",
166 | 	"Religious (figures)": "",
167 | 	"Religious (narrative)": "",
168 | 	"Ruins": "",
169 | 	"Sporting": "",
170 | 	"Topographical Views": ""
171 | }
172 | 
173 | # Monkey patch Type's _toJSON to only emit full data if not just URI+type
174 | def typeToJSON(self, top=False):
175 | 	props = self.__dict__.keys()
176 | 	if len(props) > 3:
177 | 		return super(Type, self)._toJSON()
178 | 	else:
179 | 		return self.id
180 | 
181 | Type._toJSON = typeToJSON
182 | Person._properties['familyName'] = {"rdf": "schema:familyName", "range": str}
183 | Person._properties['givenName'] = {"rdf": "schema:givenName", "range": str}
184 | Person._properties['nationality'] = {"rdf": "schema:nationality", "range": Place}
185 | ManMadeObject._properties['culture'] = {"rdf": "schema:genre", "range": Type}
186 | ManMadeObject._properties['height'] = {"rdf": "schema:height", "range": Dimension}
187 | ManMadeObject._properties['width'] = {"rdf": "schema:width", "range": Dimension}
188 | 
189 | knoedler = Group("knoedler")
190 | knoedler.label = "Knoedler"
191 | 
192 | factory.materialize_inverses = True
193 | 
194 | def process_money_value(value):
195 | 	value = value.replace('[', '')
196 | 	value = value.replace(']', '')
197 | 	value = value.replace('-', '')
198 | 	value = value.replace("?", '')
199 | 	value = value.replace('not written', '')
200 | 	value = value.replace('sold at auction', '')
201 | 	value = value.replace('x', '')
202 | 	value = value.replace('Lot Price', '')
203 | 	value = value.replace('See sales book', '')
204 | 	value = value.strip()
205 | 	return value
206 | 
207 | def process_materials(what, materials):
208 | 	materials = materials.lower()
209 | 	materials = materials.replace("&", "and")
210 | 	materials = materials.replace("card [cardboard]", "cardboard")
211 | 	materials = materials.replace("c [canvas]", "canvas")
212 | 	materials = materials.replace("w/c [watercolor]", "watercolor")
213 | 	materials = materials.replace("[bronze]", "bronze")
214 | 	materials = materials.replace("c on p [canvas on panel]", "canvas on panel")
215 | 	materials = materials.replace("[from sales book 9, 1907-1912, f. 361]", "")
216 | 	materials = materials.replace("p [panel]", "panel")
217 | 	materials = materials.replace('terra cotta', 'terracotta')
218 | 	materials = materials.replace(',', '')
219 | 	materials = materials.replace('-', '')
220 | 	materials = materials.replace('procelain', 'porcelain')
221 | 	materials = materials.strip()
222 | 	matwds = materials.split(' ')
223 | 	mats = []
224 | 	for mw in matwds:
225 | 		if mw in ['on', 'and']:
226 | 			continue
227 | 		try:
228 | 			mats.append(materialTypes[mw])
229 | 		except:
230 | 			# print "Material not found: %s" % mw
231 | 			pass
232 | 
233 | 	return mats
234 | 
235 | divre = re.compile('^([0-9]+) ([0-9]+)/([0-9]+)( |$)')
236 | unitre = re.compile('^([0-9.]+) (high|height|h|long|length|l)( |$)')
237 | 	
238 | def process_dimensions(dims):
239 | 	dims = dims.lower()
240 | 	# assume default of inches
241 | 	dims = dims.replace('"', '')
242 | 	dims = dims.replace('in.', '')
243 | 	dims = dims.replace('inches', '')
244 | 	dims = dims.replace('//', '/')
245 | 	dims = dims.replace('[', '')
246 | 	dims = dims.replace(']', '')
247 | 	dims = dims.replace('X', 'x')
248 | 	dl = dims.split('x')
249 | 
250 | 	dimensions = []
251 | 	p = 0
252 | 	for d in dl:
253 | 		d = d.strip()
254 | 		# check for (nn n/n)
255 | 		which = ""
256 | 		m = divre.match(d)
257 | 		if m:
258 | 			(main, numr, denom, end) = m.groups()
259 | 			ttl = int(main) + (float(numr) / float(denom))
260 | 		else:
261 | 			try:
262 | 				ttl = int(d)
263 | 			except:
264 | 				m = unitre.match(d)
265 | 				if m:
266 | 					(ttl, which, end) = m.groups()
267 | 					if which.startswith('h'):
268 | 						which = 'h'
269 | 					else:
270 | 						which = 'w'
271 | 				else:
272 | 					# print "----- %s" % d
273 | 					continue
274 | 		if not which:
275 | 			which = "w" if p else "h"
276 | 		p += 1
277 | 		dimensions.append([ttl, which])
278 | 	return dimensions
279 | 
280 | 
281 | def print_rec_full(rec):
282 | 	its = rec.items()
283 | 	its.sort()
284 | 	for (k,v) in its:
285 | 		if v:
286 | 			print "%s: %s" % (k, v)	
287 | 
288 | stock_books = {}
289 | pages = {}
290 | 
291 | fh = file('knoedler_cache.json')
292 | cache = json.load(fh)
293 | fh.close()
294 | 
295 | recs = cache.values()
296 | recs = sorted(recs, key=lambda x: x['star_id'])[:5000]
297 | # recs = [cache['72910']]
298 | 
299 | for rec in recs:
300 | 
301 | 	bookId = rec['stock_book_id']
302 | 	try:
303 | 		book = stock_books[bookId]
304 | 	except:
305 | 		# create the book
306 | 		book = InformationObject(bookId)
307 | 		book.label = "Knoedler Stock Book %s" % bookId
308 | 		stock_books[bookId] = book
309 | 
310 | 	pageId = "%s/%s" % (bookId, rec['page_num'])
311 | 	try:
312 | 		page = pages[pageId]
313 | 	except:
314 | 		# create the page in the book
315 | 		page = InformationObject(pageId)
316 | 		page.label = "Page %s" % rec['page_num']
317 | 		pages[pageId] = page
318 | 		book.has_fragment = page
319 | 
320 | 	# create the entry
321 | 	entryId = "%s/%s" % (pageId, rec['row_num'])
322 | 	entry = InformationObject(entryId)
323 | 	entry.label = "Row %s" % rec['row_num']
324 | 	page.has_fragment = entry
325 | 
326 | 	# the description and notes fields are related to the entry
327 | 	# not the object
328 | 	if rec['notes']:
329 | 		entry.description = rec['notes']
330 | 	if rec['description']:
331 | 		entry.description = rec['description']
332 | 
333 | 	oid = rec['pi_id']
334 | 
335 | 	# create the activity that the entry describes
336 | 	# 'Sold':26598,'Unsold':11824,'Exchanged':103,'Presented':246,'Transferred':310
337 | 	# 'Returned':533,'Unknown': 629,   
338 | 	# 'Lost': 7, 'Voided': 16, 'Disjointed': 16, 'Cancelled': 47, 'Removed': 6
339 | 
340 | 	# The outbound activity
341 | 	txn = None
342 | 	inv = None
343 | 
344 | 	txnType = rec['transaction']
345 | 	if txnType == "Sold":
346 | 		txn = Purchase(oid)
347 | 	elif txnType in ['Exchanged', 'Presented', 'Transferred']:
348 | 		if rec['price_amount']:
349 | 			txn = Purchase(oid)
350 | 		else:
351 | 			txn = Acquisition(oid)
352 | 	elif txnType in ["Unsold", "Cancelled"]:
353 | 		# Stock taking, or never left inventory due to no sale
354 | 		inv = TakeInventory(oid)
355 | 	elif txnType in ["Lost", "Removed"]:
356 | 		# Leaves inventory, but not via a transfer of ownership
357 | 		# E8 can represent end of ownership. There's just no new owner.
358 | 		txn = Acquisition(oid)
359 | 	elif txnType == "Voided":
360 | 		# Bad data; voided should be skipped (per Kelly)
361 | 		continue
362 | 	elif txnType == "Returned":
363 | 		# Can't tell what this actually means yet
364 | 		# Could be entering or leaving Knoedler stock!
365 | 		continue
366 | 	elif txnType == "Unknown":
367 | 		if rec['price_amount']:
368 | 			txn = Purchase(oid)
369 | 		else:
370 | 		 inv = TakeInventory(oid)
371 | 	else:
372 | 		# I think this is only Disjointed
373 | 		# print_rec(rec)
374 | 		continue
375 | 
376 | 	# The inbound activity that always happens
377 | 	# For consistency, always generate a Payment
378 | 	if rec['purchase_amount']:
379 | 		inTxn = Purchase("purch_%s" % oid)
380 | 		pay = Payment("purch_%s" % oid)
381 | 		inTxn.consists_of = pay
382 | 
383 | 		amnt = MonetaryAmount("purch_price_%s" % oid)
384 | 		value = process_money_value(rec['purchase_amount'])
385 | 		if value:
386 | 			try:
387 | 				amnt.has_value = float(value)
388 | 			except:
389 | 				amnt.description = value
390 | 		if rec['purchase_currency']:
391 | 			curr = Currency(rec['purchase_currency'])
392 | 			curr.label = rec['purchase_currency']
393 | 			amnt.has_currency = curr
394 | 		if rec['purchase_note']:
395 | 			amnt.description = rec['purchase_note']		
396 | 		pay.paid_amount = amnt
397 | 		pay.paid_from = knoedler
398 | 		inTxn.had_sales_price = amnt
399 | 
400 | 	else:
401 | 		inTxn = Acquisition("purch_%s" % oid)
402 | 
403 | 	inTxn.transferred_title_to = knoedler
404 | 	if rec['seller_name'] or rec['seller_name_auth']:
405 | 		# Look up in authority?
406 | 		seller = Actor("seller_%s" % oid)
407 | 		seller.label = rec['seller_name_auth'] if rec['seller_name_auth'] else rec['seller_name']
408 | 		if rec['seller_loc'] or rec['seller_loc_auth']:
409 | 			sellerPlace = Place("seller_place_%s" % oid)
410 | 			sellerPlace.label = rec['seller_loc_auth'] if rec['seller_loc_auth'] else rec['seller_loc']
411 | 			seller.has_current_or_former_residence = sellerPlace
412 | 		inTxn.transferred_title_from = seller
413 | 		if rec['purchase_amount']:
414 | 			pay.paid_to = seller
415 | 
416 | 	# CurationPeriod
417 | 	curated = Activity("curated_%s" % oid)
418 | 	curated.is_started_by = inTxn
419 | 
420 | 	if txn:
421 | 		# from
422 | 		txn.transferred_title_from = knoedler
423 | 		# to
424 | 		if rec['buyer_name'] or rec['buyer_name_auth']:
425 | 			# Look up in authority?
426 | 			buyer = Actor("buyer_%s" % oid)
427 | 			buyer.label = rec['buyer_name_auth'] if rec['buyer_name_auth'] else rec['buyer_name']
428 | 			if rec['buyer_loc'] or rec['buyer_loc_auth']:
429 | 				buyerPlace = Place("buyer_place_%s" % oid)
430 | 				buyerPlace.label = rec['buyer_loc_auth'] if rec['buyer_loc_auth'] else rec['buyer_loc']
431 | 				buyer.has_current_or_former_residence = buyerPlace
432 | 			txn.transferred_title_to = buyer
433 | 
434 | 		# when
435 | 		if rec['sale_date_year']:
436 | 			# if year, then all. blank is "00"			
437 | 			yr = rec['sale_date_year']
438 | 			mt = rec['sale_date_month']
439 | 			dy = rec['sale_date_day']
440 | 			if dy != "00":
441 | 				start = "%s-%s-%s" % (yr,mt,dy)
442 | 				end = start
443 | 			elif mt != "00":
444 | 				start = "%s-%s-01" % (yr,mt)
445 | 				end = "%s-%s-%s" % (yr,mt,endOfMonths[mt])
446 | 			else:
447 | 				start = "%s-01-01" % yr
448 | 				end = "%s-12-31" % yr
449 | 			span = TimeSpan("sale_span_%s" % oid)
450 | 			span.begin_of_the_begin = start
451 | 			span.end_of_the_end = end
452 | 			txn.has_timespan = span
453 | 
454 | 		value = process_money_value(rec['price_amount'])
455 | 		if value:
456 | 			amnt = MonetaryAmount("sale_price_%s" % oid)
457 | 			try:
458 | 				amnt.has_value = float(value)
459 | 			except:
460 | 				amnt.description = value
461 | 			if rec['price_currency']:
462 | 				curr = Currency(rec['price_currency'])
463 | 				curr.label = rec['price_currency']
464 | 				amnt.has_currency = curr
465 | 			if rec['price_note']:
466 | 				amnt.description = rec['price_note']
467 | 			txn.had_sales_price = amnt
468 | 
469 | 			# Check knoedler_share
470 | 			if rec['knoedler_share_amount']:
471 | 
472 | 				value = process_money_value(rec['knoedler_share_amount'])
473 | 				if value:
474 | 					amnt = MonetaryAmount("shared_price_%s" % oid)
475 | 					try:
476 | 						amnt.has_value = float(value)
477 | 					except:
478 | 						amnt.description = value
479 | 					if rec['knoedler_share_currency']:
480 | 						curr = Currency(rec['knoedler_share_currency'])
481 | 						curr.label = rec['knoedler_share_currency']
482 | 						amnt.has_currency = curr
483 | 					if rec['knoedler_share_note']:
484 | 						amnt.description = rec['knoedler_share_note']
485 | 
486 | 				pay = Payment("kshare_%s" % oid)
487 | 				txn.consists_of = pay
488 | 				pay.paid_amount = amnt
489 | 				pay.paid_to = knoedler
490 | 				if rec['buyer_name'] or rec['buyer_name_auth']:
491 | 					pay.paid_from = buyer
492 | 
493 | 			else:
494 | 				pay = Payment("sale_%s" % oid)
495 | 				txn.consists_of = pay
496 | 				pay.paid_amount = amnt
497 | 				pay.paid_to = knoedler
498 | 				if rec['buyer_name'] or rec['buyer_name_auth']:
499 | 					pay.paid_from = buyer
500 | 
501 | 		curated.is_finished_by = txn
502 | 	elif inv:
503 | 		# Taking of Inventory as part of the curation period
504 | 		curated.consists_of = inv
505 | 
506 | 		# If taking inventory, then the entry date is for that
507 | 		if rec['entry_date_year']:
508 | 			# if year, then all. blank is "00"			
509 | 			yr = rec['entry_date_year']
510 | 			mt = rec['entry_date_month']
511 | 			dy = rec['entry_date_day']
512 | 			if dy != "00":
513 | 				start = "%s-%s-%s" % (yr,mt,dy)
514 | 				end = start
515 | 			elif mt != "00":
516 | 				start = "%s-%s-01" % (yr,mt)
517 | 				end = "%s-%s-%s" % (yr,mt,endOfMonths[mt])
518 | 			else:
519 | 				start = "%s-01-01" % yr
520 | 				end = "%s-12-31" % yr
521 | 			span = TimeSpan("sale_span_%s" % oid)
522 | 			span.begin_of_the_begin = start
523 | 			span.end_of_the_end = end
524 | 			inv.has_timespan = span
525 | 
526 | 
527 | 	if not inv:
528 | 		# entry date is for purchase
529 | 		if rec['entry_date_year']:
530 | 			# if year, then all. blank is "00"			
531 | 			yr = rec['entry_date_year']
532 | 			mt = rec['entry_date_month']
533 | 			dy = rec['entry_date_day']
534 | 			if dy != "00":
535 | 				start = "%s-%s-%s" % (yr,mt,dy)
536 | 				end = start
537 | 			elif mt != "00":
538 | 				start = "%s-%s-01" % (yr,mt)
539 | 				end = "%s-%s-%s" % (yr,mt,endOfMonths[mt])
540 | 			else:
541 | 				start = "%s-01-01" % yr
542 | 				end = "%s-12-31" % yr
543 | 			span = TimeSpan("purch_span_%s" % oid)
544 | 			span.begin_of_the_begin = start
545 | 			span.end_of_the_end = end
546 | 			inTxn.has_timespan = span
547 | 
548 | 	# create the object of the transaction
549 | 
550 | 	objectType = rec['object_type']
551 | 	try:
552 | 		what = aat_type_mapping[objectType](oid)
553 | 	except:
554 | 		what = ManMadeObject(oid)
555 | 
556 | 	curated.used_specific_object = what
557 | 	inTxn.transferred_title_of = what
558 | 	entry.refers_to = inTxn
559 | 	if txn:
560 | 		txn.transferred_title_of = what
561 | 		entry.refers_to = txn
562 | 	elif txnType == "Voided":
563 | 		entry.refers_to = what	
564 | 
565 | 	what.label = rec['title']
566 | 
567 | 	idnt = AccessionNumber("knoedler_%s" % oid)
568 | 	idnt.value = rec['knoedler_id']
569 | 	# No way to say it's Knoedler's number?
570 | 	# Could have a Creation of the Identifier performed by Knoedler :(
571 | 
572 | 	if rec['artist_name'] or rec['artist_name_auth']:
573 | 		artist = Person("artist_%s" % oid)
574 | 		artist.label = rec['artist_name_auth'] if rec['artist_name_auth'] else rec['artist_name']
575 | 		if rec['nationality']:
576 | 			artist.nationality = Place("artist_natl_%s" % oid)
577 | 			artist.nationality.label = rec['nationality']
578 | 
579 | 		prodn = Production("production_%s" % oid)
580 | 		prodn.carried_out_by = artist
581 | 		what.was_produced_by = prodn
582 | 
583 | 	if rec['artist_name_2'] or rec['artist_name_auth_2']:
584 | 		artist = Person("artist2_%s" % oid)
585 | 		artist.label = rec['artist_name_auth_2'] if rec['artist_name_auth_2'] else rec['artist_name_2']
586 | 		if rec['nationality_2']:
587 | 			artist.nationality = Place('artist_2_natl_%s' % oid)
588 | 			artist.nationality.label = rec['nationality_2']
589 | 		prodn.carried_out_by = artist
590 | 
591 | 
592 | 	# genre
593 | 	if rec['genre'] and not rec['genre'] == '[not identified]':
594 | 		if not aat_genre_mapping.has_key(rec['genre']):
595 | 			print "Not found: %s" % (rec['genre'])
596 | 		else:	
597 | 			what.has_type = genreTypes[rec['genre']]
598 | 
599 | 	# subject
600 | 	if rec['subject']:
601 | 		s = rec['subject']
602 | 		if s.find(';'):
603 | 			ss = [x.strip() for x in s.split(';')]
604 | 		else:
605 | 			ss = [s]
606 | 		for s in ss:
607 | 			# s  = s.replace('Int\xe9rieurs', 'Interiors')
608 | 			sid = s.replace(' ', '')
609 | 			sid = sid.replace('(', '')
610 | 			sid = sid.replace(')', '')
611 | 			t = Type(sid)
612 | 			t.label = s
613 | 			what.depicts = t
614 | 
615 | 	# materials
616 | 	if rec['materials']:
617 | 		# XXX Finish this
618 | 		process_materials(what, rec['materials'])
619 | 		# what.made_of = material
620 | 
621 | 	if rec['dimensions']:
622 | 		# XXX Finish this too
623 | 		dims = process_dimensions(rec['dimensions'])
624 | 		for d in dims:
625 | 			dim = Dimension("%s_%s" % (d[0], oid))
626 | 			dim.has_value = d[0]
627 | 			dim.has_unit = inches
628 | 			if d[1] == 'h':
629 | 				what.height = dim
630 | 			else:
631 | 				what.width = dim				
632 | 
633 | collection = InformationObject("collection")
634 | for s in stock_books.values():
635 | 	collection.has_fragment = s
636 | 
637 | 
638 | factory.full_names = True
639 | outstr = factory.toString(collection, compact=False)
640 | 
641 | fh = file('knoedler.jsonld', 'w')
642 | fh.write(outstr)
643 | fh.close()
644 | 
645 | # Note that these entries are really one transaction
646 | # 64699 ... 64732
647 | 
648 | 


--------------------------------------------------------------------------------
/tests/test_model.py:
--------------------------------------------------------------------------------
  1 | import unittest 
  2 | import sys
  3 | import os
  4 | import shutil
  5 | import json
  6 | import pickle
  7 | from collections import OrderedDict
  8 | from cromulent import model, vocab
  9 | from cromulent.model import override_okay
 10 | 
 11 | 
 12 | class TestFactorySetup(unittest.TestCase):
 13 | 
 14 | 	def setUp(self):
 15 | 		model.factory.base_url = 'http://data.getty.edu/provenance/'
 16 | 		model.factory.base_dir = 'tests/provenance_base_dir'
 17 | 		model.factory.default_lang = 'en'
 18 | 		#model.factory.context_uri = 'http://www.cidoc-crm.org/cidoc-crm/'
 19 | 
 20 | 	def tearDown(self):
 21 | 		model.factory.base_url = 'http://lod.example.org/museum/'
 22 | 		model.factory.log_stream = sys.stderr
 23 | 		model.factory.debug_level = 'warn'
 24 | 
 25 | 	def test_base_url(self):
 26 | 		self.assertEqual(model.factory.base_url, 'http://data.getty.edu/provenance/')
 27 | 
 28 | 	def test_base_dir(self):
 29 | 		self.assertEqual(model.factory.base_dir, 'tests/provenance_base_dir')
 30 | 
 31 | 	def test_default_lang(self):
 32 | 		self.assertEqual(model.factory.default_lang, 'en')
 33 | 
 34 | 	def test_set_debug_stream(self):
 35 | 		strm = open('err_output', 'w')
 36 | 		model.factory.set_debug_stream(strm)
 37 | 		self.assertEqual(model.factory.log_stream, strm)
 38 | 
 39 | 	def test_set_debug(self):
 40 | 		model.factory.set_debug('error_on_warning')
 41 | 		self.assertEqual(model.factory.debug_level, 'error_on_warning')
 42 | 		self.assertRaises(model.ConfigurationError, model.factory.set_debug, 'xxx')
 43 | 		self.assertRaises(model.MetadataError, model.factory.maybe_warn, "test")
 44 | 
 45 | 	def test_load_context(self):
 46 | 		self.assertRaises(model.ConfigurationError, model.factory.load_context, 
 47 | 			"foo", {"foo":"does_not_exist.txt"})
 48 | 		model.factory.load_context("foo", {"foo":"tests/test_context.json"})
 49 | 		self.assertEqual(model.factory.context_json, {"@context":{"id":"@id"}})
 50 | 		self.assertRaises(model.ConfigurationError, model.factory.load_context, "", {})
 51 | 
 52 | 	def test_pickle(self):
 53 | 		model.factory.log_stream = sys.stderr
 54 | 		srlz = pickle.dumps(model.factory)
 55 | 		newfac = pickle.loads(srlz)
 56 | 		self.assertTrue(model.factory.log_stream is newfac.log_stream)
 57 | 
 58 | 
 59 | 
 60 | class TestFactorySerialization(unittest.TestCase):
 61 | 
 62 | 	def setUp(self):
 63 | 		self.collection = model.InformationObject('collection')
 64 | 		self.collection._label = "Test Object"
 65 | 
 66 | 	def test_broken_unicode(self):
 67 | 		model.factory.debug_level = "error_on_warning"
 68 | 		try:
 69 | 			badval = b"\xFF\xFE\x02"
 70 | 		except:
 71 | 			badval = "\xFF\xFE\x02"
 72 | 		badjs = {"_label": badval}
 73 | 		self.assertRaises(model.MetadataError, model.factory._buildString,
 74 | 			js=badjs)
 75 | 
 76 | 	def test_toJSON(self):
 77 | 		# model.factory.context_uri = 'http://lod.getty.edu/context.json'
 78 | 		expect = OrderedDict([
 79 | 			('@context', model.factory.context_uri),
 80 | 			('id', u'http://lod.example.org/museum/InformationObject/collection'), 
 81 | 			('type', 'InformationObject'), ('_label', 'Test Object')])
 82 | 		outj = model.factory.toJSON(self.collection)
 83 | 		self.assertEqual(expect, outj)
 84 | 
 85 | 	def test_toJSON_fast(self):
 86 | 		model.factory.json_serializer = "fast"
 87 | 		expect = {'@context': model.factory.context_uri, 
 88 | 			'id': 'http://lod.example.org/museum/InformationObject/collection', 
 89 | 			'type': 'InformationObject', 
 90 | 			'_label': 'Test Object'}
 91 | 		outj = model.factory.toJSON(self.collection)
 92 | 		self.assertEqual(expect, outj)
 93 | 		model.factory.json_serializer = "normal"
 94 | 
 95 | 	def test_toJSON_normal(self):
 96 | 		expect = OrderedDict([(u'@context', model.factory.context_uri), 
 97 | 			(u'@id', u'http://lod.example.org/museum/Person/1'), (u'@type', u'crm:E21_Person'),
 98 | 			('rdfs:label', 'Test Person')])
 99 | 		model.factory.full_names = True
100 | 		p = model.Person("1")
101 | 		p._label = "Test Person"
102 | 		outj = model.factory.toJSON(p)
103 | 		self.assertEqual(expect, outj)
104 | 		# reset
105 | 		model.factory.full_names = False
106 | 
107 | 	def test_toString(self):
108 | 		expect = u'{"@context":"'+model.factory.context_uri+'","id":"http://lod.example.org/museum/InformationObject/collection","type":"InformationObject","_label":"Test Object"}'
109 | 		outs = model.factory.toString(self.collection)
110 | 		self.assertEqual(expect, outs)
111 | 
112 | 	def test_toString_fast(self):
113 | 		# Should only be trusted in python 3
114 | 		if sys.version_info.major >= 3 and sys.version_info.minor >= 6:
115 | 			expect = u'{"@context":"'+model.factory.context_uri+'","id":"http://lod.example.org/museum/InformationObject/collection","type":"InformationObject","_label":"Test Object"}'
116 | 			model.factory.json_serializer = "fast"		
117 | 			outs = model.factory.toString(self.collection)
118 | 			model.factory.json_serializer = "normal"
119 | 			self.assertEqual(expect, outs)
120 | 		else:
121 | 			print("Skipping toString_fast test in Python 2.x")
122 | 
123 | 	def test_toFile(self):
124 | 		self.assertRaises(model.ConfigurationError, model.factory.toFile, self.collection)
125 | 		# Test auto filename determination
126 | 		model.factory.base_dir = 'tests'
127 | 		model.factory.toFile(self.collection)
128 | 		self.assertTrue(os.path.isfile('tests/InformationObject/collection.json'))
129 | 		# Test explicit filename setting
130 | 		model.factory.toFile(self.collection, filename='tests/fishbat.bar')
131 | 		self.assertTrue(os.path.isfile('tests/fishbat.bar'))
132 | 		# Tidy up
133 | 		shutil.rmtree('tests/InformationObject')
134 | 
135 | 	def test_breadth(self):
136 | 		x = model.TransferOfCustody()
137 | 		e = model.Activity()
138 | 		fr = model.Group()
139 | 		to = model.Group()
140 | 		w = model.HumanMadeObject()
141 | 		fr._label = "From"
142 | 		to._label = "To"
143 | 		x.transferred_custody_of = w
144 | 		x.transferred_custody_from = fr
145 | 		x.transferred_custody_to = to
146 | 		e.used_specific_object = w
147 | 		e.carried_out_by = to
148 | 		w.current_owner = fr
149 | 		x.specific_purpose = e
150 | 		js = model.factory.toJSON(x)
151 | 		# Okay ... if we're breadth first, then custody_from is a resource
152 | 		# And now it's the first in the list
153 | 		self.assertTrue(isinstance(js['transferred_custody_from'][0], OrderedDict))
154 | 
155 | 	def test_string_list(self):
156 | 		x = model.Activity()
157 | 		x._label = ["Label 1", "Label 2"]
158 | 		js = model.factory.toJSON(x)
159 | 		self.assertTrue(js['_label'] == x._label)
160 | 
161 | 	def test_external(self):
162 | 		x = model.ExternalResource(ident="1")
163 | 		model.factory.elasticsearch_compatible = 1
164 | 		js = x._toJSON(done=None)
165 | 		self.assertTrue(type(js) == dict)
166 | 		model.factory.elasticsearch_compatible = 0
167 | 		js = x._toJSON(done=None)
168 | 		# testing unicode in 2, str in 3 :(
169 | 		self.assertTrue(type(js) != dict)		
170 | 
171 | 	def test_recursion(self):
172 | 		x = model.Activity()
173 | 		x.part = x
174 | 		js = model.factory.toJSON(x)
175 | 		# If our recursion checks have regressed, this will barf right here
176 | 		self.assertTrue(1)
177 | 
178 | 	def test_pipe_scoped(self):
179 | 		x = model.Activity()
180 | 		y = model.Activity()
181 | 		x.part = y
182 | 		model.factory.pipe_scoped_contexts = True
183 | 		js = model.factory.toJSON(x)
184 | 		self.assertTrue('part|crm:P9_consists_of' in js)
185 | 		model.factory.pipe_scoped_contexts = False
186 | 		js = model.factory.toJSON(x)		
187 | 		self.assertTrue('part|crm:P9_consists_of' not in js)		
188 | 		self.assertTrue('part' in js)
189 | 
190 | 	def test_collapse_json(self):
191 | 		model.factory.auto_id_type = "uuid"
192 | 		model.factory.base_url = "http://lod.example.org/museum/"
193 | 		model.factory.context_uri = "https://linked.art/ns/v1/linked-art.json"
194 | 		p = model.Person()
195 | 		p.classified_as = model.Type(ident="http://example.org/Type", label="Test")
196 | 		res1 = model.factory.toString(p, compact=False, collapse=60) # all new lines
197 | 		res2 = model.factory.toString(p, compact=False, collapse=120) # compact list of type
198 | 		self.assertEqual(len(res1.splitlines()), 12)
199 | 		self.assertEqual(len(res2.splitlines()), 6)
200 | 
201 | 	def test_production_mode(self):
202 | 
203 | 		# model.factory.production_mode()
204 | 		# Can't unset the cached hierarchy
205 | 		# and it causes the test for the hierarchy to fail
206 | 		model.factory.validate_profile = False
207 | 		model.factory.validate_properties = False
208 | 		model.factory.validate_range = False
209 | 		model.factory.validate_multiplicity = False
210 | 
211 | 		p = model.Person()
212 | 		p.identified_by = model.Name(value="abc")
213 | 		p.part = model.HumanMadeObject()
214 | 		js = model.factory.toJSON(p)
215 | 
216 | 		model.factory.production_mode(state=False)
217 | 
218 | 
219 | 	def test_ordering(self):
220 | 		p = model.Person(label="Person")
221 | 		p.classified_as = model.Type(ident="type-uri")
222 | 		p.referred_to_by = model.LinguisticObject(content="text")
223 | 		p.dimension = model.Dimension(value=1)
224 | 
225 | 		outstr = model.factory.toString(p)
226 | 		lbl = outstr.index("_label")
227 | 		clsf = outstr.index("classified_as")
228 | 		r2b = outstr.index("referred_to_by")
229 | 		dim = outstr.index("dimension")
230 | 		self.assertTrue(lbl < clsf)
231 | 		self.assertTrue(clsf < r2b)
232 | 		self.assertTrue(r2b < dim)
233 | 
234 | 
235 | class TestProcessTSV(unittest.TestCase):
236 | 
237 | 	def test_process_tsv(self):
238 | 		expect = {u'subs': [u'E84_Information_Carrier'], u'label': u'Human-Made Object', u'className': u'HumanMadeObject', 
239 | 		u'subOf': u'E19_Physical_Object|E24_Physical_Human-Made_Thing', u'props': [], u'class': None, u'okay': u'1'}
240 | 		fn = 'cromulent/data/crm_vocab.tsv'
241 | 		vocabData = model.process_tsv(fn)
242 | 		man_made = vocabData['E22_Human-Made_Object']
243 | 		del man_made['desc']  # too long and volatile
244 | 		# check subs specifically - could be coming from an extension
245 | 		if man_made['subs'] != expect['subs']:
246 | 			del man_made['subs']
247 | 			del expect['subs']
248 | 		self.assertEqual(expect, man_made)
249 | 
250 | 
251 | class TestBuildClasses(unittest.TestCase):
252 | 
253 | 	def test_build_classes(self):
254 | 		tsv = "\nClassName_full\tclass\tClassName_py\tClass Label\tClass Description\t\t1\t\n"
255 | 		fh = open('tests/temp.tsv', 'w')
256 | 		fh.write(tsv)
257 | 		fh.close()
258 | 		model.build_classes("tests/temp.tsv", "ClassName_full")
259 | 		from cromulent.model import ClassName_py
260 | 		self.assertEqual('Class Description', ClassName_py.__doc__)
261 | 		os.remove('tests/temp.tsv')
262 | 
263 | class TestBuildClass(unittest.TestCase):
264 | 
265 | 	def test_build_class(self):
266 | 		tsv = "\nClassName_full\tclass\tClassName_py2\tClass Label\tClass Description\t\t1\t\n"
267 | 		fh = open('tests/temp.tsv', 'w')
268 | 		fh.write(tsv)
269 | 		fh.close()
270 | 		vocabData = model.process_tsv('tests/temp.tsv')
271 | 		model.build_class('ClassName_full', model.BaseResource, vocabData)
272 | 		from cromulent.model import ClassName_py2
273 | 		self.assertEqual('Class Description', ClassName_py2.__doc__)
274 | 		os.remove('tests/temp.tsv')
275 | 
276 | class TestAutoIdentifiers(unittest.TestCase):
277 | 
278 | 	def test_bad_autoid(self):
279 | 		model.factory.auto_assign_id = True
280 | 		model.factory.auto_id_type = "broken"
281 | 		self.assertRaises(model.ConfigurationError, model.factory.generate_id,
282 | 			"irrelevant")
283 | 
284 | 	def test_int(self):
285 | 		model.factory.auto_assign_id = True
286 | 		model.factory.auto_id_type = "int"
287 | 		p = model.Person()
288 | 		p2 = model.Activity()
289 | 		self.assertEqual(int(p.id[-1]), int(p2.id[-1])-1)
290 | 
291 | 	def test_int_per_type(self):
292 | 		model.factory.auto_assign_id = True
293 | 		model.factory.auto_id_type = "int-per-type"
294 | 		p = model.Person()
295 | 		p2 = model.Person()
296 | 		self.assertEqual(int(p.id[-1]), int(p2.id[-1])-1)
297 | 		p3 = model.Activity()
298 | 		self.assertEqual(int(p.id[-1]), int(p3.id[-1]))		
299 | 
300 | 	def test_int_per_segment(self):
301 | 		model.factory.auto_assign_id = True
302 | 		model.factory._auto_id_segments = {}
303 | 		model.factory.auto_id_type = "int-per-segment"
304 | 		model.Activity._uri_segment = model.Person._uri_segment
305 | 		p = model.Person()
306 | 		p2 = model.Activity()
307 | 		self.assertEqual(int(p.id[-1]), int(p2.id[-1])-1)		
308 | 		p3 = model.TimeSpan()
309 | 		self.assertEqual(int(p.id[-1]), int(p3.id[-1]))		
310 | 			
311 | 	def test_uuid(self):
312 | 		model.factory.auto_assign_id = True
313 | 		model.factory.auto_id_type = "uuid"
314 | 		p = model.Person()
315 | 		self.assertTrue(p.id.startswith('urn:uuid:'))		
316 | 
317 | 	def test_prefixes(self):
318 | 
319 | 		model.factory.prefixes = {'fish':'http://example.org/ns/'}
320 | 		p3 = model.Person('fish:3')
321 | 		self.assertEqual(p3.id, 'fish:3')
322 | 		self.assertEqual(p3._full_id, 'http://example.org/ns/3')
323 | 
324 | 		model.factory.prefixes = {}
325 | 		p4 = model.Person('fish:4')
326 | 		self.assertTrue(p4.id.startswith(model.factory.base_url))
327 | 
328 | 	def test_other_uris(self):
329 | 		p1 = model.Person(ident="tag:some-info-about-person")
330 | 		self.assertEqual(p1.id, "tag:some-info-about-person")
331 | 		p2 = model.Person(ident="info:ulan/500012345")
332 | 		self.assertEqual(p2.id, "info:ulan/500012345")
333 | 		p3 = model.Person(ident="some:random:thing:with:colons")
334 | 		self.assertFalse(p3.id == "some:random:thing:with:colons")
335 | 
336 | 	def test_no_ident(self):
337 | 
338 | 		model.factory.auto_assign_id = True
339 | 		p1 = model.Person()	# auto assigned	 
340 | 		p2 = model.Person(ident=None) # auto assigned
341 | 		p3 = model.Person(ident="") # bnode explicitly
342 | 
343 | 		self.assertTrue(p1.id.startswith('http'))
344 | 		self.assertTrue(p2.id.startswith('http'))
345 | 		self.assertEqual(p3.id, '')
346 | 
347 | 		model.factory.auto_assign_id = False
348 | 		p4 = model.Person() # bnode is default
349 | 		p5 = model.Person(ident=None) # bnode is default
350 | 		p6 = model.Person(ident="") # bnode explicitly
351 | 
352 | 		self.assertEqual(p4.id, '')
353 | 		self.assertEqual(p5.id, '')
354 | 		self.assertEqual(p6.id, '')
355 | 
356 | 		
357 | class TestBaseResource(unittest.TestCase):
358 | 
359 | 	def setUp(self):
360 | 		override_okay(model.Person, 'parent_of')
361 | 		self.artist = model.Person('00001', 'Jane Doe')
362 | 		self.son = model.Person('00002', 'John Doe')
363 | 
364 | 	def test_init(self):
365 | 		self.assertEqual(self.artist.id, 'http://lod.example.org/museum/Person/00001')
366 | 		self.assertEqual(self.artist._type, 'crm:E21_Person')
367 | 		self.assertEqual(self.artist.type, 'Person')
368 | 		self.assertEqual(self.artist._label, 'Jane Doe')
369 | 		self.assertFalse(hasattr(self.artist, 'value'))
370 | 		self.assertFalse(hasattr(self.artist, 'has_type'))
371 | 
372 | 	def test_check_prop(self):
373 | 		desc = self.artist._check_prop('_label', 'Jane Doe\'s Bio')
374 | 		self.assertEqual(desc, 1)
375 | 		parent = self.artist._check_prop('parent_of', self.son)
376 | 		self.assertEqual(parent, 2)
377 | 
378 | 	def test_list_all_props(self):
379 | 		props = self.artist.list_all_props()
380 | 		props.sort()
381 | 		self.assertEqual(props[-1], 'witnessed')
382 | 		self.assertTrue('_label' in props)
383 | 		self.assertTrue('identified_by' in props)
384 | 
385 | 	def test_list_my_props(self):
386 | 		p1 = model.Person()
387 | 		p1.classified_as = model.Type()
388 | 		props = p1.list_my_props()
389 | 		self.assertEqual(set(props), set(['classified_as', 'id']))
390 | 		props = p1.list_my_props(filter=model.Type)
391 | 		self.assertEqual(props, ['classified_as'])
392 | 
393 | 	def test_allows_multiple(self):
394 | 		p = model.Person()
395 | 		self.assertTrue(p.allows_multiple('classified_as'))
396 | 		self.assertFalse(p.allows_multiple('born'))
397 | 		self.assertRaises(model.DataError, p.allows_multiple, 'fish')
398 | 
399 | 	def test_check_reference(self):
400 | 		self.assertTrue(self.artist._check_reference('http'))
401 | 		self.assertFalse(self.artist._check_reference('xxx'))
402 | 		self.assertTrue(self.artist._check_reference({'id': 'xxx'}))
403 | 		self.assertFalse(self.artist._check_reference({'xxx': 'yyy'}))
404 | 		self.assertTrue(self.artist._check_reference(self.son))
405 | 		self.assertTrue(self.artist._check_reference(['http']))
406 | 		self.assertFalse(self.artist._check_reference(['xxx', 'yyy']))
407 | 		self.assertTrue(self.artist._check_reference(model.Person))
408 | 
409 | 	def test_multiplicity(self):
410 | 		model.factory.process_multiplicity = True
411 | 		who = model.Actor()
412 | 		mmo = model.HumanMadeObject()
413 | 		prod = model.Production()
414 | 		mmo.produced_by = prod
415 | 		who.current_owner_of = mmo
416 | 		mmo.current_owner = who
417 | 		self.assertEqual(mmo.current_owner, [who])
418 | 		self.assertEqual(who.current_owner_of, [mmo])		
419 | 		self.assertEqual(mmo.produced_by, prod)
420 | 
421 | 	def test_init_params(self):
422 | 		p1 = model.Person(ident="urn:uuid:1234")
423 | 		self.assertEqual(p1.id, "urn:uuid:1234")
424 | 		p2 = model.Person(ident="http://schema.org/Foo")
425 | 		self.assertEqual(p2.id, "schema:Foo")
426 | 		p3 = model.Name(content="Test")
427 | 		self.assertEqual(p3.content, "Test")
428 | 		c = model.MonetaryAmount(value=10)
429 | 		self.assertEqual(c.value, 10)
430 | 		n = model.Name(value="Rob")
431 | 		self.assertEqual(n.content, "Rob")
432 | 		i = model.Identifier(content="xyz123")
433 | 		self.assertEqual(i.content, "xyz123")
434 | 		i2 = model.Identifier(value="abc")
435 | 		self.assertEqual(i2.content, "abc")
436 | 
437 | 	def test_dir(self):
438 | 		props = dir(self.artist)
439 | 		self.assertTrue('identified_by' in props)
440 | 
441 | 
442 | class TestPropertyCache(unittest.TestCase):
443 | 
444 | 	def test_cache_hierarchy(self):
445 | 		o = model.HumanMadeObject()
446 | 		self.assertEqual(o._all_properties, {})
447 | 		model.factory.cache_hierarchy()
448 | 		self.assertTrue(len(o._all_properties) > 50)
449 | 		
450 | 
451 | class TestMagicMethods(unittest.TestCase):
452 | 
453 | 	def setUp(self):
454 | 		override_okay(model.Person, 'parent_of')
455 | 		# model.Person._properties['parent_of']['multiple'] = 1
456 | 
457 | 	def test_set_magic_resource(self):
458 | 		artist = model.Person('00001', 'Jane Doe')
459 | 		son = model.Person('00002', 'John Doe')
460 | 		daughter = model.Person('00002', 'Jenny Doe')
461 | 		son2 = model.Person('00002', 'Jim Doe')
462 | 		artist._set_magic_resource('parent_of', son)
463 | 		self.assertEqual(artist.parent_of, [son])
464 | 		artist._set_magic_resource('parent_of', daughter)
465 | 		try:
466 | 			self.assertIn(son, artist.parent_of)
467 | 			self.assertIn(daughter, artist.parent_of)
468 | 		except:
469 | 			# 2.6 doesn't have assertIn
470 | 			self.assertTrue(son in artist.parent_of)
471 | 			self.assertTrue(daughter in artist.parent_of)
472 | 
473 | 		artist._set_magic_resource('parent_of', son2)
474 | 		try:
475 | 			self.assertIn(son, artist.parent_of)
476 | 			self.assertIn(daughter, artist.parent_of)
477 | 			self.assertIn(son2, artist.parent_of)
478 | 		except:
479 | 			self.assertTrue(son in artist.parent_of)
480 | 			self.assertTrue(daughter in artist.parent_of)
481 | 			self.assertTrue(son2 in artist.parent_of)
482 | 
483 | 	def test_set_magic_resource_inverse(self):
484 | 		model.factory.materialize_inverses = True
485 | 		artist = model.Person('00001', 'Jane Doe')
486 | 		son = model.Person('00002', 'John Doe')
487 | 		artist._set_magic_resource('parent_of', son)
488 | 		self.assertEqual(son.parent, [artist])
489 | 		model.factory.materialize_inverses = False
490 | 
491 | 	def test_validate_profile_off(self):
492 | 		model.factory.validate_profile = False
493 | 		ia = model.IdentifierAssignment()
494 | 		# If it's not turned off this should raise
495 | 		model.factory.validate_profile = True
496 | 		self.assertRaises(model.ProfileError, model.IdentifierAssignment)		
497 | 		p1 = model.Person()
498 | 		self.assertRaises(model.ProfileError, p1.__setattr__, 'documented_in', "foo")
499 | 
500 | 	def test_validation_unknown(self):
501 | 		model.factory.validate_properties = True
502 | 		artist = model.Person('00001', 'Jane Doe')		
503 | 		self.assertRaises(model.DataError, artist.__setattr__, 'unknown_property', 1)
504 | 
505 | 	def test_validation_wrong_type(self):
506 | 		model.factory.validate_properties = True
507 | 		artist = model.Person('00001', 'Jane Doe')	
508 | 		self.assertRaises(model.DataError, artist.__setattr__, 'parent_of', 'Bad Value')
509 | 
510 | 	def test_validation_off(self):
511 | 		model.factory.validate_properties = False
512 | 		artist = model.Person('00001', 'Jane Doe')		
513 | 		artist.unknown_property = 1
514 | 		self.assertEqual(artist.unknown_property, 1)
515 | 		model.factory.validate_properties = True
516 | 
517 | 	def test_validate_multiplicity(self):
518 | 		model.factory.validate_multiplicity = True
519 | 		who = model.Person()
520 | 		b1 = model.Birth()
521 | 		who.born = b1
522 | 		b2 = model.Birth()
523 | 		self.assertRaises(model.ProfileError, who.__setattr__, 'born', b2)
524 | 		model.factory.validate_multiplicity = False
525 | 		who.born = b2
526 | 		self.assertEqual(who.born, [b1, b2])
527 | 
528 | 	def test_not_multiple_instance(self):
529 | 		who = model.Person()
530 | 		n = model.Name(content="Test")
531 | 		who.identified_by = n
532 | 
533 | 		model.factory.multiple_instances_per_property = "error"
534 | 		self.assertRaises(model.DataError, who.__setattr__, 'identified_by', n)
535 | 		self.assertEqual(who.identified_by, [n])
536 | 
537 | 		model.factory.multiple_instances_per_property = "drop"
538 | 		who.identified_by = n
539 | 		self.assertEqual(who.identified_by, [n,n])		
540 | 		# and check that only serialized once
541 | 		js = model.factory.toJSON(who)
542 | 		self.assertEqual(len(js['identified_by']), 1)
543 | 
544 | 		model.factory.multiple_instances_per_property = "allow"
545 | 		js = model.factory.toJSON(who)
546 | 		self.assertEqual(len(js['identified_by']), 2)
547 | 
548 | 
549 | class TestObjectEquality(unittest.TestCase):
550 | 	def setUp(self):
551 | 		self.artist = model.Person('00001', 'Jane Doe')
552 | 		self.son = model.Person('00002', 'John Doe')
553 | 		self.daughter = model.Person('00002', 'Jenny Doe')
554 | 		self.son2 = model.Person('00002', 'Jim Doe')
555 | 
556 | 	def test_eq_ident(self):
557 | 		self.assertEqual(self.artist, self.artist)
558 | 		self.assertEqual(self.son, model.Person('00002', 'John Doe'))
559 | 		self.assertEqual(self.son2, model.Person('00002', 'Jim Doe'))
560 | 		self.assertEqual(self.daughter, model.Person('00002', 'Jenny Doe'))
561 | 
562 | 	def test_eq_value(self):
563 | 		self.assertEqual(self.artist, model.Person('00001', 'Jane Doe'))
564 | 		self.assertEqual(self.son, self.son)
565 | 		self.assertEqual(self.son2, self.son2)
566 | 		self.assertEqual(self.daughter, self.daughter)
567 | 
568 | 	def test_in_value(self):
569 | 		people = (
570 | 			model.Person('00001', 'Jane Doe'), # artist
571 | 			model.Person('00002', 'Jim Doe')   # son2
572 | 		)
573 | 		self.assertIn(self.artist, people)
574 | 		self.assertNotIn(self.son, people)
575 | 		self.assertNotIn(self.daughter, people)
576 | 		self.assertIn(self.son2, people)
577 | 
578 | 	def test_neq(self):
579 | 		self.assertNotEqual(self.artist, self.son)
580 | 		self.assertNotEqual(self.artist, model.Person('00001', 'Jane')) # label differs
581 | 		self.assertNotEqual(self.artist, self.daughter)
582 | 		self.assertNotEqual(self.artist, self.son2)
583 | 		self.assertNotEqual(self.son, self.daughter)
584 | 		self.assertNotEqual(self.son, self.son2)
585 | 		self.assertNotEqual(self.daughter, self.son2)
586 | 
587 | 	def nation(self, name, ident):
588 | 		place = vocab.Place(ident='tag:getty.edu,2019:digital:pipeline:provenance:REPLACE-WITH-UUID#PLACE-COUNTRY-' + name, label=name)
589 | 		nation = model.Place(ident=ident)
590 | 		place.classified_as = vocab.instances['nation']
591 | 		place.identified_by = model.Name(ident='', content=name)
592 | 		return place
593 | 
594 | 	def test_equality(self):
595 | 		from cromulent.model import factory
596 | 		place1 = self.nation('Belgium', 'http://vocab.getty.edu/aat/300128207')
597 | 		place2 = self.nation('Belgium', 'http://vocab.getty.edu/aat/300128207')
598 | 		self.assertEqual(place1, place2)
599 | 
600 | if __name__ == '__main__':
601 | 	unittest.main()
602 | 
603 | 


--------------------------------------------------------------------------------