├── .dockerignore ├── .gitignore ├── .travis.yml ├── Dockerfile ├── LICENSE.txt ├── README.md ├── orcid2vivo.py ├── orcid2vivo_app ├── __init__.py ├── affiliations.py ├── bio.py ├── fundings.py ├── utility.py ├── vivo_namespace.py ├── vivo_uri.py └── works.py ├── orcid2vivo_loader.py ├── orcid2vivo_service.py ├── requirements.txt ├── setup.py ├── templates └── crosswalk_form.html └── tests ├── __init__.py ├── app ├── __init__.py ├── test_affiliations.py ├── test_bio.py ├── test_fundings.py ├── test_utility.py ├── test_vivo_uri.py └── test_works.py ├── fixtures └── loader │ └── load_single.yaml ├── test_loader.py └── test_orcid2vivo.py /.dockerignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__ 3 | *.py[cod] 4 | 5 | env 6 | ENV 7 | 8 | #PyCharm 9 | .idea 10 | 11 | #Git 12 | .git 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *,cover 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | 59 | #PyCharm 60 | .idea/ -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "2.7" 4 | before_install: 5 | - "pip install pip==7.1.2 --upgrade" 6 | - "pip install setuptools>=25.2.0 --upgrade" 7 | install: "pip install -r requirements.txt" 8 | script: python -m unittest discover 9 | notifications: 10 | email: 11 | - justinlittman@gmail.com -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:2.7 2 | MAINTAINER Justin Littman 3 | 4 | #Add files 5 | ADD . /orcid2vivo 6 | RUN pip install -r /orcid2vivo/requirements.txt 7 | EXPOSE 5000 8 | WORKDIR /orcid2vivo 9 | CMD python orcid2vivo_service.py --endpoint $O2V_ENDPOINT --username $O2V_USERNAME --password $O2V_PASSWORD --namespace $O2V_NAMESPACE --debug 10 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012-2014 The George Washington University 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject 9 | to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 17 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR 18 | ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 19 | CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # orcid2vivo 2 | Tool for retrieving data from the ORCID API and crosswalking to VIVO-ISF. 3 | 4 | ![Build status](https://travis-ci.org/gwu-libraries/orcid2vivo.svg) 5 | 6 | ## Installation 7 | With python/pip installed: 8 | 9 | ``` 10 | pip install orcid2vivo 11 | ``` 12 | 13 | ## Commandline 14 | * Supports outputting to: 15 | * screen / stdout 16 | * file 17 | * load to VIVO instance (via SPARQL Update) 18 | * Supports multiple RDF serializations. 19 | * Allows specifying: 20 | * VIVO namespace 21 | * An id or URI for the person. 22 | * Class for the person. 23 | 24 | ``` 25 | (ENV)GLSS-F0G5RP:orcid2vivo justinlittman$ python orcid2vivo.py -h 26 | usage: orcid2vivo.py [-h] [--format {xml,n3,turtle,nt,pretty-xml,trix}] 27 | [--file FILE] [--endpoint ENDPOINT] [--username USERNAME] 28 | [--password PASSWORD] [--person-id PERSON_ID] 29 | [--person-uri PERSON_URI] [--namespace NAMESPACE] 30 | [--person-class {FacultyMember,FacultyMemberEmeritus,Librarian,LibrarianEmeritus,NonAcademic,NonFacultyAcademic,ProfessorEmeritus,Student}] 31 | [--skip-person] 32 | orcid_id 33 | 34 | positional arguments: 35 | orcid_id 36 | 37 | optional arguments: 38 | -h, --help show this help message and exit 39 | --format {xml,n3,turtle,nt,pretty-xml,trix} 40 | The RDF format for serializing. Default is turtle. 41 | --file FILE Filepath to which to serialize. 42 | --endpoint ENDPOINT Endpoint for SPARQL Update of VIVO instance,e.g., 43 | http://localhost/vivo/api/sparqlUpdate. Also provide 44 | --username and --password. 45 | --username USERNAME Username for VIVO root. 46 | --password PASSWORD Password for VIVO root. 47 | --person-id PERSON_ID 48 | Id for the person to use when constructing the 49 | person's URI. If not provided, the orcid id will be 50 | used. 51 | --person-uri PERSON_URI 52 | A URI for the person. If not provided, one will be 53 | created from the orcid id or person id. 54 | --namespace NAMESPACE 55 | VIVO namespace. Default is 56 | http://vivo.mydomain.edu/individual/. 57 | --person-class {FacultyMember,FacultyMemberEmeritus,Librarian,LibrarianEmeritus,NonAcademic,NonFacultyAcademic,ProfessorEmeritus,Student} 58 | Class (in VIVO Core ontology) for a person. Default is 59 | a FOAF Person. 60 | --skip-person Skip adding triples declaring the person and the 61 | person's name. 62 | 63 | ``` 64 | 65 | For example: 66 | ``` 67 | (ENV)GLSS-F0G5RP:orcid2vivo justinlittman$ python orcid2vivo.py 0000-0003-1527-0030 68 | ``` 69 | 70 | ## Web application 71 | * Supports outputting to: 72 | * web page 73 | * download 74 | * load to VIVO instance (via SPARQL Update) 75 | * Also supports outputting of ORCID profile to web page. 76 | * Can be invoked from web form and http client. 77 | * Supports multiple RDF serializations. 78 | * Allows specifying: 79 | * VIVO namespace 80 | * An id or URI for the person. 81 | * Class for the person. 82 | * Allows providing various default values when starting the application. 83 | 84 | 85 | ``` 86 | (ENV)GLSS-F0G5RP:orcid2vivo justinlittman$ python orcid2vivo_service.py -h 87 | usage: orcid2vivo_service.py [-h] 88 | [--format {xml,n3,turtle,nt,pretty-xml,trix}] 89 | [--endpoint ENDPOINT] [--username USERNAME] 90 | [--password PASSWORD] [--namespace NAMESPACE] 91 | [--person-class {FacultyMember,FacultyMemberEmeritus,Librarian,LibrarianEmeritus,NonAcademic,NonFacultyAcademic,ProfessorEmeritus,Student}] 92 | [--skip-person] [--debug] [--port PORT] 93 | 94 | optional arguments: 95 | -h, --help show this help message and exit 96 | --format {xml,n3,turtle,nt,pretty-xml,trix} 97 | The RDF format for serializing. Default is turtle. 98 | --endpoint ENDPOINT Endpoint for SPARQL Update of VIVO instance,e.g., 99 | http://localhost/vivo/api/sparqlUpdate. 100 | --username USERNAME Username for VIVO root. 101 | --password PASSWORD Password for VIVO root. 102 | --namespace NAMESPACE 103 | VIVO namespace. Default is 104 | http://vivo.mydomain.edu/individual/. 105 | --person-class {FacultyMember,FacultyMemberEmeritus,Librarian,LibrarianEmeritus,NonAcademic,NonFacultyAcademic,ProfessorEmeritus,Student} 106 | Class (in VIVO Core ontology) for a person. Default is 107 | a FOAF Person. 108 | --skip-person Skip adding triples declaring the person and the 109 | person's name. 110 | --debug 111 | --port PORT The port the service should run on. Default is 5000. 112 | 113 | ``` 114 | 115 | For example, to start: 116 | ``` 117 | (ENV)GLSS-F0G5RP:orcid2vivo justinlittman$ python orcid2vivo_service.py 118 | ``` 119 | 120 | The web form will now be available at http://localhost:5000/. 121 | 122 | ### Invoke using curl 123 | 124 | ``` 125 | GLSS-F0G5RP:orcid2vivo justinlittman$ curl --data "orcid_id=0000-0003-1527-0030&format=turtle" http://localhost:5000/ 126 | ``` 127 | 128 | ### Docker 129 | 130 | The web application can be deployed to a [Docker](https://www.docker.com/) container. 131 | 132 | ``` 133 | GLSS-F0G5RP:orcid2vivo justinlittman$ docker build -t orcid2vivo . 134 | GLSS-F0G5RP:orcid2vivo justinlittman$ docker run -e "O2V_ENDPOINT=http://vivo:8080/vivo/api/sparqlUpdate" -e "O2V_USERNAME=vivo_root@mydomain.edu" -e "O2V_PASSWORD=password" -e "O2V_NAMESPACE=http://vivo.mydomain.edu/" -p "5000:5000" -d orcid2vivo 135 | ``` 136 | 137 | The web form will now be available at http://localhost:5000/. (Note: If using boot2docker, use result of `boot2docker ip` instead of localhost.) 138 | 139 | ## Bulk loading 140 | * Supports loading to VIVO instance (via SPARQL Update) for multiple people. 141 | * Provides database to record a list of: 142 | * Orcid id for the person 143 | * Last load 144 | * Active flag 145 | * Id for the person 146 | * URI for the person 147 | * Class for the person 148 | * Also allows specifying: 149 | * VIVO namespace 150 | * Whether to skip creating records for a person. 151 | * Invoked with command line interface. 152 | * Maintains store of complete RDF for a person. 153 | * All loads are incremental, as determined by comparing the stored RDF for a person against the generated RDF. 154 | 155 | The general workflow would be: 156 | 157 | 1. Add records to the database. 158 | 2. Periodically perform a load, possibly limiting the load by a last load cutoff or a number limit. 159 | 3. As necessary, update the database by adding or deleting (i.e., de-activating) orcid id records. 160 | 161 | ``` 162 | (ENV)GLSS-F0G5RP:orcid2vivo justinlittman$ python orcid2vivo_loader.py -h 163 | usage: orcid2vivo_loader.py [-h] [--debug] 164 | {add,delete,delete-all,load,list} ... 165 | 166 | positional arguments: 167 | {add,delete,delete-all,load,list} 168 | add Adds or updates orcid id record. If inactive, marks 169 | active. 170 | delete Marks an orcid id record as inactive so that it will 171 | not be loaded. 172 | delete-all Marks all orcid id records as inactive. 173 | load Fetches orcid profiles, crosswalks to VIVO-ISF, loads 174 | to VIVO instance, and updates orcid id record. If 175 | loading multiple orcid ids, loads in least recent 176 | order. 177 | list Lists orcid_id records in the db. 178 | 179 | optional arguments: 180 | -h, --help show this help message and exit 181 | --debug 182 | ``` 183 | 184 | For example: 185 | 186 | ``` 187 | (ENV)GLSS-F0G5RP:orcid2vivo justinlittman$ python orcid2vivo_loader.py add 0000-0003-1527-0030 188 | Adding 0000-0003-1527-0030 189 | Done 190 | (ENV)GLSS-F0G5RP:orcid2vivo justinlittman$ python orcid2vivo_loader.py list 191 | 0000-0003-1527-0030 [active=true; last_update=None; person_uri=None; person_id=None, person_class=None] 192 | Done 193 | (ENV)GLSS-F0G5RP:orcid2vivo justinlittman$ python orcid2vivo_loader.py load http://192.168.59.103:8080/vivo/api/sparqlUpdate vivo_root@gwu.edu http://vivo.gwu.edu --password password 194 | Loading to http://192.168.59.103:8080/vivo/api/sparqlUpdate 195 | Loaded: 0000-0003-1527-0030 196 | Done 197 | ``` 198 | 199 | ##Tests 200 | 201 | ``` 202 | GLSS-F0G5RP:orcid2vivo justinlittman$ python -m unittest discover 203 | ``` 204 | 205 | ##Strategies for generating URIs and/or creating entities 206 | Approaches to generating URIs and creating entities (e.g., journals or co-authors) are abstracted into strategies. Default strategies are provided, but they can be replaced with other strategies is necessary to meet local requirements. 207 | 208 | The strategy for generating URIs is provided by a class that has the following method: 209 | 210 | ``` 211 | def to_uri(self, clazz, attrs, general_clazz=None): 212 | """ 213 | Given an RDF class and a set of attributes for an entity, produce a URI. 214 | :param clazz: the class of the entity. 215 | :param attrs: a map of identifying attributes for an entity. 216 | :param general_clazz: a superclass of the entity that can be used to group like entities. 217 | :return: URI for the entity. 218 | """ 219 | ``` 220 | 221 | The strategy for creating entities is provided by a class that has the following method: 222 | 223 | ``` 224 | def should_create(self, clazz, uri): 225 | """ 226 | Determine whether an entity should be created. 227 | :param clazz: Class of the entity. 228 | :param uri: URI of the entity. 229 | :return: True if the entity should be created. 230 | """ 231 | ``` 232 | 233 | It may be desirable to skip creating entities if those entities already exist in the triple store. For example, this shows the triples when the journal is created: 234 | 235 | ``` 236 | d:academicarticle-df4d61373e64c72681d74829ea92071a vivo:hasPublicationVenue d:journal-65a2d6d4d80fdbbd78268bf4e814ee01 ; 237 | 238 | d:journal-65a2d6d4d80fdbbd78268bf4e814ee01 a bibo:Journal ; 239 | rdfs:label "D-Lib Magazine" ; 240 | bibo:issn "1082-9873" . 241 | ``` 242 | 243 | and this shows the triples when it is not created: 244 | 245 | ``` 246 | d:academicarticle-df4d61373e64c72681d74829ea92071a vivo:hasPublicationVenue d:journal-65a2d6d4d80fdbbd78268bf4e814ee01 ; 247 | 248 | ``` 249 | 250 | Depending on the strategies to be implemented, it may be a useful approach to combine both strategies into a single class. 251 | 252 | ##Caveats: 253 | * All data is not cross walked to VIVO-ISF. 254 | * Password for SPARQL Update is not handled securely. 255 | 256 | ##Other: 257 | * Feedback / tickets / pull requests welcome. 258 | * Consider using with [vivo-docker](https://github.com/gwu-libraries/vivo-docker) to put together an environment for experimenting with crosswalking ORCID to VIVO. 259 | -------------------------------------------------------------------------------- /orcid2vivo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import requests 4 | import argparse 5 | import codecs 6 | from rdflib import Graph, URIRef, RDF, OWL 7 | from rdflib.namespace import Namespace 8 | from orcid2vivo_app.vivo_uri import HashIdentifierStrategy 9 | from orcid2vivo_app.vivo_namespace import VIVO, FOAF, VCARD 10 | from orcid2vivo_app.affiliations import AffiliationsCrosswalk 11 | from orcid2vivo_app.bio import BioCrosswalk 12 | from orcid2vivo_app.fundings import FundingCrosswalk 13 | from orcid2vivo_app.works import WorksCrosswalk 14 | from orcid2vivo_app.utility import sparql_insert, clean_orcid 15 | import orcid2vivo_app.vivo_namespace as ns 16 | 17 | 18 | class SimpleCreateEntitiesStrategy(): 19 | """ 20 | A minimally configurable strategy for determining if ancillary entities 21 | should be created. 22 | 23 | Except for a few configurable options, entities are always created. 24 | 25 | Also, wraps a provided identifier strategy (need to support skip person). 26 | 27 | Other implementations must implement should_create(). 28 | """ 29 | def __init__(self, identifier_strategy, skip_person=False, person_uri=None): 30 | self.skip_person = skip_person 31 | self.person_uri = person_uri 32 | self._identifier_strategy = identifier_strategy 33 | self.person_name_vcard_uri = None 34 | 35 | def should_create(self, clazz, uri): 36 | """ 37 | Determine whether an entity should be created. 38 | :param clazz: Class of the entity. 39 | :param uri: URI of the entity. 40 | :return: True if the entity should be created. 41 | """ 42 | if self.skip_person and uri in (self.person_uri, self.person_name_vcard_uri): 43 | return False 44 | return True 45 | 46 | def to_uri(self, clazz, attrs, general_clazz=None): 47 | uri = self._identifier_strategy.to_uri(clazz, attrs, general_clazz=None) 48 | # Need to remember vcard uri for this person so that can skip. 49 | if clazz == VCARD.Name and attrs.get("person_uri") == self.person_uri: 50 | self.person_name_vcard_uri = uri 51 | return uri 52 | 53 | 54 | class PersonCrosswalk(): 55 | def __init__(self, identifier_strategy, create_strategy): 56 | self.identifier_strategy = identifier_strategy 57 | self.create_strategy = create_strategy 58 | self.bio_crosswalker = BioCrosswalk(identifier_strategy, create_strategy) 59 | self.affiliations_crosswalker = AffiliationsCrosswalk(identifier_strategy, create_strategy) 60 | self.funding_crosswalker = FundingCrosswalk(identifier_strategy, create_strategy) 61 | self.works_crosswalker = WorksCrosswalk(identifier_strategy, create_strategy) 62 | 63 | def crosswalk(self, orcid_id, person_uri, person_class=None, confirmed_orcid_id=False): 64 | 65 | # Create an RDFLib Graph 66 | graph = Graph(namespace_manager=ns.ns_manager) 67 | 68 | # 0000-0003-3441-946X 69 | clean_orcid_id = clean_orcid(orcid_id) 70 | orcid_profile = fetch_orcid_profile(clean_orcid_id) 71 | 72 | # Determine the class to use for the person 73 | person_clazz = FOAF.Person 74 | if person_class: 75 | person_clazz = getattr(VIVO, person_class) 76 | 77 | # ORCID 78 | PersonCrosswalk._add_orcid_id(person_uri, clean_orcid_id, graph, confirmed_orcid_id) 79 | 80 | self.bio_crosswalker.crosswalk(orcid_profile, person_uri, graph, person_class=person_clazz) 81 | self.works_crosswalker.crosswalk(orcid_profile, person_uri, graph) 82 | self.affiliations_crosswalker.crosswalk(orcid_profile, person_uri, graph) 83 | self.funding_crosswalker.crosswalk(orcid_profile, person_uri, graph) 84 | 85 | return graph, orcid_profile, person_uri 86 | 87 | @staticmethod 88 | def _add_orcid_id(person_uri, orcid_id, graph, confirmed): 89 | orcid_id_uriref = URIRef("http://orcid.org/%s" % orcid_id) 90 | graph.add((person_uri, VIVO.orcidId, orcid_id_uriref)) 91 | graph.add((orcid_id_uriref, RDF.type, OWL.Thing)) 92 | if confirmed: 93 | graph.add((orcid_id_uriref, VIVO.confirmedOrcidId, person_uri)) 94 | 95 | 96 | def fetch_orcid_profile(orcid_id): 97 | orcid = clean_orcid(orcid_id) 98 | r = requests.get('https://pub.orcid.org/v2.0/%s' % orcid, 99 | headers={"Accept": "application/json"}) 100 | if r: 101 | return r.json() 102 | else: 103 | raise Exception("Request to fetch ORCID profile for %s returned %s" % (orcid, r.status_code)) 104 | 105 | 106 | def set_namespace(namespace=None): 107 | # Set default VIVO namespace 108 | if namespace: 109 | ns.D = Namespace(namespace) 110 | ns.ns_manager.bind('d', ns.D, replace=True) 111 | 112 | 113 | def default_execute(orcid_id, namespace=None, person_uri=None, person_id=None, skip_person=False, person_class=None, 114 | confirmed_orcid_id=False): 115 | # Set namespace 116 | set_namespace(namespace) 117 | 118 | this_identifier_strategy = HashIdentifierStrategy() 119 | this_person_uri = URIRef(person_uri) if person_uri \ 120 | else this_identifier_strategy.to_uri(FOAF.Person, {"id": person_id or orcid_id}) 121 | 122 | # this_create_strategy will implement both create strategy and identifier strategy 123 | this_create_strategy = SimpleCreateEntitiesStrategy(this_identifier_strategy, skip_person=skip_person, 124 | person_uri=this_person_uri) 125 | 126 | crosswalker = PersonCrosswalk(create_strategy=this_create_strategy, identifier_strategy=this_create_strategy) 127 | return crosswalker.crosswalk(orcid_id, this_person_uri, person_class=person_class, 128 | confirmed_orcid_id=confirmed_orcid_id) 129 | 130 | 131 | if __name__ == '__main__': 132 | parser = argparse.ArgumentParser() 133 | parser.add_argument("orcid_id") 134 | parser.add_argument("--format", default="turtle", choices=["xml", "n3", "turtle", "nt", "pretty-xml", "trix"], 135 | help="The RDF format for serializing. Default is turtle.") 136 | parser.add_argument("--file", help="Filepath to which to serialize.") 137 | parser.add_argument("--endpoint", dest="endpoint", 138 | help="Endpoint for SPARQL Update of VIVO instance,e.g., http://localhost/vivo/api/sparqlUpdate." 139 | " Also provide --username and --password.") 140 | parser.add_argument("--username", dest="username", help="Username for VIVO root.") 141 | parser.add_argument("--password", dest="password", 142 | help="Password for VIVO root.") 143 | parser.add_argument("--person-id", dest="person_id", help="Id for the person to use when constructing the person's " 144 | "URI. If not provided, the orcid id will be used.") 145 | parser.add_argument("--person-uri", dest="person_uri", help="A URI for the person. If not provided, one will be " 146 | "created from the orcid id or person id.") 147 | parser.add_argument("--namespace", default="http://vivo.mydomain.edu/individual/", 148 | help="VIVO namespace. Default is http://vivo.mydomain.edu/individual/.") 149 | parser.add_argument("--person-class", dest="person_class", 150 | choices=["FacultyMember", "FacultyMemberEmeritus", "Librarian", "LibrarianEmeritus", 151 | "NonAcademic", "NonFacultyAcademic", "ProfessorEmeritus", "Student"], 152 | help="Class (in VIVO Core ontology) for a person. Default is a FOAF Person.") 153 | parser.add_argument("--skip-person", dest="skip_person", action="store_true", 154 | help="Skip adding triples declaring the person and the person's name.") 155 | parser.add_argument("--confirmed", action="store_true", help="Mark the orcid id as confirmed.") 156 | 157 | # Parse 158 | args = parser.parse_args() 159 | 160 | # Excute with default strategies 161 | (g, p, per_uri) = default_execute(args.orcid_id, namespace=args.namespace, person_uri=args.person_uri, 162 | person_id=args.person_id, skip_person=args.skip_person, 163 | person_class=args.person_class, confirmed_orcid_id=args.confirmed) 164 | 165 | # Write to file 166 | if args.file: 167 | with codecs.open(args.file, "w") as out: 168 | g.serialize(format=args.format, destination=out) 169 | 170 | # Post to SPARQL Update 171 | if args.endpoint: 172 | if not args.username or not args.password: 173 | raise Exception("If an endpoint is specified, --username and --password must be provided.") 174 | sparql_insert(g, args.endpoint, args.username, args.password) 175 | 176 | # If not writing to file to posting to SPARQL Update then serialize to stdout 177 | if not args.file and not args.endpoint: 178 | print g.serialize(format=args.format) 179 | -------------------------------------------------------------------------------- /orcid2vivo_app/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'justinlittman' 2 | -------------------------------------------------------------------------------- /orcid2vivo_app/affiliations.py: -------------------------------------------------------------------------------- 1 | from vivo_namespace import VIVO, OBO 2 | from rdflib import RDFS, RDF, Literal 3 | from vivo_namespace import FOAF 4 | from vivo_uri import to_hash_identifier 5 | from utility import add_date, add_date_interval 6 | import orcid2vivo_app.vivo_namespace as ns 7 | 8 | 9 | class AffiliationsCrosswalk: 10 | def __init__(self, identifier_strategy, create_strategy): 11 | self.identifier_strategy = identifier_strategy 12 | self.create_strategy = create_strategy 13 | 14 | def crosswalk(self, orcid_profile, person_uri, graph): 15 | # Education 16 | if "educations" in orcid_profile["activities-summary"]: 17 | for education in orcid_profile["activities-summary"]["educations"]["education-summary"]: 18 | # Gather some values 19 | degree_name = education.get("role-title") 20 | organization_name = education["organization"]["name"] 21 | start_date_year = (education["start-date"] or {}).get("year", {}).get("value") 22 | end_date_year = (education["end-date"] or {}).get("year", {}).get("value") 23 | 24 | # Organization 25 | organization_uri = self.identifier_strategy.to_uri(FOAF.Organization, {"name": organization_name}) 26 | if self.create_strategy.should_create(FOAF.Organization, organization_uri): 27 | graph.add((organization_uri, RDF.type, FOAF.Organization)) 28 | graph.add((organization_uri, RDFS.label, Literal(organization_name))) 29 | if "address" in education["organization"]: 30 | city = education["organization"]["address"]["city"] 31 | state = education["organization"]["address"]["region"] 32 | address_uri = ns.D[to_hash_identifier("geo", (city, state))] 33 | graph.add((address_uri, RDF.type, VIVO.GeographicLocation)) 34 | graph.add((organization_uri, OBO.RO_0001025, address_uri)) 35 | graph.add((address_uri, RDFS.label, Literal("%s, %s" % (city, state)))) 36 | 37 | # Output of educational process 38 | educational_process_uri = self.identifier_strategy.to_uri(VIVO.EducationalProcess, 39 | {"organization_name": organization_name, 40 | "degree_name": degree_name, 41 | "start_year": start_date_year, 42 | "end_year": end_date_year}) 43 | graph.add((educational_process_uri, RDF.type, VIVO.EducationalProcess)) 44 | # Has participants 45 | graph.add((educational_process_uri, OBO.RO_0000057, organization_uri)) 46 | graph.add((educational_process_uri, OBO.RO_0000057, person_uri)) 47 | # Department 48 | if education.get("department-name"): 49 | graph.add((educational_process_uri, VIVO.departmentOrSchool, 50 | Literal(education["department-name"]))) 51 | 52 | # Interval 53 | add_date_interval(educational_process_uri, graph, self.identifier_strategy, 54 | add_date(start_date_year, graph, self.identifier_strategy), 55 | add_date(end_date_year, graph, self.identifier_strategy)) 56 | 57 | if "role-title" in education: 58 | degree_name = education["role-title"] 59 | 60 | # Awarded degree 61 | awarded_degree_uri = self.identifier_strategy.to_uri(VIVO.AwardedDegree, 62 | {"educational_process_uri": 63 | educational_process_uri}) 64 | graph.add((awarded_degree_uri, RDF.type, VIVO.AwardedDegree)) 65 | graph.add((awarded_degree_uri, RDFS.label, Literal(degree_name))) 66 | 67 | # Assigned by organization 68 | graph.add((awarded_degree_uri, VIVO.assignedBy, organization_uri)) 69 | 70 | # Related to educational process 71 | graph.add((awarded_degree_uri, OBO.RO_0002353, educational_process_uri)) 72 | 73 | # Relates to degree 74 | degree_uri = self.identifier_strategy.to_uri(VIVO.AcademicDegree, {"name": degree_name}) 75 | graph.add((awarded_degree_uri, VIVO.relates, degree_uri)) 76 | if self.create_strategy.should_create(VIVO.AcademicDegree, degree_uri): 77 | graph.add((degree_uri, RDF.type, VIVO.AcademicDegree)) 78 | graph.add((degree_uri, RDFS.label, Literal(degree_name))) 79 | 80 | # Relates to person 81 | graph.add((awarded_degree_uri, VIVO.relates, person_uri)) 82 | -------------------------------------------------------------------------------- /orcid2vivo_app/bio.py: -------------------------------------------------------------------------------- 1 | from vivo_namespace import VIVO 2 | from rdflib import RDFS, RDF, Literal, XSD 3 | from utility import join_if_not_empty 4 | from vivo_namespace import VCARD, OBO, FOAF 5 | 6 | 7 | class BioCrosswalk: 8 | def __init__(self, identifier_strategy, create_strategy): 9 | self.identifier_strategy = identifier_strategy 10 | self.create_strategy = create_strategy 11 | 12 | def crosswalk(self, orcid_profile, person_uri, graph, person_class=FOAF.Person): 13 | 14 | # Get names (for person and name vcard) 15 | given_names = None 16 | family_name = None 17 | if "name" in orcid_profile["person"]: 18 | person_details = orcid_profile["person"]["name"] 19 | given_names = person_details.get("given-names", {}).get("value") 20 | family_name = person_details.get("family-name", {}).get("value") 21 | full_name = join_if_not_empty((given_names, family_name)) 22 | 23 | # Following is non-vcard bio information 24 | 25 | # If skip_person, then don't create person and add names 26 | if full_name and self.create_strategy.should_create(person_class, person_uri): 27 | # Add person 28 | graph.add((person_uri, RDF.type, person_class)) 29 | graph.add((person_uri, RDFS.label, Literal(full_name))) 30 | 31 | # Biography 32 | if "biography" in orcid_profile["person"]: 33 | biography = orcid_profile["person"]["biography"]["content"] 34 | if biography: 35 | graph.add((person_uri, VIVO.overview, Literal(biography))) 36 | 37 | # Other identifiers 38 | # Default VIVO-ISF only supports a limited number of identifier types. 39 | if "external-identifiers" in orcid_profile["person"]: 40 | external_identifiers = orcid_profile["person"]["external-identifiers"]["external-identifier"] 41 | for external_identifier in external_identifiers: 42 | # Scopus ID 43 | if external_identifier["external-id-type"] == "Scopus Author ID": 44 | graph.add((person_uri, VIVO.scopusId, Literal(external_identifier["external-id-value"]))) 45 | 46 | # ISI Research ID 47 | if external_identifier["external-id-type"] == "ResearcherID": 48 | graph.add((person_uri, VIVO.researcherId, Literal(external_identifier["external-id-value"]))) 49 | 50 | # Keywords 51 | if "keywords" in orcid_profile["person"]: 52 | keywords = orcid_profile["person"]["keywords"]["keyword"] 53 | for keyword in keywords: 54 | keywords_content = keyword["content"] 55 | if keywords_content: 56 | for keyword_content in keywords_content.split(", "): 57 | graph.add((person_uri, VIVO.freetextKeyword, Literal(keyword_content))) 58 | 59 | # Following is vcard bio information 60 | 61 | # Add main vcard 62 | vcard_uri = self.identifier_strategy.to_uri(VCARD.Individual, {"person_uri": person_uri}) 63 | # Will only add vcard if there is a child vcard 64 | add_main_vcard = False 65 | 66 | # Name vcard 67 | vcard_name_uri = self.identifier_strategy.to_uri(VCARD.Name, {"person_uri": person_uri}) 68 | if (given_names or family_name) and self.create_strategy.should_create(VCARD.Name, vcard_name_uri): 69 | graph.add((vcard_name_uri, RDF.type, VCARD.Name)) 70 | graph.add((vcard_uri, VCARD.hasName, vcard_name_uri)) 71 | if given_names: 72 | graph.add((vcard_name_uri, VCARD.givenName, Literal(given_names))) 73 | if family_name: 74 | graph.add((vcard_name_uri, VCARD.familyName, Literal(family_name))) 75 | add_main_vcard = True 76 | 77 | # Websites 78 | if "researcher-urls" in orcid_profile["person"]: 79 | researcher_urls = orcid_profile["person"]["researcher-urls"]["researcher-url"] 80 | for researcher_url in researcher_urls: 81 | url = researcher_url["url"]["value"] 82 | url_name = researcher_url["url-name"] 83 | vcard_website_uri = self.identifier_strategy.to_uri(VCARD.URL, {"url": url}) 84 | graph.add((vcard_website_uri, RDF.type, VCARD.URL)) 85 | graph.add((vcard_uri, VCARD.hasURL, vcard_website_uri)) 86 | graph.add((vcard_website_uri, VCARD.url, Literal(url, datatype=XSD.anyURI))) 87 | if url_name: 88 | graph.add((vcard_website_uri, RDFS.label, Literal(url_name))) 89 | add_main_vcard = True 90 | 91 | if add_main_vcard and self.create_strategy.should_create(VCARD.Individual, vcard_uri): 92 | graph.add((vcard_uri, RDF.type, VCARD.Individual)) 93 | # Contact info for 94 | graph.add((vcard_uri, OBO.ARG_2000029, person_uri)) 95 | -------------------------------------------------------------------------------- /orcid2vivo_app/fundings.py: -------------------------------------------------------------------------------- 1 | from vivo_namespace import VIVO, OBO, FOAF, VCARD 2 | from rdflib import RDF, RDFS, XSD, Literal 3 | from utility import add_date, add_date_interval 4 | 5 | 6 | class FundingCrosswalk: 7 | def __init__(self, identifier_strategy, create_strategy): 8 | self.identifier_strategy = identifier_strategy 9 | self.create_strategy = create_strategy 10 | 11 | def crosswalk(self, orcid_profile, person_uri, graph): 12 | if "fundings" in orcid_profile["activities-summary"]: 13 | # Funding 14 | for funding_group in orcid_profile["activities-summary"]["fundings"]["group"]: 15 | for funding in funding_group["funding-summary"]: 16 | if funding["type"] == "GRANT": 17 | 18 | title = funding["title"]["title"]["value"] 19 | grant_uri = self.identifier_strategy.to_uri(VIVO.Grant, {"title": title}) 20 | # Type 21 | graph.add((grant_uri, RDF.type, VIVO.Grant)) 22 | 23 | # Person 24 | graph.add((grant_uri, VIVO.relates, person_uri)) 25 | 26 | # Title 27 | graph.add((grant_uri, RDFS.label, Literal(title))) 28 | 29 | # Role 30 | role_uri = self.identifier_strategy.to_uri(VIVO.PrincipalInvestigatorRole, 31 | {"grant_uri": grant_uri}) 32 | graph.add((role_uri, RDF.type, VIVO.PrincipalInvestigatorRole)) 33 | # Inheres in 34 | graph.add((role_uri, OBO.RO_0000052, person_uri)) 35 | graph.add((role_uri, VIVO.relatedBy, grant_uri)) 36 | 37 | # Date interval 38 | (start_year, start_month, start_day) = FundingCrosswalk._get_date_parts("start-date", funding) 39 | (end_year, end_month, end_day) = FundingCrosswalk._get_date_parts("end-date", funding) 40 | 41 | add_date_interval(grant_uri, graph, self.identifier_strategy, 42 | add_date(start_year, graph, self.identifier_strategy, start_month, start_day), 43 | add_date(end_year, graph, self.identifier_strategy, end_month, end_day)) 44 | 45 | # Award amount 46 | funding_amount = funding.get("amount") 47 | if funding_amount is not None: 48 | value = funding_amount.get("value") 49 | if value is not None: 50 | award_amount = "${:,}".format(int(value)) 51 | graph.add((grant_uri, VIVO.totalAwardAmount, Literal(award_amount))) 52 | 53 | # Awarded by 54 | if "organization" in funding: 55 | organization_name = funding["organization"]["name"] 56 | organization_uri = self.identifier_strategy.to_uri(FOAF.Organization, 57 | {"name": organization_name}) 58 | graph.add((grant_uri, VIVO.assignedBy, organization_uri)) 59 | if self.create_strategy.should_create(FOAF.Organization, organization_uri): 60 | graph.add((organization_uri, RDF.type, FOAF.Organization)) 61 | graph.add((organization_uri, RDFS.label, Literal(organization_name))) 62 | 63 | # Identifiers 64 | if "external-ids" in funding and funding.get("external-ids"): 65 | for external_identifier in funding["external-ids"]["external-id"]: 66 | if "funding-external-identifier-value" in external_identifier: 67 | graph.add((grant_uri, VIVO.sponsorAwardId, 68 | Literal(external_identifier["external-id-value"]))) 69 | identifier_url = (external_identifier.get("external-id-url", {}) or {}).get("value") 70 | if identifier_url: 71 | vcard_uri = self.identifier_strategy.to_uri(VCARD.Kind, {"url": identifier_url}) 72 | graph.add((vcard_uri, RDF.type, VCARD.Kind)) 73 | # Has contact info 74 | graph.add((grant_uri, OBO.ARG_2000028, vcard_uri)) 75 | # Url vcard 76 | vcard_url_uri = self.identifier_strategy.to_uri(VCARD.URL, {"vcard_uri": vcard_uri}) 77 | graph.add((vcard_url_uri, RDF.type, VCARD.URL)) 78 | graph.add((vcard_uri, VCARD.hasURL, vcard_url_uri)) 79 | graph.add((vcard_url_uri, VCARD.url, Literal(identifier_url, datatype=XSD.anyURI))) 80 | 81 | @staticmethod 82 | def _get_date_parts(field_name, funding): 83 | date = funding.get(field_name, {}) or {} 84 | return (date.get("year", {}) or {}).get("value"), \ 85 | (date.get("month", {}) or {}).get("value"), \ 86 | (date.get("day", {}) or {}).get("value") 87 | -------------------------------------------------------------------------------- /orcid2vivo_app/utility.py: -------------------------------------------------------------------------------- 1 | from rdflib import RDF, RDFS, XSD, Literal 2 | from vivo_namespace import VIVO 3 | from numbers import Number 4 | from SPARQLWrapper import SPARQLWrapper 5 | import re 6 | 7 | 8 | def num_to_str(num): 9 | """ 10 | Converts a number to a string and removes leading 0s. 11 | 12 | If the number is already a string, then just returns. 13 | """ 14 | if isinstance(num, Number): 15 | return str(int(num)) 16 | return num.lstrip("0") 17 | 18 | 19 | def join_if_not_empty(items, sep=" "): 20 | """ 21 | Joins a list of items with a provided separator. 22 | 23 | Skips an empty item. 24 | """ 25 | joined = "" 26 | for item in items: 27 | if item and len(item) > 0: 28 | if joined != "": 29 | joined += sep 30 | joined += item 31 | return joined 32 | 33 | 34 | months = ("January", 35 | "February", 36 | "March", 37 | "April", 38 | "May", 39 | "June", 40 | "July", 41 | "August", 42 | "September", 43 | "October", 44 | "November", 45 | "December") 46 | 47 | 48 | def month_str_to_month_int(month_str): 49 | """ 50 | Converts a month name to the corresponding month number. 51 | 52 | If already a number, returns the number. 53 | 54 | Also, tries to convert the string to a number. 55 | """ 56 | if isinstance(month_str, Number): 57 | return month_str 58 | 59 | try: 60 | return int(month_str) 61 | except ValueError: 62 | pass 63 | 64 | return months.index(month_str)+1 65 | 66 | 67 | def month_int_to_month_str(month_int): 68 | if isinstance(month_int, basestring): 69 | try: 70 | month_int = int(month_int) 71 | except ValueError: 72 | return month_int 73 | 74 | return months[month_int-1] 75 | 76 | 77 | def add_date(year, g, identifier_strategy, month=None, day=None, label=None): 78 | """ 79 | Adds triples for a date. 80 | 81 | Return True if date was added. 82 | """ 83 | #Date 84 | date_uri = identifier_strategy.to_uri(VIVO.DateTimeValue, {"year": year, "month": month, "day": day}) 85 | if year: 86 | g.add((date_uri, RDF.type, VIVO.DateTimeValue)) 87 | #Day, month, and year 88 | if day and month: 89 | g.add((date_uri, VIVO.dateTimePrecision, VIVO.yearMonthDayPrecision)) 90 | g.add((date_uri, VIVO.dateTime, 91 | Literal("%s-%02d-%02dT00:00:00" % ( 92 | int(year), month_str_to_month_int(month), int(day)), 93 | datatype=XSD.dateTime))) 94 | g.add((date_uri, 95 | RDFS.label, 96 | Literal(label or "%s %s, %s" % (month_int_to_month_str(month), num_to_str(day), num_to_str(year))))) 97 | #Month and year 98 | elif month: 99 | g.add((date_uri, VIVO.dateTimePrecision, VIVO.yearMonthPrecision)) 100 | g.add((date_uri, VIVO.dateTime, 101 | Literal("%s-%02d-01T00:00:00" % ( 102 | year, month_str_to_month_int(month)), 103 | datatype=XSD.dateTime))) 104 | g.add((date_uri, 105 | RDFS.label, 106 | Literal(label or "%s %s" % (month, num_to_str(year))))) 107 | else: 108 | #Just year 109 | g.add((date_uri, VIVO.dateTimePrecision, VIVO.yearPrecision)) 110 | g.add((date_uri, VIVO.dateTime, 111 | Literal("%s-01-01T00:00:00" % ( 112 | year), 113 | datatype=XSD.dateTime))) 114 | g.add((date_uri, RDFS.label, Literal(label or num_to_str(year)))) 115 | return date_uri 116 | return None 117 | 118 | 119 | def add_date_interval(subject_uri, g, identifier_strategy, start_uri=None, end_uri=None): 120 | """ 121 | Adds triples for a date interval. 122 | """ 123 | if start_uri or end_uri: 124 | interval_uri = identifier_strategy.to_uri(VIVO.DateTimeInterval, {"subject_uri": subject_uri, 125 | "start_uri": start_uri, "end_uri": end_uri}) 126 | g.add((interval_uri, RDF.type, VIVO.DateTimeInterval)) 127 | g.add((subject_uri, VIVO.dateTimeInterval, interval_uri)) 128 | if start_uri: 129 | g.add((interval_uri, VIVO.start, start_uri)) 130 | if end_uri: 131 | g.add((interval_uri, VIVO.end, end_uri)) 132 | return interval_uri 133 | return None 134 | 135 | 136 | def sparql_insert(graph, endpoint, username, password): 137 | #Need to construct query 138 | ns_lines = [] 139 | triple_lines = [] 140 | for line in graph.serialize(format="turtle").splitlines(): 141 | if line.startswith("@prefix"): 142 | #Change from @prefix to PREFIX 143 | ns_lines.append("PREFIX" + line[7:-2]) 144 | else: 145 | triple_lines.append(line) 146 | query = "\n".join(ns_lines) 147 | query += "\nINSERT DATA { GRAPH {\n" 148 | query += "\n".join(triple_lines) 149 | query += "\n}}" 150 | sparql_update(query, endpoint, username, password) 151 | 152 | 153 | def sparql_delete(graph, endpoint, username, password): 154 | #Need to construct query 155 | ns_lines = [] 156 | triple_lines = [] 157 | for line in graph.serialize(format="turtle").splitlines(): 158 | if line.startswith("@prefix"): 159 | #Change from @prefix to PREFIX 160 | ns_lines.append("PREFIX" + line[7:-2]) 161 | else: 162 | triple_lines.append(line) 163 | query = "\n".join(ns_lines) 164 | query += "\nDELETE DATA { GRAPH {\n" 165 | query += "\n".join(triple_lines) 166 | query += "\n}}" 167 | sparql_update(query, endpoint, username, password) 168 | 169 | 170 | def sparql_update(query, endpoint, username, password): 171 | """ 172 | Perform a SPARQL Update query. 173 | 174 | :param query: the query to perform 175 | :param endpoint: the URL for SPARQL Update on the SPARQL server 176 | :param username: username for SPARQL Update 177 | :param password: password for SPARQL Update 178 | """ 179 | sparql = SPARQLWrapper(endpoint) 180 | sparql.addParameter("email", username) 181 | sparql.addParameter("password", password) 182 | sparql.setQuery(query) 183 | sparql.setMethod("POST") 184 | sparql.query() 185 | 186 | 187 | def clean_orcid(value): 188 | """ 189 | Minimal ORCID validation. Allowing for orcid.org/ 190 | """ 191 | if value.find('orcid.org/') > -1: 192 | return value.split('/')[-1] 193 | else: 194 | return value 195 | 196 | 197 | def is_valid_orcid(orcid): 198 | """ 199 | Returns true if has correct syntax for an orcid. 200 | """ 201 | # 0000-0003-1527-0030 202 | if re.match("\d\d\d\d-\d\d\d\d-\d\d\d\d-\d\d\d[0-9X]$", orcid): 203 | return True 204 | return False -------------------------------------------------------------------------------- /orcid2vivo_app/vivo_namespace.py: -------------------------------------------------------------------------------- 1 | from rdflib.namespace import Namespace, NamespaceManager 2 | from rdflib import Graph 3 | 4 | #Our data namespace 5 | D = Namespace('http://vivo.mydomain.edu/individual/') 6 | #The VIVO namespace 7 | VIVO = Namespace('http://vivoweb.org/ontology/core#') 8 | #The VCARD namespace 9 | VCARD = Namespace('http://www.w3.org/2006/vcard/ns#') 10 | #The OBO namespace 11 | OBO = Namespace('http://purl.obolibrary.org/obo/') 12 | #The BIBO namespace 13 | BIBO = Namespace('http://purl.org/ontology/bibo/') 14 | #The FOAF namespace 15 | FOAF = Namespace('http://xmlns.com/foaf/0.1/') 16 | #The SKOS namespace 17 | SKOS = Namespace('http://www.w3.org/2004/02/skos/core#') 18 | 19 | ns_manager = NamespaceManager(Graph()) 20 | ns_manager.bind('d', D) 21 | ns_manager.bind('vivo', VIVO) 22 | ns_manager.bind('vcard', VCARD) 23 | ns_manager.bind('obo', OBO) 24 | ns_manager.bind('bibo', BIBO) 25 | ns_manager.bind("foaf", FOAF) 26 | ns_manager.bind("skos", SKOS) 27 | -------------------------------------------------------------------------------- /orcid2vivo_app/vivo_uri.py: -------------------------------------------------------------------------------- 1 | import vivo_namespace as ns 2 | import hashlib 3 | import re 4 | import collections 5 | 6 | 7 | def to_hash_identifier(prefix, parts): 8 | """ 9 | Return an identifier composed of the prefix and hash of the parts. 10 | """ 11 | hash_parts = hashlib.md5("".join([unicode(part) for part in parts if part]).encode("utf-8")) 12 | return "%s-%s" % (prefix, hash_parts.hexdigest()) 13 | 14 | 15 | class HashIdentifierStrategy(): 16 | """ 17 | A strategy for constructing an identifier by creating a prefix from the 18 | class or general class and a body from a hash of the attributes. 19 | 20 | Other identifier strategies must implement to_uri(). 21 | """ 22 | pattern = re.compile("^.+/(.+?)(#(.+))?$") 23 | 24 | def __init__(self): 25 | pass 26 | 27 | def to_uri(self, clazz, attrs, general_clazz=None): 28 | """ 29 | Given an RDF class and a set of attributes for an entity, produce a URI. 30 | :param clazz: the class of the entity. 31 | :param attrs: a map of identifying attributes for an entity. 32 | :param general_clazz: a superclass of the entity that can be used to group like entities. 33 | :return: URI for the entity. 34 | """ 35 | return ns.D["%s-%s" % (self._class_to_prefix(general_clazz) or self._class_to_prefix(clazz), 36 | self._attrs_to_hash(attrs))] 37 | 38 | @staticmethod 39 | def _class_to_prefix(clazz): 40 | if clazz: 41 | match = HashIdentifierStrategy.pattern.search(clazz) 42 | assert match 43 | return (match.group(3) or match.group(1)).lower() 44 | return None 45 | 46 | @staticmethod 47 | def _attrs_to_hash(attrs): 48 | sorted_attrs = collections.OrderedDict(sorted(attrs.items())) 49 | hash_parts = hashlib.md5("".join([unicode(part) for part in sorted_attrs.values() if part]).encode("utf-8")) 50 | return hash_parts.hexdigest() -------------------------------------------------------------------------------- /orcid2vivo_app/works.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from rdflib import RDFS, RDF, XSD, Literal 3 | from vivo_namespace import VIVO, VCARD, OBO, BIBO, FOAF, SKOS 4 | from utility import join_if_not_empty 5 | import re 6 | import bibtexparser 7 | from bibtexparser.bparser import BibTexParser 8 | from bibtexparser.latexenc import unicode_to_latex, unicode_to_crappy_latex1, unicode_to_crappy_latex2 9 | import itertools 10 | from utility import add_date 11 | 12 | work_type_map = { 13 | "BOOK": BIBO["Book"], 14 | "BOOK_CHAPTER": BIBO["Chapter"], 15 | "BOOK_REVIEW": BIBO["Review"], 16 | "DICTIONARY_ENTRY": BIBO["DocumentPart"], 17 | "DISSERTATION": BIBO["Thesis"], 18 | "ENCYCLOPEDIA_ENTRY": BIBO["DocumentPart"], 19 | "EDITED_BOOK": BIBO["EditedBook"], 20 | "JOURNAL_ARTICLE": BIBO["AcademicArticle"], 21 | "JOURNAL_ISSUE": BIBO["Issue"], 22 | "MAGAZINE_ARTICLE": BIBO["Article"], 23 | "MANUAL": BIBO["Manual"], 24 | "ONLINE_RESOURCE": BIBO["Website"], 25 | "NEWSLETTER_ARTICLE": BIBO["Article"], 26 | "NEWSPAPER_ARTICLE": BIBO["Article"], 27 | "REPORT": BIBO["Report"], 28 | "RESEARCH_TOOL": BIBO["Document"], 29 | "SUPERVISED_STUDENT_PUBLICATION": BIBO["Article"], 30 | # test not mapped 31 | "TRANSLATION": BIBO["Document"], 32 | "WEBSITE": BIBO["Website"], 33 | "WORKING_PAPER": VIVO["WorkingPaper"], 34 | "CONFERENCE_ABSTRACT": VIVO["Abstract"], 35 | "CONFERENCE_PAPER": VIVO["ConferencePaper"], 36 | "CONFERENCE_POSTER": VIVO["ConferencePoster"], 37 | # disclosure not mapped 38 | # license not mapped 39 | "PATENT": BIBO["Patent"], 40 | # registered-copyright not mapped 41 | "ARTISTIC_PERFORMANCE": BIBO["Performance"], 42 | "DATA_SET": VIVO["Dataset"], 43 | # invention not mapped 44 | "LECTURE_SPEECH": VIVO["Speech"], 45 | "RESEARCH_TECHNIQUE": OBO["OBI_0000272"], 46 | # spin-off-company not mapped 47 | "STANDARDS_AND_POLICY": BIBO["Standard"], 48 | "OTHER": BIBO["Document"] 49 | } 50 | 51 | identifier_map = { 52 | "DOI": (BIBO.doi, "http://dx.doi.org/%s"), 53 | "ASIN": (BIBO.asin, "http://www.amazon.com/dp/%s"), 54 | "OCLC": (BIBO.oclcnum, "http://www.worldcat.org/oclc/%s"), 55 | "LCCN": (BIBO.lccn, None), 56 | "PMC": (VIVO.pmcid, "http://www.ncbi.nlm.nih.gov/pmc/articles/%s/"), 57 | "PMID": (BIBO.pmid, "http://www.ncbi.nlm.nih.gov/pubmed/%s"), 58 | "ISSN": (BIBO.issn, None) 59 | } 60 | 61 | journal_map = { 62 | "JOURNAL_ARTICLE": BIBO.Journal, 63 | "MAGAZINE_ARTICLE": BIBO.Magazine, 64 | "NEWSLETTER_ARTICLE": VIVO.Newsletter, 65 | "NEWSPAPER_ARTICLE": BIBO.Newspaper, 66 | "SUPERVISED_STUDENT_PUBLICATION": BIBO.Journal 67 | } 68 | 69 | contributor_map = { 70 | "EDITOR": VIVO.Editorship, 71 | "CHAIR_OR_TRANSLATOR": "TRANSLATOR" 72 | } 73 | 74 | bibtex_type_map = { 75 | "article": BIBO["Article"], 76 | "book": BIBO["Book"], 77 | "conference": VIVO["ConferencePaper"], 78 | "manual": BIBO["Manual"], 79 | "mastersthesis": BIBO["Thesis"], 80 | "phdthesis": BIBO["Thesis"], 81 | "proceedings": VIVO["ConferencePaper"], 82 | "techreport": BIBO["Report"] 83 | } 84 | 85 | 86 | class WorksCrosswalk: 87 | def __init__(self, identifier_strategy, create_strategy): 88 | self.identifier_strategy = identifier_strategy 89 | self.create_strategy = create_strategy 90 | 91 | def crosswalk(self, orcid_profile, person_uri, graph): 92 | # Work metadata may be available from the orcid profile, bibtex contained in the orcid profile, and/or crossref 93 | # record. The preferred order (in general) for getting metadata is crossref, bibtex, orcid. 94 | 95 | # Note that datacite records were considered, but not found to have additional/better metadata. 96 | 97 | person_surname = orcid_profile.get("person", {}).get("name", {}).get("family-name", {}).get("value", "") 98 | 99 | # Publications 100 | if "works" in orcid_profile["activities-summary"]: 101 | for work_group in orcid_profile["activities-summary"]["works"]["group"]: 102 | for work in work_group["work-summary"]: 103 | self.crosswalk_work(self._fetch_work(work["path"]), person_uri, person_surname, graph) 104 | 105 | @staticmethod 106 | def _fetch_work(path): 107 | r = requests.get('https://pub.orcid.org/v2.0%s' % path, 108 | headers={"Accept": "application/json"}) 109 | if r: 110 | return r.json() 111 | else: 112 | raise Exception("Request to fetch %s returned %s" % (path, r.status_code)) 113 | 114 | def crosswalk_work(self, work, person_uri, person_surname, graph): 115 | # Work metadata may be available from the orcid profile, bibtex contained in the orcid profile, and/or crossref 116 | # record. The preferred order (in general) for getting metadata is crossref, bibtex, orcid. 117 | 118 | # Note that datacite records were considered, but not found to have additional/better metadata. 119 | 120 | # Work Type 121 | work_type = work["type"] 122 | if work_type in work_type_map: 123 | # Extract 124 | # Get external identifiers so that can get DOI 125 | external_identifiers = WorksCrosswalk._get_work_identifiers(work) 126 | doi = external_identifiers.get("DOI") 127 | crossref_record = WorksCrosswalk._fetch_crossref_doi(doi) if doi else {} 128 | 129 | # Bibtex 130 | bibtex = WorksCrosswalk._parse_bibtex(work) 131 | # Get title so that can construct work uri 132 | title = WorksCrosswalk._get_crossref_title(crossref_record) or bibtex.get( 133 | "title") or WorksCrosswalk._get_orcid_title(work) 134 | 135 | # Work-type 136 | work_class = work_type_map[work_type] 137 | if work_type == "TRANSLATION" and bibtex and bibtex["ENTRYTYPE"] in bibtex_type_map: 138 | work_class = bibtex_type_map[bibtex["ENTRYTYPE"]] 139 | 140 | # Construct work uri 141 | work_uri = self.identifier_strategy.to_uri(work_class, {"name": title}) 142 | 143 | graph.add((work_uri, RDF.type, work_class)) 144 | 145 | # Title 146 | graph.add((work_uri, RDFS.label, Literal(title))) 147 | 148 | # Publication date 149 | (publication_year, publication_month, publication_day) = \ 150 | WorksCrosswalk._get_crossref_publication_date(crossref_record) \ 151 | or WorksCrosswalk._get_orcid_publication_date(work) \ 152 | or WorksCrosswalk._get_bibtext_publication_date(bibtex) or (None, None, None) 153 | date_uri = add_date(publication_year, graph, self.identifier_strategy, 154 | publication_month, publication_day) 155 | if date_uri: 156 | graph.add((work_uri, VIVO.dateTimeValue, date_uri)) 157 | 158 | # Subjects 159 | subjects = crossref_record["subject"] if crossref_record and "subject" in crossref_record else None 160 | if subjects: 161 | for subject in subjects: 162 | subject_uri = self.identifier_strategy.to_uri(SKOS.Concept, {"name": subject}) 163 | graph.add((work_uri, VIVO.hasSubjectArea, subject_uri)) 164 | if self.create_strategy.should_create(SKOS.Concept, subject_uri): 165 | graph.add((subject_uri, RDF.type, SKOS.Concept)) 166 | graph.add((subject_uri, RDFS.label, Literal(subject))) 167 | 168 | # Contributors (an array of (first_name, surname, VIVO type, e.g., VIVO.Authorship)) 169 | bibtex_contributors = [] 170 | bibtex_contributors.extend(WorksCrosswalk._get_bibtex_authors(bibtex)) 171 | bibtex_contributors.extend(WorksCrosswalk._get_bibtex_editors(bibtex)) 172 | # Orcid is better for translations because has translator role 173 | if work_type == "TRANSLATION": 174 | contributors = WorksCrosswalk._get_orcid_contributors(work) 175 | else: 176 | contributors = WorksCrosswalk._get_crossref_authors(crossref_record) or bibtex_contributors \ 177 | or WorksCrosswalk._get_orcid_contributors(work) 178 | if not contributors: 179 | # Add person as author or editor. 180 | # None, None means this person. 181 | if work_type in ("EDITED_BOOK",): 182 | contributors.append((None, None, VIVO.Editorship)) 183 | elif work_type == "TRANSLATION": 184 | # Translator is a predicate, not a -ship class. 185 | contributors.append((None, None, "TRANSLATOR")) 186 | else: 187 | contributors.append((None, None, VIVO.Authorship)) 188 | 189 | for (first_name, surname, vivo_type) in contributors: 190 | if not surname or person_surname.lower() == surname.lower(): 191 | contributor_uri = person_uri 192 | else: 193 | contributor_uri = self.identifier_strategy.to_uri(FOAF.Person, {"first_name": first_name, 194 | "surname": surname}) 195 | if self.create_strategy.should_create(FOAF.Person, contributor_uri): 196 | graph.add((contributor_uri, RDF.type, FOAF.Person)) 197 | full_name = join_if_not_empty((first_name, surname)) 198 | graph.add((contributor_uri, RDFS.label, Literal(full_name))) 199 | 200 | # Translation is a special case 201 | if vivo_type == "TRANSLATOR": 202 | graph.add((contributor_uri, BIBO.translator, work_uri)) 203 | # So is patent assignee 204 | elif work_type == "PATENT": 205 | graph.add((contributor_uri, VIVO.assigneeFor, work_uri)) 206 | else: 207 | contributorship_uri = self.identifier_strategy.to_uri(vivo_type, 208 | {"contributor_uri": contributor_uri, 209 | "work_uri": work_uri}) 210 | graph.add((contributorship_uri, RDF.type, vivo_type)) 211 | graph.add((contributorship_uri, VIVO.relates, work_uri)) 212 | graph.add((contributorship_uri, VIVO.relates, contributor_uri)) 213 | 214 | # Publisher 215 | publisher = crossref_record.get("publisher") or bibtex.get("publisher") 216 | if publisher: 217 | publisher_uri = self.identifier_strategy.to_uri(FOAF.Organization, {"name": publisher}) 218 | graph.add((work_uri, VIVO.publisher, publisher_uri)) 219 | if self.create_strategy.should_create(FOAF.Organization, publisher_uri): 220 | graph.add((publisher_uri, RDF.type, FOAF.Organization)) 221 | graph.add((publisher_uri, RDFS.label, Literal(publisher))) 222 | 223 | # Volume 224 | volume = crossref_record.get("volume") or bibtex.get("volume") 225 | if volume: 226 | graph.add((work_uri, BIBO.volume, Literal(volume))) 227 | 228 | # Issue 229 | issue = crossref_record.get("issue") or bibtex.get("number") 230 | if issue: 231 | graph.add((work_uri, BIBO.issue, Literal(issue))) 232 | 233 | # Pages 234 | pages = crossref_record.get("page") or bibtex.get("pages") 235 | start_page = None 236 | end_page = None 237 | if pages and "-" in pages: 238 | (start_page, end_page) = re.split(" *-+ *", pages, maxsplit=2) 239 | if start_page: 240 | graph.add((work_uri, BIBO.pageStart, Literal(start_page))) 241 | if end_page: 242 | graph.add((work_uri, BIBO.pageEnd, Literal(end_page))) 243 | 244 | # Identifiers 245 | # Add doi in bibtex, but not orcid profile 246 | if bibtex and "doi" in bibtex and "DOI" not in external_identifiers: 247 | external_identifiers["DOI"] = bibtex["doi"] 248 | # Add isbn in bibtex, but not orcid profile 249 | if bibtex and "isbn" in bibtex and "ISBN" not in external_identifiers: 250 | external_identifiers["ISBN"] = bibtex["isbn"] 251 | 252 | for identifier_type, identifier in external_identifiers.iteritems(): 253 | identifier_url = None 254 | if identifier_type in ("PAT", "OTHER-ID") and work_type == "PATENT": 255 | identifier_predicate = VIVO.patentNumber 256 | elif identifier_type == "ISBN": 257 | clean_isbn = identifier.replace("-", "") 258 | if len(clean_isbn) <= 10: 259 | identifier_predicate = BIBO.isbn10 260 | else: 261 | identifier_predicate = BIBO.isbn13 262 | else: 263 | (identifier_predicate, url_template) = identifier_map.get(identifier_type, (None, None)) 264 | if url_template: 265 | identifier_url = url_template % identifier 266 | 267 | if identifier_predicate: 268 | graph.add((work_uri, identifier_predicate, Literal(identifier))) 269 | if identifier_url: 270 | self._add_work_url(identifier_url, work_uri, graph) 271 | 272 | orcid_url = (work.get("url", {}) or {}).get("value") 273 | if orcid_url and WorksCrosswalk._use_url(orcid_url): 274 | self._add_work_url(orcid_url, work_uri, graph) 275 | bibtex_url = bibtex.get("link") 276 | if bibtex_url and WorksCrosswalk._use_url(bibtex_url) and orcid_url != bibtex_url: 277 | self._add_work_url(bibtex_url, work_uri, graph) 278 | 279 | # Series 280 | series = bibtex.get("series") 281 | # TODO: Figure out how to model series in VIVO-ISF. 282 | 283 | # Journal 284 | # If Crossref has a journal use it 285 | journal = WorksCrosswalk._get_crossref_journal(crossref_record) 286 | issns = [] 287 | if journal: 288 | issns = crossref_record.get("ISSN", []) 289 | # Otherwise, only use for some work types. 290 | elif work_type in journal_map: 291 | journal = bibtex.get("journal") 292 | if journal: 293 | if "issn" in bibtex: 294 | issns = [bibtex["issn"]] 295 | else: 296 | journal = (work.get("journal-title", {}) or {}).get("value") 297 | 298 | if journal: 299 | journal_class = journal_map.get(work_type, BIBO.Journal) 300 | journal_uri = self.identifier_strategy.to_uri(journal_class, {"name": journal}) 301 | graph.add((work_uri, VIVO.hasPublicationVenue, journal_uri)) 302 | if self.create_strategy.should_create(journal_class, journal_uri): 303 | graph.add((journal_uri, RDF.type, journal_class)) 304 | graph.add((journal_uri, RDFS.label, Literal(journal))) 305 | for issn in issns: 306 | graph.add((journal_uri, BIBO.issn, Literal(issn))) 307 | 308 | if work_type in ("BOOK_CHAPTER",): 309 | book_title = bibtex.get("booktitle") 310 | if book_title: 311 | book_uri = self.identifier_strategy.to_uri(BIBO.Book, {"name": book_title}) 312 | graph.add((work_uri, VIVO.hasPublicationVenue, book_uri)) 313 | if self.create_strategy.should_create(BIBO.Book, book_uri): 314 | graph.add((book_uri, RDF.type, BIBO.Book)) 315 | graph.add((book_uri, RDFS.label, Literal(book_title))) 316 | 317 | if work_type in ("CONFERENCE_PAPER",): 318 | proceeding = bibtex.get("journal") or (work.get("journal-title", {}) or {}).get("value") 319 | if proceeding: 320 | proceeding_uri = self.identifier_strategy.to_uri(BIBO.Proceedings, {"name": proceeding}) 321 | graph.add((work_uri, VIVO.hasPublicationVenue, proceeding_uri)) 322 | if self.create_strategy.should_create(BIBO.Proceedings, proceeding_uri): 323 | graph.add((proceeding_uri, RDF.type, BIBO.Proceedings)) 324 | graph.add((proceeding_uri, RDFS.label, Literal(proceeding))) 325 | 326 | @staticmethod 327 | def _fetch_crossref_doi(doi): 328 | # curl 'http://api.crossref.org/works/10.1177/1049732304268657' -L -i 329 | r = requests.get('http://api.crossref.org/works/%s' % doi) 330 | if r.status_code == 404: 331 | # Not a crossref DOI. 332 | return {} 333 | if r: 334 | return r.json()["message"] 335 | else: 336 | raise Exception("Request to fetch DOI %s returned %s" % (doi, r.status_code)) 337 | 338 | @staticmethod 339 | def _parse_bibtex(work): 340 | bibtex = {} 341 | if work and (work.get("citation", {}) or {}).get("citation-type") == "BIBTEX": 342 | citation = work["citation"]["citation-value"] 343 | # Need to add \n for bibtexparser to work 344 | curly_level = 0 345 | new_citation = "" 346 | for c in citation: 347 | if c == "{": 348 | curly_level += 1 349 | elif c == "}": 350 | curly_level -= 1 351 | new_citation += c 352 | if (curly_level == 1 and c == ",") or (curly_level == 0 and c == "}"): 353 | new_citation += "\n" 354 | parser = BibTexParser() 355 | parser.customization = WorksCrosswalk._bibtex_customizations 356 | bibtex = bibtexparser.loads(new_citation, parser=parser).entries[0] 357 | return bibtex 358 | 359 | @staticmethod 360 | def _get_crossref_title(crossref_record): 361 | if "title" in crossref_record and crossref_record["title"]: 362 | return crossref_record["title"][0] 363 | return None 364 | 365 | @staticmethod 366 | def _get_orcid_title(work): 367 | return join_if_not_empty((work["title"]["title"]["value"], 368 | (work["title"].get("subtitle") or {}).get("value")), ": ") 369 | 370 | @staticmethod 371 | def _get_orcid_publication_date(work): 372 | year = None 373 | month = None 374 | day = None 375 | publication_date = work.get("publication-date") 376 | if publication_date: 377 | year = publication_date["year"]["value"] if publication_date.get("year") else None 378 | month = publication_date["month"]["value"] if publication_date.get("month") else None 379 | day = publication_date["day"]["value"] if publication_date.get("day") else None 380 | if not year and not month and not day: 381 | return None 382 | return year, month, day 383 | 384 | @staticmethod 385 | def _get_bibtext_publication_date(bibtex): 386 | year = bibtex.get("year") 387 | if year and not re.match("\d{4}", year): 388 | year = None 389 | # Not going to try to parse month and day 390 | if not year: 391 | return None 392 | return year, None, None 393 | 394 | @staticmethod 395 | def _get_crossref_publication_date(doi_record): 396 | if "issued" in doi_record and "date-parts" in doi_record["issued"]: 397 | date_parts = doi_record["issued"]["date-parts"][0] 398 | return date_parts[0], date_parts[1] if len(date_parts) > 1 else None, date_parts[2] if len( 399 | date_parts) > 2 else None 400 | return None 401 | 402 | @staticmethod 403 | def _get_work_identifiers(work): 404 | ids = {} 405 | external_identifiers = work.get("external-ids") 406 | if external_identifiers: 407 | for external_identifier in (external_identifiers.get("external-id") or []): 408 | if external_identifier["external-id-value"] is not None: 409 | ids[external_identifier["external-id-type"].upper()] = \ 410 | external_identifier["external-id-value"] 411 | return ids 412 | 413 | @staticmethod 414 | def _get_crossref_authors(doi_record): 415 | authors = [] 416 | for author in doi_record.get("author", []): 417 | authors.append((author["given"], author["family"], VIVO.Authorship)) 418 | return authors 419 | 420 | @staticmethod 421 | def _get_orcid_contributors(work): 422 | contributors = [] 423 | for contributor in (work.get("contributors") or {}).get("contributor", []): 424 | # Last name, first name 425 | credit_name = (contributor.get("credit-name") or {}).get("value") 426 | # Some entries will not have a credit name, meaning the entry is for the person. 427 | # Using None, None to indicate the person. 428 | first_name = None 429 | surname = None 430 | if credit_name: 431 | # Normalize with BibtexParser's getnames() 432 | clean_name = bibtexparser.customization.getnames([credit_name])[0] 433 | (first_name, surname) = WorksCrosswalk._parse_reversed_name(clean_name) 434 | role = (contributor.get("contributor-attributes", {}) or {}).get("contributor-role") 435 | contributors.append((first_name, surname, contributor_map.get(role, VIVO.Authorship))) 436 | return contributors 437 | 438 | @staticmethod 439 | def _get_bibtex_authors(bibtex): 440 | authors = [] 441 | for name in bibtex.get("author", []): 442 | (first_name, surname) = WorksCrosswalk._parse_reversed_name(name) 443 | authors.append((first_name, surname, VIVO.Authorship)) 444 | return authors 445 | 446 | @staticmethod 447 | def _get_bibtex_editors(bibtex): 448 | editors = [] 449 | for editor in bibtex.get("editor", {}): 450 | (first_name, surname) = WorksCrosswalk._parse_reversed_name(editor["name"]) 451 | editors.append((first_name, surname, VIVO.Editorship)) 452 | return editors 453 | 454 | @staticmethod 455 | def _parse_reversed_name(name): 456 | if name: 457 | split_name = name.split(", ", 2) 458 | if len(split_name) == 2: 459 | return split_name[1], split_name[0] 460 | else: 461 | return None, name 462 | 463 | @staticmethod 464 | def _bibtex_customizations(record): 465 | record = WorksCrosswalk._bibtex_convert_to_unicode(record) 466 | record = bibtexparser.customization.author(record) 467 | record = bibtexparser.customization.editor(record) 468 | return record 469 | 470 | @staticmethod 471 | def _bibtex_convert_to_unicode(record): 472 | for val in record: 473 | if '\\' in record[val] or '{' in record[val]: 474 | for k, v in itertools.chain(unicode_to_crappy_latex1, unicode_to_latex): 475 | if v in record[val]: 476 | record[val] = record[val].replace(v, k) 477 | # Try without space 478 | elif v.rstrip() in record[val]: 479 | record[val] = record[val].replace(v.rstrip(), k) 480 | 481 | # If there is still very crappy items 482 | if '\\' in record[val]: 483 | for k, v in unicode_to_crappy_latex2: 484 | if v in record[val]: 485 | parts = record[val].split(str(v)) 486 | for key, record[val] in enumerate(parts): 487 | if key + 1 < len(parts) and len(parts[key + 1]) > 0: 488 | # Change order to display accents 489 | parts[key] = parts[key] + parts[key + 1][0] 490 | parts[key + 1] = parts[key + 1][1:] 491 | record[val] = k.join(parts) 492 | 493 | # Also replace {\\&} 494 | if '{\\&}' in record[val]: 495 | record[val] = record[val].replace('{\\&}', '&') 496 | return record 497 | 498 | def _add_work_url(self, url, work_uri, graph): 499 | vcard_uri = self.identifier_strategy.to_uri(VCARD.Kind, {"url": url}) 500 | graph.add((vcard_uri, RDF.type, VCARD.Kind)) 501 | # Has contact info 502 | graph.add((work_uri, OBO.ARG_2000028, vcard_uri)) 503 | # Url vcard 504 | vcard_url_uri = self.identifier_strategy.to_uri(VCARD.URL, {"vcard_uri": vcard_uri}) 505 | graph.add((vcard_url_uri, RDF.type, VCARD.URL)) 506 | graph.add((vcard_uri, VCARD.hasURL, vcard_url_uri)) 507 | graph.add((vcard_url_uri, VCARD.url, Literal(url, datatype=XSD.anyURI))) 508 | 509 | @staticmethod 510 | def _use_url(url): 511 | # Use url if it does not match one of the patterns in identifier_map 512 | for (identifier_predicate, url_template) in identifier_map.itervalues(): 513 | if url_template: 514 | base_url = url_template[:url_template.index("%s")] 515 | if url.startswith(base_url): 516 | return False 517 | return True 518 | 519 | @staticmethod 520 | def _get_crossref_journal(crossref_record): 521 | journal = None 522 | # May be multiple container titles. Take the longest. 523 | for j in crossref_record.get("container-title", []): 524 | if not journal or len(j) > len(journal): 525 | journal = j 526 | return journal 527 | -------------------------------------------------------------------------------- /orcid2vivo_loader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import argparse 4 | import sqlite3 5 | import os 6 | import logging 7 | import codecs 8 | from datetime import datetime 9 | from rdflib import Graph 10 | from rdflib.compare import graph_diff 11 | from orcid2vivo import default_execute 12 | from orcid2vivo_app.vivo_namespace import ns_manager 13 | from orcid2vivo_app.utility import sparql_insert, sparql_delete 14 | 15 | log = logging.getLogger(__name__) 16 | 17 | DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S" 18 | 19 | 20 | class Store: 21 | def __init__(self, data_path): 22 | self.db_filepath = os.path.join(data_path, "orcid2vivo.db") 23 | log.debug("Db filepath is %s", self.db_filepath) 24 | create_db = not os.path.exists(self.db_filepath) 25 | self._conn = sqlite3.connect(self.db_filepath) 26 | if create_db: 27 | self._create_db() 28 | 29 | def _create_db(self): 30 | logging.info("Creating db") 31 | c = self._conn.cursor() 32 | 33 | # Creating a new table 34 | c.execute(""" 35 | create table orcid_ids (orcid_id primary key, active, last_update, person_uri, person_id, person_class, 36 | confirmed); 37 | """) 38 | 39 | self._conn.commit() 40 | 41 | def __contains__(self, orcid_id): 42 | """ 43 | Returns True if there is a record for the orcid id and it is active. 44 | """ 45 | 46 | return self.contains(orcid_id, True) 47 | 48 | def contains(self, orcid_id, active=None): 49 | """ 50 | Returns True if there is a record for the orcid id. 51 | """ 52 | 53 | c = self._conn.cursor() 54 | if active is None: 55 | c.execute(""" 56 | select orcid_id from orcid_ids where orcid_id=? 57 | """, (orcid_id,)) 58 | else: 59 | c.execute(""" 60 | select orcid_id from orcid_ids where orcid_id=? and active=? 61 | """, (orcid_id, 1 if active else 0)) 62 | if c.fetchone(): 63 | return True 64 | return False 65 | 66 | def __getitem__(self, orcid_id): 67 | """ 68 | Returns orcid_id, active, last_update, person_uri, person_id, person_class, confirmed for orcid id. 69 | """ 70 | c = self._conn.cursor() 71 | c.execute(""" 72 | select orcid_id, active, last_update, person_uri, person_id, person_class, confirmed from orcid_ids where 73 | orcid_id=? 74 | """, (orcid_id,)) 75 | row = c.fetchone() 76 | if not row: 77 | raise IndexError 78 | return row 79 | 80 | def __delitem__(self, orcid_id): 81 | """ 82 | Marks an orcid id as inactive. 83 | """ 84 | c = self._conn.cursor() 85 | 86 | c.execute(""" 87 | update orcid_ids set active=0 where orcid_id=? 88 | """, (orcid_id,)) 89 | 90 | self._conn.commit() 91 | 92 | def add(self, orcid_id, person_uri=None, person_id=None, person_class=None, confirmed=False): 93 | """ 94 | Adds orcid id or updates existing orcid id and marks as active. 95 | """ 96 | c = self._conn.cursor() 97 | 98 | if self.contains(orcid_id): 99 | #Make update 100 | log.info("Updating %s", orcid_id) 101 | c.execute(""" 102 | update orcid_ids set active=1, person_uri=?, person_id=?, person_class=?, confirmed=? where orcid_id=? 103 | """, (person_uri, person_id, person_class, confirmed, orcid_id)) 104 | else: 105 | #Add 106 | log.info("Adding %s", orcid_id) 107 | c.execute(""" 108 | insert into orcid_ids (orcid_id, active, person_uri, person_id, person_class, confirmed) 109 | values (?, 1, ?, ?, ?, ?) 110 | """, (orcid_id, person_uri, person_id, person_class, confirmed)) 111 | 112 | self._conn.commit() 113 | 114 | def get_least_recent(self, limit=None, before_datetime=None): 115 | """ 116 | Returns least recently updated active orcid ids as list of 117 | orcid_id, person_uri, person_id, person_class, confirmed. 118 | """ 119 | c = self._conn.cursor() 120 | sql = """ 121 | select orcid_id, person_uri, person_id, person_class, confirmed from orcid_ids where active=1 122 | """ 123 | if before_datetime: 124 | sql += " and (last_update < '%s' or last_update is null)" % before_datetime.strftime(DATETIME_FORMAT) 125 | 126 | sql += " order by last_update asc" 127 | 128 | if limit: 129 | sql += " limit %s" % limit 130 | 131 | c.execute(sql) 132 | return c.fetchall() 133 | 134 | def touch(self, orcid_id): 135 | """ 136 | Set last update for orcid id. 137 | """ 138 | c = self._conn.cursor() 139 | 140 | c.execute(""" 141 | update orcid_ids set last_update=CURRENT_TIMESTAMP where orcid_id=? and active=1 142 | """, (orcid_id,)) 143 | 144 | self._conn.commit() 145 | 146 | def __iter__(self): 147 | c = self._conn.cursor() 148 | c.execute(""" 149 | select orcid_id, active, last_update, person_uri, person_id, person_class, confirmed from orcid_ids 150 | """) 151 | 152 | return iter(c.fetchall()) 153 | 154 | def delete_all(self): 155 | c = self._conn.cursor() 156 | c.execute(""" 157 | update orcid_ids set active=0 158 | """) 159 | self._conn.commit() 160 | 161 | # Methods to make this a Context Manager. This is necessary to make sure the connection is closed properly. 162 | def __enter__(self): 163 | return self 164 | 165 | def __exit__(self, exc_type, exc_val, exc_tb): 166 | self._conn.close() 167 | 168 | 169 | def load_single(orcid_id, person_uri, person_id, person_class, data_path, endpoint, username, password, 170 | namespace=None, skip_person=False, confirmed_orcid_id=False): 171 | with Store(data_path) as store: 172 | # Crosswalk 173 | (graph, profile, person_uri) = default_execute(orcid_id, namespace=namespace, person_uri=person_uri, 174 | person_id=person_id, skip_person=skip_person, 175 | person_class=person_class, confirmed_orcid_id=confirmed_orcid_id) 176 | 177 | graph_filepath = os.path.join(data_path, "%s.ttl" % orcid_id.lower()) 178 | previous_graph = Graph(namespace_manager=ns_manager) 179 | # Load last graph 180 | if os.path.exists(graph_filepath): 181 | log.debug("Loading previous graph %s", graph_filepath) 182 | previous_graph.parse(graph_filepath, format="turtle") 183 | 184 | # Diff against last graph 185 | (both_graph, delete_graph, add_graph) = graph_diff(previous_graph, graph) 186 | 187 | # SPARQL Update 188 | log.info("Adding %s, deleting %s triples for %s", len(add_graph), len(delete_graph), orcid_id) 189 | sparql_delete(delete_graph, endpoint, username, password) 190 | sparql_insert(add_graph, endpoint, username, password) 191 | 192 | # Save new last graph 193 | log.debug("Saving new graph %s", graph_filepath) 194 | with codecs.open(graph_filepath, "w") as out: 195 | graph.serialize(format="turtle", destination=out) 196 | 197 | # Touch 198 | store.touch(orcid_id) 199 | 200 | return graph, add_graph, delete_graph 201 | 202 | 203 | def load(data_path, endpoint, username, password, limit=None, before_datetime=None, namespace=None, skip_person=False): 204 | orcid_ids = [] 205 | failed_orcid_ids = [] 206 | with Store(data_path) as store: 207 | # Get the orcid ids to update 208 | results = store.get_least_recent(limit=limit, before_datetime=before_datetime) 209 | for (orcid_id, person_uri, person_id, person_class, confirmed) in results: 210 | try: 211 | load_single(orcid_id, person_uri, person_id, person_class, data_path, endpoint, username, password, 212 | namespace, skip_person, confirmed) 213 | orcid_ids.append(orcid_id) 214 | except Exception: 215 | failed_orcid_ids.append(orcid_id) 216 | return orcid_ids, failed_orcid_ids 217 | 218 | if __name__ == "__main__": 219 | parser = argparse.ArgumentParser() 220 | 221 | parser.add_argument("--debug", action="store_true") 222 | 223 | orcid_id_parent_parser = argparse.ArgumentParser(add_help=False) 224 | orcid_id_parent_parser.add_argument("orcid_id") 225 | data_path_parent_parser = argparse.ArgumentParser(add_help=False) 226 | data_path_parent_parser.add_argument("--data-path", dest="data_path", help="Path where db and ttl files will be " 227 | "stored. Default is ./data.", 228 | default="./data") 229 | 230 | subparsers = parser.add_subparsers(dest="command") 231 | 232 | add_parser = subparsers.add_parser("add", help="Adds or updates orcid id record. If inactive, marks active.", 233 | parents=[orcid_id_parent_parser, data_path_parent_parser]) 234 | add_parser.add_argument("--person-id", dest="person_id", help="Id for the person to use when constructing the " 235 | "person's URI. If not provided, the orcid id will be " 236 | "used.") 237 | add_parser.add_argument("--person-uri", dest="person_uri", help="A URI for the person. If not provided, one will " 238 | "be created from the orcid id or person id.") 239 | add_parser.add_argument("--person-class", dest="person_class", 240 | choices=["FacultyMember", "FacultyMemberEmeritus", "Librarian", "LibrarianEmeritus", 241 | "NonAcademic", "NonFacultyAcademic", "ProfessorEmeritus", "Student"], 242 | help="Class (in VIVO Core ontology) for a person. Default is a FOAF Person.") 243 | parser.add_argument("--confirmed", action="store_true", help="Mark the orcid id as confirmed.") 244 | 245 | delete_parser = subparsers.add_parser("delete", help="Marks an orcid id record as inactive so that it will not be " 246 | "loaded.", 247 | parents=[orcid_id_parent_parser, data_path_parent_parser]) 248 | 249 | delete_all_parser = subparsers.add_parser("delete-all", help="Marks all orcid id records as inactive.", 250 | parents=[data_path_parent_parser]) 251 | 252 | load_parser = subparsers.add_parser("load", help="Fetches orcid profiles, crosswalks to VIVO-ISF, loads to VIVO " 253 | "instance, and updates orcid id record. If loading multiple " 254 | "orcid ids, loads in least recent order.", 255 | parents=[data_path_parent_parser]) 256 | load_parser.add_argument("endpoint", help="Endpoint for SPARQL Update of VIVO instance, e.g., " 257 | "http://localhost/vivo/api/sparqlUpdate.") 258 | load_parser.add_argument("username", help="Username for VIVO root.") 259 | load_parser.add_argument("namespace", help="VIVO namespace. Default is http://vivo.mydomain.edu/individual/.") 260 | load_parser.add_argument("--password", help="Password for VIVO root. Alternatively, provide in " 261 | "environment variable VIVO_ROOT_PASSWORD.") 262 | load_parser.add_argument("--orcid_id", help="Orcid id of person to load.") 263 | load_parser.add_argument("--limit", type=int, help="Maximimum number of orcid ids to load.") 264 | load_parser.add_argument("--before", help="Orcid ids that were loaded before this date or never loaded. Format is " 265 | "YYYY-MM-DD HH:MM:SS in UTC.") 266 | load_parser.add_argument("--skip-person", dest="skip_person", action="store_true", 267 | help="Skip adding triples declaring the person and the person's name.") 268 | 269 | list_parser = subparsers.add_parser("list", help="Lists orcid_id records in the db.", 270 | parents=[data_path_parent_parser]) 271 | 272 | # Parse 273 | args = parser.parse_args() 274 | 275 | if args.debug: 276 | logging.basicConfig(level=logging.DEBUG) 277 | 278 | if not os.path.exists(args.data_path): 279 | raise IOError("%s does not exists" % args.data_path) 280 | 281 | with Store(args.data_path) as main_store: 282 | if args.command == "add": 283 | print "Adding %s" % args.orcid_id 284 | main_store.add(args.orcid_id, person_uri=args.person_uri, person_id=args.person_id, 285 | person_class=args.person_class, confirmed=args.confirmed) 286 | elif args.command == "delete": 287 | print "Deleting %s" % args.orcid_id 288 | del main_store[args.orcid_id] 289 | elif args.command == "delete-all": 290 | print "Deleting all" 291 | main_store.delete_all() 292 | elif args.command == "list": 293 | for main_orcid_id, main_active, main_last_update, main_person_uri, \ 294 | main_person_id, main_person_class, main_confirmed in main_store: 295 | print "%s [active=%s; last_update=%s; person_uri=%s; person_id=%s, person_class=%s, confirmed=%s]" % ( 296 | main_orcid_id, 297 | "true" if main_active else "false", 298 | main_last_update, 299 | main_person_uri, 300 | main_person_id, 301 | main_person_class, 302 | main_confirmed 303 | ) 304 | 305 | if args.command == "load": 306 | main_password = args.password or os.environ["VIVO_ROOT_PASSWORD"] 307 | if args.orcid_id: 308 | with Store(args.data_path) as main_store: 309 | if args.orcid_id not in main_store: 310 | raise ValueError("%s not in db. Add person to db first." % args.orcid_id) 311 | main_orcid_id, main_active, main_last_update, main_person_uri, main_person_id, \ 312 | main_person_class, main_confirmed = main_store[args.orcid_id] 313 | print "Loading %s to %s" % (args.orcid_id, args.endpoint) 314 | load_single(main_orcid_id, main_person_uri, main_person_id, main_person_class, args.data_path, 315 | args.endpoint, args.username, main_password, 316 | namespace=args.namespace, skip_person=args.skip_person) 317 | else: 318 | main_before_datetime = datetime.strptime(args.before, DATETIME_FORMAT) if args.before else None 319 | print "Loading to %s" % args.endpoint 320 | main_orcid_ids, main_failed_orcid_ids = load(args.data_path, args.endpoint, args.username, 321 | main_password, limit=args.limit, 322 | before_datetime=main_before_datetime, 323 | namespace=args.namespace, 324 | skip_person=args.skip_person) 325 | print "Loaded: %s" % ", ".join(main_orcid_ids) 326 | print "Failed: %s" % ", ".join(main_failed_orcid_ids) 327 | 328 | print "Done" 329 | -------------------------------------------------------------------------------- /orcid2vivo_service.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from flask import Flask, render_template, request, session, Response, flash, Markup 4 | import argparse 5 | import json 6 | import urllib 7 | from orcid2vivo import default_execute 8 | import orcid2vivo_app.utility as utility 9 | 10 | app = Flask(__name__) 11 | def_format = None 12 | def_endpoint = None 13 | def_username = None 14 | def_password = None 15 | def_namespace = None 16 | def_person_class = "Person" 17 | def_skip_person = False 18 | def_output = "serialize" 19 | def_output_html = True 20 | def_output_profile = False 21 | def_confirmed = False 22 | 23 | content_types = { 24 | "xml": "application/rdf+xml", 25 | "n3": "text/rdf+n3", 26 | "turtle": "application/x-turtle", 27 | "nt": "text/plain", 28 | "pretty-xml": "application/rdf+xml", 29 | "trix": "application/rdf+xml" 30 | } 31 | 32 | @app.route('/', methods=["GET"]) 33 | def crosswalk_form(rdf=None, orcid_profile=None): 34 | return render_template("crosswalk_form.html", 35 | format=session.get("format") or def_format, 36 | endpoint=session.get("endpoint") or def_endpoint, 37 | username=session.get("username") or def_username, 38 | password=session.get("password") or def_password, 39 | namespace=session.get("namespace") or def_namespace, 40 | person_class=session.get("person_class") or def_person_class, 41 | skip_person=session.get("skip_person") or def_skip_person, 42 | confirmed=session.get("confirmed") or def_confirmed, 43 | output=session.get("output") or def_output, 44 | output_html=session.get("output_html") or def_output_html, 45 | output_profile=session.get("output_profile") or def_output_profile, 46 | rdf=rdf.decode("utf-8") if rdf else None, 47 | orcid_profile=json.dumps(orcid_profile, indent=3) if orcid_profile else None) 48 | 49 | @app.route('/', methods=["POST"]) 50 | def crosswalk(): 51 | session["format"] = request.form.get("format") 52 | endpoint = request.form.get("endpoint") 53 | session["endpoint"] = endpoint 54 | session["username"] = request.form.get("username") 55 | session["password"] = request.form.get("password") 56 | person_class = request.form.get("person_class") 57 | session["person_class"] = person_class 58 | session["skip_person"] = True if "skip_person" in request.form else False 59 | session["confirmed"] = True if "confirmed" in request.form else False 60 | session["output"] = request.form.get("output") 61 | session["output_html"] = True if "output_html" in request.form else False 62 | session["output_profile"] = True if "output_profile" in request.form else False 63 | 64 | #Excute with default strategies 65 | (g, p, per_uri) = default_execute(request.form["orcid_id"], 66 | namespace=request.form["namespace"], 67 | person_uri=request.form["person_uri"], 68 | person_id=request.form["person_id"], 69 | skip_person=True if "skip_person" in request.form else False, 70 | person_class=person_class if person_class != "Person" else None, 71 | confirmed_orcid_id=True if "confirmed" in request.form else False) 72 | 73 | if "output" in request.form and request.form["output"] == "vivo": 74 | utility.sparql_insert(g, endpoint, request.form["username"], request.form["password"]) 75 | msg = "Loaded to VIVO" 76 | if endpoint.endswith("api/sparqlUpdate"): 77 | vivo_profile_url = "%s/individual?%s" % (endpoint[:-17], urllib.urlencode({"uri": per_uri})) 78 | msg += ". Try %s." % (vivo_profile_url, vivo_profile_url) 79 | flash(Markup(msg)) 80 | return crosswalk_form() 81 | else: 82 | #Serialize 83 | rdf = g.serialize(format=request.form['format'], encoding="utf-8") 84 | if "output_html" in request.form or "output_profile" in request.form: 85 | return crosswalk_form(rdf=rdf if "output_html" in request.form else None, 86 | orcid_profile=p if "output_profile" in request.form else None) 87 | else: 88 | return Response(rdf, content_type=content_types[request.form['format']]) 89 | 90 | 91 | if __name__ == "__main__": 92 | parser = argparse.ArgumentParser() 93 | parser.add_argument("--format", default="turtle", choices=["xml", "n3", "turtle", "nt", "pretty-xml", "trix"], 94 | help="The RDF format for serializing. Default is turtle.") 95 | parser.add_argument("--endpoint", dest="endpoint", 96 | help="Endpoint for SPARQL Update of VIVO instance,e.g., http://localhost/vivo/api/sparqlUpdate.") 97 | parser.add_argument("--username", dest="username", help="Username for VIVO root.") 98 | parser.add_argument("--password", dest="password", 99 | help="Password for VIVO root.") 100 | parser.add_argument("--namespace", default="http://vivo.mydomain.edu/individual/", 101 | help="VIVO namespace. Default is http://vivo.mydomain.edu/individual/.") 102 | parser.add_argument("--person-class", dest="person_class", 103 | choices=["FacultyMember", "FacultyMemberEmeritus", "Librarian", "LibrarianEmeritus", 104 | "NonAcademic", "NonFacultyAcademic", "ProfessorEmeritus", "Student"], 105 | help="Class (in VIVO Core ontology) for a person. Default is a FOAF Person.") 106 | parser.add_argument("--skip-person", dest="skip_person", action="store_true", 107 | help="Skip adding triples declaring the person and the person's name.") 108 | parser.add_argument("--confirmed", action="store_true", help="Mark the orcid id as confirmed.") 109 | parser.add_argument("--debug", action="store_true") 110 | parser.add_argument("--port", type=int, default="5000", help="The port the service should run on. Default is 5000.") 111 | 112 | #Parse 113 | args = parser.parse_args() 114 | 115 | def_format = args.format 116 | def_endpoint = args.endpoint 117 | def_username = args.username 118 | def_password = args.password 119 | def_namespace = args.namespace 120 | def_person_class = args.person_class 121 | def_skip_person = args.skip_person 122 | def_confirmed = args.confirmed 123 | 124 | app.debug = args.debug 125 | app.secret_key = "orcid2vivo" 126 | app.run(host="0.0.0.0", port=args.port) -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | rdflib==4.2.0 2 | requests==2.7.0 3 | bibtexparser==0.6.1 4 | flask==0.10.1 5 | vcrpy==1.7.0 6 | mock==1.3.0 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name='orcid2vivo', 5 | version='0.10.0', 6 | url='https://github.com/gwu-libraries/orcid2vivo', 7 | author='Justin Littman', 8 | author_email='justinlittman@gmail.com', 9 | py_modules=['orcid2vivo', 'orcid2vivo_loader', 'orcid2vivo_service'], 10 | packages=['orcid2vivo_app', ], 11 | scripts=['orcid2vivo.py', 'orcid2vivo_loader.py', 'orcid2vivo_service.py'], 12 | description="For retrieving data from the ORCID API and crosswalking to VIVO-ISF.", 13 | platforms=['POSIX'], 14 | test_suite='tests', 15 | install_requires=['rdflib>=4.2.0', 16 | 'requests>=2.7.0', 17 | 'bibtexparser>=0.6.1', 18 | 'flask>=0.10.1'], 19 | tests_require=['vcrpy>=1.7.0', 20 | 'mock>=1.3.0'], 21 | classifiers=[ 22 | 'Intended Audience :: Developers', 23 | 'Topic :: Software Development :: Libraries :: Python Modules', 24 | 'Programming Language :: Python :: 2.7', 25 | 'Development Status :: 4 - Beta', 26 | 'Framework :: Flask', 27 | ], 28 | ) 29 | -------------------------------------------------------------------------------- /templates/crosswalk_form.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Orcid to VIVO 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 |
18 | 22 | 23 | {% with messages = get_flashed_messages() %} 24 | {% if messages %} 25 | {% for message in messages %} 26 | 27 | {% endfor %} 28 | {% endif %} 29 | {% endwith %} 30 | 31 | {% if rdf %} 32 |

RDF

33 |
{{ rdf }}
34 | {% endif %} 35 | 36 | {% if orcid_profile %} 37 |

Orcid profile

38 |
{{ orcid_profile }}
39 | {% endif %} 40 | 41 |
42 |
43 | 44 |
45 | 46 |
47 |
48 |
49 | 52 |
53 | 54 |
55 |
56 |
57 | 58 | 59 |
60 |
61 |
62 |
63 | 64 |
65 | 73 |
74 |
75 |
76 | 79 |
80 |
81 | 84 |
85 | 86 |
87 |
88 |
89 | 90 | 91 |
92 | 93 |
94 |
95 |
96 | 97 |
98 | 99 |
100 |
101 |
102 | 103 |
104 | 105 |
106 |
107 |
108 | 109 |
110 | 111 |
112 |
113 |
114 |
115 |
116 | 117 |
118 | 119 |

If an id is not provided, the Orcid Id will be used.

120 |
121 | 122 |
123 |
124 | 125 |
126 | 127 |

A URI for the person. If not provided, the Person Id or Orcid Id will be used.

128 |
129 | 130 |
131 |
132 | 133 |
134 | 135 |
136 |
137 |
138 | 139 |
140 | 151 |
152 |
153 |
154 | 157 |
158 |
159 |
160 | 161 |

Be patient.

162 |
163 |
164 |
165 |
166 | 167 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import unittest 3 | import os 4 | 5 | FIXTURE_PATH = os.path.join( 6 | os.path.dirname(os.path.realpath(__file__)), 'fixtures' 7 | ) 8 | 9 | 10 | class TestCase(unittest.TestCase): 11 | logging.basicConfig(level=logging.DEBUG) -------------------------------------------------------------------------------- /tests/app/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gwu-libraries/orcid2vivo/14c4c8ebb828d862261324a13616aad1f2f0c721/tests/app/__init__.py -------------------------------------------------------------------------------- /tests/app/test_affiliations.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | import json 3 | from orcid2vivo_app.affiliations import AffiliationsCrosswalk 4 | import orcid2vivo_app.vivo_namespace as ns 5 | from rdflib import Graph 6 | from orcid2vivo_app.vivo_uri import HashIdentifierStrategy 7 | from orcid2vivo import SimpleCreateEntitiesStrategy 8 | 9 | 10 | class TestAffiliations(TestCase): 11 | 12 | def setUp(self): 13 | self.graph = Graph(namespace_manager=ns.ns_manager) 14 | self.person_uri = ns.D["test"] 15 | self.create_strategy = SimpleCreateEntitiesStrategy(HashIdentifierStrategy(), person_uri=self.person_uri) 16 | self.crosswalker = AffiliationsCrosswalk(identifier_strategy=self.create_strategy, 17 | create_strategy=self.create_strategy) 18 | 19 | def test_no_affiliations(self): 20 | orcid_profile = json.loads(""" 21 | { 22 | "activities-summary": { 23 | } 24 | } 25 | """) 26 | self.crosswalker.crosswalk(orcid_profile, self.person_uri, self.graph) 27 | self.assertEqual(0, len(self.graph)) 28 | 29 | def test_no_education(self): 30 | orcid_profile = json.loads(""" 31 | { 32 | "activities-summary": { 33 | "educations": { 34 | "education-summary": [] 35 | } 36 | } 37 | } 38 | """) 39 | self.crosswalker.crosswalk(orcid_profile, self.person_uri, self.graph) 40 | self.assertEqual(0, len(self.graph)) 41 | 42 | def test_education(self): 43 | orcid_profile = json.loads(""" 44 | { 45 | "activities-summary": { 46 | "educations": { 47 | "last-modified-date": { 48 | "value": 1486085029078 49 | }, 50 | "education-summary": [ 51 | { 52 | "created-date": { 53 | "value": 1385568459467 54 | }, 55 | "last-modified-date": { 56 | "value": 1486085026897 57 | }, 58 | "source": { 59 | "source-orcid": { 60 | "uri": "http://orcid.org/0000-0001-5109-3700", 61 | "path": "0000-0001-5109-3700", 62 | "host": "orcid.org" 63 | }, 64 | "source-client-id": null, 65 | "source-name": { 66 | "value": "Laurel L Haak" 67 | } 68 | }, 69 | "department-name": "Neurosciences", 70 | "role-title": "PhD", 71 | "start-date": { 72 | "year": { 73 | "value": "1995" 74 | }, 75 | "month": null, 76 | "day": null 77 | }, 78 | "end-date": { 79 | "year": { 80 | "value": "1997" 81 | }, 82 | "month": null, 83 | "day": null 84 | }, 85 | "organization": { 86 | "name": "Stanford University School of Medicine", 87 | "address": { 88 | "city": "Stanford", 89 | "region": "California", 90 | "country": "US" 91 | }, 92 | "disambiguated-organization": null 93 | }, 94 | "visibility": "PUBLIC", 95 | "put-code": 1006, 96 | "path": "/0000-0001-5109-3700/education/1006" 97 | } 98 | ], 99 | "path": "/0000-0001-5109-3700/educations" 100 | } 101 | } 102 | } 103 | """) 104 | # Changed start date to 1995 105 | self.crosswalker.crosswalk(orcid_profile, self.person_uri, self.graph) 106 | self.assertTrue(bool(self.graph.query(""" 107 | ask where { 108 | ?awdgre a vivo:AwardedDegree . 109 | ?awdgre rdfs:label "PhD" . 110 | ?awdgre obo:RO_0002353 ?awdgreproc . 111 | ?awdgre vivo:assignedBy ?org . 112 | ?awdgre vivo:relates d:test, ?dgre . 113 | ?org a foaf:Organization . 114 | ?org rdfs:label "Stanford University School of Medicine" . 115 | ?org obo:RO_0001025 ?geo . 116 | ?geo rdfs:label "Stanford, California" . 117 | ?awdgreproc a vivo:EducationalProcess . 118 | ?awdgreproc obo:RO_0000057 ?org, d:test . 119 | ?awdgreproc vivo:dateTimeInterval ?awdgreprocint . 120 | ?awdgreproc vivo:departmentOrSchool "Neurosciences" . 121 | ?awdgreprocint a vivo:DateTimeInterval . 122 | ?awdgreprocint vivo:end ?awdgreprocintend . 123 | ?awdgreprocintend a vivo:DateTimeValue . 124 | ?awdgreprocintend rdfs:label "1997" . 125 | ?awdgreprocintend vivo:dateTime "1997-01-01T00:00:00"^^xsd:dateTime . 126 | ?awdgreprocintend vivo:dateTimePrecision vivo:yearPrecision . 127 | ?awdgreprocint vivo:start ?awdgreprocintstart . 128 | ?awdgreprocintstart a vivo:DateTimeValue . 129 | ?awdgreprocintstart rdfs:label "1995" . 130 | ?awdgreprocintstart vivo:dateTime "1995-01-01T00:00:00"^^xsd:dateTime . 131 | ?awdgreprocintstart vivo:dateTimePrecision vivo:yearPrecision . 132 | } 133 | """))) 134 | 135 | def test_education_minimal(self): 136 | orcid_profile = json.loads(""" 137 | { 138 | "activities-summary": { 139 | "educations": { 140 | "last-modified-date": { 141 | "value": 1486085029078 142 | }, 143 | "education-summary": [ 144 | { 145 | "created-date": { 146 | "value": 1385568459467 147 | }, 148 | "last-modified-date": { 149 | "value": 1486085026897 150 | }, 151 | "source": { 152 | "source-orcid": { 153 | "uri": "http://orcid.org/0000-0001-5109-3700", 154 | "path": "0000-0001-5109-3700", 155 | "host": "orcid.org" 156 | }, 157 | "source-client-id": null, 158 | "source-name": { 159 | "value": "Laurel L Haak" 160 | } 161 | }, 162 | "department-name": null, 163 | "role-title": "PhD", 164 | "start-date": null, 165 | "end-date": null, 166 | "organization": { 167 | "name": "Stanford University School of Medicine", 168 | "address": { 169 | "city": "Stanford", 170 | "region": "California", 171 | "country": "US" 172 | }, 173 | "disambiguated-organization": null 174 | }, 175 | "visibility": "PUBLIC", 176 | "put-code": 1006, 177 | "path": "/0000-0001-5109-3700/education/1006" 178 | } 179 | ], 180 | "path": "/0000-0001-5109-3700/educations" 181 | } 182 | } 183 | } 184 | """) 185 | self.crosswalker.crosswalk(orcid_profile, self.person_uri, self.graph) 186 | self.assertTrue(bool(self.graph.query(""" 187 | ask where { 188 | ?org a foaf:Organization . 189 | ?org rdfs:label "Stanford University School of Medicine" . 190 | ?org obo:RO_0001025 ?geo . 191 | ?geo rdfs:label "Stanford, California" . 192 | ?awdgreproc a vivo:EducationalProcess . 193 | ?awdgreproc obo:RO_0000057 ?org, d:test . 194 | filter not exists { 195 | ?awdgreproc vivo:departmentOrSchool ?awdgredept . 196 | } 197 | } 198 | """))) 199 | -------------------------------------------------------------------------------- /tests/app/test_bio.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from unittest import TestCase 5 | import json 6 | from orcid2vivo_app.bio import BioCrosswalk 7 | import orcid2vivo_app.vivo_namespace as ns 8 | from rdflib import Literal, Graph, RDF, RDFS 9 | from orcid2vivo_app.vivo_namespace import D, VIVO, FOAF 10 | from orcid2vivo_app.vivo_uri import HashIdentifierStrategy 11 | from orcid2vivo import SimpleCreateEntitiesStrategy 12 | 13 | 14 | class TestBio(TestCase): 15 | def setUp(self): 16 | self.graph = Graph(namespace_manager=ns.ns_manager) 17 | self.person_uri = ns.D["test"] 18 | self.create_strategy = SimpleCreateEntitiesStrategy(HashIdentifierStrategy(), person_uri=self.person_uri) 19 | self.crosswalker = BioCrosswalk(identifier_strategy=self.create_strategy, 20 | create_strategy=self.create_strategy) 21 | 22 | def test_no_external_identifiers(self): 23 | orcid_profile = json.loads(""" 24 | { 25 | "person": { 26 | "external-identifiers": { 27 | "last-modified-date": null, 28 | "external-identifier": [], 29 | "path": "/0000-0003-4507-4735/external-identifiers" 30 | }, 31 | "path": "/0000-0003-4507-4735/person" 32 | } 33 | } 34 | """) 35 | self.crosswalker.crosswalk(orcid_profile, self.person_uri, self.graph) 36 | self.assertEqual(0, len(self.graph)) 37 | 38 | def test_external_identifiers(self): 39 | orcid_profile = json.loads(""" 40 | { 41 | "person": { 42 | "external-identifiers": { 43 | "last-modified-date": { 44 | "value": 1390435480189 45 | }, 46 | "external-identifier": [ 47 | { 48 | "created-date": { 49 | "value": 1379686803951 50 | }, 51 | "last-modified-date": { 52 | "value": 1379686803951 53 | }, 54 | "source": { 55 | "source-orcid": null, 56 | "source-client-id": { 57 | "uri": "http://orcid.org/client/0000-0002-5982-8983", 58 | "path": "0000-0002-5982-8983", 59 | "host": "orcid.org" 60 | }, 61 | "source-name": { 62 | "value": "Scopus to ORCID" 63 | } 64 | }, 65 | "external-id-type": "Scopus Author ID", 66 | "external-id-value": "6602258586", 67 | "external-id-url": { 68 | "value": "http://www.scopus.com/inward/authorDetails.url?authorID=6602258586&partnerID=MN8TOARS" 69 | }, 70 | "external-id-relationship": "SELF", 71 | "visibility": "PUBLIC", 72 | "path": "/0000-0001-5109-3700/external-identifiers/142173", 73 | "put-code": 142173, 74 | "display-index": 0 75 | }, 76 | { 77 | "created-date": { 78 | "value": 1379686803951 79 | }, 80 | "last-modified-date": { 81 | "value": 1379686803951 82 | }, 83 | "source": { 84 | "source-orcid": { 85 | "uri": "http://orcid.org/0000-0001-7707-4137", 86 | "path": "0000-0001-7707-4137", 87 | "host": "orcid.org" 88 | }, 89 | "source-client-id": null, 90 | "source-name": { 91 | "value": "Clarivate Analytics" 92 | } 93 | }, 94 | "external-id-type": "ResearcherID", 95 | "external-id-value": "C-4986-2008", 96 | "external-id-url": { 97 | "value": "http://www.researcherid.com/rid/C-4986-2008" 98 | }, 99 | "external-id-relationship": "SELF", 100 | "visibility": "PUBLIC", 101 | "path": "/0000-0001-5109-3700/external-identifiers/38181", 102 | "put-code": 38181, 103 | "display-index": 0 104 | }, 105 | { 106 | "created-date": { 107 | "value": 1390435480189 108 | }, 109 | "last-modified-date": { 110 | "value": 1390435480189 111 | }, 112 | "source": { 113 | "source-orcid": null, 114 | "source-client-id": { 115 | "uri": "http://orcid.org/client/0000-0003-0412-1857", 116 | "path": "0000-0003-0412-1857", 117 | "host": "orcid.org" 118 | }, 119 | "source-name": { 120 | "value": "ISNI2ORCID search and link" 121 | } 122 | }, 123 | "external-id-type": "ISNI", 124 | "external-id-value": "0000000138352317", 125 | "external-id-url": { 126 | "value": "http://isni.org/isni/0000000138352317" 127 | }, 128 | "external-id-relationship": "SELF", 129 | "visibility": "PUBLIC", 130 | "path": "/0000-0001-5109-3700/external-identifiers/187639", 131 | "put-code": 187639, 132 | "display-index": 0 133 | } 134 | ], 135 | "path": "/0000-0001-5109-3700/external-identifiers" 136 | }, 137 | "path": "/0000-0001-5109-3700/person" 138 | } 139 | } 140 | """) 141 | self.create_strategy.skip_person = True 142 | self.crosswalker.crosswalk(orcid_profile, self.person_uri, self.graph) 143 | self.assertEqual(2, len(self.graph)) 144 | # ScopusID is added. 145 | self.assertTrue(self.graph[D["test"]: VIVO["scopusId"]: Literal("6602258586")]) 146 | # ResearcherId is added. 147 | self.assertTrue(self.graph[D["test"]: VIVO["researcherId"]: Literal("C-4986-2008")]) 148 | 149 | def test_name(self): 150 | orcid_profile = json.loads(u""" 151 | { 152 | "person": { 153 | "name": { 154 | "created-date": { 155 | "value": 1460753221409 156 | }, 157 | "last-modified-date": { 158 | "value": 1460753221409 159 | }, 160 | "given-names": { 161 | "value": "Laurel" 162 | }, 163 | "family-name": { 164 | "value": "Haak" 165 | }, 166 | "credit-name": { 167 | "value": "Laurel L Haak" 168 | }, 169 | "source": null, 170 | "visibility": "PUBLIC", 171 | "path": "0000-0001-5109-3700" 172 | }, 173 | "other-names": { 174 | "last-modified-date": { 175 | "value": 1461191605426 176 | }, 177 | "other-name": [ 178 | { 179 | "created-date": { 180 | "value": 1461191605416 181 | }, 182 | "last-modified-date": { 183 | "value": 1461191605416 184 | }, 185 | "source": { 186 | "source-orcid": { 187 | "uri": "http://orcid.org/0000-0001-5109-3700", 188 | "path": "0000-0001-5109-3700", 189 | "host": "orcid.org" 190 | }, 191 | "source-client-id": null, 192 | "source-name": { 193 | "value": "Laurel L Haak" 194 | } 195 | }, 196 | "content": " L. L. Haak", 197 | "visibility": "PUBLIC", 198 | "path": "/0000-0001-5109-3700/other-names/721941", 199 | "put-code": 721941, 200 | "display-index": 0 201 | }, 202 | { 203 | "created-date": { 204 | "value": 1461191605425 205 | }, 206 | "last-modified-date": { 207 | "value": 1461191605425 208 | }, 209 | "source": { 210 | "source-orcid": { 211 | "uri": "http://orcid.org/0000-0001-5109-3700", 212 | "path": "0000-0001-5109-3700", 213 | "host": "orcid.org" 214 | }, 215 | "source-client-id": null, 216 | "source-name": { 217 | "value": "Laurel L Haak" 218 | } 219 | }, 220 | "content": "L Haak", 221 | "visibility": "PUBLIC", 222 | "path": "/0000-0001-5109-3700/other-names/721942", 223 | "put-code": 721942, 224 | "display-index": 0 225 | }, 226 | { 227 | "created-date": { 228 | "value": 1461191605426 229 | }, 230 | "last-modified-date": { 231 | "value": 1461191605426 232 | }, 233 | "source": { 234 | "source-orcid": { 235 | "uri": "http://orcid.org/0000-0001-5109-3700", 236 | "path": "0000-0001-5109-3700", 237 | "host": "orcid.org" 238 | }, 239 | "source-client-id": null, 240 | "source-name": { 241 | "value": "Laurel L Haak" 242 | } 243 | }, 244 | "content": "Laure Haak", 245 | "visibility": "PUBLIC", 246 | "path": "/0000-0001-5109-3700/other-names/721943", 247 | "put-code": 721943, 248 | "display-index": 0 249 | }, 250 | { 251 | "created-date": { 252 | "value": 1461191605426 253 | }, 254 | "last-modified-date": { 255 | "value": 1461191605426 256 | }, 257 | "source": { 258 | "source-orcid": { 259 | "uri": "http://orcid.org/0000-0001-5109-3700", 260 | "path": "0000-0001-5109-3700", 261 | "host": "orcid.org" 262 | }, 263 | "source-client-id": null, 264 | "source-name": { 265 | "value": "Laurel L Haak" 266 | } 267 | }, 268 | "content": "Laurela L Hāka", 269 | "visibility": "PUBLIC", 270 | "path": "/0000-0001-5109-3700/other-names/721944", 271 | "put-code": 721944, 272 | "display-index": 0 273 | } 274 | ], 275 | "path": "/0000-0001-5109-3700/other-names" 276 | } 277 | } 278 | } 279 | """) 280 | self.crosswalker.crosswalk(orcid_profile, self.person_uri, self.graph) 281 | # Laurel is a person 282 | self.assertTrue(self.graph[D["test"]: RDF.type: FOAF.Person]) 283 | # with a label 284 | self.assertTrue(self.graph[D["test"]: RDFS.label: Literal("Laurel Haak")]) 285 | 286 | # vcard test 287 | self.assertTrue(bool(self.graph.query(""" 288 | ask where { 289 | ?vcn a vcard:Name . 290 | ?vcn vcard:familyName "Haak" . 291 | ?vcn vcard:givenName "Laurel" . 292 | ?vc obo:ARG_2000029 d:test . 293 | ?vc vcard:hasName ?vcn . 294 | } 295 | """))) 296 | 297 | def test_biography(self): 298 | orcid_profile = json.loads(""" 299 | { 300 | "person": { 301 | "biography": { 302 | "created-date": { 303 | "value": 1460753221411 304 | }, 305 | "last-modified-date": { 306 | "value": 1487932762756 307 | }, 308 | "content": "Laurel L. Haak, PhD, is the Executive Director of ORCID, an international and interdisciplinary non-profit organization dedicated to providing the technical infrastructure to generate and maintain unique and persistent identifiers for researchers and scholars.", 309 | "visibility": "PUBLIC", 310 | "path": "/0000-0001-5109-3700/biography" 311 | } 312 | } 313 | } 314 | """) 315 | 316 | self.crosswalker.crosswalk(orcid_profile, self.person_uri, self.graph) 317 | 318 | # Has a biography 319 | self.assertTrue(self.graph[D["test"]: VIVO["overview"]: Literal("Laurel L. Haak, PhD, is the Executive " 320 | "Director of ORCID, an international and " 321 | "interdisciplinary non-profit organization " 322 | "dedicated to providing the technical " 323 | "infrastructure to generate and maintain " 324 | "unique and persistent identifiers for " 325 | "researchers and scholars.")]) 326 | 327 | def test_no_biography(self): 328 | orcid_profile = json.loads(""" 329 | { 330 | "person": { 331 | "biography": { 332 | "created-date": { 333 | "value": 1460766291133 334 | }, 335 | "last-modified-date": { 336 | "value": 1460766291133 337 | }, 338 | "content": null, 339 | "visibility": "PUBLIC", 340 | "path": "/0000-0003-4507-4735/biography" 341 | } 342 | } 343 | } 344 | """) 345 | 346 | self.crosswalker.crosswalk(orcid_profile, self.person_uri, self.graph) 347 | 348 | # Has a biography 349 | self.assertEqual(0, len(self.graph)) 350 | 351 | def test_websites(self): 352 | orcid_profile = json.loads(""" 353 | { 354 | "person": { 355 | "researcher-urls": { 356 | "last-modified-date": { 357 | "value": 1463003428816 358 | }, 359 | "researcher-url": [ 360 | { 361 | "created-date": { 362 | "value": 1461191605427 363 | }, 364 | "last-modified-date": { 365 | "value": 1463003428816 366 | }, 367 | "source": { 368 | "source-orcid": { 369 | "uri": "http://orcid.org/0000-0001-5109-3700", 370 | "path": "0000-0001-5109-3700", 371 | "host": "orcid.org" 372 | }, 373 | "source-client-id": null, 374 | "source-name": { 375 | "value": "Laurel L Haak" 376 | } 377 | }, 378 | "url-name": "LinkedIn", 379 | "url": { 380 | "value": "http://www.linkedin.com/pub/laurel-haak/3/1b/4a3/" 381 | }, 382 | "visibility": "PUBLIC", 383 | "path": "/0000-0001-5109-3700/researcher-urls/714700", 384 | "put-code": 714700, 385 | "display-index": 0 386 | }, 387 | { 388 | "created-date": { 389 | "value": 1461191605427 390 | }, 391 | "last-modified-date": { 392 | "value": 1463003428816 393 | }, 394 | "source": { 395 | "source-orcid": { 396 | "uri": "http://orcid.org/0000-0001-5109-3700", 397 | "path": "0000-0001-5109-3700", 398 | "host": "orcid.org" 399 | }, 400 | "source-client-id": null, 401 | "source-name": { 402 | "value": "Laurel L Haak" 403 | } 404 | }, 405 | "url-name": null, 406 | "url": { 407 | "value": "https://www.researchgate.net/profile/Laurel_Haak" 408 | }, 409 | "visibility": "PUBLIC", 410 | "path": "/0000-0001-5109-3700/researcher-urls/714701", 411 | "put-code": 714701, 412 | "display-index": 0 413 | } 414 | ], 415 | "path": "/0000-0001-5109-3700/researcher-urls" 416 | } 417 | } 418 | } 419 | """) 420 | # Set ResearchGate url-name to null. 421 | 422 | self.crosswalker.crosswalk(orcid_profile, self.person_uri, self.graph) 423 | # LinkedIn 424 | self.assertTrue(bool(self.graph.query(""" 425 | ask where { 426 | ?vcw a vcard:URL . 427 | ?vcw vcard:url "http://www.linkedin.com/pub/laurel-haak/3/1b/4a3/"^^xsd:anyURI . 428 | ?vcw rdfs:label "LinkedIn" . 429 | ?vc a vcard:Individual . 430 | ?vc vcard:hasURL ?vcw . 431 | } 432 | """))) 433 | 434 | # ResearchGate 435 | self.assertTrue(bool(self.graph.query(""" 436 | ask where { 437 | ?vcw a vcard:URL . 438 | ?vcw vcard:url "https://www.researchgate.net/profile/Laurel_Haak"^^xsd:anyURI . 439 | ?vc a vcard:Individual . 440 | ?vc vcard:hasURL ?vcw . 441 | filter not exists { 442 | ?vcw rdfs:label ?label . 443 | } 444 | } 445 | """))) 446 | 447 | def test_no_websites(self): 448 | orcid_profile = json.loads(""" 449 | { 450 | "person": { 451 | "researcher-urls": { 452 | "last-modified-date": null, 453 | "researcher-url": [], 454 | "path": "/0000-0003-4507-4735/researcher-urls" 455 | } 456 | } 457 | } 458 | """) 459 | 460 | self.crosswalker.crosswalk(orcid_profile, self.person_uri, self.graph) 461 | 462 | # Has a biography 463 | self.assertEqual(0, len(self.graph)) 464 | 465 | def test_no_keywords(self): 466 | orcid_profile = json.loads(""" 467 | { 468 | "person": { 469 | "keywords": { 470 | "last-modified-date": null, 471 | "keyword": [], 472 | "path": "/0000-0003-4507-4735/keywords" 473 | } 474 | } 475 | } 476 | """) 477 | 478 | self.crosswalker.crosswalk(orcid_profile, self.person_uri, self.graph) 479 | 480 | # Has a biography 481 | self.assertEqual(0, len(self.graph)) 482 | 483 | def test_keywords(self): 484 | orcid_profile = json.loads(""" 485 | { 486 | "person": { 487 | "keywords": { 488 | "last-modified-date": { 489 | "value": 1464800983143 490 | }, 491 | "keyword": [ 492 | { 493 | "created-date": { 494 | "value": 1461191605415 495 | }, 496 | "last-modified-date": { 497 | "value": 1464800983143 498 | }, 499 | "source": { 500 | "source-orcid": { 501 | "uri": "http://orcid.org/0000-0001-5109-3700", 502 | "path": "0000-0001-5109-3700", 503 | "host": "orcid.org" 504 | }, 505 | "source-client-id": null, 506 | "source-name": { 507 | "value": "Laurel L Haak" 508 | } 509 | }, 510 | "content": "persistent identifiers, research policy, science workforce, program evaluation, neuroscience, calcium imaging, oligodendrocytes, circadian rhythms", 511 | "visibility": "PUBLIC", 512 | "path": "/0000-0001-5109-3700/keywords/419740", 513 | "put-code": 419740, 514 | "display-index": 0 515 | } 516 | ], 517 | "path": "/0000-0001-5109-3700/keywords" 518 | } 519 | } 520 | } 521 | """) 522 | 523 | self.crosswalker.crosswalk(orcid_profile, self.person_uri, self.graph) 524 | self.assertEqual(8, len(self.graph)) 525 | 526 | self.assertTrue(bool(self.graph.query(""" 527 | ask where { 528 | d:test vivo:freetextKeyword "persistent identifiers" . 529 | d:test vivo:freetextKeyword "research policy" . 530 | d:test vivo:freetextKeyword "science workforce" . 531 | } 532 | """))) 533 | -------------------------------------------------------------------------------- /tests/app/test_fundings.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from unittest import TestCase 5 | from orcid2vivo_app.fundings import FundingCrosswalk 6 | import orcid2vivo_app.vivo_namespace as ns 7 | from orcid2vivo_app.vivo_uri import HashIdentifierStrategy 8 | from orcid2vivo import SimpleCreateEntitiesStrategy 9 | 10 | from rdflib import Graph, RDFS 11 | import json 12 | 13 | 14 | class TestFundings(TestCase): 15 | 16 | def setUp(self): 17 | self.graph = Graph(namespace_manager=ns.ns_manager) 18 | self.person_uri = ns.D["test"] 19 | self.create_strategy = SimpleCreateEntitiesStrategy(HashIdentifierStrategy(), person_uri=self.person_uri) 20 | self.crosswalker = FundingCrosswalk(identifier_strategy=self.create_strategy, 21 | create_strategy=self.create_strategy) 22 | 23 | def test_no_funding(self): 24 | orcid_profile = json.loads(""" 25 | { 26 | "activities-summary": { 27 | "fundings": { 28 | "group": [] 29 | } 30 | } 31 | } 32 | """) 33 | self.crosswalker.crosswalk(orcid_profile, self.person_uri, self.graph) 34 | # Assert no triples in graph 35 | self.assertTrue(len(self.graph) == 0) 36 | 37 | def test_with_funding(self): 38 | orcid_profile = json.loads(""" 39 | { 40 | "activities-summary": { 41 | "fundings": { 42 | "last-modified-date": { 43 | "value": 1444208097475 44 | }, 45 | "group": [ 46 | { 47 | "last-modified-date": { 48 | "value": 1437078970386 49 | }, 50 | "external-ids": { 51 | "external-id": [ 52 | { 53 | "external-id-type": "grant_number", 54 | "external-id-value": "0536999", 55 | "external-id-url": { 56 | "value": "http://www.nsf.gov/awardsearch/showAward?AWD_ID=0536999&HistoricalAwards=false" 57 | }, 58 | "external-id-relationship": "SELF" 59 | } 60 | ] 61 | }, 62 | "funding-summary": [ 63 | { 64 | "created-date": { 65 | "value": 1427825460988 66 | }, 67 | "last-modified-date": { 68 | "value": 1437078970386 69 | }, 70 | "source": { 71 | "source-orcid": null, 72 | "source-client-id": { 73 | "uri": "http://orcid.org/client/0000-0003-2174-0924", 74 | "path": "0000-0003-2174-0924", 75 | "host": "orcid.org" 76 | }, 77 | "source-name": { 78 | "value": "ÜberWizard for ORCID" 79 | } 80 | }, 81 | "title": { 82 | "title": { 83 | "value": "ADVANCE Leadership Award: Women in Science and Engineering: A Guide to Maximizing their Potential" 84 | }, 85 | "translated-title": null 86 | }, 87 | "external-ids": { 88 | "external-id": [ 89 | { 90 | "external-id-type": "grant_number", 91 | "external-id-value": "0536999", 92 | "external-id-url": { 93 | "value": "http://www.nsf.gov/awardsearch/showAward?AWD_ID=0536999&HistoricalAwards=false" 94 | }, 95 | "external-id-relationship": "SELF" 96 | }, 97 | { 98 | "external-id-type": "grant_number", 99 | "external-id-value": "0536999", 100 | "external-id-url": { 101 | "value": "http://grants.uberresearch.com/100000081/0536999/ADVANCE-Leadership-Award-Women-in-Science-and-Engineering-A-Guide-to-Maximizing-their-Potential" 102 | }, 103 | "external-id-relationship": "SELF" 104 | } 105 | ] 106 | }, 107 | "type": "GRANT", 108 | "start-date": { 109 | "year": { 110 | "value": "2006" 111 | }, 112 | "month": { 113 | "value": "04" 114 | }, 115 | "day": { 116 | "value": "01" 117 | } 118 | }, 119 | "end-date": { 120 | "year": { 121 | "value": "2007" 122 | }, 123 | "month": { 124 | "value": "03" 125 | }, 126 | "day": { 127 | "value": "31" 128 | } 129 | }, 130 | "organization": { 131 | "name": "National Science Foundation - Directorate for Education and Human Resources", 132 | "address": { 133 | "city": "n/a", 134 | "region": null, 135 | "country": "US" 136 | }, 137 | "disambiguated-organization": null 138 | }, 139 | "visibility": "PUBLIC", 140 | "put-code": 74458, 141 | "path": "/0000-0001-5109-3700/funding/74458", 142 | "display-index": "0" 143 | } 144 | ] 145 | }, 146 | { 147 | "last-modified-date": { 148 | "value": 1440583684368 149 | }, 150 | "external-ids": { 151 | "external-id": [ 152 | { 153 | "external-id-type": "grant_number", 154 | "external-id-value": "0305602", 155 | "external-id-url": { 156 | "value": "http://www.nsf.gov/awardsearch/showAward?AWD_ID=0305602&HistoricalAwards=false" 157 | }, 158 | "external-id-relationship": "SELF" 159 | } 160 | ] 161 | }, 162 | "funding-summary": [ 163 | { 164 | "created-date": { 165 | "value": 1440583684368 166 | }, 167 | "last-modified-date": { 168 | "value": 1440583684368 169 | }, 170 | "source": { 171 | "source-orcid": null, 172 | "source-client-id": { 173 | "uri": "http://orcid.org/client/0000-0003-2174-0924", 174 | "path": "0000-0003-2174-0924", 175 | "host": "orcid.org" 176 | }, 177 | "source-name": { 178 | "value": "ÜberWizard for ORCID" 179 | } 180 | }, 181 | "title": { 182 | "title": { 183 | "value": "Postdoc Network Annual Policy Meeting; Berkeley, CA, March 15-17, 2003" 184 | }, 185 | "translated-title": null 186 | }, 187 | "external-ids": { 188 | "external-id": [ 189 | { 190 | "external-id-type": "grant_number", 191 | "external-id-value": "0305602", 192 | "external-id-url": { 193 | "value": "http://www.nsf.gov/awardsearch/showAward?AWD_ID=0305602&HistoricalAwards=false" 194 | }, 195 | "external-id-relationship": "SELF" 196 | }, 197 | { 198 | "external-id-type": "grant_number", 199 | "external-id-value": "0305602", 200 | "external-id-url": { 201 | "value": "http://grants.uberresearch.com/100000076/0305602/Postdoc-Network-Annual-Policy-Meeting-Berkeley-CA-March-15-17-2003" 202 | }, 203 | "external-id-relationship": "SELF" 204 | } 205 | ] 206 | }, 207 | "type": "GRANT", 208 | "start-date": { 209 | "year": { 210 | "value": "2003" 211 | }, 212 | "month": { 213 | "value": "03" 214 | }, 215 | "day": { 216 | "value": "01" 217 | } 218 | }, 219 | "end-date": { 220 | "year": { 221 | "value": "2004" 222 | }, 223 | "month": { 224 | "value": "02" 225 | }, 226 | "day": { 227 | "value": "29" 228 | } 229 | }, 230 | "organization": { 231 | "name": "National Science Foundation - Directorate for Biological Sciences", 232 | "address": { 233 | "city": "n/a", 234 | "region": null, 235 | "country": "US" 236 | }, 237 | "disambiguated-organization": null 238 | }, 239 | "visibility": "PUBLIC", 240 | "put-code": 105986, 241 | "path": "/0000-0001-5109-3700/funding/105986", 242 | "display-index": "0" 243 | } 244 | ] 245 | }, 246 | { 247 | "last-modified-date": { 248 | "value": 1440583684380 249 | }, 250 | "external-ids": { 251 | "external-id": [ 252 | { 253 | "external-id-type": "grant_number", 254 | "external-id-value": "0342159", 255 | "external-id-url": { 256 | "value": "http://www.nsf.gov/awardsearch/showAward?AWD_ID=0342159&HistoricalAwards=false" 257 | }, 258 | "external-id-relationship": "SELF" 259 | } 260 | ] 261 | }, 262 | "funding-summary": [ 263 | { 264 | "created-date": { 265 | "value": 1440583684380 266 | }, 267 | "last-modified-date": { 268 | "value": 1440583684380 269 | }, 270 | "source": { 271 | "source-orcid": null, 272 | "source-client-id": { 273 | "uri": "http://orcid.org/client/0000-0003-2174-0924", 274 | "path": "0000-0003-2174-0924", 275 | "host": "orcid.org" 276 | }, 277 | "source-name": { 278 | "value": "ÜberWizard for ORCID" 279 | } 280 | }, 281 | "title": { 282 | "title": { 283 | "value": "Policy Implications of International Graduate Students and Postdocs in the United States" 284 | }, 285 | "translated-title": null 286 | }, 287 | "external-ids": { 288 | "external-id": [ 289 | { 290 | "external-id-type": "grant_number", 291 | "external-id-value": "0342159", 292 | "external-id-url": { 293 | "value": "http://www.nsf.gov/awardsearch/showAward?AWD_ID=0342159&HistoricalAwards=false" 294 | }, 295 | "external-id-relationship": "SELF" 296 | }, 297 | { 298 | "external-id-type": "grant_number", 299 | "external-id-value": "0342159", 300 | "external-id-url": { 301 | "value": "http://grants.uberresearch.com/100000179/0342159/Policy-Implications-of-International-Graduate-Students-and-Postdocs-in-the-United-States" 302 | }, 303 | "external-id-relationship": "SELF" 304 | } 305 | ] 306 | }, 307 | "type": "GRANT", 308 | "start-date": { 309 | "year": { 310 | "value": "2004" 311 | }, 312 | "month": { 313 | "value": "03" 314 | }, 315 | "day": { 316 | "value": "01" 317 | } 318 | }, 319 | "end-date": { 320 | "year": { 321 | "value": "2006" 322 | }, 323 | "month": { 324 | "value": "02" 325 | }, 326 | "day": { 327 | "value": "28" 328 | } 329 | }, 330 | "organization": { 331 | "name": "National Science Foundation - Office of the Director", 332 | "address": { 333 | "city": "n/a", 334 | "region": null, 335 | "country": "US" 336 | }, 337 | "disambiguated-organization": null 338 | }, 339 | "visibility": "PUBLIC", 340 | "put-code": 105988, 341 | "path": "/0000-0001-5109-3700/funding/105988", 342 | "display-index": "0" 343 | } 344 | ] 345 | }, 346 | { 347 | "last-modified-date": { 348 | "value": 1444208097475 349 | }, 350 | "external-ids": { 351 | "external-id": [ 352 | { 353 | "external-id-type": "grant_number", 354 | "external-id-value": "5F31MH010500-03", 355 | "external-id-url": { 356 | "value": "http://projectreporter.nih.gov/project_info_description.cfm?aid=2241697" 357 | }, 358 | "external-id-relationship": "SELF" 359 | } 360 | ] 361 | }, 362 | "funding-summary": [ 363 | { 364 | "created-date": { 365 | "value": 1444208097475 366 | }, 367 | "last-modified-date": { 368 | "value": 1444208097475 369 | }, 370 | "source": { 371 | "source-orcid": null, 372 | "source-client-id": { 373 | "uri": "http://orcid.org/client/0000-0003-2174-0924", 374 | "path": "0000-0003-2174-0924", 375 | "host": "orcid.org" 376 | }, 377 | "source-name": { 378 | "value": "ÜberWizard for ORCID" 379 | } 380 | }, 381 | "title": { 382 | "title": { 383 | "value": "CELLULAR BASIS OF CIRCADIAN CLOCK IN SCN" 384 | }, 385 | "translated-title": null 386 | }, 387 | "external-ids": { 388 | "external-id": [ 389 | { 390 | "external-id-type": "grant_number", 391 | "external-id-value": "5F31MH010500-03", 392 | "external-id-url": { 393 | "value": "http://projectreporter.nih.gov/project_info_description.cfm?aid=2241697" 394 | }, 395 | "external-id-relationship": "SELF" 396 | }, 397 | { 398 | "external-id-type": "grant_number", 399 | "external-id-value": "5F31MH010500-03", 400 | "external-id-url": { 401 | "value": "http://grants.uberresearch.com/100000025/F31MH010500/CELLULAR-BASIS-OF-CIRCADIAN-CLOCK-IN-SCN" 402 | }, 403 | "external-id-relationship": "SELF" 404 | } 405 | ] 406 | }, 407 | "type": "GRANT", 408 | "start-date": { 409 | "year": { 410 | "value": "1994" 411 | }, 412 | "month": { 413 | "value": "10" 414 | }, 415 | "day": { 416 | "value": "01" 417 | } 418 | }, 419 | "end-date": null, 420 | "organization": { 421 | "name": "National Institute of Mental Health", 422 | "address": { 423 | "city": "Bethesda", 424 | "region": null, 425 | "country": "US" 426 | }, 427 | "disambiguated-organization": null 428 | }, 429 | "visibility": "PUBLIC", 430 | "put-code": 116401, 431 | "path": "/0000-0001-5109-3700/funding/116401", 432 | "display-index": "0" 433 | } 434 | ] 435 | } 436 | ], 437 | "path": "/0000-0001-5109-3700/fundings" 438 | } 439 | } 440 | } 441 | """) 442 | self.crosswalker.crosswalk(orcid_profile, self.person_uri, self.graph) 443 | # Verify a grant exists. 444 | grant_uri = ns.D['grant-9ea22d7c992375778b4a3066f5142624'] 445 | self.assertEqual( 446 | self.graph.value(grant_uri, RDFS.label).toPython(), 447 | u"Policy Implications of International Graduate Students and Postdocs in the United States" 448 | ) 449 | # Verify three PI roles related to grants for this person uri. 450 | pi_roles = [guri for guri in self.graph.subjects(predicate=ns.OBO['RO_0000052'], object=self.person_uri)] 451 | self.assertEqual(len(pi_roles), 4) 452 | 453 | def test_with_funding(self): 454 | orcid_profile = json.loads(""" 455 | { 456 | "activities-summary": { 457 | "fundings": { 458 | "last-modified-date": { 459 | "value": 1449261003455 460 | }, 461 | "group": [ 462 | { 463 | "last-modified-date": { 464 | "value": 1449261003455 465 | }, 466 | "external-ids": { 467 | "external-id": [] 468 | }, 469 | "funding-summary": [ 470 | { 471 | "created-date": { 472 | "value": 1449261003455 473 | }, 474 | "last-modified-date": { 475 | "value": 1449261003455 476 | }, 477 | "source": { 478 | "source-orcid": { 479 | "uri": "http://orcid.org/0000-0003-3844-5120", 480 | "path": "0000-0003-3844-5120", 481 | "host": "orcid.org" 482 | }, 483 | "source-client-id": null, 484 | "source-name": { 485 | "value": "Ira Lurie" 486 | } 487 | }, 488 | "title": { 489 | "title": { 490 | "value": "The Utility of Ultra High Performance Supercritical Fluid Chromatography for the Analysis of Seized Drugs: Application to Synthetic Cannabinoids and Bath Salts " 491 | }, 492 | "translated-title": null 493 | }, 494 | "external-ids": null, 495 | "type": "GRANT", 496 | "start-date": { 497 | "year": { 498 | "value": "2015" 499 | }, 500 | "month": { 501 | "value": "01" 502 | }, 503 | "day": null 504 | }, 505 | "end-date": { 506 | "year": { 507 | "value": "2016" 508 | }, 509 | "month": { 510 | "value": "12" 511 | }, 512 | "day": null 513 | }, 514 | "organization": { 515 | "name": "National Institute of Justice", 516 | "address": { 517 | "city": "DC", 518 | "region": "DC", 519 | "country": "US" 520 | }, 521 | "disambiguated-organization": { 522 | "disambiguated-organization-identifier": "http://dx.doi.org/10.13039/100005289", 523 | "disambiguation-source": "FUNDREF" 524 | } 525 | }, 526 | "visibility": "PUBLIC", 527 | "put-code": 132761, 528 | "path": "/0000-0003-3844-5120/funding/132761", 529 | "display-index": "0" 530 | } 531 | ] 532 | } 533 | ], 534 | "path": "/0000-0003-3844-5120/fundings" 535 | } 536 | } 537 | } 538 | """) 539 | self.crosswalker.crosswalk(orcid_profile, self.person_uri, self.graph) 540 | # Verify a grant exists. 541 | grant_uri = ns.D['grant-742228eecfbdacf092bf482f84151082'] 542 | self.assertEqual( 543 | self.graph.value(grant_uri, RDFS.label).toPython(), 544 | u"The Utility of Ultra High Performance Supercritical Fluid Chromatography for the Analysis of Seized " 545 | u"Drugs: Application to Synthetic Cannabinoids and Bath Salts " 546 | ) 547 | self.assertEqual(0, len(list(self.graph[grant_uri : ns.VIVO.sponsorAwardId : ]))) 548 | -------------------------------------------------------------------------------- /tests/app/test_utility.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from orcid2vivo_app.utility import clean_orcid, is_valid_orcid 3 | 4 | 5 | class TestUtility(TestCase): 6 | def test_clean_orcid(self): 7 | orcid = '0000-0003-1527-0030' 8 | 9 | # Test with orcid.org prefix. 10 | self.assertEqual(clean_orcid('orcid.org/' + orcid), orcid) 11 | 12 | # Test with http://orcid.org prefix. 13 | self.assertEqual(clean_orcid('http://orcid.org/' + orcid), orcid) 14 | 15 | # Test without prefix. 16 | self.assertEqual(clean_orcid(orcid), orcid) 17 | 18 | def test_is_valid_orcid(self): 19 | self.assertTrue(is_valid_orcid("0000-0003-1527-0030")) 20 | self.assertTrue(is_valid_orcid("0000-0003-1527-003X")) 21 | self.assertFalse(is_valid_orcid("0000-0003-1527-00301")) 22 | self.assertFalse(is_valid_orcid("0000-0003-1527-003")) 23 | -------------------------------------------------------------------------------- /tests/app/test_vivo_uri.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from orcid2vivo_app.vivo_uri import HashIdentifierStrategy 3 | from orcid2vivo_app.vivo_namespace import VIVO, OBO 4 | 5 | 6 | class TestHashIdentifierStrategy(TestCase): 7 | def setUp(self): 8 | self.strategy = HashIdentifierStrategy() 9 | 10 | def test_to_identifier(self): 11 | uri = "http://vivo.mydomain.edu/individual/grant-3c73b079585811b9cbb23c3253a0796a" 12 | self.assertEqual(uri, str(self.strategy.to_uri(VIVO.Grant, {"foo": "My Foo", "bar": "My Bar"}))) 13 | #Switch order 14 | self.assertEqual(uri, str(self.strategy.to_uri(VIVO.Grant, {"bar": "My Bar", "foo": "My Foo"}))) 15 | #Add a none 16 | self.assertEqual(uri, str(self.strategy.to_uri(VIVO.Grant, 17 | {"foo": "My Foo", "bar": "My Bar", "foobar": None}))) 18 | #General class trumps class 19 | self.assertEqual(uri, str(self.strategy.to_uri(VIVO.AnotherClazz, 20 | {"foo": "My Foo", "bar": "My Bar", "foobar": None}, 21 | general_clazz=VIVO.Grant))) 22 | 23 | #Different class 24 | self.assertNotEqual(uri, str(self.strategy.to_uri(VIVO.NotAGrant, {"foo": "My Foo", "bar": "My Bar"}))) 25 | #General class 26 | self.assertNotEqual(uri, str(self.strategy.to_uri(VIVO.Grant, {"foo": "My Foo", "bar": "My Bar"}, 27 | general_clazz=VIVO.AnotherClass))) 28 | #Changed attr 29 | self.assertNotEqual(uri, str(self.strategy.to_uri(VIVO.Grant, {"foo": "Not My Foo", "bar": "My Bar"}))) 30 | #Additional attr 31 | self.assertNotEqual(uri, str(self.strategy.to_uri(VIVO.Grant, 32 | {"foo": "My Foo", "bar": "My Bar", 33 | "foobar": "My FooBar"}))) 34 | 35 | def test_class_to_prefix(self): 36 | self.assertEqual("grant", HashIdentifierStrategy._class_to_prefix(VIVO.Grant)) 37 | self.assertEqual("ro_0000052", HashIdentifierStrategy._class_to_prefix(OBO.RO_0000052)) 38 | self.assertIsNone(HashIdentifierStrategy._class_to_prefix(None)) 39 | -------------------------------------------------------------------------------- /tests/test_loader.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import tempfile 3 | import shutil 4 | from orcid2vivo_loader import Store, load_single 5 | import os 6 | import tests 7 | import time 8 | import datetime 9 | import vcr 10 | from mock import patch, call 11 | from rdflib.compare import to_isomorphic 12 | 13 | my_vcr = vcr.VCR( 14 | cassette_library_dir=tests.FIXTURE_PATH, 15 | ) 16 | 17 | 18 | class TestStore(tests.TestCase): 19 | def setUp(self): 20 | self.data_path = tempfile.mkdtemp() 21 | self.db_filepath = os.path.join(self.data_path, "orcid2vivo.db") 22 | 23 | def tearDown(self): 24 | shutil.rmtree(self.data_path, ignore_errors=True) 25 | 26 | def test_persist(self): 27 | self.assertFalse(os.path.exists(self.db_filepath)) 28 | with Store(self.data_path) as store: 29 | self.assertEqual(self.db_filepath, store.db_filepath) 30 | # Created 31 | self.assertTrue(os.path.exists(self.db_filepath)) 32 | # Add 33 | store.add("0000-0003-1527-0030") 34 | 35 | # Still exists after close 36 | self.assertTrue(os.path.exists(self.db_filepath)) 37 | with Store(self.data_path) as store: 38 | self.assertTrue("0000-0003-1527-0030" in store) 39 | 40 | def test_contains(self): 41 | with Store(self.data_path) as store: 42 | # Add 43 | store.add("0000-0003-1527-0030") 44 | self.assertTrue("0000-0003-1527-0030" in store) 45 | self.assertFalse("X000-0003-1527-0030" in store) 46 | self.assertTrue(store.contains("0000-0003-1527-0030")) 47 | self.assertTrue(store.contains("0000-0003-1527-0030", True)) 48 | self.assertFalse(store.contains("0000-0003-1527-0030", False)) 49 | 50 | def test_add_item(self): 51 | with Store(self.data_path) as store: 52 | # Insert 53 | store.add("0000-0003-1527-0030") 54 | (orcid_id, active, last_update, person_uri, person_id, person_class, confirmed) = \ 55 | store["0000-0003-1527-0030"] 56 | self.assertTrue(active) 57 | self.assertIsNone(person_uri) 58 | self.assertIsNone(person_id) 59 | self.assertIsNone(person_class) 60 | self.assertIsNone(last_update) 61 | self.assertFalse(confirmed) 62 | # Update 63 | store.add("0000-0003-1527-0030", person_uri="http://me", person_id="me", person_class="Librarian", 64 | confirmed=True) 65 | (orcid_id, active, last_update, person_uri, person_id, person_class, confirmed) = \ 66 | store["0000-0003-1527-0030"] 67 | self.assertTrue(active) 68 | self.assertEqual("http://me", person_uri) 69 | self.assertEqual("me", person_id) 70 | self.assertEqual("Librarian", person_class) 71 | self.assertIsNone(last_update) 72 | self.assertTrue(confirmed) 73 | 74 | def test_add_deleted_item(self): 75 | with Store(self.data_path) as store: 76 | # Insert 77 | store.add("0000-0003-1527-0030") 78 | (orcid_id, active, last_update, person_uri, person_id, person_class, confirmed) = \ 79 | store["0000-0003-1527-0030"] 80 | self.assertTrue(active) 81 | self.assertIsNone(person_uri) 82 | self.assertIsNone(person_id) 83 | self.assertIsNone(person_class) 84 | self.assertIsNone(last_update) 85 | self.assertFalse(confirmed) 86 | # Delete 87 | del store["0000-0003-1527-0030"] 88 | # Add again 89 | store.add("0000-0003-1527-0030") 90 | (orcid_id, active, last_update, person_uri, person_id, person_class, confirmed) = \ 91 | store["0000-0003-1527-0030"] 92 | self.assertTrue(active) 93 | self.assertIsNone(person_uri) 94 | self.assertIsNone(person_id) 95 | self.assertIsNone(person_class) 96 | self.assertIsNone(last_update) 97 | 98 | def test_del(self): 99 | with Store(self.data_path) as store: 100 | store.add("0000-0003-1527-0030") 101 | self.assertTrue("0000-0003-1527-0030" in store) 102 | del store["0000-0003-1527-0030"] 103 | self.assertFalse("0000-0003-1527-0030" in store) 104 | 105 | def test_touch(self): 106 | with Store(self.data_path) as store: 107 | store.add("0000-0003-1527-0030") 108 | (orcid_id, active, last_update, person_uri, person_id, person_class, confirmed) = \ 109 | store["0000-0003-1527-0030"] 110 | time.sleep(1) 111 | store.touch("0000-0003-1527-0030") 112 | (orcid_id, active, new_last_update, person_uri, person_id, person_class, confirmed) = \ 113 | store["0000-0003-1527-0030"] 114 | self.assertNotEqual(last_update, new_last_update) 115 | 116 | def test_get_least_recent(self): 117 | with Store(self.data_path) as store: 118 | store.add("0000-0003-1527-0030") 119 | store.add("0000-0003-1527-0031") 120 | store.add("0000-0003-1527-0032") 121 | # Deactivate one to make sure not returned. 122 | del store["0000-0003-1527-0032"] 123 | # Touch first 124 | time.sleep(1) 125 | t = datetime.datetime.utcnow() 126 | time.sleep(1) 127 | store.touch("0000-0003-1527-0030") 128 | results = list(store.get_least_recent()) 129 | self.assertEqual(2, len(results)) 130 | self.assertEqual("0000-0003-1527-0031", results[0][0]) 131 | self.assertEqual("0000-0003-1527-0030", results[1][0]) 132 | 133 | # With limit 134 | results = list(store.get_least_recent(limit=1)) 135 | self.assertEqual(1, len(results)) 136 | self.assertEqual("0000-0003-1527-0031", results[0][0]) 137 | 138 | # Before 139 | results = list(store.get_least_recent(before_datetime=t)) 140 | self.assertEqual(1, len(results)) 141 | self.assertEqual("0000-0003-1527-0031", results[0][0]) 142 | 143 | def test_iter(self): 144 | with Store(self.data_path) as store: 145 | store.add("0000-0003-1527-0030") 146 | store.add("0000-0003-1527-0031") 147 | for orcid_id, active, new_last_update, person_uri, person_id, person_class, confirmed in store: 148 | self.assertTrue(orcid_id in ("0000-0003-1527-0030", "0000-0003-1527-0031")) 149 | self.assertEqual(2, len(list(store))) 150 | 151 | def test_delete_all(self): 152 | with Store(self.data_path) as store: 153 | store.add("0000-0003-1527-0030") 154 | store.add("0000-0003-1527-0031") 155 | self.assertTrue("0000-0003-1527-0030" in store) 156 | self.assertTrue("0000-0003-1527-0031" in store) 157 | store.delete_all() 158 | self.assertFalse("0000-0003-1527-0030" in store) 159 | self.assertFalse("0000-0003-1527-0031" in store) 160 | 161 | 162 | class TestLoad(tests.TestCase): 163 | def setUp(self): 164 | self.data_path = tempfile.mkdtemp() 165 | 166 | @my_vcr.use_cassette('loader/load_single.yaml') 167 | @patch("orcid2vivo_loader.sparql_insert") 168 | @patch("orcid2vivo_loader.sparql_delete") 169 | def test_load_single(self, mock_sparql_delete, mock_sparql_insert): 170 | with Store(self.data_path) as store: 171 | store.add("0000-0003-1527-0030") 172 | (orcid_id, active, last_update, person_uri, person_id, person_class, confirmed) = \ 173 | store["0000-0003-1527-0030"] 174 | self.assertIsNone(last_update) 175 | 176 | graph1, add_graph1, delete_graph1 = load_single("0000-0003-1527-0030", None, None, None, self.data_path, 177 | "http://vivo.mydomain.edu/sparql", "vivo@mydomain.edu", 178 | "password") 179 | 180 | self.assertEqual(319, len(add_graph1)) 181 | self.assertEqual(0, len(delete_graph1)) 182 | 183 | self.assertEqual(to_isomorphic(graph1), to_isomorphic(add_graph1)) 184 | 185 | with Store(self.data_path) as store: 186 | # Last update now set 187 | (orcid_id, active, last_update, person_uri, person_id, person_class, confirmed) = \ 188 | store["0000-0003-1527-0030"] 189 | self.assertIsNotNone(last_update) 190 | 191 | # Make sure turtle file created 192 | self.assertTrue(os.path.exists(os.path.join(self.data_path, "0000-0003-1527-0030.ttl"))) 193 | 194 | # Now change a fact and run again. Changed fact is provided by vcr recording. 195 | # Changed year of Amherst degree. 196 | # Had to rig the Accept-Encoding to create the vcr recording with: 197 | # r = requests.get('https://pub.orcid.org/v2.0/%s' % orcid, 198 | # headers={"Accept": "application/json", "Accept-Encoding": "identity"}) 199 | 200 | graph2, add_graph2, delete_graph2 = load_single("0000-0003-1527-0030", None, None, None, 201 | self.data_path, "http://vivo.mydomain.edu/sparql", 202 | "vivo@mydomain.edu", "password") 203 | 204 | self.assertEqual(319, len(graph2)) 205 | self.assertEqual(17, len(add_graph2)) 206 | self.assertEqual(17, len(delete_graph2)) 207 | 208 | mock_sparql_insert.assert_has_calls([ 209 | call(add_graph1, "http://vivo.mydomain.edu/sparql", "vivo@mydomain.edu", "password"), 210 | call(add_graph2, "http://vivo.mydomain.edu/sparql", "vivo@mydomain.edu", "password")]) 211 | mock_sparql_delete.assert_has_calls([ 212 | call(delete_graph1, "http://vivo.mydomain.edu/sparql", "vivo@mydomain.edu", "password"), 213 | call(delete_graph2, "http://vivo.mydomain.edu/sparql", "vivo@mydomain.edu", "password")]) 214 | -------------------------------------------------------------------------------- /tests/test_orcid2vivo.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from rdflib import Graph, URIRef, RDF, OWL 3 | import orcid2vivo_app.vivo_namespace as ns 4 | from orcid2vivo import PersonCrosswalk 5 | from orcid2vivo_app.vivo_namespace import VIVO 6 | 7 | 8 | class TestPersonCrosswalk(TestCase): 9 | def setUp(self): 10 | self.graph = Graph(namespace_manager=ns.ns_manager) 11 | self.person_uri = ns.D["test"] 12 | self.orcid_id = "0000-0003-1527-0030" 13 | self.orcid_id_uriref = URIRef("http://orcid.org/{}".format(self.orcid_id)) 14 | 15 | def test_add_orcid_id(self): 16 | PersonCrosswalk._add_orcid_id(self.person_uri, self.orcid_id, self.graph, False) 17 | self.assertEqual(2, len(self.graph)) 18 | 19 | self.assertTrue((self.person_uri, VIVO.orcidId, self.orcid_id_uriref) in self.graph) 20 | self.assertTrue((self.orcid_id_uriref, RDF.type, OWL.Thing) in self.graph) 21 | 22 | def test_add_orcid_id_confirmed(self): 23 | PersonCrosswalk._add_orcid_id(self.person_uri, self.orcid_id, self.graph, True) 24 | self.assertEqual(3, len(self.graph)) 25 | 26 | self.assertTrue((self.orcid_id_uriref, VIVO.confirmedOrcidId, self.person_uri) in self.graph) 27 | --------------------------------------------------------------------------------