├── .dockerignore
├── .gitignore
├── .travis.yml
├── Dockerfile
├── LICENSE.txt
├── README.md
├── orcid2vivo.py
├── orcid2vivo_app
    ├── __init__.py
    ├── affiliations.py
    ├── bio.py
    ├── fundings.py
    ├── utility.py
    ├── vivo_namespace.py
    ├── vivo_uri.py
    └── works.py
├── orcid2vivo_loader.py
├── orcid2vivo_service.py
├── requirements.txt
├── setup.py
├── templates
    └── crosswalk_form.html
└── tests
    ├── __init__.py
    ├── app
        ├── __init__.py
        ├── test_affiliations.py
        ├── test_bio.py
        ├── test_fundings.py
        ├── test_utility.py
        ├── test_vivo_uri.py
        └── test_works.py
    ├── fixtures
        └── loader
        │   └── load_single.yaml
    ├── test_loader.py
    └── test_orcid2vivo.py


/.dockerignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__
 3 | *.py[cod]
 4 | 
 5 | env
 6 | ENV
 7 | 
 8 | #PyCharm
 9 | .idea
10 | 
11 | #Git
12 | .git
13 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | *.egg-info/
23 | .installed.cfg
24 | *.egg
25 | 
26 | # PyInstaller
27 | #  Usually these files are written by a python script from a template
28 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
29 | *.manifest
30 | *.spec
31 | 
32 | # Installer logs
33 | pip-log.txt
34 | pip-delete-this-directory.txt
35 | 
36 | # Unit test / coverage reports
37 | htmlcov/
38 | .tox/
39 | .coverage
40 | .coverage.*
41 | .cache
42 | nosetests.xml
43 | coverage.xml
44 | *,cover
45 | 
46 | # Translations
47 | *.mo
48 | *.pot
49 | 
50 | # Django stuff:
51 | *.log
52 | 
53 | # Sphinx documentation
54 | docs/_build/
55 | 
56 | # PyBuilder
57 | target/
58 | 
59 | #PyCharm
60 | .idea/


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |   - "2.7"
 4 | before_install:
 5 |   - "pip install pip==7.1.2 --upgrade"
 6 |   - "pip install setuptools>=25.2.0 --upgrade"
 7 | install: "pip install -r requirements.txt"
 8 | script: python -m unittest discover
 9 | notifications:
10 |   email:
11 |     - justinlittman@gmail.com


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:2.7
 2 | MAINTAINER Justin Littman <justinlittman@gwu.edu>
 3 | 
 4 | #Add files
 5 | ADD . /orcid2vivo
 6 | RUN pip install -r /orcid2vivo/requirements.txt
 7 | EXPOSE 5000
 8 | WORKDIR /orcid2vivo
 9 | CMD python orcid2vivo_service.py --endpoint $O2V_ENDPOINT --username $O2V_USERNAME --password $O2V_PASSWORD --namespace $O2V_NAMESPACE --debug
10 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2012-2014 The George Washington University
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining
 4 | a copy of this software and associated documentation files (the
 5 | "Software"), to deal in the Software without restriction, including
 6 | without limitation the rights to use, copy, modify, merge, publish,
 7 | distribute, sublicense, and/or sell copies of the Software, and to
 8 | permit persons to whom the Software is furnished to do so, subject
 9 | to the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be
12 | included in all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR
18 | ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
19 | CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # orcid2vivo
  2 | Tool for retrieving data from the ORCID API and crosswalking to VIVO-ISF.
  3 | 
  4 | ![Build status](https://travis-ci.org/gwu-libraries/orcid2vivo.svg)
  5 | 
  6 | ## Installation
  7 | With python/pip installed:
  8 | 
  9 | ```
 10 | pip install orcid2vivo
 11 | ```
 12 | 
 13 | ## Commandline
 14 | * Supports outputting to:
 15 |     * screen / stdout
 16 |     * file
 17 |     * load to VIVO instance (via SPARQL Update)
 18 | * Supports multiple RDF serializations.
 19 | * Allows specifying:
 20 |     * VIVO namespace
 21 |     * An id or URI for the person.
 22 |     * Class for the person.
 23 | 
 24 | ```
 25 | (ENV)GLSS-F0G5RP:orcid2vivo justinlittman$ python orcid2vivo.py -h
 26 | usage: orcid2vivo.py [-h] [--format {xml,n3,turtle,nt,pretty-xml,trix}]
 27 |                      [--file FILE] [--endpoint ENDPOINT] [--username USERNAME]
 28 |                      [--password PASSWORD] [--person-id PERSON_ID]
 29 |                      [--person-uri PERSON_URI] [--namespace NAMESPACE]
 30 |                      [--person-class {FacultyMember,FacultyMemberEmeritus,Librarian,LibrarianEmeritus,NonAcademic,NonFacultyAcademic,ProfessorEmeritus,Student}]
 31 |                      [--skip-person]
 32 |                      orcid_id
 33 | 
 34 | positional arguments:
 35 |   orcid_id
 36 | 
 37 | optional arguments:
 38 |   -h, --help            show this help message and exit
 39 |   --format {xml,n3,turtle,nt,pretty-xml,trix}
 40 |                         The RDF format for serializing. Default is turtle.
 41 |   --file FILE           Filepath to which to serialize.
 42 |   --endpoint ENDPOINT   Endpoint for SPARQL Update of VIVO instance,e.g.,
 43 |                         http://localhost/vivo/api/sparqlUpdate. Also provide
 44 |                         --username and --password.
 45 |   --username USERNAME   Username for VIVO root.
 46 |   --password PASSWORD   Password for VIVO root.
 47 |   --person-id PERSON_ID
 48 |                         Id for the person to use when constructing the
 49 |                         person's URI. If not provided, the orcid id will be
 50 |                         used.
 51 |   --person-uri PERSON_URI
 52 |                         A URI for the person. If not provided, one will be
 53 |                         created from the orcid id or person id.
 54 |   --namespace NAMESPACE
 55 |                         VIVO namespace. Default is
 56 |                         http://vivo.mydomain.edu/individual/.
 57 |   --person-class {FacultyMember,FacultyMemberEmeritus,Librarian,LibrarianEmeritus,NonAcademic,NonFacultyAcademic,ProfessorEmeritus,Student}
 58 |                         Class (in VIVO Core ontology) for a person. Default is
 59 |                         a FOAF Person.
 60 |   --skip-person         Skip adding triples declaring the person and the
 61 |                         person's name.
 62 | 
 63 | ```    
 64 | 
 65 | For example:
 66 | ```
 67 | (ENV)GLSS-F0G5RP:orcid2vivo justinlittman$ python orcid2vivo.py 0000-0003-1527-0030
 68 | ```
 69 | 
 70 | ## Web application
 71 | * Supports outputting to:
 72 |     * web page
 73 |     * download
 74 |     * load to VIVO instance (via SPARQL Update)
 75 | * Also supports outputting of ORCID profile to web page.
 76 | * Can be invoked from web form and http client. 
 77 | * Supports multiple RDF serializations.
 78 | * Allows specifying:
 79 |     * VIVO namespace
 80 |     * An id or URI for the person.
 81 |     * Class for the person.
 82 | * Allows providing various default values when starting the application.
 83 | 
 84 | 
 85 | ```
 86 | (ENV)GLSS-F0G5RP:orcid2vivo justinlittman$ python orcid2vivo_service.py -h
 87 | usage: orcid2vivo_service.py [-h]
 88 |                              [--format {xml,n3,turtle,nt,pretty-xml,trix}]
 89 |                              [--endpoint ENDPOINT] [--username USERNAME]
 90 |                              [--password PASSWORD] [--namespace NAMESPACE]
 91 |                              [--person-class {FacultyMember,FacultyMemberEmeritus,Librarian,LibrarianEmeritus,NonAcademic,NonFacultyAcademic,ProfessorEmeritus,Student}]
 92 |                              [--skip-person] [--debug] [--port PORT]
 93 | 
 94 | optional arguments:
 95 |   -h, --help            show this help message and exit
 96 |   --format {xml,n3,turtle,nt,pretty-xml,trix}
 97 |                         The RDF format for serializing. Default is turtle.
 98 |   --endpoint ENDPOINT   Endpoint for SPARQL Update of VIVO instance,e.g.,
 99 |                         http://localhost/vivo/api/sparqlUpdate.
100 |   --username USERNAME   Username for VIVO root.
101 |   --password PASSWORD   Password for VIVO root.
102 |   --namespace NAMESPACE
103 |                         VIVO namespace. Default is
104 |                         http://vivo.mydomain.edu/individual/.
105 |   --person-class {FacultyMember,FacultyMemberEmeritus,Librarian,LibrarianEmeritus,NonAcademic,NonFacultyAcademic,ProfessorEmeritus,Student}
106 |                         Class (in VIVO Core ontology) for a person. Default is
107 |                         a FOAF Person.
108 |   --skip-person         Skip adding triples declaring the person and the
109 |                         person's name.
110 |   --debug
111 |   --port PORT           The port the service should run on. Default is 5000.
112 | 
113 | ```
114 | 
115 | For example, to start:
116 | ```
117 | (ENV)GLSS-F0G5RP:orcid2vivo justinlittman$ python orcid2vivo_service.py
118 | ```
119 | 
120 | The web form will now be available at http://localhost:5000/.
121 | 
122 | ### Invoke using curl
123 | 
124 | ```
125 | GLSS-F0G5RP:orcid2vivo justinlittman$ curl --data "orcid_id=0000-0003-1527-0030&format=turtle" http://localhost:5000/
126 | ```
127 | 
128 | ### Docker
129 | 
130 | The web application can be deployed to a [Docker](https://www.docker.com/) container.
131 | 
132 | ```
133 | GLSS-F0G5RP:orcid2vivo justinlittman$ docker build -t orcid2vivo .
134 | GLSS-F0G5RP:orcid2vivo justinlittman$ docker run -e "O2V_ENDPOINT=http://vivo:8080/vivo/api/sparqlUpdate" -e "O2V_USERNAME=vivo_root@mydomain.edu" -e "O2V_PASSWORD=password" -e "O2V_NAMESPACE=http://vivo.mydomain.edu/" -p "5000:5000" -d orcid2vivo
135 | ```
136 | 
137 | The web form will now be available at http://localhost:5000/.  (Note:  If using boot2docker, use result of `boot2docker ip` instead of localhost.)
138 | 
139 | ## Bulk loading
140 | * Supports loading to VIVO instance (via SPARQL Update) for multiple people.
141 | * Provides database to record a list of:
142 |     * Orcid id for the person
143 |     * Last load
144 |     * Active flag
145 |     * Id for the person
146 |     * URI for the person
147 |     * Class for the person
148 | * Also allows specifying:
149 |     * VIVO namespace
150 |     * Whether to skip creating records for a person.
151 | * Invoked with command line interface.
152 | * Maintains store of complete RDF for a person.
153 | * All loads are incremental, as determined by comparing the stored RDF for a person against the generated RDF.
154 | 
155 | The general workflow would be:
156 | 
157 | 1. Add records to the database.
158 | 2. Periodically perform a load, possibly limiting the load by a last load cutoff or a number limit.
159 | 3. As necessary, update the database by adding or deleting (i.e., de-activating) orcid id records.
160 | 
161 | ```
162 | (ENV)GLSS-F0G5RP:orcid2vivo justinlittman$ python orcid2vivo_loader.py -h
163 | usage: orcid2vivo_loader.py [-h] [--debug]
164 |                             {add,delete,delete-all,load,list} ...
165 | 
166 | positional arguments:
167 |   {add,delete,delete-all,load,list}
168 |     add                 Adds or updates orcid id record. If inactive, marks
169 |                         active.
170 |     delete              Marks an orcid id record as inactive so that it will
171 |                         not be loaded.
172 |     delete-all          Marks all orcid id records as inactive.
173 |     load                Fetches orcid profiles, crosswalks to VIVO-ISF, loads
174 |                         to VIVO instance, and updates orcid id record. If
175 |                         loading multiple orcid ids, loads in least recent
176 |                         order.
177 |     list                Lists orcid_id records in the db.
178 | 
179 | optional arguments:
180 |   -h, --help            show this help message and exit
181 |   --debug
182 | ```
183 | 
184 | For example:
185 | 
186 | ```
187 | (ENV)GLSS-F0G5RP:orcid2vivo justinlittman$ python orcid2vivo_loader.py add 0000-0003-1527-0030
188 | Adding 0000-0003-1527-0030
189 | Done
190 | (ENV)GLSS-F0G5RP:orcid2vivo justinlittman$ python orcid2vivo_loader.py list
191 | 0000-0003-1527-0030 [active=true; last_update=None; person_uri=None; person_id=None, person_class=None]
192 | Done
193 | (ENV)GLSS-F0G5RP:orcid2vivo justinlittman$ python orcid2vivo_loader.py load http://192.168.59.103:8080/vivo/api/sparqlUpdate vivo_root@gwu.edu http://vivo.gwu.edu --password password
194 | Loading to http://192.168.59.103:8080/vivo/api/sparqlUpdate
195 | Loaded: 0000-0003-1527-0030
196 | Done
197 | ```
198 | 
199 | ##Tests
200 | 
201 | ```
202 | GLSS-F0G5RP:orcid2vivo justinlittman$ python -m unittest discover
203 | ```
204 | 
205 | ##Strategies for generating URIs and/or creating entities
206 | Approaches to generating URIs and creating entities (e.g., journals or co-authors) are abstracted into strategies.  Default strategies are provided, but they can be replaced with other strategies is necessary to meet local requirements.
207 | 
208 | The strategy for generating URIs is provided by a class that has the following method:
209 | 
210 | ```
211 |     def to_uri(self, clazz, attrs, general_clazz=None):
212 |         """
213 |         Given an RDF class and a set of attributes for an entity, produce a URI.
214 |         :param clazz: the class of the entity.
215 |         :param attrs: a map of identifying attributes for an entity.
216 |         :param general_clazz: a superclass of the entity that can be used to group like entities.
217 |         :return: URI for the entity.
218 |         """
219 | ```
220 | 
221 | The strategy for creating entities is provided by a class that has the following method:
222 | 
223 | ```
224 |     def should_create(self, clazz, uri):
225 |         """
226 |         Determine whether an entity should be created.
227 |         :param clazz: Class of the entity.
228 |         :param uri: URI of the entity.
229 |         :return: True if the entity should be created.
230 |         """
231 | ```
232 | 
233 | It may be desirable to skip creating entities if those entities already exist in the triple store.  For example, this shows the triples when the journal is created:
234 | 
235 | ```
236 | d:academicarticle-df4d61373e64c72681d74829ea92071a vivo:hasPublicationVenue d:journal-65a2d6d4d80fdbbd78268bf4e814ee01 ;
237 | 
238 | d:journal-65a2d6d4d80fdbbd78268bf4e814ee01 a bibo:Journal ;
239 |     rdfs:label "D-Lib Magazine" ;
240 |     bibo:issn "1082-9873" .
241 | ```
242 | 
243 | and this shows the triples when it is not created:
244 | 
245 | ```
246 | d:academicarticle-df4d61373e64c72681d74829ea92071a vivo:hasPublicationVenue d:journal-65a2d6d4d80fdbbd78268bf4e814ee01 ;
247 | 
248 | ```
249 | 
250 | Depending on the strategies to be implemented, it may be a useful approach to combine both strategies into a single class.
251 | 
252 | ##Caveats:
253 | * All data is not cross walked to VIVO-ISF.
254 | * Password for SPARQL Update is not handled securely.
255 | 
256 | ##Other:
257 | * Feedback / tickets / pull requests welcome.
258 | * Consider using with [vivo-docker](https://github.com/gwu-libraries/vivo-docker) to put together an environment for experimenting with crosswalking ORCID to VIVO. 
259 | 


--------------------------------------------------------------------------------
/orcid2vivo.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import requests
  4 | import argparse
  5 | import codecs
  6 | from rdflib import Graph, URIRef, RDF, OWL
  7 | from rdflib.namespace import Namespace
  8 | from orcid2vivo_app.vivo_uri import HashIdentifierStrategy
  9 | from orcid2vivo_app.vivo_namespace import VIVO, FOAF, VCARD
 10 | from orcid2vivo_app.affiliations import AffiliationsCrosswalk
 11 | from orcid2vivo_app.bio import BioCrosswalk
 12 | from orcid2vivo_app.fundings import FundingCrosswalk
 13 | from orcid2vivo_app.works import WorksCrosswalk
 14 | from orcid2vivo_app.utility import sparql_insert, clean_orcid
 15 | import orcid2vivo_app.vivo_namespace as ns
 16 | 
 17 | 
 18 | class SimpleCreateEntitiesStrategy():
 19 |     """
 20 |     A minimally configurable strategy for determining if ancillary entities
 21 |     should be created.
 22 | 
 23 |     Except for a few configurable options, entities are always created.
 24 | 
 25 |     Also, wraps a provided identifier strategy (need to support skip person).
 26 | 
 27 |     Other implementations must implement should_create().
 28 |     """
 29 |     def __init__(self, identifier_strategy, skip_person=False, person_uri=None):
 30 |         self.skip_person = skip_person
 31 |         self.person_uri = person_uri
 32 |         self._identifier_strategy = identifier_strategy
 33 |         self.person_name_vcard_uri = None
 34 | 
 35 |     def should_create(self, clazz, uri):
 36 |         """
 37 |         Determine whether an entity should be created.
 38 |         :param clazz: Class of the entity.
 39 |         :param uri: URI of the entity.
 40 |         :return: True if the entity should be created.
 41 |         """
 42 |         if self.skip_person and uri in (self.person_uri, self.person_name_vcard_uri):
 43 |             return False
 44 |         return True
 45 | 
 46 |     def to_uri(self, clazz, attrs, general_clazz=None):
 47 |         uri = self._identifier_strategy.to_uri(clazz, attrs, general_clazz=None)
 48 |         # Need to remember vcard uri for this person so that can skip.
 49 |         if clazz == VCARD.Name and attrs.get("person_uri") == self.person_uri:
 50 |             self.person_name_vcard_uri = uri
 51 |         return uri
 52 | 
 53 | 
 54 | class PersonCrosswalk():
 55 |     def __init__(self, identifier_strategy, create_strategy):
 56 |         self.identifier_strategy = identifier_strategy
 57 |         self.create_strategy = create_strategy
 58 |         self.bio_crosswalker = BioCrosswalk(identifier_strategy, create_strategy)
 59 |         self.affiliations_crosswalker = AffiliationsCrosswalk(identifier_strategy, create_strategy)
 60 |         self.funding_crosswalker = FundingCrosswalk(identifier_strategy, create_strategy)
 61 |         self.works_crosswalker = WorksCrosswalk(identifier_strategy, create_strategy)
 62 | 
 63 |     def crosswalk(self, orcid_id, person_uri, person_class=None, confirmed_orcid_id=False):
 64 | 
 65 |         # Create an RDFLib Graph
 66 |         graph = Graph(namespace_manager=ns.ns_manager)
 67 | 
 68 |         # 0000-0003-3441-946X
 69 |         clean_orcid_id = clean_orcid(orcid_id)
 70 |         orcid_profile = fetch_orcid_profile(clean_orcid_id)
 71 | 
 72 |         # Determine the class to use for the person
 73 |         person_clazz = FOAF.Person
 74 |         if person_class:
 75 |             person_clazz = getattr(VIVO, person_class)
 76 | 
 77 |         # ORCID
 78 |         PersonCrosswalk._add_orcid_id(person_uri, clean_orcid_id, graph, confirmed_orcid_id)
 79 | 
 80 |         self.bio_crosswalker.crosswalk(orcid_profile, person_uri, graph, person_class=person_clazz)
 81 |         self.works_crosswalker.crosswalk(orcid_profile, person_uri, graph)
 82 |         self.affiliations_crosswalker.crosswalk(orcid_profile, person_uri, graph)
 83 |         self.funding_crosswalker.crosswalk(orcid_profile, person_uri, graph)
 84 | 
 85 |         return graph, orcid_profile, person_uri
 86 | 
 87 |     @staticmethod
 88 |     def _add_orcid_id(person_uri, orcid_id, graph, confirmed):
 89 |         orcid_id_uriref = URIRef("http://orcid.org/%s" % orcid_id)
 90 |         graph.add((person_uri, VIVO.orcidId, orcid_id_uriref))
 91 |         graph.add((orcid_id_uriref, RDF.type, OWL.Thing))
 92 |         if confirmed:
 93 |             graph.add((orcid_id_uriref, VIVO.confirmedOrcidId, person_uri))
 94 | 
 95 | 
 96 | def fetch_orcid_profile(orcid_id):
 97 |     orcid = clean_orcid(orcid_id)
 98 |     r = requests.get('https://pub.orcid.org/v2.0/%s' % orcid,
 99 |                      headers={"Accept": "application/json"})
100 |     if r:
101 |         return r.json()
102 |     else:
103 |         raise Exception("Request to fetch ORCID profile for %s returned %s" % (orcid, r.status_code))
104 | 
105 | 
106 | def set_namespace(namespace=None):
107 |     # Set default VIVO namespace
108 |     if namespace:
109 |         ns.D = Namespace(namespace)
110 |         ns.ns_manager.bind('d', ns.D, replace=True)
111 | 
112 | 
113 | def default_execute(orcid_id, namespace=None, person_uri=None, person_id=None, skip_person=False, person_class=None,
114 |                     confirmed_orcid_id=False):
115 |     # Set namespace
116 |     set_namespace(namespace)
117 | 
118 |     this_identifier_strategy = HashIdentifierStrategy()
119 |     this_person_uri = URIRef(person_uri) if person_uri \
120 |         else this_identifier_strategy.to_uri(FOAF.Person, {"id": person_id or orcid_id})
121 | 
122 |     # this_create_strategy will implement both create strategy and identifier strategy
123 |     this_create_strategy = SimpleCreateEntitiesStrategy(this_identifier_strategy, skip_person=skip_person,
124 |                                                         person_uri=this_person_uri)
125 | 
126 |     crosswalker = PersonCrosswalk(create_strategy=this_create_strategy, identifier_strategy=this_create_strategy)
127 |     return crosswalker.crosswalk(orcid_id, this_person_uri, person_class=person_class,
128 |                                  confirmed_orcid_id=confirmed_orcid_id)
129 | 
130 | 
131 | if __name__ == '__main__':
132 |     parser = argparse.ArgumentParser()
133 |     parser.add_argument("orcid_id")
134 |     parser.add_argument("--format", default="turtle", choices=["xml", "n3", "turtle", "nt", "pretty-xml", "trix"],
135 |                         help="The RDF format for serializing. Default is turtle.")
136 |     parser.add_argument("--file", help="Filepath to which to serialize.")
137 |     parser.add_argument("--endpoint", dest="endpoint",
138 |                         help="Endpoint for SPARQL Update of VIVO instance,e.g., http://localhost/vivo/api/sparqlUpdate."
139 |                              " Also provide --username and --password.")
140 |     parser.add_argument("--username", dest="username", help="Username for VIVO root.")
141 |     parser.add_argument("--password", dest="password",
142 |                         help="Password for VIVO root.")
143 |     parser.add_argument("--person-id", dest="person_id", help="Id for the person to use when constructing the person's "
144 |                                                               "URI. If not provided, the orcid id will be used.")
145 |     parser.add_argument("--person-uri", dest="person_uri", help="A URI for the person. If not provided, one will be "
146 |                                                                 "created from the orcid id or person id.")
147 |     parser.add_argument("--namespace", default="http://vivo.mydomain.edu/individual/",
148 |                         help="VIVO namespace. Default is http://vivo.mydomain.edu/individual/.")
149 |     parser.add_argument("--person-class", dest="person_class",
150 |                         choices=["FacultyMember", "FacultyMemberEmeritus", "Librarian", "LibrarianEmeritus",
151 |                                  "NonAcademic", "NonFacultyAcademic", "ProfessorEmeritus", "Student"],
152 |                         help="Class (in VIVO Core ontology) for a person. Default is a FOAF Person.")
153 |     parser.add_argument("--skip-person", dest="skip_person", action="store_true",
154 |                         help="Skip adding triples declaring the person and the person's name.")
155 |     parser.add_argument("--confirmed", action="store_true", help="Mark the orcid id as confirmed.")
156 | 
157 |     # Parse
158 |     args = parser.parse_args()
159 | 
160 |     # Excute with default strategies
161 |     (g, p, per_uri) = default_execute(args.orcid_id, namespace=args.namespace, person_uri=args.person_uri,
162 |                                       person_id=args.person_id, skip_person=args.skip_person,
163 |                                       person_class=args.person_class, confirmed_orcid_id=args.confirmed)
164 | 
165 |     # Write to file
166 |     if args.file:
167 |         with codecs.open(args.file, "w") as out:
168 |             g.serialize(format=args.format, destination=out)
169 | 
170 |     # Post to SPARQL Update
171 |     if args.endpoint:
172 |         if not args.username or not args.password:
173 |             raise Exception("If an endpoint is specified, --username and --password must be provided.")
174 |         sparql_insert(g, args.endpoint, args.username, args.password)
175 | 
176 |     # If not writing to file to posting to SPARQL Update then serialize to stdout
177 |     if not args.file and not args.endpoint:
178 |         print g.serialize(format=args.format)
179 | 


--------------------------------------------------------------------------------
/orcid2vivo_app/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'justinlittman'
2 | 


--------------------------------------------------------------------------------
/orcid2vivo_app/affiliations.py:
--------------------------------------------------------------------------------
 1 | from vivo_namespace import VIVO, OBO
 2 | from rdflib import RDFS, RDF, Literal
 3 | from vivo_namespace import FOAF
 4 | from vivo_uri import to_hash_identifier
 5 | from utility import add_date, add_date_interval
 6 | import orcid2vivo_app.vivo_namespace as ns
 7 | 
 8 | 
 9 | class AffiliationsCrosswalk:
10 |     def __init__(self, identifier_strategy, create_strategy):
11 |         self.identifier_strategy = identifier_strategy
12 |         self.create_strategy = create_strategy
13 | 
14 |     def crosswalk(self, orcid_profile, person_uri, graph):
15 |         # Education
16 |         if "educations" in orcid_profile["activities-summary"]:
17 |             for education in orcid_profile["activities-summary"]["educations"]["education-summary"]:
18 |                 # Gather some values
19 |                 degree_name = education.get("role-title")
20 |                 organization_name = education["organization"]["name"]
21 |                 start_date_year = (education["start-date"] or {}).get("year", {}).get("value")
22 |                 end_date_year = (education["end-date"] or {}).get("year", {}).get("value")
23 | 
24 |                 # Organization
25 |                 organization_uri = self.identifier_strategy.to_uri(FOAF.Organization, {"name": organization_name})
26 |                 if self.create_strategy.should_create(FOAF.Organization, organization_uri):
27 |                     graph.add((organization_uri, RDF.type, FOAF.Organization))
28 |                     graph.add((organization_uri, RDFS.label, Literal(organization_name)))
29 |                     if "address" in education["organization"]:
30 |                         city = education["organization"]["address"]["city"]
31 |                         state = education["organization"]["address"]["region"]
32 |                     address_uri = ns.D[to_hash_identifier("geo", (city, state))]
33 |                     graph.add((address_uri, RDF.type, VIVO.GeographicLocation))
34 |                     graph.add((organization_uri, OBO.RO_0001025, address_uri))
35 |                     graph.add((address_uri, RDFS.label, Literal("%s, %s" % (city, state))))
36 | 
37 |                 # Output of educational process
38 |                 educational_process_uri = self.identifier_strategy.to_uri(VIVO.EducationalProcess,
39 |                                                                           {"organization_name": organization_name,
40 |                                                                            "degree_name": degree_name,
41 |                                                                            "start_year": start_date_year,
42 |                                                                            "end_year": end_date_year})
43 |                 graph.add((educational_process_uri, RDF.type, VIVO.EducationalProcess))
44 |                 # Has participants
45 |                 graph.add((educational_process_uri, OBO.RO_0000057, organization_uri))
46 |                 graph.add((educational_process_uri, OBO.RO_0000057, person_uri))
47 |                 # Department
48 |                 if education.get("department-name"):
49 |                     graph.add((educational_process_uri, VIVO.departmentOrSchool,
50 |                                Literal(education["department-name"])))
51 | 
52 |                 # Interval
53 |                 add_date_interval(educational_process_uri, graph, self.identifier_strategy,
54 |                                   add_date(start_date_year, graph, self.identifier_strategy),
55 |                                   add_date(end_date_year, graph, self.identifier_strategy))
56 | 
57 |                 if "role-title" in education:
58 |                     degree_name = education["role-title"]
59 | 
60 |                     # Awarded degree
61 |                     awarded_degree_uri = self.identifier_strategy.to_uri(VIVO.AwardedDegree,
62 |                                                                          {"educational_process_uri":
63 |                                                                           educational_process_uri})
64 |                     graph.add((awarded_degree_uri, RDF.type, VIVO.AwardedDegree))
65 |                     graph.add((awarded_degree_uri, RDFS.label, Literal(degree_name)))
66 | 
67 |                     # Assigned by organization
68 |                     graph.add((awarded_degree_uri, VIVO.assignedBy, organization_uri))
69 | 
70 |                     # Related to educational process
71 |                     graph.add((awarded_degree_uri, OBO.RO_0002353, educational_process_uri))
72 | 
73 |                     # Relates to degree
74 |                     degree_uri = self.identifier_strategy.to_uri(VIVO.AcademicDegree, {"name": degree_name})
75 |                     graph.add((awarded_degree_uri, VIVO.relates, degree_uri))
76 |                     if self.create_strategy.should_create(VIVO.AcademicDegree, degree_uri):
77 |                         graph.add((degree_uri, RDF.type, VIVO.AcademicDegree))
78 |                         graph.add((degree_uri, RDFS.label, Literal(degree_name)))
79 | 
80 |                     # Relates to person
81 |                     graph.add((awarded_degree_uri, VIVO.relates, person_uri))
82 | 


--------------------------------------------------------------------------------
/orcid2vivo_app/bio.py:
--------------------------------------------------------------------------------
 1 | from vivo_namespace import VIVO
 2 | from rdflib import RDFS, RDF, Literal, XSD
 3 | from utility import join_if_not_empty
 4 | from vivo_namespace import VCARD, OBO, FOAF
 5 | 
 6 | 
 7 | class BioCrosswalk:
 8 |     def __init__(self, identifier_strategy, create_strategy):
 9 |         self.identifier_strategy = identifier_strategy
10 |         self.create_strategy = create_strategy
11 | 
12 |     def crosswalk(self, orcid_profile, person_uri, graph, person_class=FOAF.Person):
13 | 
14 |         # Get names (for person and name vcard)
15 |         given_names = None
16 |         family_name = None
17 |         if "name" in orcid_profile["person"]:
18 |             person_details = orcid_profile["person"]["name"]
19 |             given_names = person_details.get("given-names", {}).get("value")
20 |             family_name = person_details.get("family-name", {}).get("value")
21 |             full_name = join_if_not_empty((given_names, family_name))
22 | 
23 |             # Following is non-vcard bio information
24 | 
25 |             # If skip_person, then don't create person and add names
26 |             if full_name and self.create_strategy.should_create(person_class, person_uri):
27 |                 # Add person
28 |                 graph.add((person_uri, RDF.type, person_class))
29 |                 graph.add((person_uri, RDFS.label, Literal(full_name)))
30 | 
31 |         # Biography
32 |         if "biography" in orcid_profile["person"]:
33 |             biography = orcid_profile["person"]["biography"]["content"]
34 |             if biography:
35 |                 graph.add((person_uri, VIVO.overview, Literal(biography)))
36 | 
37 |         # Other identifiers
38 |         # Default VIVO-ISF only supports a limited number of identifier types.
39 |         if "external-identifiers" in orcid_profile["person"]:
40 |             external_identifiers = orcid_profile["person"]["external-identifiers"]["external-identifier"]
41 |             for external_identifier in external_identifiers:
42 |                 # Scopus ID
43 |                 if external_identifier["external-id-type"] == "Scopus Author ID":
44 |                     graph.add((person_uri, VIVO.scopusId, Literal(external_identifier["external-id-value"])))
45 | 
46 |                 # ISI Research ID
47 |                 if external_identifier["external-id-type"] == "ResearcherID":
48 |                     graph.add((person_uri, VIVO.researcherId, Literal(external_identifier["external-id-value"])))
49 | 
50 |         # Keywords
51 |         if "keywords" in orcid_profile["person"]:
52 |             keywords = orcid_profile["person"]["keywords"]["keyword"]
53 |             for keyword in keywords:
54 |                 keywords_content = keyword["content"]
55 |                 if keywords_content:
56 |                     for keyword_content in keywords_content.split(", "):
57 |                         graph.add((person_uri, VIVO.freetextKeyword, Literal(keyword_content)))
58 | 
59 |         # Following is vcard bio information
60 | 
61 |         # Add main vcard
62 |         vcard_uri = self.identifier_strategy.to_uri(VCARD.Individual, {"person_uri": person_uri})
63 |         # Will only add vcard if there is a child vcard
64 |         add_main_vcard = False
65 | 
66 |         # Name vcard
67 |         vcard_name_uri = self.identifier_strategy.to_uri(VCARD.Name, {"person_uri": person_uri})
68 |         if (given_names or family_name) and self.create_strategy.should_create(VCARD.Name, vcard_name_uri):
69 |             graph.add((vcard_name_uri, RDF.type, VCARD.Name))
70 |             graph.add((vcard_uri, VCARD.hasName, vcard_name_uri))
71 |             if given_names:
72 |                 graph.add((vcard_name_uri, VCARD.givenName, Literal(given_names)))
73 |             if family_name:
74 |                 graph.add((vcard_name_uri, VCARD.familyName, Literal(family_name)))
75 |             add_main_vcard = True
76 | 
77 |         # Websites
78 |         if "researcher-urls" in orcid_profile["person"]:
79 |             researcher_urls = orcid_profile["person"]["researcher-urls"]["researcher-url"]
80 |             for researcher_url in researcher_urls:
81 |                 url = researcher_url["url"]["value"]
82 |                 url_name = researcher_url["url-name"]
83 |                 vcard_website_uri = self.identifier_strategy.to_uri(VCARD.URL, {"url": url})
84 |                 graph.add((vcard_website_uri, RDF.type, VCARD.URL))
85 |                 graph.add((vcard_uri, VCARD.hasURL, vcard_website_uri))
86 |                 graph.add((vcard_website_uri, VCARD.url, Literal(url, datatype=XSD.anyURI)))
87 |                 if url_name:
88 |                     graph.add((vcard_website_uri, RDFS.label, Literal(url_name)))
89 |                 add_main_vcard = True
90 | 
91 |         if add_main_vcard and self.create_strategy.should_create(VCARD.Individual, vcard_uri):
92 |             graph.add((vcard_uri, RDF.type, VCARD.Individual))
93 |             # Contact info for
94 |             graph.add((vcard_uri, OBO.ARG_2000029, person_uri))
95 | 


--------------------------------------------------------------------------------
/orcid2vivo_app/fundings.py:
--------------------------------------------------------------------------------
 1 | from vivo_namespace import VIVO, OBO, FOAF, VCARD
 2 | from rdflib import RDF, RDFS, XSD, Literal
 3 | from utility import add_date, add_date_interval
 4 | 
 5 | 
 6 | class FundingCrosswalk:
 7 |     def __init__(self, identifier_strategy, create_strategy):
 8 |         self.identifier_strategy = identifier_strategy
 9 |         self.create_strategy = create_strategy
10 | 
11 |     def crosswalk(self, orcid_profile, person_uri, graph):
12 |         if "fundings" in orcid_profile["activities-summary"]:
13 |             # Funding
14 |             for funding_group in orcid_profile["activities-summary"]["fundings"]["group"]:
15 |                 for funding in funding_group["funding-summary"]:
16 |                     if funding["type"] == "GRANT":
17 | 
18 |                         title = funding["title"]["title"]["value"]
19 |                         grant_uri = self.identifier_strategy.to_uri(VIVO.Grant, {"title": title})
20 |                         # Type
21 |                         graph.add((grant_uri, RDF.type, VIVO.Grant))
22 | 
23 |                         # Person
24 |                         graph.add((grant_uri, VIVO.relates, person_uri))
25 | 
26 |                         # Title
27 |                         graph.add((grant_uri, RDFS.label, Literal(title)))
28 | 
29 |                         # Role
30 |                         role_uri = self.identifier_strategy.to_uri(VIVO.PrincipalInvestigatorRole,
31 |                                                                    {"grant_uri": grant_uri})
32 |                         graph.add((role_uri, RDF.type, VIVO.PrincipalInvestigatorRole))
33 |                         # Inheres in
34 |                         graph.add((role_uri, OBO.RO_0000052, person_uri))
35 |                         graph.add((role_uri, VIVO.relatedBy, grant_uri))
36 | 
37 |                         # Date interval
38 |                         (start_year, start_month, start_day) = FundingCrosswalk._get_date_parts("start-date", funding)
39 |                         (end_year, end_month, end_day) = FundingCrosswalk._get_date_parts("end-date", funding)
40 | 
41 |                         add_date_interval(grant_uri, graph, self.identifier_strategy,
42 |                                           add_date(start_year, graph, self.identifier_strategy, start_month, start_day),
43 |                                           add_date(end_year, graph, self.identifier_strategy, end_month, end_day))
44 | 
45 |                         # Award amount
46 |                         funding_amount = funding.get("amount")
47 |                         if funding_amount is not None:
48 |                             value = funding_amount.get("value")
49 |                             if value is not None:
50 |                                 award_amount = "${:,}".format(int(value))
51 |                                 graph.add((grant_uri, VIVO.totalAwardAmount, Literal(award_amount)))
52 | 
53 |                         # Awarded by
54 |                         if "organization" in funding:
55 |                             organization_name = funding["organization"]["name"]
56 |                             organization_uri = self.identifier_strategy.to_uri(FOAF.Organization,
57 |                                                                                {"name": organization_name})
58 |                             graph.add((grant_uri, VIVO.assignedBy, organization_uri))
59 |                             if self.create_strategy.should_create(FOAF.Organization, organization_uri):
60 |                                 graph.add((organization_uri, RDF.type, FOAF.Organization))
61 |                                 graph.add((organization_uri, RDFS.label, Literal(organization_name)))
62 | 
63 |                         # Identifiers
64 |                         if "external-ids" in funding and funding.get("external-ids"):
65 |                             for external_identifier in funding["external-ids"]["external-id"]:
66 |                                 if "funding-external-identifier-value" in external_identifier:
67 |                                     graph.add((grant_uri, VIVO.sponsorAwardId,
68 |                                                Literal(external_identifier["external-id-value"])))
69 |                                 identifier_url = (external_identifier.get("external-id-url", {}) or {}).get("value")
70 |                                 if identifier_url:
71 |                                     vcard_uri = self.identifier_strategy.to_uri(VCARD.Kind, {"url": identifier_url})
72 |                                     graph.add((vcard_uri, RDF.type, VCARD.Kind))
73 |                                     # Has contact info
74 |                                     graph.add((grant_uri, OBO.ARG_2000028, vcard_uri))
75 |                                     # Url vcard
76 |                                     vcard_url_uri = self.identifier_strategy.to_uri(VCARD.URL, {"vcard_uri": vcard_uri})
77 |                                     graph.add((vcard_url_uri, RDF.type, VCARD.URL))
78 |                                     graph.add((vcard_uri, VCARD.hasURL, vcard_url_uri))
79 |                                     graph.add((vcard_url_uri, VCARD.url, Literal(identifier_url, datatype=XSD.anyURI)))
80 | 
81 |     @staticmethod
82 |     def _get_date_parts(field_name, funding):
83 |         date = funding.get(field_name, {}) or {}
84 |         return (date.get("year", {}) or {}).get("value"), \
85 |                (date.get("month", {}) or {}).get("value"), \
86 |                (date.get("day", {}) or {}).get("value")
87 | 


--------------------------------------------------------------------------------
/orcid2vivo_app/utility.py:
--------------------------------------------------------------------------------
  1 | from rdflib import RDF, RDFS, XSD, Literal
  2 | from vivo_namespace import VIVO
  3 | from numbers import Number
  4 | from SPARQLWrapper import SPARQLWrapper
  5 | import re
  6 | 
  7 | 
  8 | def num_to_str(num):
  9 |     """
 10 |     Converts a number to a string and removes leading 0s.
 11 | 
 12 |     If the number is already a string, then just returns.
 13 |     """
 14 |     if isinstance(num, Number):
 15 |         return str(int(num))
 16 |     return num.lstrip("0")
 17 | 
 18 | 
 19 | def join_if_not_empty(items, sep=" "):
 20 |     """
 21 |     Joins a list of items with a provided separator.
 22 | 
 23 |     Skips an empty item.
 24 |     """
 25 |     joined = ""
 26 |     for item in items:
 27 |         if item and len(item) > 0:
 28 |             if joined != "":
 29 |                 joined += sep
 30 |             joined += item
 31 |     return joined
 32 | 
 33 | 
 34 | months = ("January",
 35 |           "February",
 36 |           "March",
 37 |           "April",
 38 |           "May",
 39 |           "June",
 40 |           "July",
 41 |           "August",
 42 |           "September",
 43 |           "October",
 44 |           "November",
 45 |           "December")
 46 | 
 47 | 
 48 | def month_str_to_month_int(month_str):
 49 |     """
 50 |     Converts a month name to the corresponding month number.
 51 | 
 52 |     If already a number, returns the number.
 53 | 
 54 |     Also, tries to convert the string to a number.
 55 |     """
 56 |     if isinstance(month_str, Number):
 57 |         return month_str
 58 | 
 59 |     try:
 60 |         return int(month_str)
 61 |     except ValueError:
 62 |         pass
 63 | 
 64 |     return months.index(month_str)+1
 65 | 
 66 | 
 67 | def month_int_to_month_str(month_int):
 68 |     if isinstance(month_int, basestring):
 69 |         try:
 70 |             month_int = int(month_int)
 71 |         except ValueError:
 72 |             return month_int
 73 | 
 74 |     return months[month_int-1]
 75 | 
 76 | 
 77 | def add_date(year, g, identifier_strategy, month=None, day=None, label=None):
 78 |     """
 79 |     Adds triples for a date.
 80 | 
 81 |     Return True if date was added.
 82 |     """
 83 |     #Date
 84 |     date_uri = identifier_strategy.to_uri(VIVO.DateTimeValue, {"year": year, "month": month, "day": day})
 85 |     if year:
 86 |         g.add((date_uri, RDF.type, VIVO.DateTimeValue))
 87 |         #Day, month, and year
 88 |         if day and month:
 89 |             g.add((date_uri, VIVO.dateTimePrecision, VIVO.yearMonthDayPrecision))
 90 |             g.add((date_uri, VIVO.dateTime,
 91 |                    Literal("%s-%02d-%02dT00:00:00" % (
 92 |                        int(year), month_str_to_month_int(month), int(day)),
 93 |                        datatype=XSD.dateTime)))
 94 |             g.add((date_uri,
 95 |                    RDFS.label,
 96 |                    Literal(label or "%s %s, %s" % (month_int_to_month_str(month), num_to_str(day), num_to_str(year)))))
 97 |         #Month and year
 98 |         elif month:
 99 |             g.add((date_uri, VIVO.dateTimePrecision, VIVO.yearMonthPrecision))
100 |             g.add((date_uri, VIVO.dateTime,
101 |                    Literal("%s-%02d-01T00:00:00" % (
102 |                        year, month_str_to_month_int(month)),
103 |                        datatype=XSD.dateTime)))
104 |             g.add((date_uri,
105 |                    RDFS.label,
106 |                    Literal(label or "%s %s" % (month, num_to_str(year)))))
107 |         else:
108 |             #Just year
109 |             g.add((date_uri, VIVO.dateTimePrecision, VIVO.yearPrecision))
110 |             g.add((date_uri, VIVO.dateTime,
111 |                    Literal("%s-01-01T00:00:00" % (
112 |                        year),
113 |                        datatype=XSD.dateTime)))
114 |             g.add((date_uri, RDFS.label, Literal(label or num_to_str(year))))
115 |         return date_uri
116 |     return None
117 | 
118 | 
119 | def add_date_interval(subject_uri, g, identifier_strategy, start_uri=None, end_uri=None):
120 |     """
121 |     Adds triples for a date interval.
122 |     """
123 |     if start_uri or end_uri:
124 |         interval_uri = identifier_strategy.to_uri(VIVO.DateTimeInterval, {"subject_uri": subject_uri,
125 |                                                                           "start_uri": start_uri, "end_uri": end_uri})
126 |         g.add((interval_uri, RDF.type, VIVO.DateTimeInterval))
127 |         g.add((subject_uri, VIVO.dateTimeInterval, interval_uri))
128 |         if start_uri:
129 |             g.add((interval_uri, VIVO.start, start_uri))
130 |         if end_uri:
131 |             g.add((interval_uri, VIVO.end, end_uri))
132 |         return interval_uri
133 |     return None
134 | 
135 | 
136 | def sparql_insert(graph, endpoint, username, password):
137 |     #Need to construct query
138 |     ns_lines = []
139 |     triple_lines = []
140 |     for line in graph.serialize(format="turtle").splitlines():
141 |         if line.startswith("@prefix"):
142 |             #Change from @prefix to PREFIX
143 |             ns_lines.append("PREFIX" + line[7:-2])
144 |         else:
145 |             triple_lines.append(line)
146 |     query = "\n".join(ns_lines)
147 |     query += "\nINSERT DATA { GRAPH <http://vitro.mannlib.cornell.edu/default/vitro-kb-2> {\n"
148 |     query += "\n".join(triple_lines)
149 |     query += "\n}}"
150 |     sparql_update(query, endpoint, username, password)
151 | 
152 | 
153 | def sparql_delete(graph, endpoint, username, password):
154 |     #Need to construct query
155 |     ns_lines = []
156 |     triple_lines = []
157 |     for line in graph.serialize(format="turtle").splitlines():
158 |         if line.startswith("@prefix"):
159 |             #Change from @prefix to PREFIX
160 |             ns_lines.append("PREFIX" + line[7:-2])
161 |         else:
162 |             triple_lines.append(line)
163 |     query = "\n".join(ns_lines)
164 |     query += "\nDELETE DATA { GRAPH <http://vitro.mannlib.cornell.edu/default/vitro-kb-2> {\n"
165 |     query += "\n".join(triple_lines)
166 |     query += "\n}}"
167 |     sparql_update(query, endpoint, username, password)
168 | 
169 | 
170 | def sparql_update(query, endpoint, username, password):
171 |     """
172 |     Perform a SPARQL Update query.
173 | 
174 |     :param query: the query to perform
175 |     :param endpoint: the URL for SPARQL Update on the SPARQL server
176 |     :param username: username for SPARQL Update
177 |     :param password: password for SPARQL Update
178 |     """
179 |     sparql = SPARQLWrapper(endpoint)
180 |     sparql.addParameter("email", username)
181 |     sparql.addParameter("password", password)
182 |     sparql.setQuery(query)
183 |     sparql.setMethod("POST")
184 |     sparql.query()
185 | 
186 | 
187 | def clean_orcid(value):
188 |     """
189 |     Minimal ORCID validation.  Allowing for orcid.org/
190 |     """
191 |     if value.find('orcid.org/') > -1:
192 |         return value.split('/')[-1]
193 |     else:
194 |         return value
195 | 
196 | 
197 | def is_valid_orcid(orcid):
198 |     """
199 |     Returns true if has correct syntax for an orcid.
200 |     """
201 |     # 0000-0003-1527-0030
202 |     if re.match("\d\d\d\d-\d\d\d\d-\d\d\d\d-\d\d\d[0-9X]$", orcid):
203 |         return True
204 |     return False


--------------------------------------------------------------------------------
/orcid2vivo_app/vivo_namespace.py:
--------------------------------------------------------------------------------
 1 | from rdflib.namespace import Namespace, NamespaceManager
 2 | from rdflib import Graph
 3 | 
 4 | #Our data namespace
 5 | D = Namespace('http://vivo.mydomain.edu/individual/')
 6 | #The VIVO namespace
 7 | VIVO = Namespace('http://vivoweb.org/ontology/core#')
 8 | #The VCARD namespace
 9 | VCARD = Namespace('http://www.w3.org/2006/vcard/ns#')
10 | #The OBO namespace
11 | OBO = Namespace('http://purl.obolibrary.org/obo/')
12 | #The BIBO namespace
13 | BIBO = Namespace('http://purl.org/ontology/bibo/')
14 | #The FOAF namespace
15 | FOAF = Namespace('http://xmlns.com/foaf/0.1/')
16 | #The SKOS namespace
17 | SKOS = Namespace('http://www.w3.org/2004/02/skos/core#')
18 | 
19 | ns_manager = NamespaceManager(Graph())
20 | ns_manager.bind('d', D)
21 | ns_manager.bind('vivo', VIVO)
22 | ns_manager.bind('vcard', VCARD)
23 | ns_manager.bind('obo', OBO)
24 | ns_manager.bind('bibo', BIBO)
25 | ns_manager.bind("foaf", FOAF)
26 | ns_manager.bind("skos", SKOS)
27 | 


--------------------------------------------------------------------------------
/orcid2vivo_app/vivo_uri.py:
--------------------------------------------------------------------------------
 1 | import vivo_namespace as ns
 2 | import hashlib
 3 | import re
 4 | import collections
 5 | 
 6 | 
 7 | def to_hash_identifier(prefix, parts):
 8 |     """
 9 |     Return an identifier composed of the prefix and hash of the parts.
10 |     """
11 |     hash_parts = hashlib.md5("".join([unicode(part) for part in parts if part]).encode("utf-8"))
12 |     return "%s-%s" % (prefix, hash_parts.hexdigest())
13 | 
14 | 
15 | class HashIdentifierStrategy():
16 |     """
17 |     A strategy for constructing an identifier by creating a prefix from the
18 |     class or general class and a body from a hash of the attributes.
19 | 
20 |     Other identifier strategies must implement to_uri().
21 |     """
22 |     pattern = re.compile("^.+/(.+?)(#(.+))?$")
23 | 
24 |     def __init__(self):
25 |         pass
26 | 
27 |     def to_uri(self, clazz, attrs, general_clazz=None):
28 |         """
29 |         Given an RDF class and a set of attributes for an entity, produce a URI.
30 |         :param clazz: the class of the entity.
31 |         :param attrs: a map of identifying attributes for an entity.
32 |         :param general_clazz: a superclass of the entity that can be used to group like entities.
33 |         :return: URI for the entity.
34 |         """
35 |         return ns.D["%s-%s" % (self._class_to_prefix(general_clazz) or self._class_to_prefix(clazz),
36 |                     self._attrs_to_hash(attrs))]
37 | 
38 |     @staticmethod
39 |     def _class_to_prefix(clazz):
40 |         if clazz:
41 |             match = HashIdentifierStrategy.pattern.search(clazz)
42 |             assert match
43 |             return (match.group(3) or match.group(1)).lower()
44 |         return None
45 | 
46 |     @staticmethod
47 |     def _attrs_to_hash(attrs):
48 |         sorted_attrs = collections.OrderedDict(sorted(attrs.items()))
49 |         hash_parts = hashlib.md5("".join([unicode(part) for part in sorted_attrs.values() if part]).encode("utf-8"))
50 |         return hash_parts.hexdigest()


--------------------------------------------------------------------------------
/orcid2vivo_app/works.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | from rdflib import RDFS, RDF, XSD, Literal
  3 | from vivo_namespace import VIVO, VCARD, OBO, BIBO, FOAF, SKOS
  4 | from utility import join_if_not_empty
  5 | import re
  6 | import bibtexparser
  7 | from bibtexparser.bparser import BibTexParser
  8 | from bibtexparser.latexenc import unicode_to_latex, unicode_to_crappy_latex1, unicode_to_crappy_latex2
  9 | import itertools
 10 | from utility import add_date
 11 | 
 12 | work_type_map = {
 13 |     "BOOK": BIBO["Book"],
 14 |     "BOOK_CHAPTER": BIBO["Chapter"],
 15 |     "BOOK_REVIEW": BIBO["Review"],
 16 |     "DICTIONARY_ENTRY": BIBO["DocumentPart"],
 17 |     "DISSERTATION": BIBO["Thesis"],
 18 |     "ENCYCLOPEDIA_ENTRY": BIBO["DocumentPart"],
 19 |     "EDITED_BOOK": BIBO["EditedBook"],
 20 |     "JOURNAL_ARTICLE": BIBO["AcademicArticle"],
 21 |     "JOURNAL_ISSUE": BIBO["Issue"],
 22 |     "MAGAZINE_ARTICLE": BIBO["Article"],
 23 |     "MANUAL": BIBO["Manual"],
 24 |     "ONLINE_RESOURCE": BIBO["Website"],
 25 |     "NEWSLETTER_ARTICLE": BIBO["Article"],
 26 |     "NEWSPAPER_ARTICLE": BIBO["Article"],
 27 |     "REPORT": BIBO["Report"],
 28 |     "RESEARCH_TOOL": BIBO["Document"],
 29 |     "SUPERVISED_STUDENT_PUBLICATION": BIBO["Article"],
 30 |     # test not mapped
 31 |     "TRANSLATION": BIBO["Document"],
 32 |     "WEBSITE": BIBO["Website"],
 33 |     "WORKING_PAPER": VIVO["WorkingPaper"],
 34 |     "CONFERENCE_ABSTRACT": VIVO["Abstract"],
 35 |     "CONFERENCE_PAPER": VIVO["ConferencePaper"],
 36 |     "CONFERENCE_POSTER": VIVO["ConferencePoster"],
 37 |     # disclosure not mapped
 38 |     # license not mapped
 39 |     "PATENT": BIBO["Patent"],
 40 |     # registered-copyright not mapped
 41 |     "ARTISTIC_PERFORMANCE": BIBO["Performance"],
 42 |     "DATA_SET": VIVO["Dataset"],
 43 |     # invention not mapped
 44 |     "LECTURE_SPEECH": VIVO["Speech"],
 45 |     "RESEARCH_TECHNIQUE": OBO["OBI_0000272"],
 46 |     # spin-off-company not mapped
 47 |     "STANDARDS_AND_POLICY": BIBO["Standard"],
 48 |     "OTHER": BIBO["Document"]
 49 | }
 50 | 
 51 | identifier_map = {
 52 |     "DOI": (BIBO.doi, "http://dx.doi.org/%s"),
 53 |     "ASIN": (BIBO.asin, "http://www.amazon.com/dp/%s"),
 54 |     "OCLC": (BIBO.oclcnum, "http://www.worldcat.org/oclc/%s"),
 55 |     "LCCN": (BIBO.lccn, None),
 56 |     "PMC": (VIVO.pmcid, "http://www.ncbi.nlm.nih.gov/pmc/articles/%s/"),
 57 |     "PMID": (BIBO.pmid, "http://www.ncbi.nlm.nih.gov/pubmed/%s"),
 58 |     "ISSN": (BIBO.issn, None)
 59 | }
 60 | 
 61 | journal_map = {
 62 |     "JOURNAL_ARTICLE": BIBO.Journal,
 63 |     "MAGAZINE_ARTICLE": BIBO.Magazine,
 64 |     "NEWSLETTER_ARTICLE": VIVO.Newsletter,
 65 |     "NEWSPAPER_ARTICLE": BIBO.Newspaper,
 66 |     "SUPERVISED_STUDENT_PUBLICATION": BIBO.Journal
 67 | }
 68 | 
 69 | contributor_map = {
 70 |     "EDITOR": VIVO.Editorship,
 71 |     "CHAIR_OR_TRANSLATOR": "TRANSLATOR"
 72 | }
 73 | 
 74 | bibtex_type_map = {
 75 |     "article": BIBO["Article"],
 76 |     "book": BIBO["Book"],
 77 |     "conference": VIVO["ConferencePaper"],
 78 |     "manual": BIBO["Manual"],
 79 |     "mastersthesis": BIBO["Thesis"],
 80 |     "phdthesis": BIBO["Thesis"],
 81 |     "proceedings": VIVO["ConferencePaper"],
 82 |     "techreport": BIBO["Report"]
 83 | }
 84 | 
 85 | 
 86 | class WorksCrosswalk:
 87 |     def __init__(self, identifier_strategy, create_strategy):
 88 |         self.identifier_strategy = identifier_strategy
 89 |         self.create_strategy = create_strategy
 90 | 
 91 |     def crosswalk(self, orcid_profile, person_uri, graph):
 92 |         # Work metadata may be available from the orcid profile, bibtex contained in the orcid profile, and/or crossref
 93 |         # record. The preferred order (in general) for getting metadata is crossref, bibtex, orcid.
 94 | 
 95 |         # Note that datacite records were considered, but not found to have additional/better metadata.
 96 | 
 97 |         person_surname = orcid_profile.get("person", {}).get("name", {}).get("family-name", {}).get("value", "")
 98 | 
 99 |         # Publications
100 |         if "works" in orcid_profile["activities-summary"]:
101 |             for work_group in orcid_profile["activities-summary"]["works"]["group"]:
102 |                 for work in work_group["work-summary"]:
103 |                     self.crosswalk_work(self._fetch_work(work["path"]), person_uri, person_surname, graph)
104 | 
105 |     @staticmethod
106 |     def _fetch_work(path):
107 |         r = requests.get('https://pub.orcid.org/v2.0%s' % path,
108 |                          headers={"Accept": "application/json"})
109 |         if r:
110 |             return r.json()
111 |         else:
112 |             raise Exception("Request to fetch %s returned %s" % (path, r.status_code))
113 | 
114 |     def crosswalk_work(self, work, person_uri, person_surname, graph):
115 |         # Work metadata may be available from the orcid profile, bibtex contained in the orcid profile, and/or crossref
116 |         # record. The preferred order (in general) for getting metadata is crossref, bibtex, orcid.
117 | 
118 |         # Note that datacite records were considered, but not found to have additional/better metadata.
119 | 
120 |         # Work Type
121 |         work_type = work["type"]
122 |         if work_type in work_type_map:
123 |             # Extract
124 |             # Get external identifiers so that can get DOI
125 |             external_identifiers = WorksCrosswalk._get_work_identifiers(work)
126 |             doi = external_identifiers.get("DOI")
127 |             crossref_record = WorksCrosswalk._fetch_crossref_doi(doi) if doi else {}
128 | 
129 |             # Bibtex
130 |             bibtex = WorksCrosswalk._parse_bibtex(work)
131 |             # Get title so that can construct work uri
132 |             title = WorksCrosswalk._get_crossref_title(crossref_record) or bibtex.get(
133 |                 "title") or WorksCrosswalk._get_orcid_title(work)
134 | 
135 |             # Work-type
136 |             work_class = work_type_map[work_type]
137 |             if work_type == "TRANSLATION" and bibtex and bibtex["ENTRYTYPE"] in bibtex_type_map:
138 |                 work_class = bibtex_type_map[bibtex["ENTRYTYPE"]]
139 | 
140 |             # Construct work uri
141 |             work_uri = self.identifier_strategy.to_uri(work_class, {"name": title})
142 | 
143 |             graph.add((work_uri, RDF.type, work_class))
144 | 
145 |             # Title
146 |             graph.add((work_uri, RDFS.label, Literal(title)))
147 | 
148 |             # Publication date
149 |             (publication_year, publication_month, publication_day) = \
150 |                 WorksCrosswalk._get_crossref_publication_date(crossref_record) \
151 |                 or WorksCrosswalk._get_orcid_publication_date(work) \
152 |                 or WorksCrosswalk._get_bibtext_publication_date(bibtex) or (None, None, None)
153 |             date_uri = add_date(publication_year, graph, self.identifier_strategy,
154 |                                 publication_month, publication_day)
155 |             if date_uri:
156 |                 graph.add((work_uri, VIVO.dateTimeValue, date_uri))
157 | 
158 |             # Subjects
159 |             subjects = crossref_record["subject"] if crossref_record and "subject" in crossref_record else None
160 |             if subjects:
161 |                 for subject in subjects:
162 |                     subject_uri = self.identifier_strategy.to_uri(SKOS.Concept, {"name": subject})
163 |                     graph.add((work_uri, VIVO.hasSubjectArea, subject_uri))
164 |                     if self.create_strategy.should_create(SKOS.Concept, subject_uri):
165 |                         graph.add((subject_uri, RDF.type, SKOS.Concept))
166 |                         graph.add((subject_uri, RDFS.label, Literal(subject)))
167 | 
168 |             # Contributors (an array of (first_name, surname, VIVO type, e.g., VIVO.Authorship))
169 |             bibtex_contributors = []
170 |             bibtex_contributors.extend(WorksCrosswalk._get_bibtex_authors(bibtex))
171 |             bibtex_contributors.extend(WorksCrosswalk._get_bibtex_editors(bibtex))
172 |             # Orcid is better for translations because has translator role
173 |             if work_type == "TRANSLATION":
174 |                 contributors = WorksCrosswalk._get_orcid_contributors(work)
175 |             else:
176 |                 contributors = WorksCrosswalk._get_crossref_authors(crossref_record) or bibtex_contributors \
177 |                                or WorksCrosswalk._get_orcid_contributors(work)
178 |             if not contributors:
179 |                 # Add person as author or editor.
180 |                 # None, None means this person.
181 |                 if work_type in ("EDITED_BOOK",):
182 |                     contributors.append((None, None, VIVO.Editorship))
183 |                 elif work_type == "TRANSLATION":
184 |                     # Translator is a predicate, not a -ship class.
185 |                     contributors.append((None, None, "TRANSLATOR"))
186 |                 else:
187 |                     contributors.append((None, None, VIVO.Authorship))
188 | 
189 |             for (first_name, surname, vivo_type) in contributors:
190 |                 if not surname or person_surname.lower() == surname.lower():
191 |                     contributor_uri = person_uri
192 |                 else:
193 |                     contributor_uri = self.identifier_strategy.to_uri(FOAF.Person, {"first_name": first_name,
194 |                                                                                     "surname": surname})
195 |                     if self.create_strategy.should_create(FOAF.Person, contributor_uri):
196 |                         graph.add((contributor_uri, RDF.type, FOAF.Person))
197 |                         full_name = join_if_not_empty((first_name, surname))
198 |                         graph.add((contributor_uri, RDFS.label, Literal(full_name)))
199 | 
200 |                 # Translation is a special case
201 |                 if vivo_type == "TRANSLATOR":
202 |                     graph.add((contributor_uri, BIBO.translator, work_uri))
203 |                 # So is patent assignee
204 |                 elif work_type == "PATENT":
205 |                     graph.add((contributor_uri, VIVO.assigneeFor, work_uri))
206 |                 else:
207 |                     contributorship_uri = self.identifier_strategy.to_uri(vivo_type,
208 |                                                                           {"contributor_uri": contributor_uri,
209 |                                                                            "work_uri": work_uri})
210 |                     graph.add((contributorship_uri, RDF.type, vivo_type))
211 |                     graph.add((contributorship_uri, VIVO.relates, work_uri))
212 |                     graph.add((contributorship_uri, VIVO.relates, contributor_uri))
213 | 
214 |             # Publisher
215 |             publisher = crossref_record.get("publisher") or bibtex.get("publisher")
216 |             if publisher:
217 |                 publisher_uri = self.identifier_strategy.to_uri(FOAF.Organization, {"name": publisher})
218 |                 graph.add((work_uri, VIVO.publisher, publisher_uri))
219 |                 if self.create_strategy.should_create(FOAF.Organization, publisher_uri):
220 |                     graph.add((publisher_uri, RDF.type, FOAF.Organization))
221 |                     graph.add((publisher_uri, RDFS.label, Literal(publisher)))
222 | 
223 |             # Volume
224 |             volume = crossref_record.get("volume") or bibtex.get("volume")
225 |             if volume:
226 |                 graph.add((work_uri, BIBO.volume, Literal(volume)))
227 | 
228 |             # Issue
229 |             issue = crossref_record.get("issue") or bibtex.get("number")
230 |             if issue:
231 |                 graph.add((work_uri, BIBO.issue, Literal(issue)))
232 | 
233 |             # Pages
234 |             pages = crossref_record.get("page") or bibtex.get("pages")
235 |             start_page = None
236 |             end_page = None
237 |             if pages and "-" in pages:
238 |                 (start_page, end_page) = re.split(" *-+ *", pages, maxsplit=2)
239 |             if start_page:
240 |                 graph.add((work_uri, BIBO.pageStart, Literal(start_page)))
241 |             if end_page:
242 |                 graph.add((work_uri, BIBO.pageEnd, Literal(end_page)))
243 | 
244 |             # Identifiers
245 |             # Add doi in bibtex, but not orcid profile
246 |             if bibtex and "doi" in bibtex and "DOI" not in external_identifiers:
247 |                 external_identifiers["DOI"] = bibtex["doi"]
248 |             # Add isbn in bibtex, but not orcid profile
249 |             if bibtex and "isbn" in bibtex and "ISBN" not in external_identifiers:
250 |                 external_identifiers["ISBN"] = bibtex["isbn"]
251 | 
252 |             for identifier_type, identifier in external_identifiers.iteritems():
253 |                 identifier_url = None
254 |                 if identifier_type in ("PAT", "OTHER-ID") and work_type == "PATENT":
255 |                     identifier_predicate = VIVO.patentNumber
256 |                 elif identifier_type == "ISBN":
257 |                     clean_isbn = identifier.replace("-", "")
258 |                     if len(clean_isbn) <= 10:
259 |                         identifier_predicate = BIBO.isbn10
260 |                     else:
261 |                         identifier_predicate = BIBO.isbn13
262 |                 else:
263 |                     (identifier_predicate, url_template) = identifier_map.get(identifier_type, (None, None))
264 |                     if url_template:
265 |                         identifier_url = url_template % identifier
266 | 
267 |                 if identifier_predicate:
268 |                     graph.add((work_uri, identifier_predicate, Literal(identifier)))
269 |                 if identifier_url:
270 |                     self._add_work_url(identifier_url, work_uri, graph)
271 | 
272 |             orcid_url = (work.get("url", {}) or {}).get("value")
273 |             if orcid_url and WorksCrosswalk._use_url(orcid_url):
274 |                 self._add_work_url(orcid_url, work_uri, graph)
275 |             bibtex_url = bibtex.get("link")
276 |             if bibtex_url and WorksCrosswalk._use_url(bibtex_url) and orcid_url != bibtex_url:
277 |                 self._add_work_url(bibtex_url, work_uri, graph)
278 | 
279 |             # Series
280 |             series = bibtex.get("series")
281 |             # TODO: Figure out how to model series in VIVO-ISF.
282 | 
283 |             # Journal
284 |             # If Crossref has a journal use it
285 |             journal = WorksCrosswalk._get_crossref_journal(crossref_record)
286 |             issns = []
287 |             if journal:
288 |                 issns = crossref_record.get("ISSN", [])
289 |             # Otherwise, only use for some work types.
290 |             elif work_type in journal_map:
291 |                 journal = bibtex.get("journal")
292 |                 if journal:
293 |                     if "issn" in bibtex:
294 |                         issns = [bibtex["issn"]]
295 |                 else:
296 |                     journal = (work.get("journal-title", {}) or {}).get("value")
297 | 
298 |             if journal:
299 |                 journal_class = journal_map.get(work_type, BIBO.Journal)
300 |                 journal_uri = self.identifier_strategy.to_uri(journal_class, {"name": journal})
301 |                 graph.add((work_uri, VIVO.hasPublicationVenue, journal_uri))
302 |                 if self.create_strategy.should_create(journal_class, journal_uri):
303 |                     graph.add((journal_uri, RDF.type, journal_class))
304 |                     graph.add((journal_uri, RDFS.label, Literal(journal)))
305 |                     for issn in issns:
306 |                         graph.add((journal_uri, BIBO.issn, Literal(issn)))
307 | 
308 |             if work_type in ("BOOK_CHAPTER",):
309 |                 book_title = bibtex.get("booktitle")
310 |                 if book_title:
311 |                     book_uri = self.identifier_strategy.to_uri(BIBO.Book, {"name": book_title})
312 |                     graph.add((work_uri, VIVO.hasPublicationVenue, book_uri))
313 |                     if self.create_strategy.should_create(BIBO.Book, book_uri):
314 |                         graph.add((book_uri, RDF.type, BIBO.Book))
315 |                         graph.add((book_uri, RDFS.label, Literal(book_title)))
316 | 
317 |             if work_type in ("CONFERENCE_PAPER",):
318 |                 proceeding = bibtex.get("journal") or (work.get("journal-title", {}) or {}).get("value")
319 |                 if proceeding:
320 |                     proceeding_uri = self.identifier_strategy.to_uri(BIBO.Proceedings, {"name": proceeding})
321 |                     graph.add((work_uri, VIVO.hasPublicationVenue, proceeding_uri))
322 |                     if self.create_strategy.should_create(BIBO.Proceedings, proceeding_uri):
323 |                         graph.add((proceeding_uri, RDF.type, BIBO.Proceedings))
324 |                         graph.add((proceeding_uri, RDFS.label, Literal(proceeding)))
325 | 
326 |     @staticmethod
327 |     def _fetch_crossref_doi(doi):
328 |         # curl 'http://api.crossref.org/works/10.1177/1049732304268657' -L -i
329 |         r = requests.get('http://api.crossref.org/works/%s' % doi)
330 |         if r.status_code == 404:
331 |             # Not a crossref DOI.
332 |             return {}
333 |         if r:
334 |             return r.json()["message"]
335 |         else:
336 |             raise Exception("Request to fetch DOI %s returned %s" % (doi, r.status_code))
337 | 
338 |     @staticmethod
339 |     def _parse_bibtex(work):
340 |         bibtex = {}
341 |         if work and (work.get("citation", {}) or {}).get("citation-type") == "BIBTEX":
342 |             citation = work["citation"]["citation-value"]
343 |             # Need to add \n for bibtexparser to work
344 |             curly_level = 0
345 |             new_citation = ""
346 |             for c in citation:
347 |                 if c == "{":
348 |                     curly_level += 1
349 |                 elif c == "}":
350 |                     curly_level -= 1
351 |                 new_citation += c
352 |                 if (curly_level == 1 and c == ",") or (curly_level == 0 and c == "}"):
353 |                     new_citation += "\n"
354 |             parser = BibTexParser()
355 |             parser.customization = WorksCrosswalk._bibtex_customizations
356 |             bibtex = bibtexparser.loads(new_citation, parser=parser).entries[0]
357 |         return bibtex
358 | 
359 |     @staticmethod
360 |     def _get_crossref_title(crossref_record):
361 |         if "title" in crossref_record and crossref_record["title"]:
362 |             return crossref_record["title"][0]
363 |         return None
364 | 
365 |     @staticmethod
366 |     def _get_orcid_title(work):
367 |         return join_if_not_empty((work["title"]["title"]["value"],
368 |                                   (work["title"].get("subtitle") or {}).get("value")), ": ")
369 | 
370 |     @staticmethod
371 |     def _get_orcid_publication_date(work):
372 |         year = None
373 |         month = None
374 |         day = None
375 |         publication_date = work.get("publication-date")
376 |         if publication_date:
377 |             year = publication_date["year"]["value"] if publication_date.get("year") else None
378 |             month = publication_date["month"]["value"] if publication_date.get("month") else None
379 |             day = publication_date["day"]["value"] if publication_date.get("day") else None
380 |         if not year and not month and not day:
381 |             return None
382 |         return year, month, day
383 | 
384 |     @staticmethod
385 |     def _get_bibtext_publication_date(bibtex):
386 |         year = bibtex.get("year")
387 |         if year and not re.match("\d{4}", year):
388 |             year = None
389 |         # Not going to try to parse month and day
390 |         if not year:
391 |             return None
392 |         return year, None, None
393 | 
394 |     @staticmethod
395 |     def _get_crossref_publication_date(doi_record):
396 |         if "issued" in doi_record and "date-parts" in doi_record["issued"]:
397 |             date_parts = doi_record["issued"]["date-parts"][0]
398 |             return date_parts[0], date_parts[1] if len(date_parts) > 1 else None, date_parts[2] if len(
399 |                 date_parts) > 2 else None
400 |         return None
401 | 
402 |     @staticmethod
403 |     def _get_work_identifiers(work):
404 |         ids = {}
405 |         external_identifiers = work.get("external-ids")
406 |         if external_identifiers:
407 |             for external_identifier in (external_identifiers.get("external-id") or []):
408 |                 if external_identifier["external-id-value"] is not None:
409 |                     ids[external_identifier["external-id-type"].upper()] = \
410 |                         external_identifier["external-id-value"]
411 |         return ids
412 | 
413 |     @staticmethod
414 |     def _get_crossref_authors(doi_record):
415 |         authors = []
416 |         for author in doi_record.get("author", []):
417 |             authors.append((author["given"], author["family"], VIVO.Authorship))
418 |         return authors
419 | 
420 |     @staticmethod
421 |     def _get_orcid_contributors(work):
422 |         contributors = []
423 |         for contributor in (work.get("contributors") or {}).get("contributor", []):
424 |             # Last name, first name
425 |             credit_name = (contributor.get("credit-name") or {}).get("value")
426 |             # Some entries will not have a credit name, meaning the entry is for the person.
427 |             # Using None, None to indicate the person.
428 |             first_name = None
429 |             surname = None
430 |             if credit_name:
431 |                 # Normalize with BibtexParser's getnames()
432 |                 clean_name = bibtexparser.customization.getnames([credit_name])[0]
433 |                 (first_name, surname) = WorksCrosswalk._parse_reversed_name(clean_name)
434 |             role = (contributor.get("contributor-attributes", {}) or {}).get("contributor-role")
435 |             contributors.append((first_name, surname, contributor_map.get(role, VIVO.Authorship)))
436 |         return contributors
437 | 
438 |     @staticmethod
439 |     def _get_bibtex_authors(bibtex):
440 |         authors = []
441 |         for name in bibtex.get("author", []):
442 |             (first_name, surname) = WorksCrosswalk._parse_reversed_name(name)
443 |             authors.append((first_name, surname, VIVO.Authorship))
444 |         return authors
445 | 
446 |     @staticmethod
447 |     def _get_bibtex_editors(bibtex):
448 |         editors = []
449 |         for editor in bibtex.get("editor", {}):
450 |             (first_name, surname) = WorksCrosswalk._parse_reversed_name(editor["name"])
451 |             editors.append((first_name, surname, VIVO.Editorship))
452 |         return editors
453 | 
454 |     @staticmethod
455 |     def _parse_reversed_name(name):
456 |         if name:
457 |             split_name = name.split(", ", 2)
458 |             if len(split_name) == 2:
459 |                 return split_name[1], split_name[0]
460 |             else:
461 |                 return None, name
462 | 
463 |     @staticmethod
464 |     def _bibtex_customizations(record):
465 |         record = WorksCrosswalk._bibtex_convert_to_unicode(record)
466 |         record = bibtexparser.customization.author(record)
467 |         record = bibtexparser.customization.editor(record)
468 |         return record
469 | 
470 |     @staticmethod
471 |     def _bibtex_convert_to_unicode(record):
472 |         for val in record:
473 |             if '\\' in record[val] or '{' in record[val]:
474 |                 for k, v in itertools.chain(unicode_to_crappy_latex1, unicode_to_latex):
475 |                     if v in record[val]:
476 |                         record[val] = record[val].replace(v, k)
477 |                     # Try without space
478 |                     elif v.rstrip() in record[val]:
479 |                         record[val] = record[val].replace(v.rstrip(), k)
480 | 
481 |             # If there is still very crappy items
482 |             if '\\' in record[val]:
483 |                 for k, v in unicode_to_crappy_latex2:
484 |                     if v in record[val]:
485 |                         parts = record[val].split(str(v))
486 |                         for key, record[val] in enumerate(parts):
487 |                             if key + 1 < len(parts) and len(parts[key + 1]) > 0:
488 |                                 # Change order to display accents
489 |                                 parts[key] = parts[key] + parts[key + 1][0]
490 |                                 parts[key + 1] = parts[key + 1][1:]
491 |                         record[val] = k.join(parts)
492 | 
493 |             # Also replace {\\&}
494 |             if '{\\&}' in record[val]:
495 |                 record[val] = record[val].replace('{\\&}', '&')
496 |         return record
497 | 
498 |     def _add_work_url(self, url, work_uri, graph):
499 |         vcard_uri = self.identifier_strategy.to_uri(VCARD.Kind, {"url": url})
500 |         graph.add((vcard_uri, RDF.type, VCARD.Kind))
501 |         # Has contact info
502 |         graph.add((work_uri, OBO.ARG_2000028, vcard_uri))
503 |         # Url vcard
504 |         vcard_url_uri = self.identifier_strategy.to_uri(VCARD.URL, {"vcard_uri": vcard_uri})
505 |         graph.add((vcard_url_uri, RDF.type, VCARD.URL))
506 |         graph.add((vcard_uri, VCARD.hasURL, vcard_url_uri))
507 |         graph.add((vcard_url_uri, VCARD.url, Literal(url, datatype=XSD.anyURI)))
508 | 
509 |     @staticmethod
510 |     def _use_url(url):
511 |         # Use url if it does not match one of the patterns in identifier_map
512 |         for (identifier_predicate, url_template) in identifier_map.itervalues():
513 |             if url_template:
514 |                 base_url = url_template[:url_template.index("%s")]
515 |                 if url.startswith(base_url):
516 |                     return False
517 |         return True
518 | 
519 |     @staticmethod
520 |     def _get_crossref_journal(crossref_record):
521 |         journal = None
522 |         # May be multiple container titles. Take the longest.
523 |         for j in crossref_record.get("container-title", []):
524 |             if not journal or len(j) > len(journal):
525 |                 journal = j
526 |         return journal
527 | 


--------------------------------------------------------------------------------
/orcid2vivo_loader.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import argparse
  4 | import sqlite3
  5 | import os
  6 | import logging
  7 | import codecs
  8 | from datetime import datetime
  9 | from rdflib import Graph
 10 | from rdflib.compare import graph_diff
 11 | from orcid2vivo import default_execute
 12 | from orcid2vivo_app.vivo_namespace import ns_manager
 13 | from orcid2vivo_app.utility import sparql_insert, sparql_delete
 14 | 
 15 | log = logging.getLogger(__name__)
 16 | 
 17 | DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S"
 18 | 
 19 | 
 20 | class Store:
 21 |     def __init__(self, data_path):
 22 |         self.db_filepath = os.path.join(data_path, "orcid2vivo.db")
 23 |         log.debug("Db filepath is %s", self.db_filepath)
 24 |         create_db = not os.path.exists(self.db_filepath)
 25 |         self._conn = sqlite3.connect(self.db_filepath)
 26 |         if create_db:
 27 |             self._create_db()
 28 | 
 29 |     def _create_db(self):
 30 |         logging.info("Creating db")
 31 |         c = self._conn.cursor()
 32 | 
 33 |         # Creating a new table
 34 |         c.execute("""
 35 |             create table orcid_ids (orcid_id primary key, active, last_update, person_uri, person_id, person_class, 
 36 |             confirmed);
 37 |         """)
 38 | 
 39 |         self._conn.commit()
 40 | 
 41 |     def __contains__(self, orcid_id):
 42 |         """
 43 |         Returns True if there is a record for the orcid id and it is active.
 44 |         """
 45 | 
 46 |         return self.contains(orcid_id, True)
 47 | 
 48 |     def contains(self, orcid_id, active=None):
 49 |         """
 50 |         Returns True if there is a record for the orcid id.
 51 |         """
 52 | 
 53 |         c = self._conn.cursor()
 54 |         if active is None:
 55 |             c.execute("""
 56 |                 select orcid_id from orcid_ids where orcid_id=?
 57 |             """, (orcid_id,))
 58 |         else:
 59 |             c.execute("""
 60 |                 select orcid_id from orcid_ids where orcid_id=? and active=?
 61 |             """, (orcid_id, 1 if active else 0))
 62 |         if c.fetchone():
 63 |             return True
 64 |         return False
 65 | 
 66 |     def __getitem__(self, orcid_id):
 67 |         """
 68 |         Returns orcid_id, active, last_update, person_uri, person_id, person_class, confirmed for orcid id.
 69 |         """
 70 |         c = self._conn.cursor()
 71 |         c.execute("""
 72 |             select orcid_id, active, last_update, person_uri, person_id, person_class, confirmed from orcid_ids where 
 73 |             orcid_id=?
 74 |         """, (orcid_id,))
 75 |         row = c.fetchone()
 76 |         if not row:
 77 |             raise IndexError
 78 |         return row
 79 | 
 80 |     def __delitem__(self, orcid_id):
 81 |         """
 82 |         Marks an orcid id as inactive.
 83 |         """
 84 |         c = self._conn.cursor()
 85 | 
 86 |         c.execute("""
 87 |             update orcid_ids set active=0 where orcid_id=?
 88 |         """, (orcid_id,))
 89 | 
 90 |         self._conn.commit()
 91 | 
 92 |     def add(self, orcid_id, person_uri=None, person_id=None, person_class=None, confirmed=False):
 93 |         """
 94 |         Adds orcid id or updates existing orcid id and marks as active.
 95 |         """
 96 |         c = self._conn.cursor()
 97 | 
 98 |         if self.contains(orcid_id):
 99 |             #Make update
100 |             log.info("Updating %s", orcid_id)
101 |             c.execute("""
102 |                 update orcid_ids set active=1, person_uri=?, person_id=?, person_class=?, confirmed=? where orcid_id=?
103 |             """, (person_uri, person_id, person_class, confirmed, orcid_id))
104 |         else:
105 |             #Add
106 |             log.info("Adding %s", orcid_id)
107 |             c.execute("""
108 |                 insert into orcid_ids (orcid_id, active, person_uri, person_id, person_class, confirmed)
109 |                 values (?, 1, ?, ?, ?, ?)
110 |             """, (orcid_id, person_uri, person_id, person_class, confirmed))
111 | 
112 |         self._conn.commit()
113 | 
114 |     def get_least_recent(self, limit=None, before_datetime=None):
115 |         """
116 |         Returns least recently updated active orcid ids as list of
117 |         orcid_id, person_uri, person_id, person_class, confirmed.
118 |         """
119 |         c = self._conn.cursor()
120 |         sql = """
121 |             select orcid_id, person_uri, person_id, person_class, confirmed from orcid_ids where active=1
122 |         """
123 |         if before_datetime:
124 |             sql += " and (last_update < '%s' or last_update is null)" % before_datetime.strftime(DATETIME_FORMAT)
125 | 
126 |         sql += " order by last_update asc"
127 | 
128 |         if limit:
129 |             sql += " limit %s" % limit
130 | 
131 |         c.execute(sql)
132 |         return c.fetchall()
133 | 
134 |     def touch(self, orcid_id):
135 |         """
136 |         Set last update for orcid id.
137 |         """
138 |         c = self._conn.cursor()
139 | 
140 |         c.execute("""
141 |             update orcid_ids set last_update=CURRENT_TIMESTAMP where orcid_id=? and active=1
142 |         """, (orcid_id,))
143 | 
144 |         self._conn.commit()
145 | 
146 |     def __iter__(self):
147 |         c = self._conn.cursor()
148 |         c.execute("""
149 |             select orcid_id, active, last_update, person_uri, person_id, person_class, confirmed from orcid_ids
150 |         """)
151 | 
152 |         return iter(c.fetchall())
153 | 
154 |     def delete_all(self):
155 |         c = self._conn.cursor()
156 |         c.execute("""
157 |             update orcid_ids set active=0
158 |         """)
159 |         self._conn.commit()
160 | 
161 |     # Methods to make this a Context Manager. This is necessary to make sure the connection is closed properly.
162 |     def __enter__(self):
163 |         return self
164 | 
165 |     def __exit__(self, exc_type, exc_val, exc_tb):
166 |         self._conn.close()
167 | 
168 | 
169 | def load_single(orcid_id, person_uri, person_id, person_class, data_path, endpoint, username, password,
170 |                 namespace=None, skip_person=False, confirmed_orcid_id=False):
171 |     with Store(data_path) as store:
172 |         # Crosswalk
173 |         (graph, profile, person_uri) = default_execute(orcid_id, namespace=namespace, person_uri=person_uri,
174 |                                                        person_id=person_id, skip_person=skip_person,
175 |                                                        person_class=person_class, confirmed_orcid_id=confirmed_orcid_id)
176 | 
177 |         graph_filepath = os.path.join(data_path, "%s.ttl" % orcid_id.lower())
178 |         previous_graph = Graph(namespace_manager=ns_manager)
179 |         # Load last graph
180 |         if os.path.exists(graph_filepath):
181 |             log.debug("Loading previous graph %s", graph_filepath)
182 |             previous_graph.parse(graph_filepath, format="turtle")
183 | 
184 |         # Diff against last graph
185 |         (both_graph, delete_graph, add_graph) = graph_diff(previous_graph, graph)
186 | 
187 |         # SPARQL Update
188 |         log.info("Adding %s, deleting %s triples for %s", len(add_graph), len(delete_graph), orcid_id)
189 |         sparql_delete(delete_graph, endpoint, username, password)
190 |         sparql_insert(add_graph, endpoint, username, password)
191 | 
192 |         # Save new last graph
193 |         log.debug("Saving new graph %s", graph_filepath)
194 |         with codecs.open(graph_filepath, "w") as out:
195 |             graph.serialize(format="turtle", destination=out)
196 | 
197 |         # Touch
198 |         store.touch(orcid_id)
199 | 
200 |         return graph, add_graph, delete_graph
201 | 
202 | 
203 | def load(data_path, endpoint, username, password, limit=None, before_datetime=None, namespace=None, skip_person=False):
204 |     orcid_ids = []
205 |     failed_orcid_ids = []
206 |     with Store(data_path) as store:
207 |         # Get the orcid ids to update
208 |         results = store.get_least_recent(limit=limit, before_datetime=before_datetime)
209 |         for (orcid_id, person_uri, person_id, person_class, confirmed) in results:
210 |             try:
211 |                 load_single(orcid_id, person_uri, person_id, person_class, data_path, endpoint, username, password,
212 |                             namespace, skip_person, confirmed)
213 |                 orcid_ids.append(orcid_id)
214 |             except Exception:
215 |                 failed_orcid_ids.append(orcid_id)
216 |     return orcid_ids, failed_orcid_ids
217 | 
218 | if __name__ == "__main__":
219 |     parser = argparse.ArgumentParser()
220 | 
221 |     parser.add_argument("--debug", action="store_true")
222 | 
223 |     orcid_id_parent_parser = argparse.ArgumentParser(add_help=False)
224 |     orcid_id_parent_parser.add_argument("orcid_id")
225 |     data_path_parent_parser = argparse.ArgumentParser(add_help=False)
226 |     data_path_parent_parser.add_argument("--data-path", dest="data_path", help="Path where db and ttl files will be "
227 |                                                                                "stored. Default is ./data.",
228 |                                          default="./data")
229 | 
230 |     subparsers = parser.add_subparsers(dest="command")
231 | 
232 |     add_parser = subparsers.add_parser("add", help="Adds or updates orcid id record. If inactive, marks active.",
233 |                                        parents=[orcid_id_parent_parser, data_path_parent_parser])
234 |     add_parser.add_argument("--person-id", dest="person_id", help="Id for the person to use when constructing the "
235 |                                                                   "person's URI. If not provided, the orcid id will be "
236 |                                                                   "used.")
237 |     add_parser.add_argument("--person-uri", dest="person_uri", help="A URI for the person. If not provided, one will "
238 |                                                                     "be created from the orcid id or person id.")
239 |     add_parser.add_argument("--person-class", dest="person_class",
240 |                             choices=["FacultyMember", "FacultyMemberEmeritus", "Librarian", "LibrarianEmeritus",
241 |                                      "NonAcademic", "NonFacultyAcademic", "ProfessorEmeritus", "Student"],
242 |                             help="Class (in VIVO Core ontology) for a person. Default is a FOAF Person.")
243 |     parser.add_argument("--confirmed", action="store_true", help="Mark the orcid id as confirmed.")
244 | 
245 |     delete_parser = subparsers.add_parser("delete", help="Marks an orcid id record as inactive so that it will not be "
246 |                                                          "loaded.",
247 |                                           parents=[orcid_id_parent_parser, data_path_parent_parser])
248 | 
249 |     delete_all_parser = subparsers.add_parser("delete-all", help="Marks all orcid id records as inactive.",
250 |                                               parents=[data_path_parent_parser])
251 | 
252 |     load_parser = subparsers.add_parser("load", help="Fetches orcid profiles, crosswalks to VIVO-ISF, loads to VIVO "
253 |                                                      "instance, and updates orcid id record. If loading multiple "
254 |                                                      "orcid ids, loads in least recent order.",
255 |                                         parents=[data_path_parent_parser])
256 |     load_parser.add_argument("endpoint", help="Endpoint for SPARQL Update of VIVO instance, e.g., "
257 |                                               "http://localhost/vivo/api/sparqlUpdate.")
258 |     load_parser.add_argument("username", help="Username for VIVO root.")
259 |     load_parser.add_argument("namespace", help="VIVO namespace. Default is http://vivo.mydomain.edu/individual/.")
260 |     load_parser.add_argument("--password", help="Password for VIVO root. Alternatively, provide in "
261 |                                                 "environment variable VIVO_ROOT_PASSWORD.")
262 |     load_parser.add_argument("--orcid_id", help="Orcid id of person to load.")
263 |     load_parser.add_argument("--limit", type=int, help="Maximimum number of orcid ids to load.")
264 |     load_parser.add_argument("--before", help="Orcid ids that were loaded before this date or never loaded. Format is "
265 |                                               "YYYY-MM-DD HH:MM:SS in UTC.")
266 |     load_parser.add_argument("--skip-person", dest="skip_person", action="store_true",
267 |                              help="Skip adding triples declaring the person and the person's name.")
268 | 
269 |     list_parser = subparsers.add_parser("list", help="Lists orcid_id records in the db.",
270 |                                         parents=[data_path_parent_parser])
271 | 
272 |     # Parse
273 |     args = parser.parse_args()
274 | 
275 |     if args.debug:
276 |         logging.basicConfig(level=logging.DEBUG)
277 | 
278 |     if not os.path.exists(args.data_path):
279 |         raise IOError("%s does not exists" % args.data_path)
280 | 
281 |     with Store(args.data_path) as main_store:
282 |         if args.command == "add":
283 |             print "Adding %s" % args.orcid_id
284 |             main_store.add(args.orcid_id, person_uri=args.person_uri, person_id=args.person_id,
285 |                            person_class=args.person_class, confirmed=args.confirmed)
286 |         elif args.command == "delete":
287 |             print "Deleting %s" % args.orcid_id
288 |             del main_store[args.orcid_id]
289 |         elif args.command == "delete-all":
290 |             print "Deleting all"
291 |             main_store.delete_all()
292 |         elif args.command == "list":
293 |             for main_orcid_id, main_active, main_last_update, main_person_uri, \
294 |                     main_person_id, main_person_class, main_confirmed in main_store:
295 |                 print "%s [active=%s; last_update=%s; person_uri=%s; person_id=%s, person_class=%s, confirmed=%s]" % (
296 |                     main_orcid_id,
297 |                     "true" if main_active else "false",
298 |                     main_last_update,
299 |                     main_person_uri,
300 |                     main_person_id,
301 |                     main_person_class,
302 |                     main_confirmed
303 |                 )
304 | 
305 |     if args.command == "load":
306 |             main_password = args.password or os.environ["VIVO_ROOT_PASSWORD"]
307 |             if args.orcid_id:
308 |                 with Store(args.data_path) as main_store:
309 |                     if args.orcid_id not in main_store:
310 |                         raise ValueError("%s not in db. Add person to db first." % args.orcid_id)
311 |                     main_orcid_id, main_active, main_last_update, main_person_uri, main_person_id, \
312 |                         main_person_class, main_confirmed = main_store[args.orcid_id]
313 |                     print "Loading %s to %s" % (args.orcid_id, args.endpoint)
314 |                     load_single(main_orcid_id, main_person_uri, main_person_id, main_person_class, args.data_path,
315 |                                 args.endpoint, args.username, main_password,
316 |                                 namespace=args.namespace, skip_person=args.skip_person)
317 |             else:
318 |                 main_before_datetime = datetime.strptime(args.before, DATETIME_FORMAT) if args.before else None
319 |                 print "Loading to %s" % args.endpoint
320 |                 main_orcid_ids, main_failed_orcid_ids = load(args.data_path, args.endpoint, args.username,
321 |                                                              main_password, limit=args.limit,
322 |                                                              before_datetime=main_before_datetime,
323 |                                                              namespace=args.namespace,
324 |                                                              skip_person=args.skip_person)
325 |                 print "Loaded: %s" % ", ".join(main_orcid_ids)
326 |                 print "Failed: %s" % ", ".join(main_failed_orcid_ids)
327 | 
328 |     print "Done"
329 | 


--------------------------------------------------------------------------------
/orcid2vivo_service.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | from flask import Flask, render_template, request, session, Response, flash, Markup
  4 | import argparse
  5 | import json
  6 | import urllib
  7 | from orcid2vivo import default_execute
  8 | import orcid2vivo_app.utility as utility
  9 | 
 10 | app = Flask(__name__)
 11 | def_format = None
 12 | def_endpoint = None
 13 | def_username = None
 14 | def_password = None
 15 | def_namespace = None
 16 | def_person_class = "Person"
 17 | def_skip_person = False
 18 | def_output = "serialize"
 19 | def_output_html = True
 20 | def_output_profile = False
 21 | def_confirmed = False
 22 | 
 23 | content_types = {
 24 |     "xml": "application/rdf+xml",
 25 |     "n3": "text/rdf+n3",
 26 |     "turtle": "application/x-turtle",
 27 |     "nt": "text/plain",
 28 |     "pretty-xml": "application/rdf+xml",
 29 |     "trix": "application/rdf+xml"
 30 | }
 31 | 
 32 | @app.route('/', methods=["GET"])
 33 | def crosswalk_form(rdf=None, orcid_profile=None):
 34 |     return render_template("crosswalk_form.html",
 35 |                            format=session.get("format") or def_format,
 36 |                            endpoint=session.get("endpoint") or def_endpoint,
 37 |                            username=session.get("username") or def_username,
 38 |                            password=session.get("password") or def_password,
 39 |                            namespace=session.get("namespace") or def_namespace,
 40 |                            person_class=session.get("person_class") or def_person_class,
 41 |                            skip_person=session.get("skip_person") or def_skip_person,
 42 |                            confirmed=session.get("confirmed") or def_confirmed,
 43 |                            output=session.get("output") or def_output,
 44 |                            output_html=session.get("output_html") or def_output_html,
 45 |                            output_profile=session.get("output_profile") or def_output_profile,
 46 |                            rdf=rdf.decode("utf-8") if rdf else None,
 47 |                            orcid_profile=json.dumps(orcid_profile, indent=3) if orcid_profile else None)
 48 | 
 49 | @app.route('/', methods=["POST"])
 50 | def crosswalk():
 51 |     session["format"] = request.form.get("format")
 52 |     endpoint = request.form.get("endpoint")
 53 |     session["endpoint"] = endpoint
 54 |     session["username"] = request.form.get("username")
 55 |     session["password"] = request.form.get("password")
 56 |     person_class = request.form.get("person_class")
 57 |     session["person_class"] = person_class
 58 |     session["skip_person"] = True if "skip_person" in request.form else False
 59 |     session["confirmed"] = True if "confirmed" in request.form else False
 60 |     session["output"] = request.form.get("output")
 61 |     session["output_html"] = True if "output_html" in request.form else False
 62 |     session["output_profile"] = True if "output_profile" in request.form else False
 63 | 
 64 |     #Excute with default strategies
 65 |     (g, p, per_uri) = default_execute(request.form["orcid_id"],
 66 |                                       namespace=request.form["namespace"],
 67 |                                       person_uri=request.form["person_uri"],
 68 |                                       person_id=request.form["person_id"],
 69 |                                       skip_person=True if "skip_person" in request.form else False,
 70 |                                       person_class=person_class if person_class != "Person" else None,
 71 |                                       confirmed_orcid_id=True if "confirmed" in request.form else False)
 72 | 
 73 |     if "output" in request.form and request.form["output"] == "vivo":
 74 |         utility.sparql_insert(g, endpoint, request.form["username"], request.form["password"])
 75 |         msg = "Loaded to VIVO"
 76 |         if endpoint.endswith("api/sparqlUpdate"):
 77 |             vivo_profile_url = "%s/individual?%s" % (endpoint[:-17], urllib.urlencode({"uri": per_uri}))
 78 |             msg += ". Try <a href=\"%s\">%s</a>." % (vivo_profile_url, vivo_profile_url)
 79 |         flash(Markup(msg))
 80 |         return crosswalk_form()
 81 |     else:
 82 |         #Serialize
 83 |         rdf = g.serialize(format=request.form['format'], encoding="utf-8")
 84 |         if "output_html" in request.form or "output_profile" in request.form:
 85 |             return crosswalk_form(rdf=rdf if "output_html" in request.form else None,
 86 |                                   orcid_profile=p if "output_profile" in request.form else None)
 87 |         else:
 88 |             return Response(rdf, content_type=content_types[request.form['format']])
 89 | 
 90 | 
 91 | if __name__ == "__main__":
 92 |     parser = argparse.ArgumentParser()
 93 |     parser.add_argument("--format", default="turtle", choices=["xml", "n3", "turtle", "nt", "pretty-xml", "trix"],
 94 |                         help="The RDF format for serializing. Default is turtle.")
 95 |     parser.add_argument("--endpoint", dest="endpoint",
 96 |                         help="Endpoint for SPARQL Update of VIVO instance,e.g., http://localhost/vivo/api/sparqlUpdate.")
 97 |     parser.add_argument("--username", dest="username", help="Username for VIVO root.")
 98 |     parser.add_argument("--password", dest="password",
 99 |                         help="Password for VIVO root.")
100 |     parser.add_argument("--namespace", default="http://vivo.mydomain.edu/individual/",
101 |                         help="VIVO namespace. Default is http://vivo.mydomain.edu/individual/.")
102 |     parser.add_argument("--person-class", dest="person_class",
103 |                         choices=["FacultyMember", "FacultyMemberEmeritus", "Librarian", "LibrarianEmeritus",
104 |                                  "NonAcademic", "NonFacultyAcademic", "ProfessorEmeritus", "Student"],
105 |                         help="Class (in VIVO Core ontology) for a person. Default is a FOAF Person.")
106 |     parser.add_argument("--skip-person", dest="skip_person", action="store_true",
107 |                         help="Skip adding triples declaring the person and the person's name.")
108 |     parser.add_argument("--confirmed", action="store_true", help="Mark the orcid id as confirmed.")
109 |     parser.add_argument("--debug", action="store_true")
110 |     parser.add_argument("--port", type=int, default="5000", help="The port the service should run on. Default is 5000.")
111 | 
112 |     #Parse
113 |     args = parser.parse_args()
114 | 
115 |     def_format = args.format
116 |     def_endpoint = args.endpoint
117 |     def_username = args.username
118 |     def_password = args.password
119 |     def_namespace = args.namespace
120 |     def_person_class = args.person_class
121 |     def_skip_person = args.skip_person
122 |     def_confirmed = args.confirmed
123 | 
124 |     app.debug = args.debug
125 |     app.secret_key = "orcid2vivo"
126 |     app.run(host="0.0.0.0", port=args.port)


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | rdflib==4.2.0
2 | requests==2.7.0
3 | bibtexparser==0.6.1
4 | flask==0.10.1
5 | vcrpy==1.7.0
6 | mock==1.3.0


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | setup(
 4 |     name='orcid2vivo',
 5 |     version='0.10.0',
 6 |     url='https://github.com/gwu-libraries/orcid2vivo',
 7 |     author='Justin Littman',
 8 |     author_email='justinlittman@gmail.com',
 9 |     py_modules=['orcid2vivo', 'orcid2vivo_loader', 'orcid2vivo_service'],
10 |     packages=['orcid2vivo_app', ],
11 |     scripts=['orcid2vivo.py', 'orcid2vivo_loader.py', 'orcid2vivo_service.py'],
12 |     description="For retrieving data from the ORCID API and crosswalking to VIVO-ISF.",
13 |     platforms=['POSIX'],
14 |     test_suite='tests',
15 |     install_requires=['rdflib>=4.2.0',
16 |                       'requests>=2.7.0',
17 |                       'bibtexparser>=0.6.1',
18 |                       'flask>=0.10.1'],
19 |     tests_require=['vcrpy>=1.7.0',
20 |                    'mock>=1.3.0'],
21 |     classifiers=[
22 |         'Intended Audience :: Developers',
23 |         'Topic :: Software Development :: Libraries :: Python Modules',
24 |         'Programming Language :: Python :: 2.7',
25 |         'Development Status :: 4 - Beta',
26 |         'Framework :: Flask',
27 |     ],
28 | )
29 | 


--------------------------------------------------------------------------------
/templates/crosswalk_form.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 | <head lang="en">
  4 |     <meta charset="utf-8">
  5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
  6 |     <meta name="viewport" content="width=device-width, initial-scale=1">    <title>Orcid to VIVO</title>
  7 |     <!-- Latest compiled and minified CSS -->
  8 |     <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css">
  9 |     <!-- Optional theme -->
 10 |     <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap-theme.min.css">
 11 |     <!-- Latest compiled and minified JavaScript -->
 12 |     <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script>
 13 |     <!-- jQuery (necessary for Bootstrap's JavaScript plugins) -->
 14 |     <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js"></script>
 15 | </head>
 16 | <body>
 17 | <div class="container">
 18 |     <div class="page-header">
 19 |       <h1>Orcid to VIVO service <small>proof of concept</small></h1>
 20 |       <small>Code available at <a href="https://github.com/gwu-libraries/orcid2vivo">https://github.com/gwu-libraries/orcid2vivo</a>.</small>
 21 |     </div>
 22 | 
 23 |     {% with messages = get_flashed_messages() %}
 24 |         {% if messages %}
 25 |             {% for message in messages %}
 26 |                 <div class="alert alert-info" role="alert">{{ message }}</div>
 27 |             {% endfor %}
 28 |         {% endif %}
 29 |     {% endwith %}
 30 | 
 31 |     {% if rdf %}
 32 |         <h2>RDF</h2>
 33 |         <pre class="pre-scrollable">{{ rdf }}</pre>
 34 |     {% endif %}
 35 | 
 36 |     {% if orcid_profile %}
 37 |         <h2>Orcid profile</h2>
 38 |         <pre class="pre-scrollable">{{ orcid_profile }}</pre>
 39 |     {% endif %}
 40 | 
 41 |     <form class="form-horizontal" action="{{ url_for('crosswalk') }}" method="POST">
 42 |         <div class="form-group">
 43 |             <label for="orcid_id" class="col-sm-2 control-label">Orcid Id</label>
 44 |             <div class="col-sm-3">
 45 |                 <input type="text" class="form-control" id="orcid_id" name="orcid_id" required placeholder="XXXX-XXXX-XXXX-XXXX" maxlength="29" pattern="(orcid.org/)?[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{3}[0-9X]">
 46 |             </div>
 47 |         </div>
 48 |         <div class="checkbox col-sm-offset-2">
 49 |             <label>
 50 |                 <input type="checkbox" name="confirmed" {% if confirmed %} checked {% endif %}>Orcid Id is confirmed.
 51 |             </label>
 52 |         </div>
 53 | 
 54 |         <br />
 55 |         <div class="form-group">
 56 |             <div class="col-sm-offset-2">
 57 |                 <input type="radio" name="output" id="output1" value="serialize" {% if output == 'serialize' %} checked {% endif %}/>
 58 |                 <label for="output1" class="control-label">Crosswalk to RDF</label>
 59 |             </div>
 60 |         </div>
 61 |         <fieldset class="col-sm-offset-2">
 62 |             <div class="form-group">
 63 |                 <label for="format" class="col-sm-2 control-label">Format:</label>
 64 |                 <div class="col-sm-4">
 65 |                     <select class="form-control" name="format" id="format">
 66 |                         <option value="xml" {% if format == 'xml' %} selected {% endif %}>xml</option>
 67 |                         <option value="n3" {% if format == 'n3' %} selected {% endif %}>n3</option>
 68 |                         <option value="turtle" {% if format == 'turtle' %} selected {% endif %}>turtle</option>
 69 |                         <option value="nt" {% if format == 'nt' %} selected {% endif %}>nt</option>
 70 |                         <option value="pretty-xml" {% if format == 'pretty-xml' %} selected {% endif %}>pretty xml</option>
 71 |                         <option value="trix" {% if format == 'trix' %} selected {% endif %}>trix</option>
 72 |                     </select>
 73 |                 </div>
 74 |             </div>
 75 |             <div class="checkbox col-sm-offset-2">
 76 |                 <label>
 77 |                     <input type="checkbox" name="output_html" {% if output_html %} checked {% endif %}>Output as html.
 78 |                 </label>
 79 |             </div>
 80 |             <div class="checkbox col-sm-offset-2">
 81 |                 <label>
 82 |                     <input type="checkbox" name="output_profile" {% if output_profile %} checked {% endif %}>Show Orcid profile.
 83 |                 </label>
 84 |             </div>
 85 | 
 86 |         </fieldset>
 87 |         <div class="form-group">
 88 |             <div class="col-sm-offset-2">
 89 |                 <input type="radio" name="output" id="output2" value="vivo" {% if output == 'vivo' %} checked {% endif %}/>
 90 |                 <label for="output2" class="control-label">Crosswalk to VIVO</label>
 91 |             </div>
 92 | 
 93 |         </div>
 94 |         <fieldset class="col-sm-offset-2">
 95 |             <div class="form-group">
 96 |                 <label for="endpoint" class="col-sm-2 control-label">SPARQL Update endpoint:</label>
 97 |                 <div class="col-sm-6">
 98 |                     <input type="url" class="form-control" id="endpoint" name="endpoint" value="{{ endpoint if endpoint != None }}">
 99 |                 </div>
100 |             </div>
101 |             <div class="form-group">
102 |                 <label for="username" class="col-sm-2 control-label">Username:</label>
103 |                 <div class="col-sm-6">
104 |                     <input type="text" class="form-control" id="username" name="username" value="{{ username if username != None }}">
105 |                 </div>
106 |             </div>
107 |             <div class="form-group">
108 |                 <label for="password" class="col-sm-2 control-label">Password:</label>
109 |                 <div class="col-sm-6">
110 |                     <input type="text" class="form-control" id="password" name="password" value="{{ password if password != None }}">
111 |                 </div>
112 |             </div>
113 |         </fieldset>
114 |         <br />
115 |         <div class="form-group">
116 |             <label for="person_id" class="col-sm-2 control-label">Person Id:</label>
117 |             <div class="col-sm-6">
118 |                 <input type="text" class="form-control" id="person_id" name="person_id" value="{{ person_id if person_id != None }}">
119 |                 <p class="help-block">If an id is not provided, the Orcid Id will be used.</p>
120 |             </div>
121 | 
122 |         </div>
123 |         <div class="form-group">
124 |             <label for="person_uri" class="col-sm-2 control-label">Person URI:</label>
125 |             <div class="col-sm-6">
126 |                 <input type="text" class="form-control" id="person_uri" name="person_uri" value="{{ person_uri if person_uri != None }}">
127 |                 <p class="help-block">A URI for the person.  If not provided, the Person Id or Orcid Id will be used.</p>
128 |             </div>
129 | 
130 |         </div>
131 |         <div class="form-group">
132 |             <label for="namespace" class="col-sm-2 control-label">Namespace:</label>
133 |             <div class="col-sm-6">
134 |                 <input type="text" class="form-control" id="namespace" name="namespace" value="{{ namespace if namespace != None }}">
135 |             </div>
136 |         </div>
137 |         <div class="form-group">
138 |             <label for="person_class" class="col-sm-2 control-label">Person class:</label>
139 |             <div class="col-sm-4">
140 |                 <select class="form-control" name="person_class" id="person_class">
141 |                     <option value="Person" {% if person_class == 'Person' %} selected {% endif %}>Person</option>
142 |                     <option value="FacultyMember" {% if person_class == 'FacultyMember' %} selected {% endif %}>Faculty Member</option>
143 |                     <option value="FacultyMemberEmeritus" {% if person_class == 'FacultyMemberEmeritus' %} selected {% endif %}>Faculty Member Emeritus</option>
144 |                     <option value="Librarian" {% if person_class == 'Librarian' %} selected {% endif %}>Librarian</option>
145 |                     <option value="LibrarianEmeritus" {% if person_class == 'LibrarianEmeritus' %} selected {% endif %}>Librarian Emeritus</option>
146 |                     <option value="NonAcademic" {% if person_class == 'NonAcademic' %} selected {% endif %}>Non-Academic</option>
147 |                     <option value="NonFacultyAcademic" {% if person_class == 'NonFacultyAcademic' %} selected {% endif %}>Non-Faculty Academic</option>
148 |                     <option value="ProfessorEmeritus" {% if person_class == 'ProfessorEmeritus' %} selected {% endif %}>Professor Emeritus</option>
149 |                     <option value="Student" {% if person_class == 'Student' %} selected {% endif %}>Student</option>
150 |                 </select>
151 |             </div>
152 |         </div>
153 |         <div class="checkbox col-sm-offset-2">
154 |             <label>
155 |                 <input type="checkbox" name="skip_person" {% if skip_person %} checked {% endif %}>Skip adding triples declaring person and person's name.
156 |             </label>
157 |         </div>
158 |         <div class="form-group">
159 |             <div class="col-sm-offset-2">
160 |                 <button type="submit" class="btn btn-default">Submit</button>
161 |                 <p class="help-block">Be patient.</p>
162 |             </div>
163 |         </div>
164 |     </form>
165 | </div>
166 | </body>
167 | </html>


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import unittest
 3 | import os
 4 | 
 5 | FIXTURE_PATH = os.path.join(
 6 |     os.path.dirname(os.path.realpath(__file__)), 'fixtures'
 7 | )
 8 | 
 9 | 
10 | class TestCase(unittest.TestCase):
11 |     logging.basicConfig(level=logging.DEBUG)


--------------------------------------------------------------------------------
/tests/app/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gwu-libraries/orcid2vivo/14c4c8ebb828d862261324a13616aad1f2f0c721/tests/app/__init__.py


--------------------------------------------------------------------------------
/tests/app/test_affiliations.py:
--------------------------------------------------------------------------------
  1 | from unittest import TestCase
  2 | import json
  3 | from orcid2vivo_app.affiliations import AffiliationsCrosswalk
  4 | import orcid2vivo_app.vivo_namespace as ns
  5 | from rdflib import Graph
  6 | from orcid2vivo_app.vivo_uri import HashIdentifierStrategy
  7 | from orcid2vivo import SimpleCreateEntitiesStrategy
  8 | 
  9 | 
 10 | class TestAffiliations(TestCase):
 11 | 
 12 |     def setUp(self):
 13 |         self.graph = Graph(namespace_manager=ns.ns_manager)
 14 |         self.person_uri = ns.D["test"]
 15 |         self.create_strategy = SimpleCreateEntitiesStrategy(HashIdentifierStrategy(), person_uri=self.person_uri)
 16 |         self.crosswalker = AffiliationsCrosswalk(identifier_strategy=self.create_strategy,
 17 |                                                  create_strategy=self.create_strategy)
 18 | 
 19 |     def test_no_affiliations(self):
 20 |         orcid_profile = json.loads("""
 21 | {
 22 |   "activities-summary": {
 23 |   }
 24 | }
 25 |         """)
 26 |         self.crosswalker.crosswalk(orcid_profile, self.person_uri, self.graph)
 27 |         self.assertEqual(0, len(self.graph))
 28 | 
 29 |     def test_no_education(self):
 30 |         orcid_profile = json.loads("""
 31 | {
 32 |   "activities-summary": {
 33 |     "educations": {
 34 |         "education-summary": []
 35 |     }
 36 |   }
 37 | }
 38 | """)
 39 |         self.crosswalker.crosswalk(orcid_profile, self.person_uri, self.graph)
 40 |         self.assertEqual(0, len(self.graph))
 41 | 
 42 |     def test_education(self):
 43 |         orcid_profile = json.loads("""
 44 | {
 45 |   "activities-summary": {
 46 |     "educations": {
 47 |       "last-modified-date": {
 48 |         "value": 1486085029078
 49 |       },
 50 |       "education-summary": [
 51 |         {
 52 |           "created-date": {
 53 |             "value": 1385568459467
 54 |           },
 55 |           "last-modified-date": {
 56 |             "value": 1486085026897
 57 |           },
 58 |           "source": {
 59 |             "source-orcid": {
 60 |               "uri": "http://orcid.org/0000-0001-5109-3700",
 61 |               "path": "0000-0001-5109-3700",
 62 |               "host": "orcid.org"
 63 |             },
 64 |             "source-client-id": null,
 65 |             "source-name": {
 66 |               "value": "Laurel L Haak"
 67 |             }
 68 |           },
 69 |           "department-name": "Neurosciences",
 70 |           "role-title": "PhD",
 71 |           "start-date": {
 72 |             "year": {
 73 |               "value": "1995"
 74 |             },
 75 |             "month": null,
 76 |             "day": null
 77 |           },
 78 |           "end-date": {
 79 |             "year": {
 80 |               "value": "1997"
 81 |             },
 82 |             "month": null,
 83 |             "day": null
 84 |           },
 85 |           "organization": {
 86 |             "name": "Stanford University School of Medicine",
 87 |             "address": {
 88 |               "city": "Stanford",
 89 |               "region": "California",
 90 |               "country": "US"
 91 |             },
 92 |             "disambiguated-organization": null
 93 |           },
 94 |           "visibility": "PUBLIC",
 95 |           "put-code": 1006,
 96 |           "path": "/0000-0001-5109-3700/education/1006"
 97 |         }
 98 |       ],
 99 |       "path": "/0000-0001-5109-3700/educations"
100 |     }
101 |   }  
102 | }
103 |         """)
104 |         # Changed start date to 1995
105 |         self.crosswalker.crosswalk(orcid_profile, self.person_uri, self.graph)
106 |         self.assertTrue(bool(self.graph.query("""
107 |             ask where {
108 |                 ?awdgre a vivo:AwardedDegree .
109 |                 ?awdgre rdfs:label "PhD" .
110 |                 ?awdgre obo:RO_0002353 ?awdgreproc .
111 |                 ?awdgre vivo:assignedBy ?org .
112 |                 ?awdgre vivo:relates d:test, ?dgre .
113 |                 ?org a foaf:Organization .
114 |                 ?org rdfs:label "Stanford University School of Medicine" .
115 |                 ?org obo:RO_0001025 ?geo .
116 |                 ?geo rdfs:label "Stanford, California" .
117 |                 ?awdgreproc a vivo:EducationalProcess .
118 |                 ?awdgreproc obo:RO_0000057 ?org, d:test .
119 |                 ?awdgreproc vivo:dateTimeInterval ?awdgreprocint .
120 |                 ?awdgreproc vivo:departmentOrSchool "Neurosciences" .
121 |                 ?awdgreprocint a vivo:DateTimeInterval .
122 |                 ?awdgreprocint vivo:end ?awdgreprocintend .
123 |                 ?awdgreprocintend a vivo:DateTimeValue .
124 |                 ?awdgreprocintend rdfs:label "1997" .
125 |                 ?awdgreprocintend vivo:dateTime "1997-01-01T00:00:00"^^xsd:dateTime .
126 |                 ?awdgreprocintend vivo:dateTimePrecision vivo:yearPrecision .
127 |                 ?awdgreprocint vivo:start ?awdgreprocintstart .
128 |                 ?awdgreprocintstart a vivo:DateTimeValue .
129 |                 ?awdgreprocintstart rdfs:label "1995" .
130 |                 ?awdgreprocintstart vivo:dateTime "1995-01-01T00:00:00"^^xsd:dateTime .
131 |                 ?awdgreprocintstart vivo:dateTimePrecision vivo:yearPrecision .
132 |             }
133 |         """)))
134 | 
135 |     def test_education_minimal(self):
136 |         orcid_profile = json.loads("""
137 | {
138 |   "activities-summary": {
139 |     "educations": {
140 |       "last-modified-date": {
141 |         "value": 1486085029078
142 |       },
143 |       "education-summary": [
144 |         {
145 |           "created-date": {
146 |             "value": 1385568459467
147 |           },
148 |           "last-modified-date": {
149 |             "value": 1486085026897
150 |           },
151 |           "source": {
152 |             "source-orcid": {
153 |               "uri": "http://orcid.org/0000-0001-5109-3700",
154 |               "path": "0000-0001-5109-3700",
155 |               "host": "orcid.org"
156 |             },
157 |             "source-client-id": null,
158 |             "source-name": {
159 |               "value": "Laurel L Haak"
160 |             }
161 |           },
162 |           "department-name": null,
163 |           "role-title": "PhD",
164 |           "start-date": null,
165 |           "end-date": null,
166 |           "organization": {
167 |             "name": "Stanford University School of Medicine",
168 |             "address": {
169 |               "city": "Stanford",
170 |               "region": "California",
171 |               "country": "US"
172 |             },
173 |             "disambiguated-organization": null
174 |           },
175 |           "visibility": "PUBLIC",
176 |           "put-code": 1006,
177 |           "path": "/0000-0001-5109-3700/education/1006"
178 |         }
179 |       ],
180 |       "path": "/0000-0001-5109-3700/educations"
181 |     }
182 |   }  
183 | }
184 |         """)
185 |         self.crosswalker.crosswalk(orcid_profile, self.person_uri, self.graph)
186 |         self.assertTrue(bool(self.graph.query("""
187 |             ask where {
188 |                 ?org a foaf:Organization .
189 |                 ?org rdfs:label "Stanford University School of Medicine" .
190 |                 ?org obo:RO_0001025 ?geo .
191 |                 ?geo rdfs:label "Stanford, California" .
192 |                 ?awdgreproc a vivo:EducationalProcess .
193 |                 ?awdgreproc obo:RO_0000057 ?org, d:test .
194 |                 filter not exists {
195 |                     ?awdgreproc vivo:departmentOrSchool ?awdgredept .
196 |                 }
197 |             }
198 |         """)))
199 | 


--------------------------------------------------------------------------------
/tests/app/test_bio.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from unittest import TestCase
  5 | import json
  6 | from orcid2vivo_app.bio import BioCrosswalk
  7 | import orcid2vivo_app.vivo_namespace as ns
  8 | from rdflib import Literal, Graph, RDF, RDFS
  9 | from orcid2vivo_app.vivo_namespace import D, VIVO, FOAF
 10 | from orcid2vivo_app.vivo_uri import HashIdentifierStrategy
 11 | from orcid2vivo import SimpleCreateEntitiesStrategy
 12 | 
 13 | 
 14 | class TestBio(TestCase):
 15 |     def setUp(self):
 16 |         self.graph = Graph(namespace_manager=ns.ns_manager)
 17 |         self.person_uri = ns.D["test"]
 18 |         self.create_strategy = SimpleCreateEntitiesStrategy(HashIdentifierStrategy(), person_uri=self.person_uri)
 19 |         self.crosswalker = BioCrosswalk(identifier_strategy=self.create_strategy,
 20 |                                         create_strategy=self.create_strategy)
 21 | 
 22 |     def test_no_external_identifiers(self):
 23 |         orcid_profile = json.loads("""
 24 | {
 25 |   "person": {
 26 |     "external-identifiers": {
 27 |       "last-modified-date": null,
 28 |       "external-identifier": [],
 29 |       "path": "/0000-0003-4507-4735/external-identifiers"
 30 |     },
 31 |     "path": "/0000-0003-4507-4735/person"
 32 |   }
 33 | }
 34 | """)
 35 |         self.crosswalker.crosswalk(orcid_profile, self.person_uri, self.graph)
 36 |         self.assertEqual(0, len(self.graph))
 37 | 
 38 |     def test_external_identifiers(self):
 39 |         orcid_profile = json.loads("""
 40 | {
 41 |   "person": {
 42 |     "external-identifiers": {
 43 |       "last-modified-date": {
 44 |         "value": 1390435480189
 45 |       },
 46 |       "external-identifier": [
 47 |         {
 48 |           "created-date": {
 49 |             "value": 1379686803951
 50 |           },
 51 |           "last-modified-date": {
 52 |             "value": 1379686803951
 53 |           },
 54 |           "source": {
 55 |             "source-orcid": null,
 56 |             "source-client-id": {
 57 |               "uri": "http://orcid.org/client/0000-0002-5982-8983",
 58 |               "path": "0000-0002-5982-8983",
 59 |               "host": "orcid.org"
 60 |             },
 61 |             "source-name": {
 62 |               "value": "Scopus to ORCID"
 63 |             }
 64 |           },
 65 |           "external-id-type": "Scopus Author ID",
 66 |           "external-id-value": "6602258586",
 67 |           "external-id-url": {
 68 |             "value": "http://www.scopus.com/inward/authorDetails.url?authorID=6602258586&partnerID=MN8TOARS"
 69 |           },
 70 |           "external-id-relationship": "SELF",
 71 |           "visibility": "PUBLIC",
 72 |           "path": "/0000-0001-5109-3700/external-identifiers/142173",
 73 |           "put-code": 142173,
 74 |           "display-index": 0
 75 |         },
 76 |         {
 77 |           "created-date": {
 78 |             "value": 1379686803951
 79 |           },
 80 |           "last-modified-date": {
 81 |             "value": 1379686803951
 82 |           },
 83 |           "source": {
 84 |             "source-orcid": {
 85 |               "uri": "http://orcid.org/0000-0001-7707-4137",
 86 |               "path": "0000-0001-7707-4137",
 87 |               "host": "orcid.org"
 88 |             },
 89 |             "source-client-id": null,
 90 |             "source-name": {
 91 |               "value": "Clarivate Analytics"
 92 |             }
 93 |           },
 94 |           "external-id-type": "ResearcherID",
 95 |           "external-id-value": "C-4986-2008",
 96 |           "external-id-url": {
 97 |             "value": "http://www.researcherid.com/rid/C-4986-2008"
 98 |           },
 99 |           "external-id-relationship": "SELF",
100 |           "visibility": "PUBLIC",
101 |           "path": "/0000-0001-5109-3700/external-identifiers/38181",
102 |           "put-code": 38181,
103 |           "display-index": 0
104 |         },
105 |         {
106 |           "created-date": {
107 |             "value": 1390435480189
108 |           },
109 |           "last-modified-date": {
110 |             "value": 1390435480189
111 |           },
112 |           "source": {
113 |             "source-orcid": null,
114 |             "source-client-id": {
115 |               "uri": "http://orcid.org/client/0000-0003-0412-1857",
116 |               "path": "0000-0003-0412-1857",
117 |               "host": "orcid.org"
118 |             },
119 |             "source-name": {
120 |               "value": "ISNI2ORCID search and link"
121 |             }
122 |           },
123 |           "external-id-type": "ISNI",
124 |           "external-id-value": "0000000138352317",
125 |           "external-id-url": {
126 |             "value": "http://isni.org/isni/0000000138352317"
127 |           },
128 |           "external-id-relationship": "SELF",
129 |           "visibility": "PUBLIC",
130 |           "path": "/0000-0001-5109-3700/external-identifiers/187639",
131 |           "put-code": 187639,
132 |           "display-index": 0
133 |         }
134 |       ],
135 |       "path": "/0000-0001-5109-3700/external-identifiers"
136 |     },
137 |     "path": "/0000-0001-5109-3700/person"
138 |   }
139 | }
140 | """)
141 |         self.create_strategy.skip_person = True
142 |         self.crosswalker.crosswalk(orcid_profile, self.person_uri, self.graph)
143 |         self.assertEqual(2, len(self.graph))
144 |         # ScopusID is added.
145 |         self.assertTrue(self.graph[D["test"]: VIVO["scopusId"]: Literal("6602258586")])
146 |         # ResearcherId is added.
147 |         self.assertTrue(self.graph[D["test"]: VIVO["researcherId"]: Literal("C-4986-2008")])
148 | 
149 |     def test_name(self):
150 |         orcid_profile = json.loads(u"""
151 | {
152 |   "person": {
153 |     "name": {
154 |       "created-date": {
155 |         "value": 1460753221409
156 |       },
157 |       "last-modified-date": {
158 |         "value": 1460753221409
159 |       },
160 |       "given-names": {
161 |         "value": "Laurel"
162 |       },
163 |       "family-name": {
164 |         "value": "Haak"
165 |       },
166 |       "credit-name": {
167 |         "value": "Laurel L Haak"
168 |       },
169 |       "source": null,
170 |       "visibility": "PUBLIC",
171 |       "path": "0000-0001-5109-3700"
172 |     },
173 |     "other-names": {
174 |       "last-modified-date": {
175 |         "value": 1461191605426
176 |       },
177 |       "other-name": [
178 |         {
179 |           "created-date": {
180 |             "value": 1461191605416
181 |           },
182 |           "last-modified-date": {
183 |             "value": 1461191605416
184 |           },
185 |           "source": {
186 |             "source-orcid": {
187 |               "uri": "http://orcid.org/0000-0001-5109-3700",
188 |               "path": "0000-0001-5109-3700",
189 |               "host": "orcid.org"
190 |             },
191 |             "source-client-id": null,
192 |             "source-name": {
193 |               "value": "Laurel L Haak"
194 |             }
195 |           },
196 |           "content": " L. L. Haak",
197 |           "visibility": "PUBLIC",
198 |           "path": "/0000-0001-5109-3700/other-names/721941",
199 |           "put-code": 721941,
200 |           "display-index": 0
201 |         },
202 |         {
203 |           "created-date": {
204 |             "value": 1461191605425
205 |           },
206 |           "last-modified-date": {
207 |             "value": 1461191605425
208 |           },
209 |           "source": {
210 |             "source-orcid": {
211 |               "uri": "http://orcid.org/0000-0001-5109-3700",
212 |               "path": "0000-0001-5109-3700",
213 |               "host": "orcid.org"
214 |             },
215 |             "source-client-id": null,
216 |             "source-name": {
217 |               "value": "Laurel L Haak"
218 |             }
219 |           },
220 |           "content": "L Haak",
221 |           "visibility": "PUBLIC",
222 |           "path": "/0000-0001-5109-3700/other-names/721942",
223 |           "put-code": 721942,
224 |           "display-index": 0
225 |         },
226 |         {
227 |           "created-date": {
228 |             "value": 1461191605426
229 |           },
230 |           "last-modified-date": {
231 |             "value": 1461191605426
232 |           },
233 |           "source": {
234 |             "source-orcid": {
235 |               "uri": "http://orcid.org/0000-0001-5109-3700",
236 |               "path": "0000-0001-5109-3700",
237 |               "host": "orcid.org"
238 |             },
239 |             "source-client-id": null,
240 |             "source-name": {
241 |               "value": "Laurel L Haak"
242 |             }
243 |           },
244 |           "content": "Laure Haak",
245 |           "visibility": "PUBLIC",
246 |           "path": "/0000-0001-5109-3700/other-names/721943",
247 |           "put-code": 721943,
248 |           "display-index": 0
249 |         },
250 |         {
251 |           "created-date": {
252 |             "value": 1461191605426
253 |           },
254 |           "last-modified-date": {
255 |             "value": 1461191605426
256 |           },
257 |           "source": {
258 |             "source-orcid": {
259 |               "uri": "http://orcid.org/0000-0001-5109-3700",
260 |               "path": "0000-0001-5109-3700",
261 |               "host": "orcid.org"
262 |             },
263 |             "source-client-id": null,
264 |             "source-name": {
265 |               "value": "Laurel L Haak"
266 |             }
267 |           },
268 |           "content": "Laurela L Hāka",
269 |           "visibility": "PUBLIC",
270 |           "path": "/0000-0001-5109-3700/other-names/721944",
271 |           "put-code": 721944,
272 |           "display-index": 0
273 |         }
274 |       ],
275 |       "path": "/0000-0001-5109-3700/other-names"
276 |     }
277 |   }
278 | }
279 |         """)
280 |         self.crosswalker.crosswalk(orcid_profile, self.person_uri, self.graph)
281 |         # Laurel is a person
282 |         self.assertTrue(self.graph[D["test"]: RDF.type: FOAF.Person])
283 |         # with a label
284 |         self.assertTrue(self.graph[D["test"]: RDFS.label: Literal("Laurel Haak")])
285 | 
286 |         # vcard test
287 |         self.assertTrue(bool(self.graph.query("""
288 |             ask where {
289 |                 ?vcn a vcard:Name .
290 |                 ?vcn vcard:familyName "Haak" .
291 |                 ?vcn vcard:givenName "Laurel" .
292 |                 ?vc obo:ARG_2000029 d:test .
293 |                 ?vc vcard:hasName ?vcn .
294 |             }
295 |         """)))
296 | 
297 |     def test_biography(self):
298 |         orcid_profile = json.loads("""
299 | {
300 |   "person": {
301 |     "biography": {
302 |       "created-date": {
303 |         "value": 1460753221411
304 |       },
305 |       "last-modified-date": {
306 |         "value": 1487932762756
307 |       },
308 |       "content": "Laurel L. Haak, PhD, is the Executive Director of ORCID, an international and interdisciplinary non-profit organization dedicated to providing the technical infrastructure to generate and maintain unique and persistent identifiers for researchers and scholars.",
309 |       "visibility": "PUBLIC",
310 |       "path": "/0000-0001-5109-3700/biography"
311 |     }
312 |   }
313 | }
314 |         """)
315 | 
316 |         self.crosswalker.crosswalk(orcid_profile, self.person_uri, self.graph)
317 | 
318 |         # Has a biography
319 |         self.assertTrue(self.graph[D["test"]: VIVO["overview"]: Literal("Laurel L. Haak, PhD, is the Executive "
320 |                                                                         "Director of ORCID, an international and "
321 |                                                                         "interdisciplinary non-profit organization "
322 |                                                                         "dedicated to providing the technical "
323 |                                                                         "infrastructure to generate and maintain "
324 |                                                                         "unique and persistent identifiers for "
325 |                                                                         "researchers and scholars.")])
326 | 
327 |     def test_no_biography(self):
328 |         orcid_profile = json.loads("""
329 | {
330 |   "person": {
331 |     "biography": {
332 |       "created-date": {
333 |         "value": 1460766291133
334 |       },
335 |       "last-modified-date": {
336 |         "value": 1460766291133
337 |       },
338 |       "content": null,
339 |       "visibility": "PUBLIC",
340 |       "path": "/0000-0003-4507-4735/biography"
341 |     }
342 |   }
343 | }
344 | """)
345 | 
346 |         self.crosswalker.crosswalk(orcid_profile, self.person_uri, self.graph)
347 | 
348 |         # Has a biography
349 |         self.assertEqual(0, len(self.graph))
350 | 
351 |     def test_websites(self):
352 |         orcid_profile = json.loads("""
353 | {
354 |  "person": {
355 |     "researcher-urls": {
356 |       "last-modified-date": {
357 |         "value": 1463003428816
358 |       },
359 |       "researcher-url": [
360 |         {
361 |           "created-date": {
362 |             "value": 1461191605427
363 |           },
364 |           "last-modified-date": {
365 |             "value": 1463003428816
366 |           },
367 |           "source": {
368 |             "source-orcid": {
369 |               "uri": "http://orcid.org/0000-0001-5109-3700",
370 |               "path": "0000-0001-5109-3700",
371 |               "host": "orcid.org"
372 |             },
373 |             "source-client-id": null,
374 |             "source-name": {
375 |               "value": "Laurel L Haak"
376 |             }
377 |           },
378 |           "url-name": "LinkedIn",
379 |           "url": {
380 |             "value": "http://www.linkedin.com/pub/laurel-haak/3/1b/4a3/"
381 |           },
382 |           "visibility": "PUBLIC",
383 |           "path": "/0000-0001-5109-3700/researcher-urls/714700",
384 |           "put-code": 714700,
385 |           "display-index": 0
386 |         },
387 |         {
388 |           "created-date": {
389 |             "value": 1461191605427
390 |           },
391 |           "last-modified-date": {
392 |             "value": 1463003428816
393 |           },
394 |           "source": {
395 |             "source-orcid": {
396 |               "uri": "http://orcid.org/0000-0001-5109-3700",
397 |               "path": "0000-0001-5109-3700",
398 |               "host": "orcid.org"
399 |             },
400 |             "source-client-id": null,
401 |             "source-name": {
402 |               "value": "Laurel L Haak"
403 |             }
404 |           },
405 |           "url-name": null,
406 |           "url": {
407 |             "value": "https://www.researchgate.net/profile/Laurel_Haak"
408 |           },
409 |           "visibility": "PUBLIC",
410 |           "path": "/0000-0001-5109-3700/researcher-urls/714701",
411 |           "put-code": 714701,
412 |           "display-index": 0
413 |         }
414 |       ],
415 |       "path": "/0000-0001-5109-3700/researcher-urls"
416 |     }
417 |   }
418 | }
419 | """)
420 |         # Set ResearchGate url-name to null.
421 | 
422 |         self.crosswalker.crosswalk(orcid_profile, self.person_uri, self.graph)
423 |         # LinkedIn
424 |         self.assertTrue(bool(self.graph.query("""
425 |             ask where {
426 |                 ?vcw a vcard:URL .
427 |                 ?vcw vcard:url "http://www.linkedin.com/pub/laurel-haak/3/1b/4a3/"^^xsd:anyURI .
428 |                 ?vcw rdfs:label "LinkedIn" .
429 |                 ?vc a vcard:Individual .
430 |                 ?vc vcard:hasURL ?vcw .
431 |             }
432 |         """)))
433 | 
434 |         # ResearchGate
435 |         self.assertTrue(bool(self.graph.query("""
436 |             ask where {
437 |                 ?vcw a vcard:URL .
438 |                 ?vcw vcard:url "https://www.researchgate.net/profile/Laurel_Haak"^^xsd:anyURI .
439 |                 ?vc a vcard:Individual .
440 |                 ?vc vcard:hasURL ?vcw .
441 |                 filter not exists {
442 |                     ?vcw rdfs:label ?label .
443 |                 }
444 |             }
445 |         """)))
446 | 
447 |     def test_no_websites(self):
448 |         orcid_profile = json.loads("""
449 | {
450 |   "person": {
451 |     "researcher-urls": {
452 |       "last-modified-date": null,
453 |       "researcher-url": [],
454 |       "path": "/0000-0003-4507-4735/researcher-urls"
455 |     }
456 |   }
457 | }
458 |         """)
459 | 
460 |         self.crosswalker.crosswalk(orcid_profile, self.person_uri, self.graph)
461 | 
462 |         # Has a biography
463 |         self.assertEqual(0, len(self.graph))
464 | 
465 |     def test_no_keywords(self):
466 |         orcid_profile = json.loads("""
467 | {
468 |   "person": {
469 |     "keywords": {
470 |       "last-modified-date": null,
471 |       "keyword": [],
472 |       "path": "/0000-0003-4507-4735/keywords"
473 |     }
474 |   }
475 | }
476 |         """)
477 | 
478 |         self.crosswalker.crosswalk(orcid_profile, self.person_uri, self.graph)
479 | 
480 |         # Has a biography
481 |         self.assertEqual(0, len(self.graph))
482 | 
483 |     def test_keywords(self):
484 |         orcid_profile = json.loads("""
485 | {
486 |   "person": {
487 |     "keywords": {
488 |       "last-modified-date": {
489 |         "value": 1464800983143
490 |       },
491 |       "keyword": [
492 |         {
493 |           "created-date": {
494 |             "value": 1461191605415
495 |           },
496 |           "last-modified-date": {
497 |             "value": 1464800983143
498 |           },
499 |           "source": {
500 |             "source-orcid": {
501 |               "uri": "http://orcid.org/0000-0001-5109-3700",
502 |               "path": "0000-0001-5109-3700",
503 |               "host": "orcid.org"
504 |             },
505 |             "source-client-id": null,
506 |             "source-name": {
507 |               "value": "Laurel L Haak"
508 |             }
509 |           },
510 |           "content": "persistent identifiers, research policy, science workforce, program evaluation, neuroscience, calcium imaging, oligodendrocytes, circadian rhythms",
511 |           "visibility": "PUBLIC",
512 |           "path": "/0000-0001-5109-3700/keywords/419740",
513 |           "put-code": 419740,
514 |           "display-index": 0
515 |         }
516 |       ],
517 |       "path": "/0000-0001-5109-3700/keywords"
518 |     }
519 |   }
520 | }
521 |         """)
522 | 
523 |         self.crosswalker.crosswalk(orcid_profile, self.person_uri, self.graph)
524 |         self.assertEqual(8, len(self.graph))
525 | 
526 |         self.assertTrue(bool(self.graph.query("""
527 |             ask where {
528 |                 d:test vivo:freetextKeyword "persistent identifiers" .
529 |                 d:test vivo:freetextKeyword "research policy" .
530 |                 d:test vivo:freetextKeyword "science workforce" .
531 |             }
532 |         """)))
533 | 


--------------------------------------------------------------------------------
/tests/app/test_fundings.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from unittest import TestCase
  5 | from orcid2vivo_app.fundings import FundingCrosswalk
  6 | import orcid2vivo_app.vivo_namespace as ns
  7 | from orcid2vivo_app.vivo_uri import HashIdentifierStrategy
  8 | from orcid2vivo import SimpleCreateEntitiesStrategy
  9 | 
 10 | from rdflib import Graph, RDFS
 11 | import json
 12 | 
 13 | 
 14 | class TestFundings(TestCase):
 15 | 
 16 |     def setUp(self):
 17 |         self.graph = Graph(namespace_manager=ns.ns_manager)
 18 |         self.person_uri = ns.D["test"]
 19 |         self.create_strategy = SimpleCreateEntitiesStrategy(HashIdentifierStrategy(), person_uri=self.person_uri)
 20 |         self.crosswalker = FundingCrosswalk(identifier_strategy=self.create_strategy,
 21 |                                             create_strategy=self.create_strategy)
 22 | 
 23 |     def test_no_funding(self):
 24 |         orcid_profile = json.loads("""
 25 | {
 26 |   "activities-summary": {
 27 |     "fundings": {
 28 |       "group": []
 29 |     }
 30 |   }
 31 | }
 32 |         """)
 33 |         self.crosswalker.crosswalk(orcid_profile, self.person_uri, self.graph)
 34 |         # Assert no triples in graph
 35 |         self.assertTrue(len(self.graph) == 0)
 36 | 
 37 |     def test_with_funding(self):
 38 |         orcid_profile = json.loads("""
 39 | {
 40 |   "activities-summary": {
 41 | "fundings": {
 42 |       "last-modified-date": {
 43 |         "value": 1444208097475
 44 |       },
 45 |       "group": [
 46 |         {
 47 |           "last-modified-date": {
 48 |             "value": 1437078970386
 49 |           },
 50 |           "external-ids": {
 51 |             "external-id": [
 52 |               {
 53 |                 "external-id-type": "grant_number",
 54 |                 "external-id-value": "0536999",
 55 |                 "external-id-url": {
 56 |                   "value": "http://www.nsf.gov/awardsearch/showAward?AWD_ID=0536999&HistoricalAwards=false"
 57 |                 },
 58 |                 "external-id-relationship": "SELF"
 59 |               }
 60 |             ]
 61 |           },
 62 |           "funding-summary": [
 63 |             {
 64 |               "created-date": {
 65 |                 "value": 1427825460988
 66 |               },
 67 |               "last-modified-date": {
 68 |                 "value": 1437078970386
 69 |               },
 70 |               "source": {
 71 |                 "source-orcid": null,
 72 |                 "source-client-id": {
 73 |                   "uri": "http://orcid.org/client/0000-0003-2174-0924",
 74 |                   "path": "0000-0003-2174-0924",
 75 |                   "host": "orcid.org"
 76 |                 },
 77 |                 "source-name": {
 78 |                   "value": "ÜberWizard for ORCID"
 79 |                 }
 80 |               },
 81 |               "title": {
 82 |                 "title": {
 83 |                   "value": "ADVANCE Leadership Award: Women in Science and Engineering: A Guide to Maximizing their Potential"
 84 |                 },
 85 |                 "translated-title": null
 86 |               },
 87 |               "external-ids": {
 88 |                 "external-id": [
 89 |                   {
 90 |                     "external-id-type": "grant_number",
 91 |                     "external-id-value": "0536999",
 92 |                     "external-id-url": {
 93 |                       "value": "http://www.nsf.gov/awardsearch/showAward?AWD_ID=0536999&HistoricalAwards=false"
 94 |                     },
 95 |                     "external-id-relationship": "SELF"
 96 |                   },
 97 |                   {
 98 |                     "external-id-type": "grant_number",
 99 |                     "external-id-value": "0536999",
100 |                     "external-id-url": {
101 |                       "value": "http://grants.uberresearch.com/100000081/0536999/ADVANCE-Leadership-Award-Women-in-Science-and-Engineering-A-Guide-to-Maximizing-their-Potential"
102 |                     },
103 |                     "external-id-relationship": "SELF"
104 |                   }
105 |                 ]
106 |               },
107 |               "type": "GRANT",
108 |               "start-date": {
109 |                 "year": {
110 |                   "value": "2006"
111 |                 },
112 |                 "month": {
113 |                   "value": "04"
114 |                 },
115 |                 "day": {
116 |                   "value": "01"
117 |                 }
118 |               },
119 |               "end-date": {
120 |                 "year": {
121 |                   "value": "2007"
122 |                 },
123 |                 "month": {
124 |                   "value": "03"
125 |                 },
126 |                 "day": {
127 |                   "value": "31"
128 |                 }
129 |               },
130 |               "organization": {
131 |                 "name": "National Science Foundation - Directorate for Education and Human Resources",
132 |                 "address": {
133 |                   "city": "n/a",
134 |                   "region": null,
135 |                   "country": "US"
136 |                 },
137 |                 "disambiguated-organization": null
138 |               },
139 |               "visibility": "PUBLIC",
140 |               "put-code": 74458,
141 |               "path": "/0000-0001-5109-3700/funding/74458",
142 |               "display-index": "0"
143 |             }
144 |           ]
145 |         },
146 |         {
147 |           "last-modified-date": {
148 |             "value": 1440583684368
149 |           },
150 |           "external-ids": {
151 |             "external-id": [
152 |               {
153 |                 "external-id-type": "grant_number",
154 |                 "external-id-value": "0305602",
155 |                 "external-id-url": {
156 |                   "value": "http://www.nsf.gov/awardsearch/showAward?AWD_ID=0305602&HistoricalAwards=false"
157 |                 },
158 |                 "external-id-relationship": "SELF"
159 |               }
160 |             ]
161 |           },
162 |           "funding-summary": [
163 |             {
164 |               "created-date": {
165 |                 "value": 1440583684368
166 |               },
167 |               "last-modified-date": {
168 |                 "value": 1440583684368
169 |               },
170 |               "source": {
171 |                 "source-orcid": null,
172 |                 "source-client-id": {
173 |                   "uri": "http://orcid.org/client/0000-0003-2174-0924",
174 |                   "path": "0000-0003-2174-0924",
175 |                   "host": "orcid.org"
176 |                 },
177 |                 "source-name": {
178 |                   "value": "ÜberWizard for ORCID"
179 |                 }
180 |               },
181 |               "title": {
182 |                 "title": {
183 |                   "value": "Postdoc Network Annual Policy Meeting; Berkeley, CA, March 15-17, 2003"
184 |                 },
185 |                 "translated-title": null
186 |               },
187 |               "external-ids": {
188 |                 "external-id": [
189 |                   {
190 |                     "external-id-type": "grant_number",
191 |                     "external-id-value": "0305602",
192 |                     "external-id-url": {
193 |                       "value": "http://www.nsf.gov/awardsearch/showAward?AWD_ID=0305602&HistoricalAwards=false"
194 |                     },
195 |                     "external-id-relationship": "SELF"
196 |                   },
197 |                   {
198 |                     "external-id-type": "grant_number",
199 |                     "external-id-value": "0305602",
200 |                     "external-id-url": {
201 |                       "value": "http://grants.uberresearch.com/100000076/0305602/Postdoc-Network-Annual-Policy-Meeting-Berkeley-CA-March-15-17-2003"
202 |                     },
203 |                     "external-id-relationship": "SELF"
204 |                   }
205 |                 ]
206 |               },
207 |               "type": "GRANT",
208 |               "start-date": {
209 |                 "year": {
210 |                   "value": "2003"
211 |                 },
212 |                 "month": {
213 |                   "value": "03"
214 |                 },
215 |                 "day": {
216 |                   "value": "01"
217 |                 }
218 |               },
219 |               "end-date": {
220 |                 "year": {
221 |                   "value": "2004"
222 |                 },
223 |                 "month": {
224 |                   "value": "02"
225 |                 },
226 |                 "day": {
227 |                   "value": "29"
228 |                 }
229 |               },
230 |               "organization": {
231 |                 "name": "National Science Foundation - Directorate for Biological Sciences",
232 |                 "address": {
233 |                   "city": "n/a",
234 |                   "region": null,
235 |                   "country": "US"
236 |                 },
237 |                 "disambiguated-organization": null
238 |               },
239 |               "visibility": "PUBLIC",
240 |               "put-code": 105986,
241 |               "path": "/0000-0001-5109-3700/funding/105986",
242 |               "display-index": "0"
243 |             }
244 |           ]
245 |         },
246 |         {
247 |           "last-modified-date": {
248 |             "value": 1440583684380
249 |           },
250 |           "external-ids": {
251 |             "external-id": [
252 |               {
253 |                 "external-id-type": "grant_number",
254 |                 "external-id-value": "0342159",
255 |                 "external-id-url": {
256 |                   "value": "http://www.nsf.gov/awardsearch/showAward?AWD_ID=0342159&HistoricalAwards=false"
257 |                 },
258 |                 "external-id-relationship": "SELF"
259 |               }
260 |             ]
261 |           },
262 |           "funding-summary": [
263 |             {
264 |               "created-date": {
265 |                 "value": 1440583684380
266 |               },
267 |               "last-modified-date": {
268 |                 "value": 1440583684380
269 |               },
270 |               "source": {
271 |                 "source-orcid": null,
272 |                 "source-client-id": {
273 |                   "uri": "http://orcid.org/client/0000-0003-2174-0924",
274 |                   "path": "0000-0003-2174-0924",
275 |                   "host": "orcid.org"
276 |                 },
277 |                 "source-name": {
278 |                   "value": "ÜberWizard for ORCID"
279 |                 }
280 |               },
281 |               "title": {
282 |                 "title": {
283 |                   "value": "Policy Implications of International Graduate Students and Postdocs in the United States"
284 |                 },
285 |                 "translated-title": null
286 |               },
287 |               "external-ids": {
288 |                 "external-id": [
289 |                   {
290 |                     "external-id-type": "grant_number",
291 |                     "external-id-value": "0342159",
292 |                     "external-id-url": {
293 |                       "value": "http://www.nsf.gov/awardsearch/showAward?AWD_ID=0342159&HistoricalAwards=false"
294 |                     },
295 |                     "external-id-relationship": "SELF"
296 |                   },
297 |                   {
298 |                     "external-id-type": "grant_number",
299 |                     "external-id-value": "0342159",
300 |                     "external-id-url": {
301 |                       "value": "http://grants.uberresearch.com/100000179/0342159/Policy-Implications-of-International-Graduate-Students-and-Postdocs-in-the-United-States"
302 |                     },
303 |                     "external-id-relationship": "SELF"
304 |                   }
305 |                 ]
306 |               },
307 |               "type": "GRANT",
308 |               "start-date": {
309 |                 "year": {
310 |                   "value": "2004"
311 |                 },
312 |                 "month": {
313 |                   "value": "03"
314 |                 },
315 |                 "day": {
316 |                   "value": "01"
317 |                 }
318 |               },
319 |               "end-date": {
320 |                 "year": {
321 |                   "value": "2006"
322 |                 },
323 |                 "month": {
324 |                   "value": "02"
325 |                 },
326 |                 "day": {
327 |                   "value": "28"
328 |                 }
329 |               },
330 |               "organization": {
331 |                 "name": "National Science Foundation - Office of the Director",
332 |                 "address": {
333 |                   "city": "n/a",
334 |                   "region": null,
335 |                   "country": "US"
336 |                 },
337 |                 "disambiguated-organization": null
338 |               },
339 |               "visibility": "PUBLIC",
340 |               "put-code": 105988,
341 |               "path": "/0000-0001-5109-3700/funding/105988",
342 |               "display-index": "0"
343 |             }
344 |           ]
345 |         },
346 |         {
347 |           "last-modified-date": {
348 |             "value": 1444208097475
349 |           },
350 |           "external-ids": {
351 |             "external-id": [
352 |               {
353 |                 "external-id-type": "grant_number",
354 |                 "external-id-value": "5F31MH010500-03",
355 |                 "external-id-url": {
356 |                   "value": "http://projectreporter.nih.gov/project_info_description.cfm?aid=2241697"
357 |                 },
358 |                 "external-id-relationship": "SELF"
359 |               }
360 |             ]
361 |           },
362 |           "funding-summary": [
363 |             {
364 |               "created-date": {
365 |                 "value": 1444208097475
366 |               },
367 |               "last-modified-date": {
368 |                 "value": 1444208097475
369 |               },
370 |               "source": {
371 |                 "source-orcid": null,
372 |                 "source-client-id": {
373 |                   "uri": "http://orcid.org/client/0000-0003-2174-0924",
374 |                   "path": "0000-0003-2174-0924",
375 |                   "host": "orcid.org"
376 |                 },
377 |                 "source-name": {
378 |                   "value": "ÜberWizard for ORCID"
379 |                 }
380 |               },
381 |               "title": {
382 |                 "title": {
383 |                   "value": "CELLULAR BASIS OF CIRCADIAN CLOCK IN SCN"
384 |                 },
385 |                 "translated-title": null
386 |               },
387 |               "external-ids": {
388 |                 "external-id": [
389 |                   {
390 |                     "external-id-type": "grant_number",
391 |                     "external-id-value": "5F31MH010500-03",
392 |                     "external-id-url": {
393 |                       "value": "http://projectreporter.nih.gov/project_info_description.cfm?aid=2241697"
394 |                     },
395 |                     "external-id-relationship": "SELF"
396 |                   },
397 |                   {
398 |                     "external-id-type": "grant_number",
399 |                     "external-id-value": "5F31MH010500-03",
400 |                     "external-id-url": {
401 |                       "value": "http://grants.uberresearch.com/100000025/F31MH010500/CELLULAR-BASIS-OF-CIRCADIAN-CLOCK-IN-SCN"
402 |                     },
403 |                     "external-id-relationship": "SELF"
404 |                   }
405 |                 ]
406 |               },
407 |               "type": "GRANT",
408 |               "start-date": {
409 |                 "year": {
410 |                   "value": "1994"
411 |                 },
412 |                 "month": {
413 |                   "value": "10"
414 |                 },
415 |                 "day": {
416 |                   "value": "01"
417 |                 }
418 |               },
419 |               "end-date": null,
420 |               "organization": {
421 |                 "name": "National Institute of Mental Health",
422 |                 "address": {
423 |                   "city": "Bethesda",
424 |                   "region": null,
425 |                   "country": "US"
426 |                 },
427 |                 "disambiguated-organization": null
428 |               },
429 |               "visibility": "PUBLIC",
430 |               "put-code": 116401,
431 |               "path": "/0000-0001-5109-3700/funding/116401",
432 |               "display-index": "0"
433 |             }
434 |           ]
435 |         }
436 |       ],
437 |       "path": "/0000-0001-5109-3700/fundings"
438 |     }
439 |   }
440 | }
441 |                 """)
442 |         self.crosswalker.crosswalk(orcid_profile, self.person_uri, self.graph)
443 |         # Verify a grant exists.
444 |         grant_uri = ns.D['grant-9ea22d7c992375778b4a3066f5142624']
445 |         self.assertEqual(
446 |             self.graph.value(grant_uri, RDFS.label).toPython(),
447 |             u"Policy Implications of International Graduate Students and Postdocs in the United States"
448 |         )
449 |         # Verify three PI roles related to grants for this person uri.
450 |         pi_roles = [guri for guri in self.graph.subjects(predicate=ns.OBO['RO_0000052'], object=self.person_uri)]
451 |         self.assertEqual(len(pi_roles), 4)
452 | 
453 |     def test_with_funding(self):
454 |         orcid_profile = json.loads("""
455 | {
456 |   "activities-summary": {
457 |     "fundings": {
458 |       "last-modified-date": {
459 |         "value": 1449261003455
460 |       },
461 |       "group": [
462 |         {
463 |           "last-modified-date": {
464 |             "value": 1449261003455
465 |           },
466 |           "external-ids": {
467 |             "external-id": []
468 |           },
469 |           "funding-summary": [
470 |             {
471 |               "created-date": {
472 |                 "value": 1449261003455
473 |               },
474 |               "last-modified-date": {
475 |                 "value": 1449261003455
476 |               },
477 |               "source": {
478 |                 "source-orcid": {
479 |                   "uri": "http://orcid.org/0000-0003-3844-5120",
480 |                   "path": "0000-0003-3844-5120",
481 |                   "host": "orcid.org"
482 |                 },
483 |                 "source-client-id": null,
484 |                 "source-name": {
485 |                   "value": "Ira Lurie"
486 |                 }
487 |               },
488 |               "title": {
489 |                 "title": {
490 |                   "value": "The Utility of Ultra High Performance Supercritical Fluid Chromatography for the Analysis of Seized Drugs:  Application to Synthetic Cannabinoids and Bath Salts "
491 |                 },
492 |                 "translated-title": null
493 |               },
494 |               "external-ids": null,
495 |               "type": "GRANT",
496 |               "start-date": {
497 |                 "year": {
498 |                   "value": "2015"
499 |                 },
500 |                 "month": {
501 |                   "value": "01"
502 |                 },
503 |                 "day": null
504 |               },
505 |               "end-date": {
506 |                 "year": {
507 |                   "value": "2016"
508 |                 },
509 |                 "month": {
510 |                   "value": "12"
511 |                 },
512 |                 "day": null
513 |               },
514 |               "organization": {
515 |                 "name": "National Institute of Justice",
516 |                 "address": {
517 |                   "city": "DC",
518 |                   "region": "DC",
519 |                   "country": "US"
520 |                 },
521 |                 "disambiguated-organization": {
522 |                   "disambiguated-organization-identifier": "http://dx.doi.org/10.13039/100005289",
523 |                   "disambiguation-source": "FUNDREF"
524 |                 }
525 |               },
526 |               "visibility": "PUBLIC",
527 |               "put-code": 132761,
528 |               "path": "/0000-0003-3844-5120/funding/132761",
529 |               "display-index": "0"
530 |             }
531 |           ]
532 |         }
533 |       ],
534 |       "path": "/0000-0003-3844-5120/fundings"
535 |     }
536 |   }
537 | }
538 |                 """)
539 |         self.crosswalker.crosswalk(orcid_profile, self.person_uri, self.graph)
540 |         # Verify a grant exists.
541 |         grant_uri = ns.D['grant-742228eecfbdacf092bf482f84151082']
542 |         self.assertEqual(
543 |             self.graph.value(grant_uri, RDFS.label).toPython(),
544 |             u"The Utility of Ultra High Performance Supercritical Fluid Chromatography for the Analysis of Seized "
545 |             u"Drugs:  Application to Synthetic Cannabinoids and Bath Salts "
546 |         )
547 |         self.assertEqual(0, len(list(self.graph[grant_uri : ns.VIVO.sponsorAwardId : ])))
548 | 


--------------------------------------------------------------------------------
/tests/app/test_utility.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | from orcid2vivo_app.utility import clean_orcid, is_valid_orcid
 3 | 
 4 | 
 5 | class TestUtility(TestCase):
 6 |     def test_clean_orcid(self):
 7 |         orcid = '0000-0003-1527-0030'
 8 | 
 9 |         # Test with orcid.org prefix.
10 |         self.assertEqual(clean_orcid('orcid.org/' + orcid), orcid)
11 | 
12 |         # Test with http://orcid.org prefix.
13 |         self.assertEqual(clean_orcid('http://orcid.org/' + orcid), orcid)
14 | 
15 |         # Test without prefix.
16 |         self.assertEqual(clean_orcid(orcid), orcid)
17 | 
18 |     def test_is_valid_orcid(self):
19 |         self.assertTrue(is_valid_orcid("0000-0003-1527-0030"))
20 |         self.assertTrue(is_valid_orcid("0000-0003-1527-003X"))
21 |         self.assertFalse(is_valid_orcid("0000-0003-1527-00301"))
22 |         self.assertFalse(is_valid_orcid("0000-0003-1527-003"))
23 | 


--------------------------------------------------------------------------------
/tests/app/test_vivo_uri.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | from orcid2vivo_app.vivo_uri import HashIdentifierStrategy
 3 | from orcid2vivo_app.vivo_namespace import VIVO, OBO
 4 | 
 5 | 
 6 | class TestHashIdentifierStrategy(TestCase):
 7 |     def setUp(self):
 8 |         self.strategy = HashIdentifierStrategy()
 9 | 
10 |     def test_to_identifier(self):
11 |         uri = "http://vivo.mydomain.edu/individual/grant-3c73b079585811b9cbb23c3253a0796a"
12 |         self.assertEqual(uri, str(self.strategy.to_uri(VIVO.Grant, {"foo": "My Foo", "bar": "My Bar"})))
13 |         #Switch order
14 |         self.assertEqual(uri, str(self.strategy.to_uri(VIVO.Grant, {"bar": "My Bar", "foo": "My Foo"})))
15 |         #Add a none
16 |         self.assertEqual(uri, str(self.strategy.to_uri(VIVO.Grant,
17 |                                                               {"foo": "My Foo", "bar": "My Bar", "foobar": None})))
18 |         #General class trumps class
19 |         self.assertEqual(uri, str(self.strategy.to_uri(VIVO.AnotherClazz,
20 |                                                        {"foo": "My Foo", "bar": "My Bar", "foobar": None},
21 |                                                        general_clazz=VIVO.Grant)))
22 | 
23 |         #Different class
24 |         self.assertNotEqual(uri, str(self.strategy.to_uri(VIVO.NotAGrant, {"foo": "My Foo", "bar": "My Bar"})))
25 |         #General class
26 |         self.assertNotEqual(uri, str(self.strategy.to_uri(VIVO.Grant, {"foo": "My Foo", "bar": "My Bar"},
27 |                                                           general_clazz=VIVO.AnotherClass)))
28 |         #Changed attr
29 |         self.assertNotEqual(uri, str(self.strategy.to_uri(VIVO.Grant, {"foo": "Not My Foo", "bar": "My Bar"})))
30 |         #Additional attr
31 |         self.assertNotEqual(uri, str(self.strategy.to_uri(VIVO.Grant,
32 |                                                                  {"foo": "My Foo", "bar": "My Bar",
33 |                                                                   "foobar": "My FooBar"})))
34 | 
35 |     def test_class_to_prefix(self):
36 |         self.assertEqual("grant", HashIdentifierStrategy._class_to_prefix(VIVO.Grant))
37 |         self.assertEqual("ro_0000052", HashIdentifierStrategy._class_to_prefix(OBO.RO_0000052))
38 |         self.assertIsNone(HashIdentifierStrategy._class_to_prefix(None))
39 | 


--------------------------------------------------------------------------------
/tests/test_loader.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | import tempfile
  3 | import shutil
  4 | from orcid2vivo_loader import Store, load_single
  5 | import os
  6 | import tests
  7 | import time
  8 | import datetime
  9 | import vcr
 10 | from mock import patch, call
 11 | from rdflib.compare import to_isomorphic
 12 | 
 13 | my_vcr = vcr.VCR(
 14 |     cassette_library_dir=tests.FIXTURE_PATH,
 15 | )
 16 | 
 17 | 
 18 | class TestStore(tests.TestCase):
 19 |     def setUp(self):
 20 |         self.data_path = tempfile.mkdtemp()
 21 |         self.db_filepath = os.path.join(self.data_path, "orcid2vivo.db")
 22 | 
 23 |     def tearDown(self):
 24 |         shutil.rmtree(self.data_path, ignore_errors=True)
 25 | 
 26 |     def test_persist(self):
 27 |         self.assertFalse(os.path.exists(self.db_filepath))
 28 |         with Store(self.data_path) as store:
 29 |             self.assertEqual(self.db_filepath, store.db_filepath)
 30 |             # Created
 31 |             self.assertTrue(os.path.exists(self.db_filepath))
 32 |             # Add
 33 |             store.add("0000-0003-1527-0030")
 34 | 
 35 |         # Still exists after close
 36 |         self.assertTrue(os.path.exists(self.db_filepath))
 37 |         with Store(self.data_path) as store:
 38 |             self.assertTrue("0000-0003-1527-0030" in store)
 39 | 
 40 |     def test_contains(self):
 41 |         with Store(self.data_path) as store:
 42 |             # Add
 43 |             store.add("0000-0003-1527-0030")
 44 |             self.assertTrue("0000-0003-1527-0030" in store)
 45 |             self.assertFalse("X000-0003-1527-0030" in store)
 46 |             self.assertTrue(store.contains("0000-0003-1527-0030"))
 47 |             self.assertTrue(store.contains("0000-0003-1527-0030", True))
 48 |             self.assertFalse(store.contains("0000-0003-1527-0030", False))
 49 | 
 50 |     def test_add_item(self):
 51 |         with Store(self.data_path) as store:
 52 |             # Insert
 53 |             store.add("0000-0003-1527-0030")
 54 |             (orcid_id, active, last_update, person_uri, person_id, person_class, confirmed) = \
 55 |                 store["0000-0003-1527-0030"]
 56 |             self.assertTrue(active)
 57 |             self.assertIsNone(person_uri)
 58 |             self.assertIsNone(person_id)
 59 |             self.assertIsNone(person_class)
 60 |             self.assertIsNone(last_update)
 61 |             self.assertFalse(confirmed)
 62 |             # Update
 63 |             store.add("0000-0003-1527-0030", person_uri="http://me", person_id="me", person_class="Librarian",
 64 |                       confirmed=True)
 65 |             (orcid_id, active, last_update, person_uri, person_id, person_class, confirmed) = \
 66 |                 store["0000-0003-1527-0030"]
 67 |             self.assertTrue(active)
 68 |             self.assertEqual("http://me", person_uri)
 69 |             self.assertEqual("me", person_id)
 70 |             self.assertEqual("Librarian", person_class)
 71 |             self.assertIsNone(last_update)
 72 |             self.assertTrue(confirmed)
 73 | 
 74 |     def test_add_deleted_item(self):
 75 |         with Store(self.data_path) as store:
 76 |             # Insert
 77 |             store.add("0000-0003-1527-0030")
 78 |             (orcid_id, active, last_update, person_uri, person_id, person_class, confirmed) = \
 79 |                 store["0000-0003-1527-0030"]
 80 |             self.assertTrue(active)
 81 |             self.assertIsNone(person_uri)
 82 |             self.assertIsNone(person_id)
 83 |             self.assertIsNone(person_class)
 84 |             self.assertIsNone(last_update)
 85 |             self.assertFalse(confirmed)
 86 |             # Delete
 87 |             del store["0000-0003-1527-0030"]
 88 |             # Add again
 89 |             store.add("0000-0003-1527-0030")
 90 |             (orcid_id, active, last_update, person_uri, person_id, person_class, confirmed) = \
 91 |                 store["0000-0003-1527-0030"]
 92 |             self.assertTrue(active)
 93 |             self.assertIsNone(person_uri)
 94 |             self.assertIsNone(person_id)
 95 |             self.assertIsNone(person_class)
 96 |             self.assertIsNone(last_update)
 97 | 
 98 |     def test_del(self):
 99 |         with Store(self.data_path) as store:
100 |             store.add("0000-0003-1527-0030")
101 |             self.assertTrue("0000-0003-1527-0030" in store)
102 |             del store["0000-0003-1527-0030"]
103 |             self.assertFalse("0000-0003-1527-0030" in store)
104 | 
105 |     def test_touch(self):
106 |         with Store(self.data_path) as store:
107 |             store.add("0000-0003-1527-0030")
108 |             (orcid_id, active, last_update, person_uri, person_id, person_class, confirmed) = \
109 |                 store["0000-0003-1527-0030"]
110 |             time.sleep(1)
111 |             store.touch("0000-0003-1527-0030")
112 |             (orcid_id, active, new_last_update, person_uri, person_id, person_class, confirmed) = \
113 |                 store["0000-0003-1527-0030"]
114 |             self.assertNotEqual(last_update, new_last_update)
115 | 
116 |     def test_get_least_recent(self):
117 |         with Store(self.data_path) as store:
118 |             store.add("0000-0003-1527-0030")
119 |             store.add("0000-0003-1527-0031")
120 |             store.add("0000-0003-1527-0032")
121 |             # Deactivate one to make sure not returned.
122 |             del store["0000-0003-1527-0032"]
123 |             # Touch first
124 |             time.sleep(1)
125 |             t = datetime.datetime.utcnow()
126 |             time.sleep(1)
127 |             store.touch("0000-0003-1527-0030")
128 |             results = list(store.get_least_recent())
129 |             self.assertEqual(2, len(results))
130 |             self.assertEqual("0000-0003-1527-0031", results[0][0])
131 |             self.assertEqual("0000-0003-1527-0030", results[1][0])
132 | 
133 |             # With limit
134 |             results = list(store.get_least_recent(limit=1))
135 |             self.assertEqual(1, len(results))
136 |             self.assertEqual("0000-0003-1527-0031", results[0][0])
137 | 
138 |             # Before
139 |             results = list(store.get_least_recent(before_datetime=t))
140 |             self.assertEqual(1, len(results))
141 |             self.assertEqual("0000-0003-1527-0031", results[0][0])
142 | 
143 |     def test_iter(self):
144 |         with Store(self.data_path) as store:
145 |             store.add("0000-0003-1527-0030")
146 |             store.add("0000-0003-1527-0031")
147 |             for orcid_id, active, new_last_update, person_uri, person_id, person_class, confirmed in store:
148 |                 self.assertTrue(orcid_id in ("0000-0003-1527-0030", "0000-0003-1527-0031"))
149 |             self.assertEqual(2, len(list(store)))
150 | 
151 |     def test_delete_all(self):
152 |         with Store(self.data_path) as store:
153 |             store.add("0000-0003-1527-0030")
154 |             store.add("0000-0003-1527-0031")
155 |             self.assertTrue("0000-0003-1527-0030" in store)
156 |             self.assertTrue("0000-0003-1527-0031" in store)
157 |             store.delete_all()
158 |             self.assertFalse("0000-0003-1527-0030" in store)
159 |             self.assertFalse("0000-0003-1527-0031" in store)
160 | 
161 | 
162 | class TestLoad(tests.TestCase):
163 |     def setUp(self):
164 |         self.data_path = tempfile.mkdtemp()
165 | 
166 |     @my_vcr.use_cassette('loader/load_single.yaml')
167 |     @patch("orcid2vivo_loader.sparql_insert")
168 |     @patch("orcid2vivo_loader.sparql_delete")
169 |     def test_load_single(self, mock_sparql_delete, mock_sparql_insert):
170 |         with Store(self.data_path) as store:
171 |             store.add("0000-0003-1527-0030")
172 |             (orcid_id, active, last_update, person_uri, person_id, person_class, confirmed) = \
173 |                 store["0000-0003-1527-0030"]
174 |             self.assertIsNone(last_update)
175 | 
176 |         graph1, add_graph1, delete_graph1 = load_single("0000-0003-1527-0030", None, None, None, self.data_path,
177 |                                                         "http://vivo.mydomain.edu/sparql", "vivo@mydomain.edu",
178 |                                                         "password")
179 | 
180 |         self.assertEqual(319, len(add_graph1))
181 |         self.assertEqual(0, len(delete_graph1))
182 | 
183 |         self.assertEqual(to_isomorphic(graph1), to_isomorphic(add_graph1))
184 | 
185 |         with Store(self.data_path) as store:
186 |             # Last update now set
187 |             (orcid_id, active, last_update, person_uri, person_id, person_class, confirmed) = \
188 |                 store["0000-0003-1527-0030"]
189 |             self.assertIsNotNone(last_update)
190 | 
191 |         # Make sure turtle file created
192 |         self.assertTrue(os.path.exists(os.path.join(self.data_path, "0000-0003-1527-0030.ttl")))
193 | 
194 |         # Now change a fact and run again. Changed fact is provided by vcr recording.
195 |         # Changed year of Amherst degree.
196 |         # Had to rig the Accept-Encoding to create the vcr recording with:
197 |         # r = requests.get('https://pub.orcid.org/v2.0/%s' % orcid,
198 |         #                  headers={"Accept": "application/json", "Accept-Encoding": "identity"})
199 | 
200 |         graph2, add_graph2, delete_graph2 = load_single("0000-0003-1527-0030", None, None, None,
201 |                                                         self.data_path, "http://vivo.mydomain.edu/sparql",
202 |                                                         "vivo@mydomain.edu", "password")
203 | 
204 |         self.assertEqual(319, len(graph2))
205 |         self.assertEqual(17, len(add_graph2))
206 |         self.assertEqual(17, len(delete_graph2))
207 | 
208 |         mock_sparql_insert.assert_has_calls([
209 |             call(add_graph1, "http://vivo.mydomain.edu/sparql", "vivo@mydomain.edu", "password"),
210 |             call(add_graph2, "http://vivo.mydomain.edu/sparql", "vivo@mydomain.edu", "password")])
211 |         mock_sparql_delete.assert_has_calls([
212 |             call(delete_graph1, "http://vivo.mydomain.edu/sparql", "vivo@mydomain.edu", "password"),
213 |             call(delete_graph2, "http://vivo.mydomain.edu/sparql", "vivo@mydomain.edu", "password")])
214 | 


--------------------------------------------------------------------------------
/tests/test_orcid2vivo.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | from rdflib import Graph, URIRef, RDF, OWL
 3 | import orcid2vivo_app.vivo_namespace as ns
 4 | from orcid2vivo import PersonCrosswalk
 5 | from orcid2vivo_app.vivo_namespace import VIVO
 6 | 
 7 | 
 8 | class TestPersonCrosswalk(TestCase):
 9 |     def setUp(self):
10 |         self.graph = Graph(namespace_manager=ns.ns_manager)
11 |         self.person_uri = ns.D["test"]
12 |         self.orcid_id = "0000-0003-1527-0030"
13 |         self.orcid_id_uriref = URIRef("http://orcid.org/{}".format(self.orcid_id))
14 | 
15 |     def test_add_orcid_id(self):
16 |         PersonCrosswalk._add_orcid_id(self.person_uri, self.orcid_id, self.graph, False)
17 |         self.assertEqual(2, len(self.graph))
18 | 
19 |         self.assertTrue((self.person_uri, VIVO.orcidId, self.orcid_id_uriref) in self.graph)
20 |         self.assertTrue((self.orcid_id_uriref, RDF.type, OWL.Thing) in self.graph)
21 | 
22 |     def test_add_orcid_id_confirmed(self):
23 |         PersonCrosswalk._add_orcid_id(self.person_uri, self.orcid_id, self.graph, True)
24 |         self.assertEqual(3, len(self.graph))
25 | 
26 |         self.assertTrue((self.orcid_id_uriref, VIVO.confirmedOrcidId, self.person_uri) in self.graph)
27 | 


--------------------------------------------------------------------------------