├── Binder.png ├── CITATION.cff ├── LICENSE ├── README.md ├── Search for Experts ├── Readme.md └── openalex_get_experts_by_list_of_persons.ipynb ├── Search for funder informations ├── Crossref_get_doi_funder_informations_from_an_organization.ipynb ├── Crossref_get_funder_informations_by_list_of_persons.ipynb ├── Crossref_get_funder_informations_of_a_list_of_dois.ipynb └── Readme.md ├── organization-organization ├── README.md └── ror-organigram.ipynb ├── organization-people ├── README.md ├── freya_get_people_by_organization.ipynb ├── openalex_get_people_by_organization.ipynb └── orcid_get_people_by_organization.ipynb ├── organization-works ├── Readme.md ├── crossref_get_works_by_organization.ipynb └── openalex_get_works_by_organization.ipynb ├── person-works ├── README.md ├── crossref_get_works_by_list_of_persons.ipynb ├── crossref_get_works_by_person.ipynb ├── freya_get_works_by_person.ipynb ├── openaire_get_publications_by_person.ipynb ├── openalex_get_works_by_list_of_persons.ipynb ├── openalex_get_works_by_person.ipynb └── orcid_get_works_by_person.ipynb ├── requirements.txt └── work-projects ├── README.md └── openaire_get_projects_by_work.ipynb /Binder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Project-TAPIR/pidgraph-notebooks/23b3bb0e14f8ee949cf68438dfeed6712d0519cb/Binder.png -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "If you use this software in your work, please cite it using the following metadata." 3 | title: "Project-TAPIR: pidgraph-notebooks" 4 | abstract: "A collection of Jupyter notebooks with examples of querying different PID providers like ORCID, ROR, Crossref and PID graphs like the FREYA PID Graph, OpenAlex, OpenAIRE for connected objects." 5 | type: software 6 | authors: 7 | - given-names: "Sandra" 8 | family-names: "Mierz" 9 | email: "sandra.mierz@tib.eu" 10 | affiliation: "TIB" 11 | orcid: "https://orcid.org/0000-0002-8913-9011" 12 | repository-code: "https://github.com/Project-TAPIR/pidgraph-notebooks" 13 | license: "BSD-3-Clause" 14 | doi: "10.5281/zenodo.6287038" 15 | identifiers: 16 | - description: "This is the collection of archived snapshots of all versions of Project-TAPIR: pidgraph-notebooks" 17 | type: doi 18 | value: "10.5281/zenodo.6287037" 19 | - description: "This is the archived snapshot of version 1.0.0 of Project-TAPIR: pidgraph-notebooks" 20 | type: doi 21 | value: "10.5281/zenodo.6287038" 22 | - description: "This is the archived snapshot of version 1.1.0 of Project-TAPIR: pidgraph-notebooks" 23 | type: doi 24 | value: "10.5281/zenodo.6373245" 25 | keywords: 26 | - "FREYA PID Graph" 27 | - "ORCID" 28 | - "ROR" 29 | - "DOI" 30 | - "Datacite" 31 | - "Crossref" 32 | - "OpenAlex" 33 | - "OpenAIRE" 34 | references: 35 | - authors: 36 | - family-names: "Fenner" 37 | given-names: "Martin" 38 | - family-names: "Garza" 39 | given-names: "Kristian" 40 | doi: "10.5438/8gb0-v673" 41 | repository-code: "https://github.com/datacite/lupo" 42 | title: "DataCite Application API" 43 | type: software 44 | version: "4.5.4" 45 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2022, Technische Informationsbibliothek (TIB) & Universität Osnabrück 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pidgraph-notebooks 2 | 3 | [![DOI](https://zenodo.org/badge/447263093.svg)](https://zenodo.org/badge/latestdoi/447263093) 4 | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/Project-TAPIR/pidgraph-notebooks/main) 5 | 6 | A collection of Jupyter notebooks with examples of querying different PID providers like [ORCID](https://orcid.org/), [ROR](https://ror.readme.io/), [Crossref](https://www.crossref.org/) and PID graphs like the [FREYA PID Graph](https://blog.datacite.org/powering-the-pid-graph/), [OpenAlex](https://openalex.org/about) and [OpenAIRE](https://www.openaire.eu/) for connected objects. 7 | 8 | Currently included connections: 9 | * organization-organization 10 | * input: ROR 11 | * output: hierarchy of sub-organizations starting at given organization, each identified by their ROR 12 | * data sources: ROR 13 | * organization-people 14 | * input: ROR 15 | * output: list of people affiliated with the organization, each identified by their ORCID iD 16 | * data sources: FREYA PID Graph, OpenAlex, ORCID 17 | * organization-works 18 | * inout: ROR 19 | * output: list of works affiliated with an organization, each identified by their DOI 20 | * data sources: Crossref, OpenAlex 21 | * person-works 22 | * input: ORCID 23 | * output: list of works authored/created by the person, each identified by their DOI 24 | * data sources: Crossref, FREYA PID Graph, OpenAlex, ORCID, OpenAIRE 25 | * work-projects 26 | * input: DOI 27 | * output: list of projects the work was produced in, each identified by their OpenAIRE project ID 28 | * data sources: OpenAIRE 29 | * Search for experts 30 | * input: ORCID ID, OpenAlex Concept 31 | * output: ORCiD ID with the respective Concepts and there concept score 32 | * data source: OpenAlex 33 | * Search for funder informations 34 | * input: ROR, ORCID, DOI 35 | * output: list of DOIs and there funder informations 36 | * data source: Crossref 37 | 38 | Please navigate into the respective folder to see the list of available notebooks. 39 | 40 | ### Run notebooks 41 | While GitHub renders Jupyter notebooks as static HTML files (not executable), 42 | you can use this link to launch the notebooks on Binder where you can execute and modify them: 43 | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/Project-TAPIR/pidgraph-notebooks/main) 44 | 45 | [![Screenshot Binder](Binder.png)](https://mybinder.org/v2/gh/Project-TAPIR/pidgraph-notebooks/main) 46 | 47 | ---------------------------- 48 | 49 | ### Background 50 | In the joint project [TAPIR](https://projects.tib.eu/tapir/en/) (Partially Automated Persistent Identifier-based Reporting), partially automated procedures for research reporting are being tested in the context of university and non-university research. To this end, the question is being investigated : 51 | 52 | To what extent can the necessary data aggregation be carried out on the basis of openly available research information using persistent identifiers? 53 | 54 | *More information in our blog post "[Project TAPIR: Harvesting the power of PIDs](https://blogs.tib.eu/wp/tib/2022/03/01/project-tapir-harvesting-the-power-of-pids/)"* 55 | -------------------------------------------------------------------------------- /Search for Experts/Readme.md: -------------------------------------------------------------------------------- 1 | ## Search for Experts 2 | 3 | A Jupyter Notebook showing the usecase of an expert search. It uses ORCID IDs and a concept ID to identifi the reaeach field of an author. 4 | 5 | * [OpenAlex](https://openalex.org/about) 6 | -------------------------------------------------------------------------------- /Search for Experts/openalex_get_experts_by_list_of_persons.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "0fdf9631", 6 | "metadata": {}, 7 | "source": [ 8 | "## Search for Experts from a list of ORCID IDs\n", 9 | "\n", 10 | "This notebook queries the [OpenAlex API](https://docs.openalex.org/api) via its `/works` endpoint for works authored by a person. It then uses the metadata from the concept field(researchfield) to calculate a score for each given concept. This is used to print a list of experts for a given researchfield and there overall score." 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 1, 16 | "id": "W8sB1ZF6aKG2", 17 | "metadata": { 18 | "executionInfo": { 19 | "elapsed": 5, 20 | "status": "ok", 21 | "timestamp": 1643058187428, 22 | "user": { 23 | "displayName": "", 24 | "photoUrl": "", 25 | "userId": "" 26 | }, 27 | "user_tz": -60 28 | }, 29 | "id": "W8sB1ZF6aKG2" 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "# Prerequisites:\n", 34 | "import requests # dependency to make HTTP calls" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "id": "be2fe02c", 40 | "metadata": {}, 41 | "source": [ 42 | "#### Subject List\n", 43 | "\n", 44 | "This List contains all 19 root concepts (Level 0) from OpenAlex. This concepts are the broadest concepts, OpenAlex contains over 60000 different concepts, which can be found here:\n", 45 | "This search can work with all concepts an can search mupliple concepts at the same time. \n", 46 | "\n", 47 | "#### orthography\n", 48 | "\n", 49 | "If you want to use different non level 0 concepts you have to use the displayed name. The concept has to be the form 'concept', if you want to use multiple concepts, they have to be seperated with an comma." 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 2, 55 | "id": "155dd5c5", 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "# list of root concepts\n", 60 | "root_concepts=['Political science',\n", 61 | "'Philosophy',\n", 62 | "'Economics',\n", 63 | "'Business',\n", 64 | "'Psychology',\n", 65 | "'Mathematics',\n", 66 | "'Medicine',\n", 67 | "'Biology',\n", 68 | "'Computer science',\n", 69 | "'Geology',\n", 70 | "'Chemistry',\n", 71 | "'Art',\n", 72 | "'Sociology',\n", 73 | "'Engineering',\n", 74 | "'Geography',\n", 75 | "'History',\n", 76 | "'Materials science',\n", 77 | "'Physics',\n", 78 | "'Environmental science']" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "id": "b77a8eb7", 84 | "metadata": {}, 85 | "source": [ 86 | "#### Input parameters\n", 87 | "\n", 88 | "There are 3 Input Parameters: A List of ORCID IDs, a list of subjects and a confidence score." 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 3, 94 | "id": "33dd8492", 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "# Search Subjects\n", 99 | "Concept_search=['Computer science','Physics']" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "id": "3df20d0e", 105 | "metadata": {}, 106 | "source": [ 107 | "Note: ORCID IDs have to be in the Form \"0000-0001-5380-4449\" and have to be seperated by comma." 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 4, 113 | "id": "25f8a288", 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "# Orcid list\n", 118 | "list_of_ids=[\"0000-0002-3416-2652\",\n", 119 | "\"0000-0001-6604-6253\",\n", 120 | "\"0000-0003-4331-8695\",\n", 121 | "\"0000-0003-4939-1666\",\n", 122 | "\"0000-0002-5861-8896\",]" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "id": "4e4479e7", 128 | "metadata": {}, 129 | "source": [ 130 | "Note: The score has to be between 0 and 1. a higher score meets a higher threshold for counting as an expert." 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 5, 136 | "id": "3a8d23b4", 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "# Confidence scores\n", 141 | "confidence_score=0.4" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "id": "nWOX9dkvaZ97", 147 | "metadata": { 148 | "id": "nWOX9dkvaZ97" 149 | }, 150 | "source": [ 151 | "We use it to query the OpenAlex API for works that specified the ORCID URL within their metadata in the field '`authorships.author.orcid`'.\n", 152 | " Since the API uses [pagination](https://docs.openalex.org/api/get-lists-of-entities#pagination), we need to loop through all pages to get the complete result set." 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 6, 158 | "id": "8b608640-96a8-47d1-9de7-b7d3f6fd5a47", 159 | "metadata": { 160 | "executionInfo": { 161 | "elapsed": 5, 162 | "status": "ok", 163 | "timestamp": 1643058187685, 164 | "user": { 165 | "displayName": "", 166 | "photoUrl": "", 167 | "userId": "" 168 | }, 169 | "user_tz": -60 170 | }, 171 | "id": "8b608640-96a8-47d1-9de7-b7d3f6fd5a47" 172 | }, 173 | "outputs": [], 174 | "source": [ 175 | "# OpenAlex endpoint to query for works\n", 176 | "OPENALEX_API_WORKS = \"https://api.openalex.org/works\"\n", 177 | "\n", 178 | "# query all works that are connected to orcid\n", 179 | "def query_openalex_for_person2works(orcid):\n", 180 | " page = 1\n", 181 | " max_page = 1\n", 182 | " \n", 183 | " while page <= max_page:\n", 184 | " params = {'filter': 'authorships.author.orcid:'+orcid, 'page': page}\n", 185 | " response = requests.get(url=OPENALEX_API_WORKS,\n", 186 | " params=params,\n", 187 | " headers= {'Accept': 'application/json'})\n", 188 | " response.raise_for_status()\n", 189 | " result=response.json()\n", 190 | "\n", 191 | " # calculate max page number in first loop\n", 192 | " if max_page == 1:\n", 193 | " max_page = determine_max_page(result)\n", 194 | " page = page + 1\n", 195 | " yield result\n", 196 | "\n", 197 | "# calculate max number of result pages\n", 198 | "def determine_max_page(response_data):\n", 199 | " item_count = response_data['meta']['count']\n", 200 | " items_per_page = response_data['meta']['per_page']\n", 201 | " max_page_ceil = item_count // items_per_page + bool(item_count % items_per_page)\n", 202 | " return max_page_ceil" 203 | ] 204 | }, 205 | { 206 | "cell_type": "markdown", 207 | "id": "kHRO_LiJr-u_", 208 | "metadata": { 209 | "id": "kHRO_LiJr-u_" 210 | }, 211 | "source": [ 212 | "From the resulting list of works we extract and print out title and DOI. \n", 213 | "\n", 214 | "*Note: works that do not have a DOI assigned, will not be printed.*" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": 7, 220 | "id": "1c36737c-4dcf-42d5-80e2-802f0a7a8326", 221 | "metadata": { 222 | "colab": { 223 | "base_uri": "https://localhost:8080/" 224 | }, 225 | "executionInfo": { 226 | "elapsed": 516, 227 | "status": "ok", 228 | "timestamp": 1643058188197, 229 | "user": { 230 | "displayName": "", 231 | "photoUrl": "", 232 | "userId": "" 233 | }, 234 | "user_tz": -60 235 | }, 236 | "id": "1c36737c-4dcf-42d5-80e2-802f0a7a8326", 237 | "outputId": "fb849105-45c1-4abd-a6be-b6d4e3c567f4", 238 | "scrolled": true 239 | }, 240 | "outputs": [], 241 | "source": [ 242 | "# from the result pages we get from the OpenAlex API, extract the data about works\n", 243 | "def extract_works_from_page(page):\n", 244 | " return [work for work in page.get('results') or []]\n", 245 | "\n", 246 | "# extract DOI from work\n", 247 | "def extract_doi(work):\n", 248 | " doi=work.get('ids', {}).get('doi') or \"\"\n", 249 | " doi_id=doi.replace(\"https://doi.org/\", \"\") if doi else doi\n", 250 | " title=work.get('display_name', \"\")\n", 251 | " concept=work.get('concepts')\n", 252 | " return doi_id, title, concept\n", 253 | "\n", 254 | "def main_search(orcid):\n", 255 | " global Error_count\n", 256 | " # Query for DOI list\n", 257 | " result_doi=[]\n", 258 | " count_doi=0\n", 259 | " list_of_pages=query_openalex_for_person2works(orcid)\n", 260 | " for page in list_of_pages or []:\n", 261 | " works=extract_works_from_page(page)\n", 262 | " for work in works or []:\n", 263 | " doi,title,concept=extract_doi(work)\n", 264 | " if doi:\n", 265 | " add=[]\n", 266 | " add.append(orcid)\n", 267 | " add.append(doi)\n", 268 | " add.append(title)\n", 269 | " add_concept=[]\n", 270 | " for item in concept:\n", 271 | " all_concepts=[item['display_name'],'Level:'+str(item['level']),item['score']]\n", 272 | " add_concept.append(all_concepts)\n", 273 | " add.append(add_concept)\n", 274 | " result_doi.append(add)\n", 275 | " # Start of the expertsearch \n", 276 | " dict_gesamt={}\n", 277 | " dict_gesamt.update({'ID':orcid})\n", 278 | " dict_gesamt.update({'Count DOI:':count_doi})\n", 279 | " add=[]\n", 280 | " dedub_add=[]\n", 281 | " # Building a list of all with the respective ORCID connected concepts\n", 282 | " for item in result_doi:\n", 283 | " if orcid in item:\n", 284 | " count_doi=count_doi+1\n", 285 | " for item2 in item[3]:\n", 286 | " new=item2[0]\n", 287 | " add.append(new) \n", 288 | " dict_gesamt.update({'Count DOI:':count_doi})\n", 289 | " # Dedublication \n", 290 | " for item in add:\n", 291 | " if item not in dedub_add:\n", 292 | " dedub_add.append(item)\n", 293 | " # Score for each concept\n", 294 | " for single_concept in dedub_add:\n", 295 | " score_concept=0\n", 296 | " concept_count=0\n", 297 | " for item in result_doi:\n", 298 | " for item2 in item[3]:\n", 299 | " if single_concept in item2[0]:\n", 300 | " score_concept=score_concept+float(item2[2])\n", 301 | " concept_count=concept_count+1\n", 302 | " if concept_count>0:\n", 303 | " final_score=score_concept/concept_count\n", 304 | " dict_gesamt.update({single_concept:final_score}) \n", 305 | " # error search\n", 306 | " dict_error=dict_gesamt.copy()\n", 307 | " del dict_error['ID']\n", 308 | " del dict_error['Count DOI:']\n", 309 | " error_check=dict_error.values()\n", 310 | " for item in error_check:\n", 311 | " if item >1:\n", 312 | " Error_count=Error_count+1\n", 313 | " print('############Error#############')\n", 314 | " # Expert search \n", 315 | " check=0\n", 316 | " expert=['Orcid:', dict_gesamt['ID']]\n", 317 | " for item in Concept_search:\n", 318 | " if item in dict_gesamt.keys() and dict_gesamt[item]>confidence_score:\n", 319 | " check=1\n", 320 | " add=['Subject:'+item,'Score:', dict_gesamt[item]]\n", 321 | " expert.append(add)\n", 322 | " if check ==1:\n", 323 | " list_experts.append(expert)" 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": 8, 329 | "id": "425b6e12", 330 | "metadata": { 331 | "scrolled": true 332 | }, 333 | "outputs": [], 334 | "source": [ 335 | "# main programm:\n", 336 | "global Error_count\n", 337 | "Error_count=0\n", 338 | "global list_experts\n", 339 | "list_experts=[]\n", 340 | "for item in list_of_ids:\n", 341 | " main_search(item) " 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": 9, 347 | "id": "987d822b", 348 | "metadata": { 349 | "scrolled": true 350 | }, 351 | "outputs": [ 352 | { 353 | "name": "stdout", 354 | "output_type": "stream", 355 | "text": [ 356 | "Error Count: 0\n", 357 | "Count of Experts: 2\n", 358 | "['Orcid:', '0000-0002-3416-2652', ['Subject:Computer science', 'Score:', 0.5338947615263159]]\n", 359 | "['Orcid:', '0000-0002-5861-8896', ['Subject:Computer science', 'Score:', 0.6944728807438019]]\n" 360 | ] 361 | } 362 | ], 363 | "source": [ 364 | "# Results\n", 365 | "print('Error Count:',Error_count)\n", 366 | "print('Count of Experts:', len(list_experts))\n", 367 | "if len(list_experts) ==0:\n", 368 | " print('no experts found')\n", 369 | "for exp in list_experts:\n", 370 | " print (exp)" 371 | ] 372 | } 373 | ], 374 | "metadata": { 375 | "colab": { 376 | "collapsed_sections": [], 377 | "name": "openalex_get_works_by_person.ipynb", 378 | "provenance": [ 379 | { 380 | "file_id": "https://github.com/TAPIR-TIB/pidgraph-notebooks/blob/person-works/person-works/openalex_get_works_by_person.ipynb", 381 | "timestamp": 1643058224827 382 | }, 383 | { 384 | "file_id": "https://github.com/TAPIR-TIB/pidgraph-notebooks/blob/person-works/person-works/openalex_get_works_by_person.ipynb", 385 | "timestamp": 1643040475251 386 | }, 387 | { 388 | "file_id": "1neSGbKlkQwjOYX77kpGK14BqT4KpCei0", 389 | "timestamp": 1643025949695 390 | } 391 | ] 392 | }, 393 | "kernelspec": { 394 | "display_name": "Python 3 (ipykernel)", 395 | "language": "python", 396 | "name": "python3" 397 | }, 398 | "language_info": { 399 | "codemirror_mode": { 400 | "name": "ipython", 401 | "version": 3 402 | }, 403 | "file_extension": ".py", 404 | "mimetype": "text/x-python", 405 | "name": "python", 406 | "nbconvert_exporter": "python", 407 | "pygments_lexer": "ipython3", 408 | "version": "3.9.13" 409 | } 410 | }, 411 | "nbformat": 4, 412 | "nbformat_minor": 5 413 | } 414 | -------------------------------------------------------------------------------- /Search for funder informations/Crossref_get_doi_funder_informations_from_an_organization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Query Crossref for works and funder informations affiliated with an institution\n", 8 | "\n", 9 | "This notebook queries the [Crossref API](http://api.crossref.org) via its `'/works'` endpoint for works authored by a institution and there funders. It takes a ror id as input which is used to filter for all works where one of the authors `'ror'` field matches the given ror id. From the resulting list of works we output all DOIs, their titles and theire funder informations.\n" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "#### Examples of ror_ids\n", 17 | "\n", 18 | "University of Osnabrück = https://ror.org/04qmmjx98\n", 19 | "\n", 20 | "TIB Hannover = https://ror.org/04aj4c181" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 1, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "# Prerequisites:\n", 30 | "import requests # dependency to make HTTP calls\n", 31 | "from habanero import Crossref # lib for querying crossref api" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 2, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "# input ror\n", 41 | "ror_id=\"https://ror.org/04qmmjx98\"" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "We use it to query the Crossref API via its '`/works`' endpoint and set a filter for the `ror` field to match the given ORCID iD. " 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 3, 54 | "metadata": { 55 | "executionInfo": { 56 | "elapsed": 485, 57 | "status": "ok", 58 | "timestamp": 1643057866151, 59 | "user": { 60 | "displayName": "Sandra M", 61 | "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjjehryRcYlqHNFf_9Q6slGN_VZPE0y5QkvOxzG=s64", 62 | "userId": "04602594913862593282" 63 | }, 64 | "user_tz": -60 65 | }, 66 | "id": "AIuILzlS6zv9", 67 | "scrolled": false 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "# we are using an existing library for querying the api crossref \n", 72 | "\n", 73 | "def searchror(ror_id):\n", 74 | " list_of_pages=Crossref().works(filter = {'ror-id': ror_id}, cursor = \"*\", select = \"DOI,title,funder\")\n", 75 | " for page in list_of_pages: \n", 76 | " for item in page['message']['items']:\n", 77 | " print('New Entry') # For presentation purposes, marks a new doi entry\n", 78 | " if 'funder' in item.keys():\n", 79 | " print(f\"{item['DOI']},{item['title'][0]}\",'Funder Informations:',f\"{item['funder']}\")\n", 80 | " else:\n", 81 | " print(f\"{item['DOI']},{item['title'][0]}\",',no funder informations')" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 4, 87 | "metadata": { 88 | "colab": { 89 | "base_uri": "https://localhost:8080/" 90 | }, 91 | "executionInfo": { 92 | "elapsed": 6, 93 | "status": "ok", 94 | "timestamp": 1643057866152, 95 | "user": { 96 | "displayName": "Sandra M", 97 | "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjjehryRcYlqHNFf_9Q6slGN_VZPE0y5QkvOxzG=s64", 98 | "userId": "04602594913862593282" 99 | }, 100 | "user_tz": -60 101 | }, 102 | "id": "0bW0T-cv25wN", 103 | "outputId": "247e242b-c97a-47aa-dd5b-a80334ea1c42", 104 | "scrolled": true 105 | }, 106 | "outputs": [ 107 | { 108 | "name": "stdout", 109 | "output_type": "stream", 110 | "text": [ 111 | "New Entry\n", 112 | "10.3762/bjnano.13.53,Quantitative dynamic force microscopy with inclined tip oscillation Funder Informations: [{'DOI': '10.13039/501100001659', 'name': 'Deutsche Forschungsgemeinschaft', 'doi-asserted-by': 'publisher', 'award': ['RA2832/1-1', 'RE1186/21-1', 'RE1186/23-1']}]\n", 113 | "New Entry\n", 114 | "10.7554/elife.77227.sa2,Author response: Transition to siblinghood causes a substantial and long-lasting increase in urinary cortisol levels in wild bonobos ,no funder informations\n", 115 | "New Entry\n", 116 | "10.7554/elife.80901.sa2,Author response: Structure of the HOPS tethering complex, a lysosomal membrane fusion machinery ,no funder informations\n", 117 | "New Entry\n", 118 | "10.7554/elife.80901,Structure of the HOPS tethering complex, a lysosomal membrane fusion machinery Funder Informations: [{'DOI': '10.13039/501100001659', 'name': 'Deutsche Forschungsgemeinschaft', 'doi-asserted-by': 'publisher', 'award': ['UN111/5-6']}, {'DOI': '10.13039/501100001659', 'name': 'Deutsche Forschungsgemeinschaft', 'doi-asserted-by': 'publisher', 'award': ['INST190/196-1 FUGG']}, {'DOI': '10.13039/501100002347', 'name': 'Bundesministerium für Bildung und Forschung', 'doi-asserted-by': 'publisher', 'award': ['BMBF/DLR 01ED2010']}, {'DOI': '10.13039/501100001659', 'name': 'Deutsche Forschungsgemeinschaft', 'doi-asserted-by': 'publisher', 'award': ['SFB 944 P11']}, {'DOI': '10.13039/501100001659', 'name': 'Deutsche Forschungsgemeinschaft', 'doi-asserted-by': 'publisher', 'award': ['SFB 944 P27']}, {'DOI': '10.13039/501100001659', 'name': 'Deutsche Forschungsgemeinschaft', 'doi-asserted-by': 'publisher', 'award': ['SFB 944 P20']}, {'DOI': '10.13039/501100001659', 'name': 'Deutsche Forschungsgemeinschaft', 'doi-asserted-by': 'publisher', 'award': ['SFB 944 P16']}, {'DOI': '10.13039/501100001659', 'name': 'Deutsche Forschungsgemeinschaft', 'doi-asserted-by': 'publisher', 'award': ['MO 2752/3-6']}]\n", 119 | "New Entry\n", 120 | "10.7554/elife.79278,Pathogenic variants of sphingomyelin synthase SMS2 disrupt lipid landscapes in the secretory pathway Funder Informations: [{'DOI': '10.13039/501100001659', 'name': 'Deutsche Forschungsgemeinschaft', 'doi-asserted-by': 'publisher', 'award': ['SFB944-P14']}, {'DOI': '10.13039/501100001659', 'name': 'Deutsche Forschungsgemeinschaft', 'doi-asserted-by': 'publisher', 'award': ['HO3539/1-1']}, {'DOI': '10.13039/501100001659', 'name': 'Deutsche Forschungsgemeinschaft', 'doi-asserted-by': 'publisher', 'award': ['SFB944-P8']}, {'DOI': '10.13039/100000002', 'name': 'National Institutes of Health', 'doi-asserted-by': 'publisher', 'award': ['R35 GM144096']}, {'DOI': '10.13039/501100009708', 'name': 'Novo Nordisk Foundation', 'doi-asserted-by': 'publisher', 'award': ['NNF17OC0029432']}, {'DOI': '10.13039/501100004836', 'name': 'Independent Research Fund Denmark', 'doi-asserted-by': 'publisher', 'award': ['6108–00542B']}]\n", 121 | "New Entry\n", 122 | "10.7554/elife.79278.sa2,Author response: Pathogenic variants of sphingomyelin synthase SMS2 disrupt lipid landscapes in the secretory pathway ,no funder informations\n", 123 | "New Entry\n", 124 | "10.7554/elife.77227,Transition to siblinghood causes a substantial and long-lasting increase in urinary cortisol levels in wild bonobos Funder Informations: [{'DOI': '10.13039/501100001659', 'name': 'Deutsche Forschungsgemeinschaft', 'doi-asserted-by': 'publisher', 'award': ['BE 5511/4-1']}, {'DOI': '10.13039/501100007768', 'name': 'Max Planck Institute for Evolutionary Anthropology', 'doi-asserted-by': 'publisher', 'award': ['Open access funding']}, {'name': 'Max Planck Institute of Animal Behavior', 'award': ['Open access funding']}]\n" 125 | ] 126 | } 127 | ], 128 | "source": [ 129 | "# main programm \n", 130 | "searchror(ror_id) " 131 | ] 132 | } 133 | ], 134 | "metadata": { 135 | "colab": { 136 | "authorship_tag": "ABX9TyN8GdEZ2emA2pb1j6K+9PwP", 137 | "name": "crossref_get_works_by_person.ipynb", 138 | "provenance": [ 139 | { 140 | "file_id": "1RvDBYtHIK8LG_31cmfKW2PxQ3whxondX", 141 | "timestamp": 1643057922530 142 | } 143 | ] 144 | }, 145 | "kernelspec": { 146 | "display_name": "Python 3 (ipykernel)", 147 | "language": "python", 148 | "name": "python3" 149 | }, 150 | "language_info": { 151 | "codemirror_mode": { 152 | "name": "ipython", 153 | "version": 3 154 | }, 155 | "file_extension": ".py", 156 | "mimetype": "text/x-python", 157 | "name": "python", 158 | "nbconvert_exporter": "python", 159 | "pygments_lexer": "ipython3", 160 | "version": "3.9.13" 161 | } 162 | }, 163 | "nbformat": 4, 164 | "nbformat_minor": 4 165 | } 166 | -------------------------------------------------------------------------------- /Search for funder informations/Crossref_get_funder_informations_by_list_of_persons.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "k6HfuEZe48LV" 7 | }, 8 | "source": [ 9 | "## Query Crossref for works and there funder authored by a person\n", 10 | "This notebook queries the [Crossref API](http://api.crossref.org) via its `'/works'` endpoint for works authored by a person. It takes a list of ORCID IDs as input which is used to filter for all works where one of the authors' `orcid` field matches one of the given ORCID iD. \n", 11 | "From the resulting list of works we output all DOIs, their title and their funder informations" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 1, 17 | "metadata": { 18 | "id": "aV_HkXxJ4pVB" 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "# Prerequisites:\n", 23 | "import requests # dependency to make HTTP calls\n", 24 | "from habanero import Crossref # lib for querying crossref api" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 3, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "# input list of ORCID IDs\n", 34 | "list_of_ids=['0000-0001-5380-4449', \n", 35 | "'0000-0001-5406-9458',\n", 36 | "'0000-0001-5449-4593',\n", 37 | "'0000-0003-4939-1666',\n", 38 | "'0000-0003-4971-9991']" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 4, 44 | "metadata": { 45 | "executionInfo": { 46 | "elapsed": 485, 47 | "status": "ok", 48 | "timestamp": 1643057866151, 49 | "user": { 50 | "displayName": "Sandra M", 51 | "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjjehryRcYlqHNFf_9Q6slGN_VZPE0y5QkvOxzG=s64", 52 | "userId": "04602594913862593282" 53 | }, 54 | "user_tz": -60 55 | }, 56 | "id": "AIuILzlS6zv9" 57 | }, 58 | "outputs": [], 59 | "source": [ 60 | "def search_Funder(orcid_id):\n", 61 | " list_of_pages=Crossref().works(filter = {'orcid': orcid_id}, cursor = \"*\", select = \"DOI,title,funder\") \n", 62 | " for page in list_of_pages:\n", 63 | " for item in page['message']['items']:\n", 64 | " print('New Entry') # For presentation purposes, marks a new doi entry\n", 65 | " if 'funder' in item.keys():\n", 66 | " print(f\"{item['DOI']},{item['title'][0]}\",'Funder Informations:',f\"{item['funder']}\")\n", 67 | " else:\n", 68 | " print(f\"{item['DOI']},{item['title'][0]}\",',no funder informations')" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 5, 74 | "metadata": { 75 | "scrolled": true 76 | }, 77 | "outputs": [ 78 | { 79 | "name": "stdout", 80 | "output_type": "stream", 81 | "text": [ 82 | "New Entry\n", 83 | "10.1007/s11553-022-00940-6,Prävention von berufsbedingtem Hautkrebs bei Beschäftigten im Freien: Entwicklung eines Curriculums zur Fortbildung von Multiplikator*innen Funder Informations: [{'name': 'Nationale Versorgungskonferenz Hautkrebs e. V.'}, {'DOI': '10.13039/501100016379', 'name': 'Universität Osnabrück', 'doi-asserted-by': 'crossref'}]\n", 84 | "New Entry\n", 85 | "10.1111/cod.14206,Prevention of occupational hand eczema in healthcare workers during the COVID‐19 pandemic: A controlled intervention study ,no funder informations\n", 86 | "New Entry\n", 87 | "10.2196/preprints.31980,Expressiveness of an International Semantic Standard for Wound Care: Mapping a Standardized Item Set for Leg Ulcers to the Systematized Nomenclature of Medicine–Clinical Terms (Preprint) ,no funder informations\n", 88 | "New Entry\n", 89 | "10.1007/s11553-020-00799-5,Prävention berufsbedingter Erkrankungen durch Lehrkräfte Funder Informations: [{'DOI': '10.13039/501100016379', 'name': 'Universität Osnabrück', 'doi-asserted-by': 'crossref'}]\n", 90 | "New Entry\n", 91 | "10.1111/cod.14123,Effects and acceptance of semipermeable gloves compared to cotton gloves in patients with hand dermatoses: Results of a controlled intervention study ,no funder informations\n", 92 | "New Entry\n", 93 | "10.1111/cod.13929,Acceptance of semipermeable glove liners compared to cotton glove liners in health care workers with work‐related skin diseases: Results of a quasi‐randomized trial under real workplace conditions ,no funder informations\n", 94 | "New Entry\n", 95 | "10.1111/jam.15399,Novel insights into pivotal risk factors for rectal carriage of extended-spectrum-β-lactamase-producing enterobacterales within the general population in Lower Saxony, Germany Funder Informations: [{'name': 'Osnabrueck University'}, {'name': 'INTERREG V A'}]\n", 96 | "New Entry\n", 97 | "10.1016/j.orthtr.2022.11.011,Back and neck problems as well as disadvantageous ergonomic behavior patterns in university students: Concomitants of the pandemic? ,no funder informations\n", 98 | "New Entry\n", 99 | "10.1093/bjd/ljac053,Occupational exposure to cobalt nanoparticles: potential implications on risk assessment and preventive measures ,no funder informations\n", 100 | "New Entry\n", 101 | "10.1111/cod.13719,Degree of employment, sick leave, and costs following notification of occupational contact dermatitis—A register‐based study ,no funder informations\n", 102 | "New Entry\n", 103 | "10.1016/j.orthtr.2022.03.012,Influence of physical activity on well-being at times of the COVID-19 pandemic: a review ,no funder informations\n", 104 | "New Entry\n", 105 | "10.1136/bmjopen-2022-062194,Prevalence and incidence of hand eczema in healthcare workers: protocol for a systematic review and meta-analysis ,no funder informations\n", 106 | "New Entry\n", 107 | "10.1111/cod.14236,Systematic review on skin adverse effects of important hazardous hair cosmetic ingredients with a focus on hairdressers ,no funder informations\n", 108 | "New Entry\n", 109 | "10.1111/cod.14107,Nickel and cobalt release from beauty tools: A field study in the German cosmetics trade ,no funder informations\n", 110 | "New Entry\n", 111 | "10.1111/cod.14119,Effects of skin washing frequency on the epidermal barrier function and inflammatory processes of the epidermis: An experimental study ,no funder informations\n", 112 | "New Entry\n", 113 | "10.1111/cod.14055,Differences between hairdressers and consumers in skin exposure to hair cosmetic products: A review ,no funder informations\n", 114 | "New Entry\n", 115 | "10.3390/cancers12082111,MicroRNA-21-Enriched Exosomes as Epigenetic Regulators in Melanomagenesis and Melanoma Progression: The Impact of Western Lifestyle Factors ,no funder informations\n", 116 | "New Entry\n", 117 | "10.2196/31980,Expressiveness of an International Semantic Standard for Wound Care: Mapping a Standardized Item Set for Leg Ulcers to the Systematized Nomenclature of Medicine–Clinical Terms ,no funder informations\n", 118 | "New Entry\n", 119 | "10.3390/en13226035,A One-Month Monitoring of Exposure to Solar UV Radiation of a Group of Construction Workers in Tuscany ,no funder informations\n", 120 | "New Entry\n", 121 | "10.1111/bjd.19152,Global evidence on occupational sun exposure and keratinocyte cancers: a systematic review ,no funder informations\n", 122 | "New Entry\n", 123 | "10.1111/jdv.17011,Improved protection of outdoor workers from solar ultraviolet radiation: position statement ,no funder informations\n", 124 | "New Entry\n", 125 | "10.1111/cod.14048,Prevalence and incidence of hand eczema in hairdressers—A systematic review and meta‐analysis of the published literature from 2000–2021 ,no funder informations\n", 126 | "New Entry\n", 127 | "10.3390/ijerph19074176,Occupational Exposure of Hairdressers to Airborne Hazardous Chemicals: A Scoping Review Funder Informations: [{'name': 'Uni Europa–The European Global Services Union', 'award': ['Project VS/2019/0440']}]\n", 128 | "New Entry\n", 129 | "10.1111/cod.14099,Evaluation of the secondary and tertiary prevention strategies against occupational contact dermatitis in Germany: A systematic review ,no funder informations\n", 130 | "New Entry\n", 131 | "10.1111/jdv.17058,Experimental evaluation of nickel and cobalt release from tools and self‐reported prevalence of nickel and cobalt allergy in the German hairdressing trade ,no funder informations\n", 132 | "New Entry\n", 133 | "10.1111/cod.14242,Bleaching powders, bleaching creams and other hair lightening preparations as sources for (airborne) allergic contact dermatitis and other health effects in hairdressers: Results of an empirical study ,no funder informations\n", 134 | "New Entry\n", 135 | "10.1155/2021/5558694,UV-Induced Skin Cancer Knowledge, Sun Exposure, and Tanning Behavior among University Students: Investigation of an Opportunity Sample of German University Students Funder Informations: [{'DOI': '10.13039/501100001659', 'name': 'Deutsche Forschungsgemeinschaft', 'doi-asserted-by': 'publisher'}]\n", 136 | "New Entry\n", 137 | "10.1093/occmed/kqac046,Acceptance of skin products in healthcare workers: an empirical investigation Funder Informations: [{'DOI': '10.13039/501100010558', 'name': 'Beiersdorf AG, Hamburg, Germany', 'doi-asserted-by': 'crossref'}]\n", 138 | "New Entry\n", 139 | "10.1111/cod.13910,Incidence rates of occupational contact dermatitis in Denmark between 2007 and 2018: A population‐based study ,no funder informations\n", 140 | "New Entry\n", 141 | "10.1111/cod.14056,Allergic contact dermatitis caused by 2‐hydroxyethyl methacrylate and ethyl cyanoacrylate contained in cosmetic glues among hairdressers and beauticians who perform nail treatments and eyelash extension as well as hair extension applications: A systematic review ,no funder informations\n", 142 | "New Entry\n", 143 | "10.3390/ijms19082186,The Small Yeast GTPase Rho5 and Its Dimeric GEF Dck1/Lmo1 Respond to Glucose Starvation Funder Informations: [{'DOI': '10.13039/501100001659', 'name': 'Deutsche Forschungsgemeinschaft', 'doi-asserted-by': 'publisher', 'award': ['HE 1880/6-1']}]\n", 144 | "New Entry\n", 145 | "10.3390/ijms20225550,Analysis of Functional Domains in Rho5, the Yeast Homolog of Human Rac1 GTPase, in Oxidative Stress Response Funder Informations: [{'DOI': '10.13039/501100001659', 'name': 'Deutsche Forschungsgemeinschaft', 'doi-asserted-by': 'publisher', 'award': ['HE 1880/6-1']}]\n", 146 | "New Entry\n", 147 | "10.1371/journal.pone.0223374,Analysis of the protein composition of the spindle pole body during sporulation in Ashbya gossypii Funder Informations: [{'DOI': '10.13039/501100001659', 'name': 'Deutsche Forschungsgemeinschaft', 'doi-asserted-by': 'publisher', 'award': ['SCHM 2388/2-1']}]\n", 148 | "New Entry\n", 149 | "10.3390/land10020149,Soil Protection in Floodplains—A Review ,no funder informations\n", 150 | "New Entry\n", 151 | "10.1016/j.flora.2020.151602,Evolutionary history of the Eurasian steppe plant Schivereckia podolica (Brassicaceae) and its close relatives Funder Informations: [{'DOI': '10.13039/501100001659', 'name': 'Deutsche Forschungsgemeinschaft', 'doi-asserted-by': 'publisher', 'award': ['FR 1431/8-1', 'NE 314/15-1']}, {'DOI': '10.13039/501100006769', 'name': 'Russian Science Foundation', 'doi-asserted-by': 'publisher', 'award': ['RSF 19-14-00071']}]\n", 152 | "New Entry\n", 153 | "10.1002/ece3.8015,Pleistocene dynamics of the Eurasian steppe as a driving force of evolution: Phylogenetic history of the genus\n", 154 | " Capsella\n", 155 | " (Brassicaceae) Funder Informations: [{'DOI': '10.13039/501100001659', 'name': 'Deutsche Forschungsgemeinschaft', 'doi-asserted-by': 'publisher', 'award': ['NE 314/15‐1']}]\n", 156 | "New Entry\n", 157 | "10.1016/j.flora.2022.152206,S-alleles and mating system in natural populations of Capsella grandiflora (Brassicaceae) and its congeneric relatives Funder Informations: [{'DOI': '10.13039/501100001659', 'name': 'Deutsche Forschungsgemeinschaft', 'doi-asserted-by': 'publisher', 'award': ['NE 314/6-1', 'NE 314/8-1']}]\n", 158 | "New Entry\n", 159 | "10.1002/ece3.7217,A story from the Miocene: Clock‐dated phylogeny ofSisymbriumL. (Sisymbrieae, Brassicaceae) Funder Informations: [{'DOI': '10.13039/501100001659', 'name': 'Deutsche Forschungsgemeinschaft', 'doi-asserted-by': 'publisher', 'award': ['NE 314/15‐1']}]\n", 160 | "New Entry\n", 161 | "10.1105/tpc.19.00551,A Similar Genetic Architecture Underlies the Convergent Evolution of the Selfing Syndrome in Capsella Funder Informations: [{'DOI': '10.13039/501100001659', 'name': 'Deutsche Forschungsgemeinschaft', 'doi-asserted-by': 'publisher', 'award': ['SI1967/2']}, {'DOI': '10.13039/501100004359', 'name': 'Swedish Research Council', 'doi-asserted-by': 'publisher', 'award': ['2018-04214']}]\n" 162 | ] 163 | } 164 | ], 165 | "source": [ 166 | "# Main programm\n", 167 | "for item in list_of_ids:\n", 168 | " search_Funder(item)" 169 | ] 170 | } 171 | ], 172 | "metadata": { 173 | "colab": { 174 | "authorship_tag": "ABX9TyN8GdEZ2emA2pb1j6K+9PwP", 175 | "name": "crossref_get_works_by_person.ipynb", 176 | "provenance": [ 177 | { 178 | "file_id": "1RvDBYtHIK8LG_31cmfKW2PxQ3whxondX", 179 | "timestamp": 1643057922530 180 | } 181 | ] 182 | }, 183 | "kernelspec": { 184 | "display_name": "Python 3 (ipykernel)", 185 | "language": "python", 186 | "name": "python3" 187 | }, 188 | "language_info": { 189 | "codemirror_mode": { 190 | "name": "ipython", 191 | "version": 3 192 | }, 193 | "file_extension": ".py", 194 | "mimetype": "text/x-python", 195 | "name": "python", 196 | "nbconvert_exporter": "python", 197 | "pygments_lexer": "ipython3", 198 | "version": "3.9.13" 199 | } 200 | }, 201 | "nbformat": 4, 202 | "nbformat_minor": 4 203 | } 204 | -------------------------------------------------------------------------------- /Search for funder informations/Crossref_get_funder_informations_of_a_list_of_dois.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "k6HfuEZe48LV" 7 | }, 8 | "source": [ 9 | "## Query Crossref for funder informations\n", 10 | "This notebook queries the [Crossref API](http://api.crossref.org) via its `'/works'` endpoint for publications and there funders. It takes a list of DOIs as input which is used to filter for the respective dois and display there title, doi and funder informations" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 1, 16 | "metadata": { 17 | "id": "aV_HkXxJ4pVB", 18 | "scrolled": true 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "# Prerequisites:\n", 23 | "import requests # dependency to make HTTP calls\n", 24 | "from habanero import Crossref # lib for querying crossref api" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "*Note: A DOI has to be in the form 'doi' and without any http.etc.*" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 2, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "# Input list of dois\n", 41 | "list_doi=['10.1371/journal.pone.0233153',\n", 42 | "'10.1002/1873-3468.13989',\n", 43 | "'10.1371/journal.ppat.1008220',\n", 44 | "'10.1002/adem.202101159',\n", 45 | "'10.1002/admi.202001911',\n", 46 | "'10.1002/anie.202207950',\n", 47 | "'10.1002/aps3.11438'\n", 48 | "]" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 3, 54 | "metadata": { 55 | "executionInfo": { 56 | "elapsed": 485, 57 | "status": "ok", 58 | "timestamp": 1643057866151, 59 | "user": { 60 | "displayName": "Sandra M", 61 | "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjjehryRcYlqHNFf_9Q6slGN_VZPE0y5QkvOxzG=s64", 62 | "userId": "04602594913862593282" 63 | }, 64 | "user_tz": -60 65 | }, 66 | "id": "AIuILzlS6zv9" 67 | }, 68 | "outputs": [], 69 | "source": [ 70 | "def search(item):\n", 71 | " list_of_pages=Crossref().works(filter = {'doi': item}, cursor = \"*\", select = \"DOI,title,funder\") \n", 72 | " for page in list_of_pages:\n", 73 | " for item in page['message']['items']:\n", 74 | " if 'funder' in item.keys():\n", 75 | " print(f\"{item['DOI']},{item['title'][0]}\",'Funder Informations',f\"{item['funder']}\")\n", 76 | " else:\n", 77 | " print(f\"{item['DOI']},{item['title'][0]}\",',no funder informations')" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 4, 83 | "metadata": { 84 | "scrolled": true 85 | }, 86 | "outputs": [ 87 | { 88 | "name": "stdout", 89 | "output_type": "stream", 90 | "text": [ 91 | "10.1371/journal.pone.0233153,An empirically derived recommendation for the classification of body dysmorphic disorder: Findings from structural equation modeling Funder Informations [{'DOI': '10.13039/501100001659', 'name': 'Deutsche Forschungsgemeinschaft', 'doi-asserted-by': 'publisher', 'award': ['GR 4761/2-1']}]\n", 92 | "10.1002/1873-3468.13989,Cryo‐EM of ABC transporters: an ice‐cold solution to everything? Funder Informations [{'DOI': '10.13039/501100001659', 'name': 'Deutsche Forschungsgemeinschaft', 'doi-asserted-by': 'publisher', 'award': ['Mo2752/2']}, {'DOI': '10.13039/501100004189', 'name': 'Max-Planck-Gesellschaft', 'doi-asserted-by': 'publisher'}]\n", 93 | "10.1371/journal.ppat.1008220,A trafficome-wide RNAi screen reveals deployment of early and late secretory host proteins and the entire late endo-/lysosomal vesicle fusion machinery by intracellular Salmonella Funder Informations [{'DOI': '10.13039/501100002347', 'name': 'Bundesministerium für Bildung und Forschung', 'doi-asserted-by': 'publisher', 'award': ['0315834D']}, {'DOI': '10.13039/501100001659', 'name': 'Deutsche Forschungsgemeinschaft', 'doi-asserted-by': 'publisher', 'award': ['SPP1580, HE1964/18-2']}, {'DOI': '10.13039/501100001659', 'name': 'Deutsche Forschungsgemeinschaft', 'doi-asserted-by': 'publisher', 'award': ['SFB944, Z project']}]\n", 94 | "10.1002/adem.202101159,Thin Patterned Lithium Niobate Films by Parallel Additive Capillary Stamping of Aqueous Precursor Solutions ,no funder informations\n", 95 | "10.1002/admi.202001911,Capillary Stamping of Functional Materials: Parallel Additive Substrate Patterning without Ink Depletion Funder Informations [{'DOI': '10.13039/501100000781', 'name': 'European Research Council', 'doi-asserted-by': 'publisher', 'award': ['ERC‐CoG‐2014', '646742 INCANA']}, {'DOI': '10.13039/501100001659', 'name': 'Deutsche Forschungsgemeinschaft', 'doi-asserted-by': 'publisher', 'award': ['GA2169/7‐1', 'HA5405/6‐2', 'ME 3275/6‐1']}]\n", 96 | "10.1002/anie.202207950,Proton‐Gradient‐Driven Sensitivity Enhancement of Liposome‐Encapsulated Supramolecular Chemosensors Funder Informations [{'DOI': '10.13039/501100001659', 'name': 'Deutsche Forschungsgemeinschaft', 'doi-asserted-by': 'publisher', 'award': ['HE 5967/6-1', 'NA 686/15-1']}]\n", 97 | "10.1002/aps3.11438,The best of both worlds: Combining lineage‐specific and universal bait sets in target‐enrichment hybridization reactions ,no funder informations\n" 98 | ] 99 | } 100 | ], 101 | "source": [ 102 | "# Main programm\n", 103 | "for item in list_doi:\n", 104 | " search(item)" 105 | ] 106 | } 107 | ], 108 | "metadata": { 109 | "colab": { 110 | "authorship_tag": "ABX9TyN8GdEZ2emA2pb1j6K+9PwP", 111 | "name": "crossref_get_works_by_person.ipynb", 112 | "provenance": [ 113 | { 114 | "file_id": "1RvDBYtHIK8LG_31cmfKW2PxQ3whxondX", 115 | "timestamp": 1643057922530 116 | } 117 | ] 118 | }, 119 | "kernelspec": { 120 | "display_name": "Python 3 (ipykernel)", 121 | "language": "python", 122 | "name": "python3" 123 | }, 124 | "language_info": { 125 | "codemirror_mode": { 126 | "name": "ipython", 127 | "version": 3 128 | }, 129 | "file_extension": ".py", 130 | "mimetype": "text/x-python", 131 | "name": "python", 132 | "nbconvert_exporter": "python", 133 | "pygments_lexer": "ipython3", 134 | "version": "3.9.13" 135 | } 136 | }, 137 | "nbformat": 4, 138 | "nbformat_minor": 4 139 | } 140 | -------------------------------------------------------------------------------- /Search for funder informations/Readme.md: -------------------------------------------------------------------------------- 1 | ## Search for funder informations 2 | 3 | Jupyter Notebooks showing the use of persistent identifiers for the identification of funder informations 4 | 5 | * [Crossref](https://www.crossref.org/) 6 | -------------------------------------------------------------------------------- /organization-organization/README.md: -------------------------------------------------------------------------------- 1 | ## organization-organization 2 | 3 | A Jupyter notebook showing an example of using a persistent identifier for an organization (here ROR ID) as input for retrieving all sub-organizations (also identified by a ROR ID) connected to it. 4 | 5 | * [ROR](https://ror.readme.io/) organigram 6 | -------------------------------------------------------------------------------- /organization-organization/ror-organigram.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "0-62NAVa8DL0" 7 | }, 8 | "source": [ 9 | "### Query ROR for an organization and all its suborganizations\n", 10 | "\n", 11 | "This notebook queries the [ROR API](https://ror.readme.io/) for an organization and all its suborganizations. \n", 12 | "It takes a ROR URL or ROR ID as input which is used as a starting point for recursively querying the ROR API for an organization's suborganizations specified in the metadata field \"`relationships`\" having the `\"type\"=Child`. \n", 13 | "The hierarchy is represented as a tree and will be outputted.\n" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 1, 19 | "metadata": { 20 | "id": "VaYUrh5n2iq9", 21 | "scrolled": true 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "# Prerequisites:\n", 26 | "import requests # dependency to make HTTP calls\n", 27 | "from anytree import Node, RenderTree # dependency to construct tree structure" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": { 33 | "id": "YmmrIwjv3QqG" 34 | }, 35 | "source": [ 36 | "The input for this notebook is a ROR URL or ROR ID, e.g. '`https://ror.org/03vek6s52`' or '`03vek6s52`'." 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 2, 42 | "metadata": { 43 | "executionInfo": { 44 | "elapsed": 19, 45 | "status": "ok", 46 | "timestamp": 1643279169086, 47 | "user": { 48 | "displayName": "Sandra M", 49 | "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjjehryRcYlqHNFf_9Q6slGN_VZPE0y5QkvOxzG=s64", 50 | "userId": "04602594913862593282" 51 | }, 52 | "user_tz": -60 53 | }, 54 | "id": "UhY0RQcU3Q1Z" 55 | }, 56 | "outputs": [], 57 | "source": [ 58 | "# input parameter\n", 59 | "example_ror=\"https://ror.org/03vek6s52\"" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": { 65 | "id": "4MApaFfD6knr" 66 | }, 67 | "source": [ 68 | "We may use it to query the ROR API once for the organization's metadata..." 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 3, 74 | "metadata": { 75 | "executionInfo": { 76 | "elapsed": 16, 77 | "status": "ok", 78 | "timestamp": 1643279169087, 79 | "user": { 80 | "displayName": "Sandra M", 81 | "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjjehryRcYlqHNFf_9Q6slGN_VZPE0y5QkvOxzG=s64", 82 | "userId": "04602594913862593282" 83 | }, 84 | "user_tz": -60 85 | }, 86 | "id": "YpwZ3mrC3dHO", 87 | "scrolled": true 88 | }, 89 | "outputs": [], 90 | "source": [ 91 | "# URL to ROR API\n", 92 | "ROR_API_ENDPOINT = \"https://api.ror.org/organizations\"\n", 93 | "\n", 94 | "# query ROR API for organization's metadata\n", 95 | "def query_ror_api(ror):\n", 96 | " complete_url=requests.utils.requote_uri(ROR_API_ENDPOINT + \"/\" + ror)\n", 97 | " response = requests.get(url=complete_url,\n", 98 | " headers={'Accept': 'application/json'})\n", 99 | " response.raise_for_status()\n", 100 | " result=response.json()\n", 101 | " return result\n", 102 | "\n", 103 | "\n", 104 | "#---- example execution\n", 105 | "# uncomment following lines to see the metadata for specified example_ror\n", 106 | "#import pprint\n", 107 | "#organization_data = query_ror_api(example_ror)\n", 108 | "#pprint.pprint(organization_data)" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": { 114 | "id": "T25jtRBf3c1T" 115 | }, 116 | "source": [ 117 | "but in this notebook we use it as a starting point to recursively query the ROR API using the relationship type \"`Child`\" to construct the organizational hierarchy below it." 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 4, 123 | "metadata": { 124 | "colab": { 125 | "base_uri": "https://localhost:8080/" 126 | }, 127 | "executionInfo": { 128 | "elapsed": 6559, 129 | "status": "ok", 130 | "timestamp": 1643279175634, 131 | "user": { 132 | "displayName": "Sandra M", 133 | "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjjehryRcYlqHNFf_9Q6slGN_VZPE0y5QkvOxzG=s64", 134 | "userId": "04602594913862593282" 135 | }, 136 | "user_tz": -60 137 | }, 138 | "id": "4jh1liI56A4x", 139 | "outputId": "ea1cbf18-6c90-4aa3-bfcb-10addcaa9e8f", 140 | "pycharm": { 141 | "is_executing": true 142 | } 143 | }, 144 | "outputs": [ 145 | { 146 | "name": "stdout", 147 | "output_type": "stream", 148 | "text": [ 149 | "Node('/Harvard University')\n", 150 | "├── Node('/Harvard University/Athinoula A. Martinos Center for Biomedical Imaging')\n", 151 | "├── Node('/Harvard University/Berenson Allen Center for Noninvasive Brain Stimulation')\n", 152 | "├── Node('/Harvard University/Center for Astrophysics Harvard & Smithsonian')\n", 153 | "│ ├── Node('/Harvard University/Center for Astrophysics Harvard & Smithsonian/Harvard College Observatory')\n", 154 | "│ └── Node('/Harvard University/Center for Astrophysics Harvard & Smithsonian/Smithsonian Astrophysical Observatory')\n", 155 | "├── Node('/Harvard University/Center for Systems Biology')\n", 156 | "├── Node('/Harvard University/Center for Vascular Biology Research')\n", 157 | "├── Node('/Harvard University/Gordon Center for Medical Imaging')\n", 158 | "├── Node('/Harvard University/Harvard Stem Cell Institute')\n", 159 | "├── Node('/Harvard University/Harvard University Press')\n", 160 | "├── Node('/Harvard University/MIT-Harvard Center for Ultracold Atoms')\n", 161 | "├── Node('/Harvard University/Ragon Institute of MGH, MIT and Harvard')\n", 162 | "├── Node('/Harvard University/Sleep and Human Health Institute')\n", 163 | "└── Node('/Harvard University/The NSF AI Institute for Artificial Intelligence and Fundamental Interactions')\n" 164 | ] 165 | } 166 | ], 167 | "source": [ 168 | "# construct organizational tree recursively starting at given ROR\n", 169 | "def construct_tree(ror, parent=None):\n", 170 | " organization = query_ror_api(ror)\n", 171 | " current_node = Node(organization[\"name\"], parent=parent)\n", 172 | "\n", 173 | " for rel in organization['relationships']:\n", 174 | " if rel[\"type\"]==\"Child\":\n", 175 | " construct_tree(rel[\"id\"], current_node)\n", 176 | "\n", 177 | " return current_node\n", 178 | "\n", 179 | "\n", 180 | "#---- example execution\n", 181 | "organigram = construct_tree(example_ror)\n", 182 | "print(RenderTree(organigram))" 183 | ] 184 | } 185 | ], 186 | "metadata": { 187 | "colab": { 188 | "authorship_tag": "ABX9TyPEl1r8wtGqaTN4CJj87Lso", 189 | "name": "ror-organigram.ipynb", 190 | "provenance": [ 191 | { 192 | "file_id": "1yJn6R6ixeEZFU47XyeZsTiqhU-W7AS2H", 193 | "timestamp": 1643279199327 194 | } 195 | ] 196 | }, 197 | "kernelspec": { 198 | "display_name": "Python 3 (ipykernel)", 199 | "language": "python", 200 | "name": "python3" 201 | }, 202 | "language_info": { 203 | "codemirror_mode": { 204 | "name": "ipython", 205 | "version": 3 206 | }, 207 | "file_extension": ".py", 208 | "mimetype": "text/x-python", 209 | "name": "python", 210 | "nbconvert_exporter": "python", 211 | "pygments_lexer": "ipython3", 212 | "version": "3.9.6" 213 | } 214 | }, 215 | "nbformat": 4, 216 | "nbformat_minor": 1 217 | } 218 | -------------------------------------------------------------------------------- /organization-people/README.md: -------------------------------------------------------------------------------- 1 | ## organization-people 2 | 3 | A collection of Jupyter notebooks showing examples of using a persistent identifier for an organization (here ROR ID) as input for different APIs of PID providers or PID Graphs and retrieving all people (identified by an ORCID iD) connected to it. 4 | 5 | Currently available PID Graphs: 6 | * [FREYA PID Graph](https://blog.datacite.org/powering-the-pid-graph/) 7 | * [OpenAlex](https://openalex.org/about) 8 | * [ORCID](https://orcid.org/) -------------------------------------------------------------------------------- /organization-people/freya_get_people_by_organization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "etxiXTW668ZD" 7 | }, 8 | "source": [ 9 | "### Query the FREYA PID Graph for people affiliated with an organization\n", 10 | "\n", 11 | "This notebook queries the [FREYA PID Graph](https://blog.datacite.org/powering-the-pid-graph/) via [Datacite's GraphQL API](https://api.datacite.org/graphql) to retrieve people affiliated with an organization. It takes a ROR URL as input which is used internally to retrieve the Grid ID from the ROR API and Ringgold ID from Wikidata and use these identifiers to [find ORCID record holders at the institution](https://info.orcid.org/faq/how-do-i-find-orcid-record-holders-at-my-institution/). From the resulting list of people we output the ORCID iDs." 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 1, 17 | "metadata": { 18 | "id": "8Mk7-aYc7x3A", 19 | "scrolled": true 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "# Prerequisites:\n", 24 | "import requests # dependency to make HTTP calls\n", 25 | "from benedict import benedict # dependency for dealing with json" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": { 31 | "id": "J31_ejB6bWqd" 32 | }, 33 | "source": [ 34 | "The input for this notebook is a ROR URL, e.g. '`https://ror.org/021k10z87`'" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 2, 40 | "metadata": { 41 | "executionInfo": { 42 | "elapsed": 15, 43 | "status": "ok", 44 | "timestamp": 1643208788232, 45 | "user": { 46 | "displayName": "", 47 | "photoUrl": "", 48 | "userId": "" 49 | }, 50 | "user_tz": -60 51 | }, 52 | "id": "UwYUsbnMbZnI" 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "# input parameter\n", 57 | "example_ror=\"https://ror.org/021k10z87\"" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": { 63 | "id": "ba_A3Anpbl4P" 64 | }, 65 | "source": [ 66 | "We use it to query Datacite's GraphQL API for the organization's metadata and all people connected to it.\n", 67 | "Since the API uses pagination, we need to loop through all pages to get the complete result set.\n" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 3, 73 | "metadata": { 74 | "executionInfo": { 75 | "elapsed": 226, 76 | "status": "ok", 77 | "timestamp": 1643208819281, 78 | "user": { 79 | "displayName": "", 80 | "photoUrl": "", 81 | "userId": "" 82 | }, 83 | "user_tz": -60 84 | }, 85 | "id": "7FAu2l388OeD" 86 | }, 87 | "outputs": [], 88 | "source": [ 89 | "# Datacite's GraphQL endpoint for the FREYA PID Graph\n", 90 | "DATACITE_GRAPHQL_API = \"https://api.datacite.org/graphql\"\n", 91 | "\n", 92 | "# Query to retrieve an organization and all its affiliated people\n", 93 | "QUERY_ORGA2PEOPLE = \"\"\"query organization($ror :ID!, $after:String){\n", 94 | "organization(id: $ror) {\n", 95 | " people(first: 1000, after: $after) {\n", 96 | " totalCount\n", 97 | " pageInfo {\n", 98 | " endCursor\n", 99 | " hasNextPage\n", 100 | " }\n", 101 | "\n", 102 | " nodes {\n", 103 | " id\n", 104 | " name\n", 105 | " }\n", 106 | " }\n", 107 | " }\n", 108 | "}\"\"\"\n", 109 | "\n", 110 | "# query the freya pid graph for all people connected to given ROR\n", 111 | "def query_freya_for_orga2people(ror):\n", 112 | " continue_paginating = True\n", 113 | " cursor=\"\"\n", 114 | " \n", 115 | " while continue_paginating:\n", 116 | " vars = {'ror': ror, 'after': cursor}\n", 117 | " response = requests.post(url=DATACITE_GRAPHQL_API,\n", 118 | " json={'query': QUERY_ORGA2PEOPLE, 'variables': vars},\n", 119 | " headers={'Accept': 'application/json'})\n", 120 | " response.raise_for_status()\n", 121 | " result=response.json()\n", 122 | " if 'errors' in result:\n", 123 | " raise requests.exceptions.HTTPError(result)\n", 124 | "\n", 125 | " # check if next page exists and set cursor to next page\n", 126 | " continue_paginating = has_next_page(result)\n", 127 | " cursor = next_cursor(result)\n", 128 | " yield result\n", 129 | "\n", 130 | "# check if there is another page with results to query\n", 131 | "def has_next_page(response_data):\n", 132 | " resp_dict = benedict.from_json(response_data)\n", 133 | " has_next_page = resp_dict.get(\"data.organization.people.pageInfo.hasNextPage\")\n", 134 | " return has_next_page\n", 135 | "\n", 136 | "# set cursor to next value\n", 137 | "def next_cursor(response_data):\n", 138 | " resp_dict = benedict.from_json(response_data)\n", 139 | " cursor = resp_dict.get(\"data.organization.people.pageInfo.endCursor\")\n", 140 | " return cursor\n", 141 | "\n", 142 | "\n", 143 | "#--- example execution\n", 144 | "list_of_pages=query_freya_for_orga2people(example_ror)" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": { 150 | "id": "2lR-J8vUcI5-" 151 | }, 152 | "source": [ 153 | "From the returned pages we extract the list of people and for each person we extract and print out their name and ORCID iD." 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 4, 159 | "metadata": { 160 | "executionInfo": { 161 | "elapsed": 261, 162 | "status": "ok", 163 | "timestamp": 1643208827139, 164 | "user": { 165 | "displayName": "", 166 | "photoUrl": "", 167 | "userId": "" 168 | }, 169 | "user_tz": -60 170 | }, 171 | "id": "lQqnqydz2hUh" 172 | }, 173 | "outputs": [ 174 | { 175 | "name": "stdout", 176 | "output_type": "stream", 177 | "text": [ 178 | "0000-0002-3783-6130, Irene Weipert-Fenner\n", 179 | "0000-0002-5452-0488, Hans-Joachim Spanger\n", 180 | "0000-0002-4621-9687, Simone Schnabel\n", 181 | "0000-0001-6731-5304, Julia Eckert\n", 182 | "0000-0001-6746-1248, Anton Peez\n", 183 | "0000-0003-1575-9688, Hendrik Simon\n", 184 | "0000-0002-1712-2624, Julian Junk\n", 185 | "0000-0003-0035-5840, Raphael Oidtmann\n", 186 | "0000-0002-5925-043X, Ariadne Natal\n", 187 | "0000-0002-7012-6739, Peter Kreuzer\n", 188 | "0000-0001-7843-4480, Dirk Peters\n", 189 | "0000-0001-6823-6819, Janna Lisa Chalmovsky\n", 190 | "0000-0003-1940-8877, Mikhail Polianskii\n", 191 | "0000-0001-7302-444X, Katja Freistein\n", 192 | "0000-0002-8739-2486, Elvira Rosert\n", 193 | "0000-0001-7286-3575, Paul Chambers\n", 194 | "0000-0003-0039-9827, Eldad Ben Aharon\n", 195 | "0000-0002-4259-6071, Felix S. Bethke\n" 196 | ] 197 | } 198 | ], 199 | "source": [ 200 | "# from the result pages we get from the GraphQL API, extract the data about the people\n", 201 | "def extract_people_from_page(page):\n", 202 | " page_dict=benedict.from_json(page)\n", 203 | " return [person for person in page_dict.get('data.organization.people.nodes') or []]\n", 204 | "\n", 205 | "# extract ORCID from person\n", 206 | "def extract_orcid(person):\n", 207 | " person_dict = benedict.from_json(person)\n", 208 | " orcid = person_dict.get('id').replace(\"https://orcid.org/\", \"\")\n", 209 | " name = person_dict.get('name')\n", 210 | " return orcid, name\n", 211 | "\n", 212 | "\n", 213 | "#--- example execution\n", 214 | "for page in list_of_pages or []:\n", 215 | " people=extract_people_from_page(page)\n", 216 | " for person in people or []:\n", 217 | " orcid, name = extract_orcid(person)\n", 218 | " print(f\"{orcid}, {name}\")" 219 | ] 220 | } 221 | ], 222 | "metadata": { 223 | "colab": { 224 | "authorship_tag": "ABX9TyOPyixqZithrfY0TncA4o1K", 225 | "name": "freya_get_people_by_organization.ipynb", 226 | "provenance": [ 227 | { 228 | "file_id": "https://github.com/Project-TAPIR/pidgraph-notebooks/blob/organization-people/organization-people/freya_get_people_by_organization.ipynb", 229 | "timestamp": 1643208926409 230 | } 231 | ] 232 | }, 233 | "kernelspec": { 234 | "display_name": "Python 3 (ipykernel)", 235 | "language": "python", 236 | "name": "python3" 237 | }, 238 | "language_info": { 239 | "codemirror_mode": { 240 | "name": "ipython", 241 | "version": 3 242 | }, 243 | "file_extension": ".py", 244 | "mimetype": "text/x-python", 245 | "name": "python", 246 | "nbconvert_exporter": "python", 247 | "pygments_lexer": "ipython3", 248 | "version": "3.9.6" 249 | } 250 | }, 251 | "nbformat": 4, 252 | "nbformat_minor": 1 253 | } 254 | -------------------------------------------------------------------------------- /organization-people/openalex_get_people_by_organization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "ac7bedaf-05fb-4eb0-9bf5-e4d1d68a08c3", 6 | "metadata": { 7 | "id": "ac7bedaf-05fb-4eb0-9bf5-e4d1d68a08c3" 8 | }, 9 | "source": [ 10 | "### Query OpenAlex for people affiliated with an organization\n", 11 | "This notebook queries the [OpenAlex API](https://docs.openalex.org/api) via its '`/authors`' endpoint for all authors affiliated with an organization.\n", 12 | "It takes a ROR URL as input which is used to retrieve authors with the specified ROR URL in their metadata field '`last_known_institution.ror`'. From the resulting list of people we output the ORCID iDs." 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 1, 18 | "id": "IUqshUWKwSk2", 19 | "metadata": { 20 | "executionInfo": { 21 | "elapsed": 8, 22 | "status": "ok", 23 | "timestamp": 1643210415322, 24 | "user": { 25 | "displayName": "", 26 | "photoUrl": "", 27 | "userId": "" 28 | }, 29 | "user_tz": -60 30 | }, 31 | "id": "IUqshUWKwSk2" 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "# Prerequisites:\n", 36 | "import requests # dependency to make HTTP calls" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "id": "nSJjdkxGdWll", 42 | "metadata": { 43 | "id": "nSJjdkxGdWll" 44 | }, 45 | "source": [ 46 | "The input for the notebook is a ROR URL, e.g. '`https://ror.org/021k10z87`'" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 2, 52 | "id": "7EryzPledIp6", 53 | "metadata": { 54 | "executionInfo": { 55 | "elapsed": 6, 56 | "status": "ok", 57 | "timestamp": 1643210415322, 58 | "user": { 59 | "displayName": "", 60 | "photoUrl": "", 61 | "userId": "" 62 | }, 63 | "user_tz": -60 64 | }, 65 | "id": "7EryzPledIp6" 66 | }, 67 | "outputs": [], 68 | "source": [ 69 | "# input parameter\n", 70 | "example_ror=\"https://ror.org/021k10z87\"" 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "id": "MiXVDKXid9tq", 76 | "metadata": { 77 | "id": "MiXVDKXid9tq" 78 | }, 79 | "source": [ 80 | "We use it to query the OpenAlex API endpoint for `'authors'` with the specified ROR URL in their metadata field '`last_known_institution.ror`'. Since the API uses [pagination](https://docs.openalex.org/api/get-lists-of-entities#pagination), we need to loop through all pages to get the complete result set." 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 3, 86 | "id": "8b608640-96a8-47d1-9de7-b7d3f6fd5a47", 87 | "metadata": { 88 | "executionInfo": { 89 | "elapsed": 5, 90 | "status": "ok", 91 | "timestamp": 1643210415323, 92 | "user": { 93 | "displayName": "", 94 | "photoUrl": "", 95 | "userId": "" 96 | }, 97 | "user_tz": -60 98 | }, 99 | "id": "8b608640-96a8-47d1-9de7-b7d3f6fd5a47" 100 | }, 101 | "outputs": [], 102 | "source": [ 103 | "# OpenAlex endpoint to query for authors\n", 104 | "OPENALEX_API_AUTHORS = \"https://api.openalex.org/authors\"\n", 105 | "\n", 106 | "# query all people that are connected to given ROR\n", 107 | "def query_openalex_for_orga2people(ror):\n", 108 | " page = 1\n", 109 | " max_page = 1\n", 110 | " \n", 111 | " while page <= max_page:\n", 112 | " params = {'filter': 'last_known_institution.ror:' + ror, 'page': page}\n", 113 | " response = requests.get(url=OPENALEX_API_AUTHORS,\n", 114 | " params=params,\n", 115 | " headers= {'Accept': 'application/json'})\n", 116 | " response.raise_for_status()\n", 117 | " result=response.json()\n", 118 | "\n", 119 | " # calculate max page number in first loop\n", 120 | " if max_page == 1:\n", 121 | " max_page = determine_max_page(result)\n", 122 | " page = page + 1\n", 123 | " yield result\n", 124 | "\n", 125 | "# calculate max number of result pages\n", 126 | "def determine_max_page(response_data):\n", 127 | " item_count = response_data['meta']['count']\n", 128 | " items_per_page = response_data['meta']['per_page']\n", 129 | " max_page_ceil = item_count // items_per_page + bool(item_count % items_per_page)\n", 130 | " return max_page_ceil\n", 131 | "\n", 132 | "\n", 133 | "#--- example execution\n", 134 | "list_of_pages=query_openalex_for_orga2people(example_ror)" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "id": "CwRzvAQweuoW", 140 | "metadata": { 141 | "id": "CwRzvAQweuoW" 142 | }, 143 | "source": [ 144 | "From the resulting list of people we extract and print out each ORCID iD and name.\n", 145 | "\n", 146 | "*Note: people that do not have an ORCID within their `ids` metadata, will not be printed.*" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 4, 152 | "id": "1c36737c-4dcf-42d5-80e2-802f0a7a8326", 153 | "metadata": { 154 | "colab": { 155 | "base_uri": "https://localhost:8080/" 156 | }, 157 | "executionInfo": { 158 | "elapsed": 3186, 159 | "status": "ok", 160 | "timestamp": 1643210418504, 161 | "user": { 162 | "displayName": "", 163 | "photoUrl": "", 164 | "userId": "" 165 | }, 166 | "user_tz": -60 167 | }, 168 | "id": "1c36737c-4dcf-42d5-80e2-802f0a7a8326", 169 | "outputId": "5efd986b-0b92-4b0d-e5cc-a65aeae2785e" 170 | }, 171 | "outputs": [ 172 | { 173 | "name": "stdout", 174 | "output_type": "stream", 175 | "text": [ 176 | "0000-0002-3824-5375, Nicole Deitelhoff\n", 177 | "0000-0002-7348-7206, Jonas Wolff\n", 178 | "0000-0002-3536-8898, Felix Anderl\n", 179 | "0000-0002-4259-6071, Felix S. Bethke\n", 180 | "0000-0002-3136-0901, Thorsten Gromes\n", 181 | "0000-0001-9698-2616, Annika Elena Poppe\n", 182 | "0000-0002-3783-6130, Irene Weipert-Fenner\n", 183 | "0000-0002-4793-9010, Arvid Bell\n", 184 | "0000-0002-7012-6739, Peter Kreuzer\n", 185 | "0000-0002-1185-8673, Daniel Mullis\n", 186 | "0000-0001-6746-1248, Anton Peez\n" 187 | ] 188 | } 189 | ], 190 | "source": [ 191 | "# from the result pages we get from the OpenAlex API, extract the data about authors\n", 192 | "def extract_authors_from_page(page):\n", 193 | " return [author for author in page.get('results') or []]\n", 194 | "\n", 195 | "# extract ORCID from author\n", 196 | "def extract_orcid(author):\n", 197 | " orcid=author.get('ids', {}).get('orcid') or \"\"\n", 198 | " orcid_id=orcid.replace(\"https://orcid.org/\", \"\") if orcid else orcid\n", 199 | " name=author.get('display_name', \"\")\n", 200 | " return orcid_id, name\n", 201 | "\n", 202 | "\n", 203 | "#--- example execution\n", 204 | "for page in list_of_pages or []:\n", 205 | " authors=extract_authors_from_page(page)\n", 206 | " for author in authors or []:\n", 207 | " orcid, name=extract_orcid(author)\n", 208 | " if orcid:\n", 209 | " print(f\"{orcid}, {name}\")" 210 | ] 211 | } 212 | ], 213 | "metadata": { 214 | "colab": { 215 | "collapsed_sections": [], 216 | "name": "openalex_get_people_by_organization.ipynb", 217 | "provenance": [ 218 | { 219 | "file_id": "https://github.com/Project-TAPIR/pidgraph-notebooks/blob/organization-people/organization-people/openalex_get_people_by_organization.ipynb", 220 | "timestamp": 1643210429142 221 | } 222 | ] 223 | }, 224 | "kernelspec": { 225 | "display_name": "Python 3 (ipykernel)", 226 | "language": "python", 227 | "name": "python3" 228 | }, 229 | "language_info": { 230 | "codemirror_mode": { 231 | "name": "ipython", 232 | "version": 3 233 | }, 234 | "file_extension": ".py", 235 | "mimetype": "text/x-python", 236 | "name": "python", 237 | "nbconvert_exporter": "python", 238 | "pygments_lexer": "ipython3", 239 | "version": "3.9.6" 240 | } 241 | }, 242 | "nbformat": 4, 243 | "nbformat_minor": 5 244 | } 245 | -------------------------------------------------------------------------------- /organization-people/orcid_get_people_by_organization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "u4HQPvDxKyjs" 7 | }, 8 | "source": [ 9 | "### Query ORCID for people affiliated with an organization and filter for current employees only\n", 10 | "\n", 11 | "This notebook queries the [ORCID Public API](https://api.orcid.org/v3.0/) for all [people affiliated with an organization](https://info.orcid.org/faq/how-do-i-find-orcid-record-holders-at-my-institution/) and additionally narrows down the affiliation to people **currently employed** by the organization. From the resulting list of people we output the ORCID iDs.\n", 12 | "\n", 13 | "*Disclosure:\n", 14 | "The process of querying the ROR API for additional identifiers and using them to query the ORCID API for affiliated people is the same as used by the [FREYA PID Graph](https://blog.datacite.org/powering-the-pid-graph/) and is implemented in [DataCite Application API](https://doi.org/10.5438/8gb0-v673).*" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": { 21 | "id": "5s5h9I5OKefn", 22 | "scrolled": true 23 | }, 24 | "outputs": [], 25 | "source": [ 26 | "# Prerequisites:\n", 27 | "import requests # dependency to make HTTP calls\n", 28 | "from benedict import benedict # dependency for dealing with json" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": { 34 | "id": "JqXxTHg026Tk" 35 | }, 36 | "source": [ 37 | "The input for this notebook is a ROR URL or ID, e.g. '`https://ror.org/04aj4c181`' or '`04aj4c181`'." 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 2, 43 | "metadata": { 44 | "executionInfo": { 45 | "elapsed": 8, 46 | "status": "ok", 47 | "timestamp": 1643285312923, 48 | "user": { 49 | "displayName": "", 50 | "photoUrl": "", 51 | "userId": "" 52 | }, 53 | "user_tz": -60 54 | }, 55 | "id": "tAoAtVZP25JT" 56 | }, 57 | "outputs": [], 58 | "source": [ 59 | "example_ror=\"https://ror.org/04aj4c181\"" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": { 65 | "id": "mmV5ar17CiSO" 66 | }, 67 | "source": [ 68 | "### Organization metadata\n", 69 | "The first step is to call the [ROR API](https://ror.readme.io/) for the organization's metadata." 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 3, 75 | "metadata": { 76 | "executionInfo": { 77 | "elapsed": 763, 78 | "status": "ok", 79 | "timestamp": 1643285313679, 80 | "user": { 81 | "displayName": "", 82 | "photoUrl": "", 83 | "userId": "" 84 | }, 85 | "user_tz": -60 86 | }, 87 | "id": "FKpMNpLLLYaZ", 88 | "pycharm": { 89 | "is_executing": true 90 | } 91 | }, 92 | "outputs": [], 93 | "source": [ 94 | "# URL to ROR API\n", 95 | "ROR_API_ENDPOINT = \"https://api.ror.org/organizations\"\n", 96 | "\n", 97 | "# query ROR API for organization's metadata\n", 98 | "def query_ror_api(ror):\n", 99 | " complete_url=requests.utils.requote_uri(ROR_API_ENDPOINT + \"/\" + ror)\n", 100 | " response = requests.get(url=complete_url,\n", 101 | " headers={'Accept': 'application/json'})\n", 102 | " response.raise_for_status()\n", 103 | " result=response.json()\n", 104 | " return result\n", 105 | "\n", 106 | "#--- example execution\n", 107 | "ror_data=query_ror_api(example_ror)\n", 108 | "organization_ror_id=example_ror.replace(\"https://ror.org/\", \"\")\n", 109 | "# if you want to see the retrieved metadata, uncomment next lines\n", 110 | "#import pprint\n", 111 | "#pprint.pprint(ror_data)" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": { 117 | "id": "mxOGfasxMAaA" 118 | }, 119 | "source": [ 120 | "In particular we are interested in the organization's Grid ID and Wikidata ID." 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 4, 126 | "metadata": { 127 | "colab": { 128 | "base_uri": "https://localhost:8080/" 129 | }, 130 | "executionInfo": { 131 | "elapsed": 6, 132 | "status": "ok", 133 | "timestamp": 1643285313679, 134 | "user": { 135 | "displayName": "", 136 | "photoUrl": "", 137 | "userId": "" 138 | }, 139 | "user_tz": -60 140 | }, 141 | "id": "wSwVZgSELik3", 142 | "outputId": "c668459d-9115-4070-a28e-decab0d1d623", 143 | "pycharm": { 144 | "is_executing": true 145 | } 146 | }, 147 | "outputs": [ 148 | { 149 | "name": "stdout", 150 | "output_type": "stream", 151 | "text": [ 152 | "Grid ID: grid.461819.3\n", 153 | "Wikidata ID: Q2399120\n" 154 | ] 155 | } 156 | ], 157 | "source": [ 158 | "def extract_grid_from_ror_data(ror_data):\n", 159 | " orga_dict = benedict.from_json(ror_data)\n", 160 | " path_to_grid_id = \"external_ids.GRID.all\"\n", 161 | " grid_id = orga_dict.get(path_to_grid_id)\n", 162 | " return grid_id\n", 163 | "\n", 164 | "def extract_wikidata_from_ror_data(ror_data):\n", 165 | " orga_dict = benedict.from_json(ror_data)\n", 166 | " path_to_wikidata_id = \"external_ids.Wikidata.all[0]\"\n", 167 | " wikidata_id = orga_dict.get(path_to_wikidata_id)\n", 168 | " return wikidata_id\n", 169 | "\n", 170 | "\n", 171 | "#--- example execution\n", 172 | "organization_grid_id=extract_grid_from_ror_data(ror_data)\n", 173 | "print(\"Grid ID: \" + str(organization_grid_id or ''))\n", 174 | "organization_wikidata_id=extract_wikidata_from_ror_data(ror_data)\n", 175 | "print(\"Wikidata ID: \" + str(organization_wikidata_id or ''))" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": { 181 | "id": "oLZD9V1lzyQ4" 182 | }, 183 | "source": [ 184 | "We use the Wikidata ID of the organization to query Wikidata for the Ringgold ID of the organization." 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 5, 190 | "metadata": { 191 | "colab": { 192 | "base_uri": "https://localhost:8080/" 193 | }, 194 | "executionInfo": { 195 | "elapsed": 543, 196 | "status": "ok", 197 | "timestamp": 1643285314219, 198 | "user": { 199 | "displayName": "", 200 | "photoUrl": "", 201 | "userId": "" 202 | }, 203 | "user_tz": -60 204 | }, 205 | "id": "v8S4dwfM0A-1", 206 | "outputId": "79360115-9b93-4ac3-9472-711a0df3a700", 207 | "pycharm": { 208 | "is_executing": true 209 | } 210 | }, 211 | "outputs": [ 212 | { 213 | "name": "stdout", 214 | "output_type": "stream", 215 | "text": [ 216 | "Ringgold ID: 28359\n" 217 | ] 218 | } 219 | ], 220 | "source": [ 221 | "WIKIDATA_API = \"https://www.wikidata.org/w/api.php\"\n", 222 | "\n", 223 | "# query Wikidata with an organization's Wikidata ID\n", 224 | "def query_wikidata_api(wikidata_id):\n", 225 | " response = requests.get(url=WIKIDATA_API,\n", 226 | " params={'action': 'wbgetentities', 'ids': wikidata_id, 'props':'claims', 'format':'json'},\n", 227 | " headers={'Accept': 'application/json'})\n", 228 | " response.raise_for_status()\n", 229 | " result=response.json()\n", 230 | " return result\n", 231 | "\n", 232 | "def extract_ringgold_from_wikidata_data(wikidata, wikidata_id):\n", 233 | " wikidata_dict = benedict.from_json(wikidata)\n", 234 | " path_to_ringgold_id = f\"entities.{wikidata_id}.claims.P3500[0].mainsnak.datavalue.value\"\n", 235 | " ringgold_id = wikidata_dict.get(path_to_ringgold_id)\n", 236 | " return ringgold_id\n", 237 | "\n", 238 | "\n", 239 | "#--- example execution\n", 240 | "wikidata_data = query_wikidata_api(organization_wikidata_id)\n", 241 | "organization_ringgold_id = extract_ringgold_from_wikidata_data(wikidata_data, organization_wikidata_id)\n", 242 | "print(\"Ringgold ID: \" + str(organization_ringgold_id or ''))" 243 | ] 244 | }, 245 | { 246 | "cell_type": "markdown", 247 | "metadata": { 248 | "id": "Tx8YWNLZ7_fx" 249 | }, 250 | "source": [ 251 | "To sum up the process up until now:\n", 252 | "1. We used an organization's ROR ID to query the ROR API for an organization's Grid ID and Wikidata ID. \n", 253 | "2. We use Wikidata as intermediary to retrieve the Ringgold ID of the organization.\n", 254 | "\n", 255 | "![organization_data.png]()" 256 | ] 257 | }, 258 | { 259 | "cell_type": "markdown", 260 | "metadata": { 261 | "id": "tQ0ZhMZk_Wcz" 262 | }, 263 | "source": [ 264 | "### Connection organization -> people\n", 265 | "The second part of the process is to query for the people affiliated with the organization. For this we use the ORCID API and search for people affiliated with an organization like it is explained in the ORCID tutorial [\"How do I find ORCID record holders at my institution?\"](https://info.orcid.org/faq/how-do-i-find-orcid-record-holders-at-my-institution/). As parameters for the query we use the ROR ID, Grid ID and Ringgold ID for the organization.\n" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": 6, 271 | "metadata": { 272 | "colab": { 273 | "base_uri": "https://localhost:8080/" 274 | }, 275 | "executionInfo": { 276 | "elapsed": 246, 277 | "status": "ok", 278 | "timestamp": 1643285314461, 279 | "user": { 280 | "displayName": "", 281 | "photoUrl": "", 282 | "userId": "" 283 | }, 284 | "user_tz": -60 285 | }, 286 | "id": "LwhzE2Nc_J-x", 287 | "outputId": "c0804214-1ca6-4600-df92-841e00303237", 288 | "pycharm": { 289 | "is_executing": true 290 | } 291 | }, 292 | "outputs": [ 293 | { 294 | "name": "stdout", 295 | "output_type": "stream", 296 | "text": [ 297 | "Number of affiliated people: 90\n", 298 | "0000-0001-8258-2603, Noreen Krause\n", 299 | "0000-0002-8913-9011, Sandra Mierz\n", 300 | "0000-0002-2013-6920, Gazi Yücel\n", 301 | "0000-0001-6836-1193, Marleen Burger\n", 302 | "0000-0001-9924-9153, Allard Oelen\n", 303 | "0000-0001-5232-9236, Sebastian Becker\n", 304 | "0000-0003-3975-5374, Hassan Hussein\n", 305 | "0000-0002-0687-5460, Britta Dreyer\n", 306 | "0000-0001-8777-2780, Mohamad Yaser Jaradeh\n", 307 | "0000-0003-2237-7725, Olga Lezhnina\n", 308 | "0000-0003-3922-8638, Mathias Begoin\n", 309 | "0000-0002-5610-9908, Evelyn Navarrete None\n", 310 | "0000-0003-2749-7988, Clarissa Schnabel\n", 311 | "0000-0001-9758-904X, Susanne Nicolai\n", 312 | "0000-0003-2718-0913, Sven Strobel\n", 313 | "0000-0002-2614-1253, Oleg Nekhayenko\n", 314 | "0000-0002-1266-4029, Anke Tina Krüger\n", 315 | "0000-0003-0929-7528, Markus Prinzhorn\n", 316 | "0000-0002-4311-5620, Mitra Safaei\n", 317 | "0000-0002-1595-3213, Philip Strömert\n", 318 | "0000-0001-5135-5758, Katrin Leinweber\n", 319 | "0000-0002-3680-2086, Jens Kösters\n", 320 | "0000-0002-6347-5666, David Morris\n", 321 | "0000-0002-2874-4832, Matti Stöhr\n", 322 | "0000-0001-5375-3063, Nicola Bieg\n", 323 | "0000-0003-1574-4865, Alexander Pöche\n", 324 | "0000-0001-5322-0478, Judith Ludwig\n", 325 | "0000-0002-1407-7362, Erhun Giray TUNCAY\n", 326 | "0000-0003-1668-3304, Wolfgang Gritz\n", 327 | "0000-0002-6802-1241, Eric Müller-Budack\n", 328 | "0000-0001-9248-5444, Junaid Ahmed Ghauri\n", 329 | "0000-0003-2257-0517, Peter Löwe\n", 330 | "0000-0001-5693-4708, Cedrik Zellmann\n", 331 | "0000-0002-0938-0340, Dana Vosberg\n", 332 | "0000-0002-0698-2864, Sören Auer\n", 333 | "0000-0003-2510-0529, Xenia van Edig\n", 334 | "0000-0002-3060-7052, Felix Engel\n", 335 | "0000-0002-2342-0636, Anja Mielke\n", 336 | "0000-0002-0021-9729, Salomon KABONGO KABENAMUALU\n", 337 | "0000-0003-0524-1834, Bernhard Tempel\n", 338 | "0000-0001-8824-8390, Jessika Rücknagel\n", 339 | "0000-0002-7760-5708, Nils Nommensen\n", 340 | "0000-0002-0719-5440, Katharina Schulz\n", 341 | "0000-0003-1537-2862, Svantje Lilienthal\n", 342 | "0000-0001-6260-7578, Linna Lu\n", 343 | "0000-0002-5320-0220, Martin Nissen\n", 344 | "0000-0003-1132-7220, Silvia Czerwinski\n", 345 | "0000-0001-5336-6899, Oliver Karras\n", 346 | "0000-0001-9133-4978, Julia Maria Struß\n", 347 | "0000-0001-5492-3212, Markus Stocker\n", 348 | "0000-0002-3278-0422, Helene Brinken\n", 349 | "0000-0002-3447-0575, Anna Beer\n", 350 | "0000-0002-1851-0442, Abiodun Ogunyemi\n", 351 | "0000-0002-1442-335X, Disha Purohit\n", 352 | "0000-0001-7408-0611, Rigo Wenning\n", 353 | "0000-0002-7917-3101, Elzbieta Gabrys-Deutscher\n", 354 | "0000-0002-3075-7640, Ina Blümel\n", 355 | "0000-0002-7839-3698, André Castro\n", 356 | "0000-0002-9649-7829, Anette Cordts\n", 357 | "0000-0003-1702-8707, Samaneh Jozashoori\n", 358 | "0000-0001-8080-5308, Luis Ramos\n", 359 | "0000-0002-9362-4968, Anna-Karina Renziehausen\n", 360 | "0000-0003-1043-4964, Anette Ganske\n", 361 | "0000-0003-4081-9646, Pia Rudnik\n", 362 | "0000-0002-1452-9509, Anett Hoppe\n", 363 | "0000-0002-8579-9717, Simon Worthington\n", 364 | "0000-0003-4040-9073, Gelareh Mofakhamsanie\n", 365 | "0000-0001-8920-7515, David Hopf\n", 366 | "0000-0002-2593-8754, Lucia Sohmen\n", 367 | "0000-0003-0226-3608, Christian Otto\n", 368 | "0000-0003-3320-5187, Robert Strötgen\n", 369 | "0000-0001-5712-1565, Shahi DOST\n", 370 | "0000-0003-1800-0351, Esther Tobschall\n", 371 | "0000-0003-0232-7085, Lambert Heller\n", 372 | "0000-0002-3557-9345, Abdolali Faraji\n", 373 | "0000-0002-7992-5668, Nelli Taller\n", 374 | "0000-0002-0310-5831, Sarah Dellmann\n", 375 | "0000-0002-9767-3257, Inken Feldsien-Sudhaus\n", 376 | "0000-0001-5839-0177, Andreas Lütjen\n", 377 | "0000-0002-7325-5114, Brian Cahill\n", 378 | "0000-0003-3184-5930, Anita Eppelin\n", 379 | "0000-0002-0474-2410, Stephanie Hagemann-Wilholt\n", 380 | "0000-0003-2499-7741, Christian Hauschke\n", 381 | "0000-0002-5124-0165, Benjamin Kampe\n", 382 | "0000-0001-7086-6211, Franziska Altemeier\n", 383 | "0000-0002-1019-9151, Susanne Arndt\n", 384 | "0000-0002-1019-3606, Anna Kasprzik\n", 385 | "0000-0003-3709-5608, Michelle Lindlar\n", 386 | "0000-0002-4450-349X, José Alberto Benítez-Andrades\n", 387 | "0000-0001-7460-7794, Paloma Marín-Arraiza\n" 388 | ] 389 | } 390 | ], 391 | "source": [ 392 | "# URL for ORCID search API\n", 393 | "ORCID_SEARCH_API = \"https://pub.orcid.org/v3.0/expanded-search/\"\n", 394 | "\n", 395 | "# query ORCID with an organization's ROR, Grid and Ringgold ID\n", 396 | "def query_orcid_for_affiliations(ror_id, grid_id, ringgold_id):\n", 397 | " grid_search = f\"grid-org-id:{grid_id}\" if grid_id else \"\"\n", 398 | " ringgold_search = f\"ringgold-org-id:{ringgold_id}\" if ringgold_id else \"\"\n", 399 | " ror_search = f\"ror-org-id:{ror_id}\" if ror_id else \"\"\n", 400 | " orga_search_ids = [ror_search, grid_search, ringgold_search]\n", 401 | " query = ' OR '.join(filter(None, orga_search_ids))\n", 402 | "\n", 403 | " response = requests.get(url=ORCID_SEARCH_API,\n", 404 | " params={'q': query},\n", 405 | " headers={'Accept': 'application/json'})\n", 406 | " response.raise_for_status()\n", 407 | " result=response.json()\n", 408 | " return result\n", 409 | "\n", 410 | "def extract_orcids_from_affiliated_people(affiliated_people):\n", 411 | " people_dict = benedict.from_json(affiliated_people)\n", 412 | " for person in people_dict.get('expanded-result'):\n", 413 | " orcid=benedict(person).get('orcid-id')\n", 414 | " given_name=benedict(person).get('given-names') \n", 415 | " family_name=benedict(person).get('family-names')\n", 416 | " name = f\"{given_name} {family_name}\"\n", 417 | " yield orcid,name\n", 418 | "\n", 419 | "\n", 420 | "#-- example execution\n", 421 | "affiliated_people = query_orcid_for_affiliations(organization_ror_id, organization_grid_id, organization_ringgold_id)\n", 422 | "affiliated_count = affiliated_people.get('num-found','')\n", 423 | "print(f\"Number of affiliated people: {affiliated_count}\")\n", 424 | "\n", 425 | "affiliated_orcids= extract_orcids_from_affiliated_people(affiliated_people)\n", 426 | "for orcid,name in affiliated_orcids:\n", 427 | " print(f\"{orcid}, {name}\")" 428 | ] 429 | }, 430 | { 431 | "cell_type": "markdown", 432 | "metadata": { 433 | "id": "DUMnQM62MXns" 434 | }, 435 | "source": [ 436 | "### Filter for current employees\n", 437 | "The connection between organization and people via their affiliation as defined by the ORCID API is quite abroad: \n", 438 | "\n", 439 | "* It contains each person that used the organization identifier in one of the sections [employment, education & qualifications, membership & service, invited positions & distinctions](https://info.orcid.org/documentation/integration-guide/working-with-organization-identifiers/) in their ORCID record.\n", 440 | "* Furthermore the connection is not limited to the current affiliation but also contains people that were affiliated with the organization years ago.\n", 441 | "\n", 442 | "--> \n", 443 | "\n", 444 | "That's why we decided to use the ORCID iDs we retrieve via the search API to query the ORCID API for each of their detailed record to narrow the result set down to only people who \n", 445 | "* use one of the organization's IDs in the employment section\n", 446 | "* and that are currently employed (end-date of employment is empty)" 447 | ] 448 | }, 449 | { 450 | "cell_type": "code", 451 | "execution_count": 7, 452 | "metadata": { 453 | "colab": { 454 | "base_uri": "https://localhost:8080/" 455 | }, 456 | "executionInfo": { 457 | "elapsed": 30794, 458 | "status": "ok", 459 | "timestamp": 1643285345253, 460 | "user": { 461 | "displayName": "", 462 | "photoUrl": "", 463 | "userId": "" 464 | }, 465 | "user_tz": -60 466 | }, 467 | "id": "6Ac2mk4vOz1A", 468 | "outputId": "7ef40f11-fffd-447c-80e1-0fb28f6ae767", 469 | "pycharm": { 470 | "is_executing": true 471 | } 472 | }, 473 | "outputs": [ 474 | { 475 | "name": "stdout", 476 | "output_type": "stream", 477 | "text": [ 478 | "Number of current employees: 59\n", 479 | "0000-0002-8913-9011, Sandra Mierz\n", 480 | "0000-0002-2013-6920, Gazi Yücel\n", 481 | "0000-0001-6836-1193, Marleen Burger\n", 482 | "0000-0003-3975-5374, Hassan Hussein\n", 483 | "0000-0003-3922-8638, Mathias Begoin\n", 484 | "0000-0002-5610-9908, Evelyn Navarrete None\n", 485 | "0000-0003-2749-7988, Clarissa Schnabel\n", 486 | "0000-0001-9758-904X, Susanne Nicolai\n", 487 | "0000-0003-2718-0913, Sven Strobel\n", 488 | "0000-0002-2614-1253, Oleg Nekhayenko\n", 489 | "0000-0003-0929-7528, Markus Prinzhorn\n", 490 | "0000-0002-1595-3213, Philip Strömert\n", 491 | "0000-0001-5135-5758, Katrin Leinweber\n", 492 | "0000-0002-3680-2086, Jens Kösters\n", 493 | "0000-0002-6347-5666, David Morris\n", 494 | "0000-0002-2874-4832, Matti Stöhr\n", 495 | "0000-0001-5375-3063, Nicola Bieg\n", 496 | "0000-0003-1574-4865, Alexander Pöche\n", 497 | "0000-0001-5322-0478, Judith Ludwig\n", 498 | "0000-0002-1407-7362, Erhun Giray TUNCAY\n", 499 | "0000-0002-6802-1241, Eric Müller-Budack\n", 500 | "0000-0001-9248-5444, Junaid Ahmed Ghauri\n", 501 | "0000-0002-0938-0340, Dana Vosberg\n", 502 | "0000-0002-0698-2864, Sören Auer\n", 503 | "0000-0003-2510-0529, Xenia van Edig\n", 504 | "0000-0002-3060-7052, Felix Engel\n", 505 | "0000-0002-2342-0636, Anja Mielke\n", 506 | "0000-0001-8824-8390, Jessika Rücknagel\n", 507 | "0000-0002-0719-5440, Katharina Schulz\n", 508 | "0000-0001-6260-7578, Linna Lu\n", 509 | "0000-0003-1132-7220, Silvia Czerwinski\n", 510 | "0000-0001-5336-6899, Oliver Karras\n", 511 | "0000-0001-5492-3212, Markus Stocker\n", 512 | "0000-0002-3278-0422, Helene Brinken\n", 513 | "0000-0002-3447-0575, Anna Beer\n", 514 | "0000-0002-1851-0442, Abiodun Ogunyemi\n", 515 | "0000-0002-1442-335X, Disha Purohit\n", 516 | "0000-0002-7917-3101, Elzbieta Gabrys-Deutscher\n", 517 | "0000-0003-1702-8707, Samaneh Jozashoori\n", 518 | "0000-0001-8080-5308, Luis Ramos\n", 519 | "0000-0002-9362-4968, Anna-Karina Renziehausen\n", 520 | "0000-0003-1043-4964, Anette Ganske\n", 521 | "0000-0003-4081-9646, Pia Rudnik\n", 522 | "0000-0002-1452-9509, Anett Hoppe\n", 523 | "0000-0002-8579-9717, Simon Worthington\n", 524 | "0000-0003-4040-9073, Gelareh Mofakhamsanie\n", 525 | "0000-0001-8920-7515, David Hopf\n", 526 | "0000-0002-2593-8754, Lucia Sohmen\n", 527 | "0000-0003-0232-7085, Lambert Heller\n", 528 | "0000-0002-7992-5668, Nelli Taller\n", 529 | "0000-0002-0310-5831, Sarah Dellmann\n", 530 | "0000-0001-5839-0177, Andreas Lütjen\n", 531 | "0000-0002-7325-5114, Brian Cahill\n", 532 | "0000-0003-3184-5930, Anita Eppelin\n", 533 | "0000-0002-0474-2410, Stephanie Hagemann-Wilholt\n", 534 | "0000-0003-2499-7741, Christian Hauschke\n", 535 | "0000-0002-5124-0165, Benjamin Kampe\n", 536 | "0000-0002-1019-9151, Susanne Arndt\n", 537 | "0000-0003-3709-5608, Michelle Lindlar\n" 538 | ] 539 | } 540 | ], 541 | "source": [ 542 | "# URL for ORCID API\n", 543 | "ORCID_RECORD_API = \"https://pub.orcid.org/v3.0/\"\n", 544 | "\n", 545 | "# query ORCID for an ORCID record\n", 546 | "def query_orcid_for_record(orcid_id):\n", 547 | "\n", 548 | " response = requests.get(url=requests.utils.requote_uri(ORCID_RECORD_API + orcid_id),\n", 549 | " headers={'Accept': 'application/json'})\n", 550 | " response.raise_for_status()\n", 551 | " result=response.json()\n", 552 | " return result\n", 553 | "\n", 554 | "# check if affiliated person is a current employee\n", 555 | "def is_current_employee(orcid_id, grid_id, ringgold_id):\n", 556 | " # get orcid record\n", 557 | " orcid_record = query_orcid_for_record(orcid_id)\n", 558 | "\n", 559 | " #filter for current employees only\n", 560 | " record_dict = benedict.from_json(orcid_record)\n", 561 | " path_to_employments = \"activities-summary.employments.affiliation-group\"\n", 562 | " for employment in record_dict.get(path_to_employments):\n", 563 | " employment_dict = benedict(employment)\n", 564 | " path_to_orga_id = \"summaries[0].employment-summary.organization.disambiguated-organization.disambiguated-organization-identifier\"\n", 565 | " path_to_end_date = \"summaries[0].employment-summary.end-date\"\n", 566 | "\n", 567 | " orga_id = employment_dict.get(path_to_orga_id)\n", 568 | " end_date = employment_dict.get(path_to_end_date)\n", 569 | "\n", 570 | " return not end_date and (orga_id == grid_id or orga_id == ringgold_id)\n", 571 | "\n", 572 | "\n", 573 | "#--- example execution\n", 574 | "affiliated_orcids = extract_orcids_from_affiliated_people(affiliated_people)\n", 575 | "employee_orcids = [t for t in affiliated_orcids if is_current_employee(t[0], organization_grid_id, organization_ringgold_id)]\n", 576 | "employee_count = len(employee_orcids)\n", 577 | "print(f\"Number of current employees: {employee_count}\")\n", 578 | "for orcid,name in employee_orcids:\n", 579 | " print(f\"{orcid}, {name}\")" 580 | ] 581 | }, 582 | { 583 | "cell_type": "code", 584 | "execution_count": 8, 585 | "metadata": { 586 | "colab": { 587 | "base_uri": "https://localhost:8080/" 588 | }, 589 | "executionInfo": { 590 | "elapsed": 18, 591 | "status": "ok", 592 | "timestamp": 1643285345254, 593 | "user": { 594 | "displayName": "", 595 | "photoUrl": "", 596 | "userId": "" 597 | }, 598 | "user_tz": -60 599 | }, 600 | "id": "_n-0Ac4KbgpG", 601 | "outputId": "22cd7397-b5d4-4e44-ef76-5daf8ff76028", 602 | "pycharm": { 603 | "is_executing": true 604 | } 605 | }, 606 | "outputs": [ 607 | { 608 | "name": "stdout", 609 | "output_type": "stream", 610 | "text": [ 611 | "--> For this example we were able to narrow down the result from 90 affiliated people to 59 currently employed people.\n" 612 | ] 613 | } 614 | ], 615 | "source": [ 616 | "print(f\"--> For this example we were able to narrow down the result from {affiliated_count} affiliated people to {employee_count} currently employed people.\")" 617 | ] 618 | } 619 | ], 620 | "metadata": { 621 | "colab": { 622 | "authorship_tag": "ABX9TyMv/ZeG2/OIfjQKmxDLHbdH", 623 | "collapsed_sections": [], 624 | "name": "orcid_get_people_by_organization.ipynb", 625 | "provenance": [ 626 | { 627 | "file_id": "https://github.com/Project-TAPIR/pidgraph-notebooks/blob/main/organization-people/orcid_get_people_by_organization.ipynb", 628 | "timestamp": 1643285403772 629 | }, 630 | { 631 | "file_id": "https://github.com/Project-TAPIR/pidgraph-notebooks/blob/organization-people/organization-people/orcid_get_people_by_organization.ipynb", 632 | "timestamp": 1643211404649 633 | } 634 | ] 635 | }, 636 | "kernelspec": { 637 | "display_name": "Python 3 (ipykernel)", 638 | "language": "python", 639 | "name": "python3" 640 | }, 641 | "language_info": { 642 | "codemirror_mode": { 643 | "name": "ipython", 644 | "version": 3 645 | }, 646 | "file_extension": ".py", 647 | "mimetype": "text/x-python", 648 | "name": "python", 649 | "nbconvert_exporter": "python", 650 | "pygments_lexer": "ipython3", 651 | "version": "3.9.6" 652 | } 653 | }, 654 | "nbformat": 4, 655 | "nbformat_minor": 1 656 | } 657 | -------------------------------------------------------------------------------- /organization-works/Readme.md: -------------------------------------------------------------------------------- 1 | ## Organization-works 2 | 3 | A collection of Jupyter notebooks showing examples of using a persistent identifier for an organization (here ROR ID) as input for different APIs of PID providers or PID Graphs and retrieving all works (identified by an ROR iD) connected to it. 4 | 5 | Currently available PID Graphs: 6 | 7 | * [OpenAlex](https://openalex.org/about) 8 | * [Crossref](https://www.crossref.org/) 9 | -------------------------------------------------------------------------------- /organization-works/crossref_get_works_by_organization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Query Crossref for works and there funder authored by a person\n", 8 | "\n", 9 | "This notebook queries the [Crossref API](http://api.crossref.org) via its `'/works'` endpoint for works authored by a institution. It takes a ror id as input which is used to filter for all works where one of the authors `'ror'` field matches the given ror id.From the resulting list of works we output all DOIs and their titles.\n" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "#### Examples of ror_ids\n", 17 | "\n", 18 | "University of Osnabrück = https://ror.org/04qmmjx98\n", 19 | "\n", 20 | "TIB Hannover = https://ror.org/04aj4c181" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 1, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "# Prerequisites:\n", 30 | "import requests # dependency to make HTTP calls\n", 31 | "from habanero import Crossref # lib for querying crossref api" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 2, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "# input ror\n", 41 | "ror_id=\"https://ror.org/04qmmjx98\"" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "We use it to query the Crossref API via its '`/works`' endpoint and set a filter for the `ror` field to match the given ROR ID. " 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 3, 54 | "metadata": { 55 | "executionInfo": { 56 | "elapsed": 485, 57 | "status": "ok", 58 | "timestamp": 1643057866151, 59 | "user": { 60 | "displayName": "Sandra M", 61 | "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjjehryRcYlqHNFf_9Q6slGN_VZPE0y5QkvOxzG=s64", 62 | "userId": "04602594913862593282" 63 | }, 64 | "user_tz": -60 65 | }, 66 | "id": "AIuILzlS6zv9", 67 | "scrolled": false 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "# we are using an existing library for querying the api crossref \n", 72 | "\n", 73 | "def searchror(ror_id):\n", 74 | " list_of_pages=Crossref().works(filter = {'ror-id': ror_id}, cursor = \"*\", select = \"DOI,title\")\n", 75 | " for page in list_of_pages: \n", 76 | " for item in page['message']['items']:\n", 77 | " print(f\"{item['DOI']},{item['title']}\")" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": {}, 83 | "source": [ 84 | "*Note: Searching with an ror id can take a long time*" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 4, 90 | "metadata": { 91 | "colab": { 92 | "base_uri": "https://localhost:8080/" 93 | }, 94 | "executionInfo": { 95 | "elapsed": 6, 96 | "status": "ok", 97 | "timestamp": 1643057866152, 98 | "user": { 99 | "displayName": "Sandra M", 100 | "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjjehryRcYlqHNFf_9Q6slGN_VZPE0y5QkvOxzG=s64", 101 | "userId": "04602594913862593282" 102 | }, 103 | "user_tz": -60 104 | }, 105 | "id": "0bW0T-cv25wN", 106 | "outputId": "247e242b-c97a-47aa-dd5b-a80334ea1c42", 107 | "scrolled": true 108 | }, 109 | "outputs": [ 110 | { 111 | "name": "stdout", 112 | "output_type": "stream", 113 | "text": [ 114 | "10.3762/bjnano.13.53,['Quantitative dynamic force microscopy with inclined tip oscillation']\n", 115 | "10.7554/elife.77227.sa2,['Author response: Transition to siblinghood causes a substantial and long-lasting increase in urinary cortisol levels in wild bonobos']\n", 116 | "10.7554/elife.80901.sa2,['Author response: Structure of the HOPS tethering complex, a lysosomal membrane fusion machinery']\n", 117 | "10.7554/elife.80901,['Structure of the HOPS tethering complex, a lysosomal membrane fusion machinery']\n", 118 | "10.7554/elife.79278,['Pathogenic variants of sphingomyelin synthase SMS2 disrupt lipid landscapes in the secretory pathway']\n", 119 | "10.7554/elife.79278.sa2,['Author response: Pathogenic variants of sphingomyelin synthase SMS2 disrupt lipid landscapes in the secretory pathway']\n", 120 | "10.7554/elife.77227,['Transition to siblinghood causes a substantial and long-lasting increase in urinary cortisol levels in wild bonobos']\n" 121 | ] 122 | } 123 | ], 124 | "source": [ 125 | "# main programm \n", 126 | "searchror(ror_id) " 127 | ] 128 | } 129 | ], 130 | "metadata": { 131 | "colab": { 132 | "authorship_tag": "ABX9TyN8GdEZ2emA2pb1j6K+9PwP", 133 | "name": "crossref_get_works_by_person.ipynb", 134 | "provenance": [ 135 | { 136 | "file_id": "1RvDBYtHIK8LG_31cmfKW2PxQ3whxondX", 137 | "timestamp": 1643057922530 138 | } 139 | ] 140 | }, 141 | "kernelspec": { 142 | "display_name": "Python 3 (ipykernel)", 143 | "language": "python", 144 | "name": "python3" 145 | }, 146 | "language_info": { 147 | "codemirror_mode": { 148 | "name": "ipython", 149 | "version": 3 150 | }, 151 | "file_extension": ".py", 152 | "mimetype": "text/x-python", 153 | "name": "python", 154 | "nbconvert_exporter": "python", 155 | "pygments_lexer": "ipython3", 156 | "version": "3.9.13" 157 | } 158 | }, 159 | "nbformat": 4, 160 | "nbformat_minor": 4 161 | } 162 | -------------------------------------------------------------------------------- /person-works/README.md: -------------------------------------------------------------------------------- 1 | ## person-works 2 | 3 | A collection of Jupyter notebooks showing examples of using a persistent identifier for a person (here ORCID URL or iD) as input for different APIs of PID providers or PID Graphs and retrieving all works identified by a DOI connected to it. 4 | 5 | Currently available PID Graphs: 6 | * [Crossref](https://www.crossref.org/) 7 | * [FREYA PID Graph](https://blog.datacite.org/powering-the-pid-graph/) 8 | * [OpenAlex](https://openalex.org/about) 9 | * [ORCID](https://orcid.org/) 10 | * [OpenAIRE](https://www.openaire.eu/) -------------------------------------------------------------------------------- /person-works/crossref_get_works_by_person.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "k6HfuEZe48LV" 7 | }, 8 | "source": [ 9 | "### Query Crossref for works authored by a person\n", 10 | "This notebook queries the [Crossref API](http://api.crossref.org) via its `'/works'` endpoint for works authored by a person. It takes an ORCID URL or ORCID iD as input which is used to filter for all works where one of the authors' `orcid` field matches the given ORCID iD.\n", 11 | "From the resulting list of works we output all DOIs." 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 1, 17 | "metadata": { 18 | "id": "aV_HkXxJ4pVB", 19 | "scrolled": true 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "# Prerequisites:\n", 24 | "import requests # dependency to make HTTP calls\n", 25 | "from habanero import Crossref # lib for querying crossref api" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": { 31 | "id": "4z7u62G76IBh" 32 | }, 33 | "source": [ 34 | "The input for this notebook is an ORCID URL or ORCID iD, e.g. '`https://orcid.org/0000-0003-2499-7741`' or '`0000-0003-2499-7741`'" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 2, 40 | "metadata": { 41 | "executionInfo": { 42 | "elapsed": 14, 43 | "status": "ok", 44 | "timestamp": 1643057865676, 45 | "user": { 46 | "displayName": "Sandra M", 47 | "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjjehryRcYlqHNFf_9Q6slGN_VZPE0y5QkvOxzG=s64", 48 | "userId": "04602594913862593282" 49 | }, 50 | "user_tz": -60 51 | }, 52 | "id": "rbQX-rGA6M6B" 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "# input parameter\n", 57 | "example_orcid=\"https://orcid.org/0000-0003-2499-7741\"" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": { 63 | "id": "uYK5apGy6i4f" 64 | }, 65 | "source": [ 66 | "We use it to query the Crossref API via its '`/works`' endpoint and set a filter for the `orcid` field to match the given ORCID iD." 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 3, 72 | "metadata": { 73 | "executionInfo": { 74 | "elapsed": 485, 75 | "status": "ok", 76 | "timestamp": 1643057866151, 77 | "user": { 78 | "displayName": "Sandra M", 79 | "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjjehryRcYlqHNFf_9Q6slGN_VZPE0y5QkvOxzG=s64", 80 | "userId": "04602594913862593282" 81 | }, 82 | "user_tz": -60 83 | }, 84 | "id": "AIuILzlS6zv9" 85 | }, 86 | "outputs": [], 87 | "source": [ 88 | "# we are using an existing library for querying the api crossref\n", 89 | "orcid_id= example_orcid.replace(\"https://orcid.org/\", \"\")\n", 90 | "list_of_pages=Crossref().works(filter = {'orcid': orcid_id}, cursor = \"*\", select = \"DOI,title\")" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": { 96 | "id": "CpHKzkCu281O" 97 | }, 98 | "source": [ 99 | "Since the Crossref API uses pagination, we need to loop through all pages to get the complete result set. We print out title and DOI of each work in the result set." 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 4, 105 | "metadata": { 106 | "colab": { 107 | "base_uri": "https://localhost:8080/" 108 | }, 109 | "executionInfo": { 110 | "elapsed": 6, 111 | "status": "ok", 112 | "timestamp": 1643057866152, 113 | "user": { 114 | "displayName": "Sandra M", 115 | "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjjehryRcYlqHNFf_9Q6slGN_VZPE0y5QkvOxzG=s64", 116 | "userId": "04602594913862593282" 117 | }, 118 | "user_tz": -60 119 | }, 120 | "id": "0bW0T-cv25wN", 121 | "outputId": "247e242b-c97a-47aa-dd5b-a80334ea1c42" 122 | }, 123 | "outputs": [ 124 | { 125 | "name": "stdout", 126 | "output_type": "stream", 127 | "text": [ 128 | "10.3897/rio.7.e66264, OPTIMETA – Strengthening the Open Access publishing system through open citations and spatiotemporal metadata \n", 129 | "10.31263/voebm.v72i2.2808, Open Science und die Bibliothek – Aktionsfelder und Berufsbild\n", 130 | "10.1080/19386389.2021.1999156, Roadmap to FAIR Research Information in Open Infrastructures\n", 131 | "10.21105/joss.01182, VIVO: a system for research discovery\n", 132 | "10.3897/rio.4.e31656, Reference implementation for open scientometric indicators (ROSI)\n" 133 | ] 134 | } 135 | ], 136 | "source": [ 137 | "#-- example execution\n", 138 | "for page in list_of_pages:\n", 139 | " for item in page['message']['items']:\n", 140 | " print(f\"{item['DOI']}, {item['title'][0]}\")" 141 | ] 142 | } 143 | ], 144 | "metadata": { 145 | "colab": { 146 | "authorship_tag": "ABX9TyN8GdEZ2emA2pb1j6K+9PwP", 147 | "name": "crossref_get_works_by_person.ipynb", 148 | "provenance": [ 149 | { 150 | "file_id": "1RvDBYtHIK8LG_31cmfKW2PxQ3whxondX", 151 | "timestamp": 1643057922530 152 | } 153 | ] 154 | }, 155 | "kernelspec": { 156 | "display_name": "Python 3 (ipykernel)", 157 | "language": "python", 158 | "name": "python3" 159 | }, 160 | "language_info": { 161 | "codemirror_mode": { 162 | "name": "ipython", 163 | "version": 3 164 | }, 165 | "file_extension": ".py", 166 | "mimetype": "text/x-python", 167 | "name": "python", 168 | "nbconvert_exporter": "python", 169 | "pygments_lexer": "ipython3", 170 | "version": "3.9.6" 171 | } 172 | }, 173 | "nbformat": 4, 174 | "nbformat_minor": 1 175 | } 176 | -------------------------------------------------------------------------------- /person-works/freya_get_works_by_person.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "YDW3V56iL0gO" 7 | }, 8 | "source": [ 9 | "### Query the FREYA PID Graph for works authored by a person\n", 10 | "\n", 11 | "This notebook queries the [FREYA PID Graph](https://blog.datacite.org/powering-the-pid-graph/) via [Datacite's GraphQL API](https://api.datacite.org/graphql) to retrieve works created by a person. It takes an ORCID URL as input which is used to filter for all works registered at Datacite and some registered at Crossref where '`creator.nameIdentifier`' matches the given ORCID URL. From the resulting list of works we output all DOIs." 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 1, 17 | "metadata": { 18 | "id": "CCRX2-tC_ijb", 19 | "scrolled": true 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "# Prerequisites:\n", 24 | "import requests # dependency to make HTTP calls\n", 25 | "from benedict import benedict # dependency for dealing with json" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": { 31 | "id": "9J5pNonyPXzZ" 32 | }, 33 | "source": [ 34 | "The input for this notebook is an ORCID URL, e.g. '`https://orcid.org/0000-0003-2499-7741`'." 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 2, 40 | "metadata": { 41 | "executionInfo": { 42 | "elapsed": 10, 43 | "status": "ok", 44 | "timestamp": 1643038887890, 45 | "user": { 46 | "displayName": "Sandra M", 47 | "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjjehryRcYlqHNFf_9Q6slGN_VZPE0y5QkvOxzG=s64", 48 | "userId": "04602594913862593282" 49 | }, 50 | "user_tz": -60 51 | }, 52 | "id": "HKtYXm9XQiGB" 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "# input parameter\n", 57 | "example_orcid=\"https://orcid.org/0000-0003-2499-7741\"" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": { 63 | "id": "s00nilOOQs5m" 64 | }, 65 | "source": [ 66 | "We use it to query Datacite's GraphQL API for the person's metadata and all works connected to them. Since the API uses pagination, we need to loop through all pages to get the complete result set." 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 3, 72 | "metadata": { 73 | "executionInfo": { 74 | "elapsed": 7, 75 | "status": "ok", 76 | "timestamp": 1643038887890, 77 | "user": { 78 | "displayName": "Sandra M", 79 | "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjjehryRcYlqHNFf_9Q6slGN_VZPE0y5QkvOxzG=s64", 80 | "userId": "04602594913862593282" 81 | }, 82 | "user_tz": -60 83 | }, 84 | "id": "V0PWy15LPfZ_" 85 | }, 86 | "outputs": [], 87 | "source": [ 88 | "# Datacite's GraphQL endpoint for the FREYA PID Graph\n", 89 | "DATACITE_GRAPHQL_API = \"https://api.datacite.org/graphql\"\n", 90 | "\n", 91 | "# GraphQL query to retrieve a person and all their created works\n", 92 | "QUERY_PERSON2WORKS = \"\"\"query person($orcid :ID!, $after:String){\n", 93 | " person(id: $orcid) {\n", 94 | " works(first:1000, after: $after) {\n", 95 | " pageInfo {\n", 96 | " endCursor\n", 97 | " hasNextPage\n", 98 | " }\n", 99 | "\n", 100 | " nodes {\n", 101 | " doi\n", 102 | " titles {\n", 103 | " title\n", 104 | " }\n", 105 | " versions {\n", 106 | " nodes {\n", 107 | " doi\n", 108 | " }\n", 109 | " }\n", 110 | " }\n", 111 | " }\n", 112 | " }\n", 113 | "}\"\"\"\n", 114 | "\n", 115 | "# query for all works connected to given ORCID\n", 116 | "def query_freya_for_person2works(orcid):\n", 117 | " continue_paginating = True\n", 118 | " cursor=\"\"\n", 119 | " \n", 120 | " while continue_paginating:\n", 121 | " vars = {'orcid': orcid, 'after': cursor}\n", 122 | " response = requests.post(url=DATACITE_GRAPHQL_API,\n", 123 | " json={'query': QUERY_PERSON2WORKS, 'variables': vars},\n", 124 | " headers={'Accept': 'application/json'})\n", 125 | " response.raise_for_status()\n", 126 | " result=response.json()\n", 127 | " if 'errors' in result:\n", 128 | " raise requests.exceptions.HTTPError(result)\n", 129 | "\n", 130 | " # check if next page exists and set cursor to next page\n", 131 | " cursor = next_cursor(result)\n", 132 | " continue_paginating = has_next_page(result)\n", 133 | " yield result\n", 134 | "\n", 135 | "# check if there is another page with results to query\n", 136 | "def has_next_page(response_data):\n", 137 | " resp_dict = benedict.from_json(response_data)\n", 138 | " has_next_page = resp_dict.get(\"data.person.works.pageInfo.hasNextPage\")\n", 139 | " return has_next_page\n", 140 | "\n", 141 | "# set cursor to next value\n", 142 | "def next_cursor(response_data):\n", 143 | " resp_dict = benedict.from_json(response_data)\n", 144 | " cursor = resp_dict.get(\"data.person.works.pageInfo.endCursor\")\n", 145 | " return cursor\n", 146 | "\n", 147 | "\n", 148 | "#--- example execution\n", 149 | "list_of_pages=query_freya_for_person2works(example_orcid)" 150 | ] 151 | }, 152 | { 153 | "cell_type": "markdown", 154 | "metadata": { 155 | "id": "wmJZvW41Te2e" 156 | }, 157 | "source": [ 158 | "From the returned pages we \n", 159 | "* extract the list of works,\n", 160 | "* remove the ones that are older versions of another work, which is the case if the metadata field for '`versions.nodes.doi`' contains a DOI for the successing work,\n", 161 | "* extract and print out the title and DOI of each work.\n", 162 | "\n", 163 | "*Note: \n", 164 | "While we are able to filter some versions of a work if they are linked via the metadata field '`versions.nodes.doi`', others would need advanced filters (for example based on name similarity) which is out of scope for our project.*" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 4, 170 | "metadata": { 171 | "executionInfo": { 172 | "elapsed": 8, 173 | "status": "ok", 174 | "timestamp": 1643038887891, 175 | "user": { 176 | "displayName": "Sandra M", 177 | "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjjehryRcYlqHNFf_9Q6slGN_VZPE0y5QkvOxzG=s64", 178 | "userId": "04602594913862593282" 179 | }, 180 | "user_tz": -60 181 | }, 182 | "id": "oNoGVZpbt5cP" 183 | }, 184 | "outputs": [ 185 | { 186 | "name": "stdout", 187 | "output_type": "stream", 188 | "text": [ 189 | "Complete number of works: 105\n", 190 | "Filtered number of works: 91\n", 191 | "10.6084/m9.figshare.647329, Members of Deutscher Bibliotheksverband e. V. (dbv)\n", 192 | "10.2314/coscv1.1, Literatur recherchieren und verwalten\n", 193 | "10.2314/coscv1.2, Organisieren\n", 194 | "10.2314/coscv1.3, Daten sammeln und verarbeiten\n", 195 | "10.2314/coscv1, CoScience - Gemeinsam forschen und publizieren mit dem Netz\n", 196 | "10.2314/coscv2, CoScience - Gemeinsam forschen und publizieren mit dem Netz\n", 197 | "10.2314/coscv2.2, Oganisieren\n", 198 | "10.2314/coscv2.3, Daten sammeln und verarbeiten\n", 199 | "10.6084/m9.figshare.647329.v1, Members of Deutscher Bibliotheksverband e. V. (dbv)\n", 200 | "10.6084/m9.figshare.5271943, TIB-FIS-Discovery - VIVO at the German National Library of Science and Technology (TIB)\n", 201 | "10.6084/m9.figshare.5271943.v1, TIB-FIS-DISCOVERY VIVO AT THE GERMAN NATIONAL LIBRARY OF SCIENCE AND TECHNOLOGY (TIB)\n", 202 | "10.6084/m9.figshare.5271943.v2, TIB-FIS-Discovery - VIVO at the German National Library of Science and Technology (TIB)\n", 203 | "10.6084/m9.figshare.5285743, Lost in translation – challenges of tailoring VIVO to the needs of the German scholarly landscape\n", 204 | "10.6084/m9.figshare.5285743.v1, Lost in translation – challenges of tailoring VIVO to the needs of the German scholarly landscape\n", 205 | "10.6084/m9.figshare.6465149, User perceptions, feedback, and stories.pdf\n", 206 | "10.6084/m9.figshare.6465149.v1, User perceptions, feedback, and stories.pdf\n", 207 | "10.5281/zenodo.1287885, Supporting A Vivo Regional Community\n", 208 | "10.6084/m9.figshare.6819971, Information Security Challenges in VIVO - Adapting the BSI IT Security Catalog Standards\n", 209 | "10.6084/m9.figshare.6819971.v1, VIVO_Conference_2018_BSI-IT-Security.pdf\n", 210 | "10.6084/m9.figshare.6820217, Challenges and opportunities of using VIVO as a reporting tool\n", 211 | "10.6084/m9.figshare.6820217.v1, Challenges and opportunities of using VIVO as a reporting tool\n", 212 | "10.6084/m9.figshare.6819971.v2, Information Security Challenges in VIVO - Adapting the BSI IT Security Catalog Standards\n", 213 | "10.6084/m9.figshare.6819722.v2, VIVO-DE: Collaborative ontology editing & management with VoCol\n", 214 | "10.5281/zenodo.1445521, Vivo 2018 - Ein Rückblick\n", 215 | "10.5281/zenodo.1464108, Vivo - Eine Einführung\n", 216 | "10.5281/zenodo.1464897, Reporting / Kdsf Und Vivo\n", 217 | "10.5281/zenodo.1478571, Forschungsevaluation Und Visualisierung Von Zitationsnetzwerken\n", 218 | "10.6084/m9.figshare.6819722, VIVO-DE: Collaborative ontology editing & management with VoCol\n", 219 | "10.6084/m9.figshare.6819722.v1, VIVO-DE: Collaborative ontology editing & management with VoCol\n", 220 | "10.5281/zenodo.1689951, Referenzimplementierung für offene szientometrische Indikatoren (ROSI)\n", 221 | "10.5281/zenodo.2613539, ROSI – Reference Implementation for Open Scientometric Indicators\n", 222 | "10.5281/zenodo.2615675, The ROSI Project\n", 223 | "10.5281/zenodo.2639714, VIVO software 1.10.0 release\n", 224 | "10.5281/zenodo.2639713, VIVO software 1.10.0 release\n", 225 | "10.5281/zenodo.3242680, ROSI – Open Metrics for Open Repositories\n", 226 | "10.5281/zenodo.3243485, Registry of [Open] Scientometric Data Sources – Technische Evaluierung von Offenen Datenquellen\n", 227 | "10.6084/m9.figshare.9756770, Identifying Ontological Domains Related to VIVO\n", 228 | "10.6084/m9.figshare.9756770.v1, Identifying Ontological Domains Related to VIVO\n", 229 | "10.6084/m9.figshare.9771701, VIVO Ontology Version 2\n", 230 | "10.6084/m9.figshare.9771701.v1, VIVO Ontology Version 2\n", 231 | "10.5281/zenodo.3407681, Visualising open scientometric data in VIVO\n", 232 | "10.5281/zenodo.3407680, Visualising open scientometric data in VIVO\n", 233 | "10.6084/m9.figshare.9897008, A Library of Queries and Reports: Introducing the Vitro Query Tool\n", 234 | "10.6084/m9.figshare.9897008.v1, A Library of Queries and Reports: Introducing the Vitro Query Tool\n", 235 | "10.5281/zenodo.3518713, confIDent - for FAIR conference metadata\n", 236 | "10.5281/zenodo.3518714, confIDent - for FAIR conference metadata\n", 237 | "10.5281/zenodo.3564456, Trends und Entwicklungen rund um VIVO\n", 238 | "10.5281/zenodo.3564457, Trends und Entwicklungen rund um VIVO\n", 239 | "10.21105/joss.01182, VIVO: a system for research discovery\n", 240 | "10.31263/voebm.v72i2.2808, Open Science und die Bibliothek – Aktionsfelder und Berufsbild\n", 241 | "10.3897/rio.4.e31656, Reference implementation for open scientometric indicators (ROSI)\n", 242 | "10.6084/m9.figshare.11961699, Ontological Domains for Representing Scholarship\n", 243 | "10.6084/m9.figshare.11961699.v1, Ontological Domains for Representing Scholarship\n", 244 | "10.5281/zenodo.3862804, Bibliometrische Visualisierungen auf dem Prüfstein – Versuch einer bibliothekarischen Perspektive\n", 245 | "10.5281/zenodo.3896517, AEON - Towards an Academic Events Ontology\n", 246 | "10.5281/zenodo.3900193, Research Profile Ownership through User Studies: A Case Study in the German National Research System\n", 247 | "10.25815/cex2-cc69, Virtual Conferences Require Dedicated Time, Too\n", 248 | "10.6084/m9.figshare.13645577.v1, VIVO Ontology Development: Why, What, and How\n", 249 | "10.6084/m9.figshare.13645577, VIVO Ontology Development: Why, What, and How\n", 250 | "10.25968/opus-837, Roving Librarians in der Zentralbibliothek der Hochschule Hannover: ein Experiment\n", 251 | "10.25968/opus-6, Libworld. Biblioblogs global\n", 252 | "10.25968/opus-1, Teaching Information Literacy with the Lerninformationssystem\n", 253 | "10.25968/opus-2, Personalisiertes Lernen in der Bibliothek: das Düsseldorfer Online-Tutorial (DOT) Informationskompetenz\n", 254 | "10.25968/opus-198, LibWorld - library blogging worldwide\n", 255 | "10.25968/opus-521, CoScience : gemeinsam forschen und publizieren mit dem Netz\n", 256 | "10.25968/opus-102, 13 Dinge\n", 257 | "10.25968/opus-330, Die Online-Auskunft der Universitäts- und Landesbibliothek Düsseldorf : erste Evaluation eines Erfolgsmodells\n", 258 | "10.25968/opus-272, Working Paper: Open Government Data\n", 259 | "10.25968/opus-303, Lernen 2.0 : Bericht aus der Praxis\n", 260 | "10.25968/opus-453, Empfehlungen zur Öffnung bibliothekarischer Daten\n", 261 | "10.25798/qsdh-en13, 12th Annual / International VIVO Conference 2021\n", 262 | "10.6084/m9.figshare.14842221, Geospatial information in VIVO -- Thoughts, Ideas, Suggestions\n", 263 | "10.6084/m9.figshare.14842221.v1, Geospatial information in VIVO -- Thoughts, Ideas, Suggestions\n", 264 | "10.5281/zenodo.5031664, Der Forschungsatlas - a community-oriented research profile system in the making\n", 265 | "10.5281/zenodo.5031663, Der Forschungsatlas - a community-oriented research profile system in the making\n", 266 | "10.5281/zenodo.5040057, Towards FAIR research information - insights from expert workshops\n", 267 | "10.5281/zenodo.5040056, Towards FAIR research information - insights from expert workshops\n", 268 | "10.5281/zenodo.5060204, Forschungsevaluierung FAIR(ER) gestalten\n", 269 | "10.5281/zenodo.5060203, Forschungsevaluierung FAIR(ER) gestalten\n", 270 | "10.5281/zenodo.5075098, Sharing Queries and Reports with the Reporting Marketplace\n", 271 | "10.5281/zenodo.5075108, Creating a Semantic Catalogue of Architectural Drawings\n", 272 | "10.5281/zenodo.5075107, Creating a Semantic Catalogue of Architectural Drawings\n", 273 | "10.5281/zenodo.5082237, Priorities of a VIVO Community - results of a survey at the 5. VIVO-Workshop 2021\n", 274 | "10.5281/zenodo.5082236, Priorities of a VIVO Community - results of a survey at the 5. VIVO-Workshop 2021\n", 275 | "10.5281/zenodo.5082370, Geospatial information in VIVO - thoughts, ideas, suggestions\n", 276 | "10.5281/zenodo.5082369, Geospatial information in VIVO - thoughts, ideas, suggestions\n", 277 | "10.5281/zenodo.5526786, Das Projekt OPTIMETA – Stärkung des Open-Access-Publikationssystems durch offene Zitationen und raumzeitliche Metadaten\n", 278 | "10.5281/zenodo.5526785, Das Projekt OPTIMETA – Stärkung des Open-Access-Publikationssystems durch offene Zitationen und raumzeitliche Metadaten\n", 279 | "10.5281/zenodo.5767060, How even small and independent journals can contribute to the citation commons\n", 280 | "10.5281/zenodo.5767059, How even small and independent journals can contribute to the citation commons\n", 281 | "10.25968/opus-2135, Are Conference Posters Being Cited?\n" 282 | ] 283 | } 284 | ], 285 | "source": [ 286 | "# from the result pages we get from the GraphQL API, extract the data about the works\n", 287 | "def extract_works_from_page(page):\n", 288 | " page_dict=benedict.from_json(page)\n", 289 | " return [work for work in page_dict.get('data.person.works.nodes') or []]\n", 290 | "\n", 291 | "# remove old versions from the list of works\n", 292 | "def filter_older_versions(works):\n", 293 | " return [work for work in works if not benedict.from_json(work).get('versions.nodes[0].doi')]\n", 294 | "\n", 295 | "# extract DOI from work\n", 296 | "def extract_doi(work):\n", 297 | " work_dict = benedict.from_json(work)\n", 298 | " doi = work_dict.get('doi')\n", 299 | " title = work_dict.get('titles[0].title')\n", 300 | " return doi, title\n", 301 | "\n", 302 | "\n", 303 | "#--- example execution\n", 304 | "for page in list_of_pages or []:\n", 305 | " works=extract_works_from_page(page)\n", 306 | " print(f\"Complete number of works: {len(works)}\")\n", 307 | " works_filtered=filter_older_versions(works)\n", 308 | " print(f\"Filtered number of works: {len(works_filtered)}\")\n", 309 | " for work in works_filtered or []:\n", 310 | " doi, title = extract_doi(work)\n", 311 | " print(f\"{doi}, {title}\")" 312 | ] 313 | } 314 | ], 315 | "metadata": { 316 | "colab": { 317 | "authorship_tag": "ABX9TyPYZxjYgTxRPJcOH8TS6HMv", 318 | "name": "freya_get_works_by_person.ipynb", 319 | "provenance": [ 320 | { 321 | "file_id": "1msooXyRI-0s9kVYKOQqYXoUbPYoW3f4U", 322 | "timestamp": 1643040022714 323 | } 324 | ] 325 | }, 326 | "kernelspec": { 327 | "display_name": "Python 3 (ipykernel)", 328 | "language": "python", 329 | "name": "python3" 330 | }, 331 | "language_info": { 332 | "codemirror_mode": { 333 | "name": "ipython", 334 | "version": 3 335 | }, 336 | "file_extension": ".py", 337 | "mimetype": "text/x-python", 338 | "name": "python", 339 | "nbconvert_exporter": "python", 340 | "pygments_lexer": "ipython3", 341 | "version": "3.9.6" 342 | } 343 | }, 344 | "nbformat": 4, 345 | "nbformat_minor": 1 346 | } 347 | -------------------------------------------------------------------------------- /person-works/openaire_get_publications_by_person.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### Query OpenAIRE for publications authored by a person\n", 8 | "This notebook queries the [OpenAIRE HTTP API](https://graph.openaire.eu/develop/api.html) via its `/publications` endpoint for publications authored by a person. It takes an ORCID iD as input which is used to filter for publications where one of the creators' `orcid` field matches the given ORCID iD. From the resulting list of publications we output all DOIs.\n", 9 | "\n", 10 | "*Note:\n", 11 | "The API has several different endpoints for research outputs: they are divided into publications, research data, software metadata and other research products, so to get a full picture about a person's research output, you would have to query all of these endpoints and union their results.*" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 1, 17 | "metadata": { 18 | "pycharm": { 19 | "name": "#%%\n" 20 | } 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "# Prerequisites:\n", 25 | "import requests # dependency for making HTTP calls\n", 26 | "from benedict import benedict # dependency for dealing with json" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": { 32 | "collapsed": true, 33 | "pycharm": { 34 | "name": "#%% md\n" 35 | } 36 | }, 37 | "source": [ 38 | "The input for this notebook is an ORCID iD, e.g. '`0000-0003-2499-7741`'." 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 2, 44 | "metadata": { 45 | "pycharm": { 46 | "name": "#%%\n" 47 | } 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "# input parameter\n", 52 | "example_orcid_id=\"0000-0003-2499-7741\"" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "We use it to query the OpenAIRE HTTP API for publications that specified the ORCID iD within their metadata in one of the creators `orcid` field. Since the API uses pagination, we need to loop through all pages to get the complete result set." 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 3, 65 | "metadata": { 66 | "pycharm": { 67 | "name": "#%%\n" 68 | } 69 | }, 70 | "outputs": [], 71 | "source": [ 72 | "# OpenAIRE endpoint to query for publications\n", 73 | "OPENAIRE_API_PUBLICATIONS = \"https://api.openaire.eu/search/publications\"\n", 74 | "\n", 75 | "# query OpenAIRE for all publications that are connected to orcid\n", 76 | "def query_openaire_for_person2publications(orcid_id):\n", 77 | " page = 1\n", 78 | " max_page = 1\n", 79 | "\n", 80 | " while page <= max_page:\n", 81 | " params = {'orcid': orcid_id, 'page': page, 'format': \"json\"}\n", 82 | " response = requests.get(url=OPENAIRE_API_PUBLICATIONS,\n", 83 | " params=params)\n", 84 | " response.raise_for_status()\n", 85 | " result=response.json()\n", 86 | "\n", 87 | " # calculate max page number in first loop\n", 88 | " if max_page == 1:\n", 89 | " max_page = determine_max_page(result)\n", 90 | " page = page + 1\n", 91 | " yield result\n", 92 | "\n", 93 | "# calculate max number of result pages\n", 94 | "def determine_max_page(response_data):\n", 95 | " response_dict = benedict.from_json(response_data)\n", 96 | " items_total = response_dict.get('response.header.total.$')\n", 97 | " items_per_page = response_dict.get('response.header.size.$')\n", 98 | " max_page_ceil = items_total // items_per_page + bool(items_total % items_per_page)\n", 99 | " return max_page_ceil\n", 100 | "\n", 101 | "\n", 102 | "# ---- example execution\n", 103 | "list_of_pages=query_openaire_for_person2publications(example_orcid_id)" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "From the resulting list of publications we extract and print out each title and DOI. \n", 111 | "\n", 112 | "*Note: publications that do not have a DOI assigned, will not be printed.*" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 4, 118 | "metadata": {}, 119 | "outputs": [ 120 | { 121 | "name": "stdout", 122 | "output_type": "stream", 123 | "text": [ 124 | "Number of publications found: 6\n", 125 | "\n", 126 | "10.15488/11463, Roadmap to FAIR Research Information in Open Infrastructures\n", 127 | "10.1515/bd.2006.40.4.466, Informationsvermittlung: Personalisiertes Lernen in der Bibliothek: das Düsseldorfer Online-Tutorial (DOT) Informationskompetenz\n", 128 | "10.1080/00048623.2006.10755322, Teaching Information Literacy with the Lerninformationssystem\n", 129 | "10.3389/frma.2021.694307, Enhancing Knowledge Graph Extraction and Validation From Scholarly Publications Using Bibliographic Metadata\n", 130 | "10.3897/rio.7.e66264, OPTIMETA – Strengthening the Open Access publishing system through open citations and spatiotemporal metadata\n", 131 | "10.1016/j.procs.2019.01.074, The Research Core Dataset (KDSF) in the Linked Data context\n" 132 | ] 133 | } 134 | ], 135 | "source": [ 136 | "# from the result pages, extract the data about each publication\n", 137 | "def extract_publications_from_page(page):\n", 138 | " return [pub for pub in benedict.from_json(page).get('response.results.result') or []]\n", 139 | "\n", 140 | "# extract DOI from publication\n", 141 | "def extract_doi(pub):\n", 142 | " oaf_result=benedict.from_json(pub).get('metadata.oaf:entity.oaf:result')\n", 143 | "\n", 144 | " # unfortunately the json data is inconsistently modeled:\n", 145 | " # if there is one pid/title for a publication, it is a json object\n", 146 | " # if there are multiple pids/titles for a publication, they form a json list\n", 147 | " pids=oaf_result.get('pid') or []\n", 148 | " is_doi = lambda pid: pid.get('@classid')==\"doi\"\n", 149 | " if isinstance(pids, list):\n", 150 | " dois=[pid['$'] for pid in pids if is_doi(pid)]\n", 151 | " else:\n", 152 | " dois= [pids['$']] if is_doi(pids) else []\n", 153 | " doi=dois[0] if dois else None # pick the first one\n", 154 | " \n", 155 | " titles=oaf_result.get('title') or []\n", 156 | " is_main_title = lambda title: title.get('@classid')==\"main title\"\n", 157 | " if isinstance(titles, list):\n", 158 | " main_titles=[title['$'] for title in titles if is_main_title(title)]\n", 159 | " else:\n", 160 | " main_titles=[titles['$']] if is_main_title(titles) else []\n", 161 | " title=main_titles[0] if main_titles else None # pick the first one\n", 162 | "\n", 163 | " return doi, title\n", 164 | "\n", 165 | "\n", 166 | "#--- example execution\n", 167 | "for page in list_of_pages or []:\n", 168 | " publications=extract_publications_from_page(page)\n", 169 | " print(f\"Number of publications found: {len(publications)}\\n\")\n", 170 | " for pub in publications:\n", 171 | " doi,title = extract_doi(pub)\n", 172 | " if doi:\n", 173 | " print(f\"{doi}, {title}\")" 174 | ] 175 | } 176 | ], 177 | "metadata": { 178 | "kernelspec": { 179 | "display_name": "Python 3 (ipykernel)", 180 | "language": "python", 181 | "name": "python3" 182 | }, 183 | "language_info": { 184 | "codemirror_mode": { 185 | "name": "ipython", 186 | "version": 3 187 | }, 188 | "file_extension": ".py", 189 | "mimetype": "text/x-python", 190 | "name": "python", 191 | "nbconvert_exporter": "python", 192 | "pygments_lexer": "ipython3", 193 | "version": "3.9.6" 194 | } 195 | }, 196 | "nbformat": 4, 197 | "nbformat_minor": 1 198 | } -------------------------------------------------------------------------------- /person-works/openalex_get_works_by_person.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "ac7bedaf-05fb-4eb0-9bf5-e4d1d68a08c3", 6 | "metadata": { 7 | "id": "ac7bedaf-05fb-4eb0-9bf5-e4d1d68a08c3" 8 | }, 9 | "source": [ 10 | "### Query OpenAlex for works authored by a person\n", 11 | "This notebook queries the [OpenAlex API](https://docs.openalex.org/api) via its `/works` endpoint for works authored by a person. It takes an ORCID URL as input which is used to filter for works where '`authorships.author.orcid`' matches the given ORCID URL.\n", 12 | "From the resulting list of works we output all DOIs." 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 1, 18 | "id": "W8sB1ZF6aKG2", 19 | "metadata": { 20 | "executionInfo": { 21 | "elapsed": 5, 22 | "status": "ok", 23 | "timestamp": 1643058187428, 24 | "user": { 25 | "displayName": "", 26 | "photoUrl": "", 27 | "userId": "" 28 | }, 29 | "user_tz": -60 30 | }, 31 | "id": "W8sB1ZF6aKG2", 32 | "scrolled": false 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "# Prerequisites:\n", 37 | "import requests # dependency to make HTTP calls" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "id": "Rt7GUbFcaNxi", 43 | "metadata": { 44 | "id": "Rt7GUbFcaNxi" 45 | }, 46 | "source": [ 47 | "The input for this notebook is an ORCID URL, e.g. '`https://orcid.org/0000-0003-2499-7741`'" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 2, 53 | "id": "GUMzx_sPaTsH", 54 | "metadata": { 55 | "executionInfo": { 56 | "elapsed": 259, 57 | "status": "ok", 58 | "timestamp": 1643058187684, 59 | "user": { 60 | "displayName": "", 61 | "photoUrl": "", 62 | "userId": "" 63 | }, 64 | "user_tz": -60 65 | }, 66 | "id": "GUMzx_sPaTsH" 67 | }, 68 | "outputs": [], 69 | "source": [ 70 | "# input parameter\n", 71 | "example_orcid=\"https://orcid.org/0000-0003-2499-7741\"" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "id": "nWOX9dkvaZ97", 77 | "metadata": { 78 | "id": "nWOX9dkvaZ97" 79 | }, 80 | "source": [ 81 | "We use it to query the OpenAlex API for works that specified the ORCID URL within their metadata in the field '`authorships.author.orcid`'.\n", 82 | " Since the API uses [pagination](https://docs.openalex.org/api/get-lists-of-entities#pagination), we need to loop through all pages to get the complete result set." 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 3, 88 | "id": "8b608640-96a8-47d1-9de7-b7d3f6fd5a47", 89 | "metadata": { 90 | "executionInfo": { 91 | "elapsed": 5, 92 | "status": "ok", 93 | "timestamp": 1643058187685, 94 | "user": { 95 | "displayName": "", 96 | "photoUrl": "", 97 | "userId": "" 98 | }, 99 | "user_tz": -60 100 | }, 101 | "id": "8b608640-96a8-47d1-9de7-b7d3f6fd5a47" 102 | }, 103 | "outputs": [], 104 | "source": [ 105 | "# OpenAlex endpoint to query for works\n", 106 | "OPENALEX_API_WORKS = \"https://api.openalex.org/works\"\n", 107 | "\n", 108 | "# query all works that are connected to orcid\n", 109 | "def query_openalex_for_person2works(orcid):\n", 110 | " page = 1\n", 111 | " max_page = 1\n", 112 | " \n", 113 | " while page <= max_page:\n", 114 | " params = {'filter': 'authorships.author.orcid:'+orcid, 'page': page}\n", 115 | " response = requests.get(url=OPENALEX_API_WORKS,\n", 116 | " params=params,\n", 117 | " headers= {'Accept': 'application/json'})\n", 118 | " response.raise_for_status()\n", 119 | " result=response.json()\n", 120 | "\n", 121 | " # calculate max page number in first loop\n", 122 | " if max_page == 1:\n", 123 | " max_page = determine_max_page(result)\n", 124 | " page = page + 1\n", 125 | " yield result\n", 126 | "\n", 127 | "# calculate max number of result pages\n", 128 | "def determine_max_page(response_data):\n", 129 | " item_count = response_data['meta']['count']\n", 130 | " items_per_page = response_data['meta']['per_page']\n", 131 | " max_page_ceil = item_count // items_per_page + bool(item_count % items_per_page)\n", 132 | " return max_page_ceil\n", 133 | "\n", 134 | "\n", 135 | "# ---- example execution\n", 136 | "list_of_pages=query_openalex_for_person2works(example_orcid)" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "id": "kHRO_LiJr-u_", 142 | "metadata": { 143 | "id": "kHRO_LiJr-u_" 144 | }, 145 | "source": [ 146 | "From the resulting list of works we extract and print out title and DOI. \n", 147 | "\n", 148 | "*Note: works that do not have a DOI assigned, will not be printed.*" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 4, 154 | "id": "1c36737c-4dcf-42d5-80e2-802f0a7a8326", 155 | "metadata": { 156 | "colab": { 157 | "base_uri": "https://localhost:8080/" 158 | }, 159 | "executionInfo": { 160 | "elapsed": 516, 161 | "status": "ok", 162 | "timestamp": 1643058188197, 163 | "user": { 164 | "displayName": "", 165 | "photoUrl": "", 166 | "userId": "" 167 | }, 168 | "user_tz": -60 169 | }, 170 | "id": "1c36737c-4dcf-42d5-80e2-802f0a7a8326", 171 | "outputId": "fb849105-45c1-4abd-a6be-b6d4e3c567f4" 172 | }, 173 | "outputs": [ 174 | { 175 | "name": "stdout", 176 | "output_type": "stream", 177 | "text": [ 178 | "10.3897/rio.7.e66264, OPTIMETA – Strengthening the Open Access publishing system through open citations and spatiotemporal metadata\n", 179 | "10.11588/ip.2020.2.73938, Bericht vom 4. VIVO-Workshop 2019\n", 180 | "10.15488/9424, ConfIDent – An Open Platform for FAIR Conference Metadata\n", 181 | "10.31263/voebm.v72i2.2808, Open Science und die Bibliothek – Aktionsfelder und Berufsbild\n", 182 | "10.21105/joss.01182, VIVO: a system for research discovery\n", 183 | "10.11588/ip.2019.1.61729, Bericht vom 3. VIVO-Workshop 2018\n", 184 | "10.11588/ip.2019.1.49609, Problematische Aspekte bibliometrie-basierter Forschungsevaluierung\n", 185 | "10.1016/j.procs.2019.01.074, The Research Core Dataset (KDSF) in the Linked Data context\n", 186 | "10.11588/ip.2018.1.49357, Vitro - ein universell einsetzbarer Editor für Ontologien und Instanzen\n", 187 | "10.3897/rio.4.e31656, Reference implementation for open scientometric indicators (ROSI)\n", 188 | "10.5281/zenodo.1464108, VIVO - eine Einführung\n", 189 | "10.11588/ip.2018.1.46819, Anforderungen an Forschungsinformationssysteme in Deutschland durch Forschende und Forschungsadministration – Zusammenfassung zweier Studien\n", 190 | "10.5281/zenodo.1287885, Supporting a VIVO Regional Community\n", 191 | "10.15488/4087, Referenzimplementierung für offene szientometrische Indikatoren (ROSI)\n", 192 | "10.15488/3952, Forschungsevaluation und Visualisierung von Zitationsnetzwerken\n", 193 | "10.15488/3951, Reporting mit VIVO und Kibana\n", 194 | "10.11588/ip.2017.2.41926, Tagungsbericht VIVO-Workshop 2017 - “Forschungsinformationen in der Praxis”\n", 195 | "10.11588/ip.2016.2.32678, Third-Party-Elemente in deutschen Bibliothekswebseiten\n", 196 | "10.11588/ip.2016.1.31963, Editorial – Rückblick und Call for Call for Papers\n", 197 | "10.25968/opus-837, Roving Librarians in der Zentralbibliothek der Hochschule Hannover: ein Experiment\n", 198 | "10.5281/zenodo.50969, URLs von Webseiten mit Typ Bibliothek aus Lobid.org\n", 199 | "10.11588/ip.2015.2.23784, Erstellung wiederverwendbarer RDF-Geodaten mit Google Refine\n", 200 | "10.5281/zenodo.30992, VIVO an der Hochschule Hannover - Beispiel: Forschungsprojekte\n", 201 | "10.11588/ip.2015.1.18489, Editorial - Willkommen zur ersten Ausgabe der Informationspraxis!\n", 202 | "10.5281/zenodo.13101, geodata: Populated places for VIVO\n", 203 | "10.2314/coscv2, CoScience : gemeinsam forschen und publizieren mit dem Netz\n", 204 | "10.25968/opus-330, Die Online-Auskunft der Universitäts- und Landesbibliothek Düsseldorf : erste Evaluation eines Erfolgsmodells\n", 205 | "10.25968/opus-272, Working Paper: Open Government Data\n", 206 | "10.25968/opus-303, Lernen 2.0 : Bericht aus der Praxis\n", 207 | "10.17877/de290r-8755, Web 2.0 in Bibliotheken - Bibliotheken im Web 2.0\n", 208 | "10.18452/8872, Libworld. Biblioblogs global\n", 209 | "10.1080/00048623.2006.10755322, Teaching Information Literacy with the Lerninformationssystem\n", 210 | "10.1515/bd.2006.40.4.466, Informationsvermittlung: Personalisiertes Lernen in der Bibliothek: das Düsseldorfer Online-Tutorial (DOT) Informationskompetenz\n" 211 | ] 212 | } 213 | ], 214 | "source": [ 215 | "# from the result pages we get from the OpenAlex API, extract the data about works\n", 216 | "def extract_works_from_page(page):\n", 217 | " return [work for work in page.get('results') or []]\n", 218 | "\n", 219 | "# extract DOI from work\n", 220 | "def extract_doi(work):\n", 221 | " doi=work.get('ids', {}).get('doi') or \"\"\n", 222 | " doi_id=doi.replace(\"https://doi.org/\", \"\") if doi else doi\n", 223 | " title=work.get('display_name', \"\")\n", 224 | " return doi_id, title\n", 225 | "\n", 226 | "\n", 227 | "#--- example execution\n", 228 | "for page in list_of_pages or []:\n", 229 | " works=extract_works_from_page(page)\n", 230 | " for work in works or []:\n", 231 | " doi,title=extract_doi(work)\n", 232 | " if doi:\n", 233 | " print(f\"{doi}, {title}\")" 234 | ] 235 | } 236 | ], 237 | "metadata": { 238 | "colab": { 239 | "collapsed_sections": [], 240 | "name": "openalex_get_works_by_person.ipynb", 241 | "provenance": [ 242 | { 243 | "file_id": "https://github.com/TAPIR-TIB/pidgraph-notebooks/blob/person-works/person-works/openalex_get_works_by_person.ipynb", 244 | "timestamp": 1643058224827 245 | }, 246 | { 247 | "file_id": "https://github.com/TAPIR-TIB/pidgraph-notebooks/blob/person-works/person-works/openalex_get_works_by_person.ipynb", 248 | "timestamp": 1643040475251 249 | }, 250 | { 251 | "file_id": "1neSGbKlkQwjOYX77kpGK14BqT4KpCei0", 252 | "timestamp": 1643025949695 253 | } 254 | ] 255 | }, 256 | "kernelspec": { 257 | "display_name": "Python 3 (ipykernel)", 258 | "language": "python", 259 | "name": "python3" 260 | }, 261 | "language_info": { 262 | "codemirror_mode": { 263 | "name": "ipython", 264 | "version": 3 265 | }, 266 | "file_extension": ".py", 267 | "mimetype": "text/x-python", 268 | "name": "python", 269 | "nbconvert_exporter": "python", 270 | "pygments_lexer": "ipython3", 271 | "version": "3.9.6" 272 | } 273 | }, 274 | "nbformat": 4, 275 | "nbformat_minor": 5 276 | } 277 | -------------------------------------------------------------------------------- /person-works/orcid_get_works_by_person.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "zXRmiQOY9GRv" 7 | }, 8 | "source": [ 9 | "### Query ORCID for works authored by a person\n", 10 | "\n", 11 | "This notebook queries the [ORCID Public API](https://pub.orcid.org/v3.0/) to retrieve works listed in a person's ORCID record. It takes an ORCID URL or iD as input to retrieve the ORCID record of a person and the works listed on it. From the resulting list of works we output all DOIs." 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 1, 17 | "metadata": { 18 | "id": "8OqUUX037qB4", 19 | "scrolled": true 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "# Prerequisites:\n", 24 | "import requests # dependency to make HTTP calls\n", 25 | "from benedict import benedict # dependency for dealing with json" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": { 31 | "id": "G3oSBrNo8rb-" 32 | }, 33 | "source": [ 34 | "The input for this notebook is an ORCID URL or iD, e.g. '`https://orcid.org/0000-0003-2499-7741`' or '`0000-0003-2499-7741`'." 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 2, 40 | "metadata": { 41 | "executionInfo": { 42 | "elapsed": 20, 43 | "status": "ok", 44 | "timestamp": 1643202545770, 45 | "user": { 46 | "displayName": "Sandra M", 47 | "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjjehryRcYlqHNFf_9Q6slGN_VZPE0y5QkvOxzG=s64", 48 | "userId": "04602594913862593282" 49 | }, 50 | "user_tz": -60 51 | }, 52 | "id": "I_KWAcmq98no" 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "# input parameter\n", 57 | "example_orcid=\"https://orcid.org/0000-0003-2499-7741\"" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": { 63 | "id": "sD-zpjgT9_rn" 64 | }, 65 | "source": [ 66 | "We use it to query ORCID's Public API for the person's metadata and all works connected to them." 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 3, 72 | "metadata": { 73 | "executionInfo": { 74 | "elapsed": 685, 75 | "status": "ok", 76 | "timestamp": 1643202546441, 77 | "user": { 78 | "displayName": "Sandra M", 79 | "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjjehryRcYlqHNFf_9Q6slGN_VZPE0y5QkvOxzG=s64", 80 | "userId": "04602594913862593282" 81 | }, 82 | "user_tz": -60 83 | }, 84 | "id": "qH6LfmWH-HLs" 85 | }, 86 | "outputs": [], 87 | "source": [ 88 | "# URL for ORCID API\n", 89 | "ORCID_RECORD_API = \"https://pub.orcid.org/v3.0/\"\n", 90 | "\n", 91 | "# query ORCID for an ORCID record\n", 92 | "def query_orcid_for_record(orcid_id):\n", 93 | "\n", 94 | " response = requests.get(url=requests.utils.requote_uri(ORCID_RECORD_API + orcid_id),\n", 95 | " headers={'Accept': 'application/json'})\n", 96 | " response.raise_for_status()\n", 97 | " result=response.json()\n", 98 | " return result\n", 99 | "\n", 100 | "\n", 101 | "#-- example execution\n", 102 | "orcid_id=example_orcid.replace(\"https://orcid.org/\", \"\")\n", 103 | "orcid_record=query_orcid_for_record(orcid_id)\n", 104 | "# uncomment next lines to see complete metadata for given ORCID\n", 105 | "#import pprint\n", 106 | "#pprint.pprint(orcid_record)" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": { 112 | "id": "gZ43Wyp7_qVu" 113 | }, 114 | "source": [ 115 | "From the complete ORCID metadata we extract the works section and print out title and DOI of each first `work-summary` (the first item in a personal information section has the highest [display index](https://info.orcid.org/documentation/integration-guide/orcid-record/#Display_index)).\n", 116 | "\n", 117 | "*Note: works that do not have a DOI assigned, will not be printed.*" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 4, 123 | "metadata": { 124 | "colab": { 125 | "base_uri": "https://localhost:8080/" 126 | }, 127 | "executionInfo": { 128 | "elapsed": 12, 129 | "status": "ok", 130 | "timestamp": 1643202546444, 131 | "user": { 132 | "displayName": "Sandra M", 133 | "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GjjehryRcYlqHNFf_9Q6slGN_VZPE0y5QkvOxzG=s64", 134 | "userId": "04602594913862593282" 135 | }, 136 | "user_tz": -60 137 | }, 138 | "id": "Xn1ttd1L__vA", 139 | "outputId": "7e726c92-df32-43db-c3d7-800c4160d9ea" 140 | }, 141 | "outputs": [ 142 | { 143 | "name": "stdout", 144 | "output_type": "stream", 145 | "text": [ 146 | "10.3389/frma.2021.694307, Enhancing Knowledge Graph Extraction and Validation From Scholarly Publications Using Bibliographic Metadata\n", 147 | "10.3897/rio.7.e66264, OPTIMETA – Strengthening the Open Access publishing system through open citations and spatiotemporal metadata \n", 148 | "10.1080/19386389.2021.1999156, Roadmap to FAIR Research Information in Open Infrastructures\n", 149 | "10.5281/zenodo.3900193, Research Profile Ownership through User Studies: A Case Study in the German National Research System\n", 150 | "10.5281/zenodo.3896517, AEON - Towards an Academic Events Ontology\n", 151 | "10.31263/voebm.v72i2.2808, Open Science und die Bibliothek – Aktionsfelder und Berufsbild\n", 152 | "10.5281/zenodo.3242680, ROSI – Open Metrics for Open Repositories\n", 153 | "10.5281/zenodo.3243485, Registry of [Open] Scientometric Data Sources – Technische Evaluierung von Offenen Datenquellen\n", 154 | "10.1016/j.procs.2019.01.074, The Research Core Dataset (KDSF) in the Linked Data context\n", 155 | "10.3897/rio.4.e31656, Reference implementation for open scientometric indicators (ROSI)\n", 156 | "10.5281/ZENODO.1287885, Supporting A Vivo Regional Community\n", 157 | "10.6084/m9.figshare.6465149.v1, User perceptions, feedback, and stories\n", 158 | "10.5281/zenodo.321651, Umsetzung Des Kdsf-Datenmodells In Vivo\n", 159 | "10.5446/30808, VIVO - eine Einführung\n", 160 | "10.5281/ZENODO.60515, Output of webXray analysis of German library websites\n", 161 | "10.11588/IP.2016.1.28559, Roving Librarians in der Zentralbibliothek der Hochschule Hannover: ein Experiment\n", 162 | "10.11588/IP.2016.2.32678, Third-Party-Elemente in deutschen Bibliothekswebseiten\n", 163 | "10.5281/ZENODO.50969, URLs von Webseiten mit Typ Bibliothek aus Lobid.org\n", 164 | "10.11588/IP.2015.1.18489, Editorial - Willkommen bei der ersten Ausgabe der Informationspraxis!\n", 165 | "10.11588/IP.2015.2.23784, Erstellung wiederverwendbarer RDF-Geodaten mit Google Refine\n", 166 | "10.5281/ZENODO.30992, VIVO an der Hochschule Hannover - Beispiel: Forschungsprojekte\n", 167 | "http://dx.doi.org/10.5281/zenodo.13101, Geodata\n", 168 | "10.2314/COSCV1, CoScience - Gemeinsam forschen und publizieren mit dem Netz\n", 169 | "10.6084/M9.FIGSHARE.647329, Members of Deutscher Bibliotheksverband e. V. (dbv)\n", 170 | "10.18452/8872, Libworld. Biblioblogs global\n", 171 | "10.1080/00048623.2006.10755322, Teaching Information Literacy with the Lerninformationssystem\n", 172 | "10.1515/9783110278736.232, Empfehlungen zur Öffnung bibliothekarischer Daten v.1.0 veröffentlicht am 31. Oktober 2011\n", 173 | "10.1515/bd.2006.40.4.466, Informationsvermittlung: Personalisiertes Lernen in der Bibliothek: das Düsseldorfer Online-Tutorial (DOT) Informationskompetenz\n" 174 | ] 175 | } 176 | ], 177 | "source": [ 178 | "# extract works section from ORCID profile\n", 179 | "def extract_works_section(orcid_record):\n", 180 | " orcid_dict=benedict.from_json(orcid_record)\n", 181 | " works=orcid_dict.get('activities-summary.works.group') or []\n", 182 | " return works\n", 183 | "\n", 184 | "# for each work in the work section: extract title and DOI\n", 185 | "def extract_doi(work):\n", 186 | " work_dict=benedict.from_json(work)\n", 187 | " title=work_dict.get('work-summary[0].title.title.value')\n", 188 | " dois= [doi['external-id-value'] for doi in work_dict.get('work-summary[0].external-ids.external-id', []) if doi['external-id-type']==\"doi\"]\n", 189 | " # if there is a DOI assigned to the work, the list of dois is not empty and we can extract the first one\n", 190 | " doi=dois[0] if dois else None\n", 191 | " return doi, title\n", 192 | "\n", 193 | "\n", 194 | "# ---- example execution\n", 195 | "works=extract_works_section(orcid_record)\n", 196 | "for work in works:\n", 197 | " doi,title = extract_doi(work)\n", 198 | " if doi:\n", 199 | " print(f\"{doi}, {title}\")" 200 | ] 201 | } 202 | ], 203 | "metadata": { 204 | "colab": { 205 | "authorship_tag": "ABX9TyODvRNkLSMqVZfZ+mPUc8CJ", 206 | "name": "orcid_get_works_by_person.ipynb", 207 | "provenance": [ 208 | { 209 | "file_id": "1KzKd-wQe1zzvp8-tkukdrpXOhEy3on2t", 210 | "timestamp": 1643202582749 211 | } 212 | ] 213 | }, 214 | "kernelspec": { 215 | "display_name": "Python 3 (ipykernel)", 216 | "language": "python", 217 | "name": "python3" 218 | }, 219 | "language_info": { 220 | "codemirror_mode": { 221 | "name": "ipython", 222 | "version": 3 223 | }, 224 | "file_extension": ".py", 225 | "mimetype": "text/x-python", 226 | "name": "python", 227 | "nbconvert_exporter": "python", 228 | "pygments_lexer": "ipython3", 229 | "version": "3.9.6" 230 | } 231 | }, 232 | "nbformat": 4, 233 | "nbformat_minor": 1 234 | } 235 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests==2.27.1 2 | anytree==2.8.0 3 | python-benedict==0.24.3 4 | habanero==1.0.0 5 | -------------------------------------------------------------------------------- /work-projects/README.md: -------------------------------------------------------------------------------- 1 | ## work-projects 2 | 3 | A Jupyter notebook showing an example of using a persistent identifier for a publication (DOI) 4 | as input for retrieving the project a work was produced in (identified by its OpenAIRE project ID). 5 | 6 | * [OpenAIRE](https://www.openaire.eu/) -------------------------------------------------------------------------------- /work-projects/openaire_get_projects_by_work.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "collapsed": true, 7 | "pycharm": { 8 | "is_executing": true 9 | } 10 | }, 11 | "source": [ 12 | "### Query OpenAIRE for the project(s) a publication was produced in\n", 13 | "This notebook queries the [OpenAIRE HTTP API](https://graph.openaire.eu/develop/api.html) for the project(s) a publication was produced in. It takes a DOI as input which is used to retrieve the publication's metadata via the API's `/publications` endpoint and checks if there is a `'isProducedBy'` relation to a project. If that is the case, the project's ID is used to query the API via its `/projects` endpoint and the title, call identifier and funded amount of the project are printed." 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 1, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "# Prerequisites:\n", 23 | "import requests # dependency for making HTTP calls\n", 24 | "from benedict import benedict # dependency for dealing with json" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "The input for this notebook is a DOI, e.g. '`10.1007/978-3-030-74296-6_19`'." 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 2, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "# input parameter\n", 41 | "example_doi=\"10.1007/978-3-030-74296-6_19\"" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "We use it to query the OpenAIRE HTTP API for the specified publication and its metadata. " 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 3, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "# OpenAIRE endpoint to query for publications\n", 58 | "OPENAIRE_API_PUBLICATIONS = \"https://api.openaire.eu/search/publications\"\n", 59 | "\n", 60 | "# query OpenAIRE for a specific publication\n", 61 | "def query_openaire_for_publication(doi):\n", 62 | " params = {'doi': doi, 'format': \"json\"}\n", 63 | " response = requests.get(url=OPENAIRE_API_PUBLICATIONS,\n", 64 | " params=params)\n", 65 | " response.raise_for_status()\n", 66 | " result=response.json()\n", 67 | " return result\n", 68 | "\n", 69 | "\n", 70 | "# ---- example execution\n", 71 | "pub_response=query_openaire_for_publication(example_doi)" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "From the complete response we get from the API, we extract the metadata for the specified publication.\n", 79 | "If the metadata contains a reference to a project within the list of relations (`'rels'`), then extract the project's ID." 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 4, 85 | "metadata": {}, 86 | "outputs": [ 87 | { 88 | "name": "stdout", 89 | "output_type": "stream", 90 | "text": [ 91 | "['corda__h2020::c6af905285a4bcd97a2fdf7cadc3cf3a']\n" 92 | ] 93 | } 94 | ], 95 | "source": [ 96 | "# extract the metadata about the publication from the response\n", 97 | "path_to_result='response.results.result[0].metadata.oaf:entity.oaf:result'\n", 98 | "oaf_result=benedict.from_json(pub_response).get(path_to_result, {})\n", 99 | "\n", 100 | "# extract the metadata about relations\n", 101 | "# and check for each rel, if it is pointing to a project\n", 102 | "rels=oaf_result.get('rels.rel') or []\n", 103 | "is_rel_to_project = lambda rel: rel['to']['@class']==\"isProducedBy\" and rel['to']['@type']==\"project\"\n", 104 | "\n", 105 | "# unfortunately the json data is inconsistently modeled:\n", 106 | "# if there is one rel for a publication, it is a json object\n", 107 | "# if there are multiple rels for a publication, they form a json list\n", 108 | "if isinstance(rels, list):\n", 109 | " project_ids=[rel['to']['$'] for rel in rels if is_rel_to_project(rel)]\n", 110 | "else:\n", 111 | " project_ids= [rels['to']['$']] if is_rel_to_project(rels) else []\n", 112 | "\n", 113 | "print(project_ids)" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "For each project ID, we query the OpenAIRE HTTP API via its `/projects` endpoint for the project's metadata." 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 5, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "# OpenAIRE endpoint to query for projects\n", 130 | "OPENAIRE_API_PROJECTS = \"https://api.openaire.eu/search/projects\"\n", 131 | "\n", 132 | "# query OpenAIRE for a specific project\n", 133 | "def query_openaire_for_project(openaire_project_id):\n", 134 | " params = {'openaireProjectID': openaire_project_id, 'format': \"json\"}\n", 135 | " response = requests.get(url=OPENAIRE_API_PROJECTS,\n", 136 | " params=params)\n", 137 | " response.raise_for_status()\n", 138 | " result=response.json()\n", 139 | " return result\n", 140 | "\n", 141 | "\n", 142 | "# ---- example execution\n", 143 | "project_responses=[query_openaire_for_project(project_id) for project_id in project_ids]" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "Let's extract and print each project's title, code, call identifier and funded amount." 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 6, 156 | "metadata": {}, 157 | "outputs": [ 158 | { 159 | "name": "stdout", 160 | "output_type": "stream", 161 | "text": [ 162 | "Project data:\n", 163 | " code: 819536\n", 164 | " title: Knowledge Graph based Representation, Augmentation and Exploration of Scholarly Communication\n", 165 | " callidentifier: ERC-2018-COG\n", 166 | " fundedamount:1996250.0 EUR\n", 167 | "\n" 168 | ] 169 | } 170 | ], 171 | "source": [ 172 | "def extract_data_from_project(project_response):\n", 173 | " path_to_project='response.results.result[0].metadata.oaf:entity.oaf:project'\n", 174 | " oaf_project=benedict.from_json(project_response).get(path_to_project, {})\n", 175 | " \n", 176 | " title=oaf_project.get('title.$')\n", 177 | " code=oaf_project.get('code.$')\n", 178 | " callidentifier=oaf_project.get('callidentifier.$')\n", 179 | " fundedamount=oaf_project.get('fundedamount.$')\n", 180 | " currency=oaf_project.get('currency.$')\n", 181 | " return title, code, callidentifier, f\"{fundedamount} {currency}\"\n", 182 | "\n", 183 | "\n", 184 | "# ---- example execution\n", 185 | "if (not project_responses):\n", 186 | " print(\"No projects associated with publication\")\n", 187 | "for project in project_responses:\n", 188 | " title, code, callidentifier, fundedamount = extract_data_from_project(project)\n", 189 | " print(\"Project data:\")\n", 190 | " print(f\" code: {code}\\n title: {title}\\n callidentifier: {callidentifier}\\n fundedamount:{fundedamount}\\n\")" 191 | ] 192 | } 193 | ], 194 | "metadata": { 195 | "kernelspec": { 196 | "display_name": "Python 3 (ipykernel)", 197 | "language": "python", 198 | "name": "python3" 199 | }, 200 | "language_info": { 201 | "codemirror_mode": { 202 | "name": "ipython", 203 | "version": 3 204 | }, 205 | "file_extension": ".py", 206 | "mimetype": "text/x-python", 207 | "name": "python", 208 | "nbconvert_exporter": "python", 209 | "pygments_lexer": "ipython3", 210 | "version": "3.9.6" 211 | } 212 | }, 213 | "nbformat": 4, 214 | "nbformat_minor": 1 215 | } --------------------------------------------------------------------------------